edsger 0.1.5__cp39-cp39-win32.whl → 0.1.6__cp39-cp39-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edsger/path.py CHANGED
@@ -2,11 +2,13 @@
2
2
  Path-related methods.
3
3
  """
4
4
 
5
+ from typing import Optional, Union, List, Any
5
6
  import warnings
6
7
 
7
8
  import numpy as np
8
9
  import pandas as pd
9
10
 
11
+ from edsger.graph_importer import standardize_graph_dataframe
10
12
  from edsger.commons import (
11
13
  A_VERY_SMALL_TIME_INTERVAL_PY,
12
14
  DTYPE_INF_PY,
@@ -40,6 +42,7 @@ from edsger.star import (
40
42
  convert_graph_to_csr_float64,
41
43
  convert_graph_to_csr_uint32,
42
44
  )
45
+ from edsger.bfs import bfs_csr, bfs_csc # pylint: disable=no-name-in-module
43
46
 
44
47
 
45
48
  class Dijkstra:
@@ -78,19 +81,20 @@ class Dijkstra:
78
81
 
79
82
  def __init__(
80
83
  self,
81
- edges,
82
- tail="tail",
83
- head="head",
84
- weight="weight",
85
- orientation="out",
86
- check_edges=False,
87
- permute=False,
88
- verbose=False,
89
- ):
84
+ edges: pd.DataFrame,
85
+ tail: str = "tail",
86
+ head: str = "head",
87
+ weight: str = "weight",
88
+ orientation: str = "out",
89
+ check_edges: bool = False,
90
+ permute: bool = False,
91
+ verbose: bool = False,
92
+ ) -> None:
90
93
  # load the edges
91
94
  if check_edges:
92
95
  self._check_edges(edges, tail, head, weight)
93
- self._edges = edges[[tail, head, weight]].copy(deep=True)
96
+ # Convert to standardized NumPy-backed pandas DataFrame
97
+ self._edges = standardize_graph_dataframe(edges, tail, head, weight)
94
98
  self._n_edges = len(self._edges)
95
99
  self._verbose = verbose
96
100
 
@@ -99,7 +103,12 @@ class Dijkstra:
99
103
 
100
104
  # reindex the vertices
101
105
  self._permute = permute
102
- if self._permute:
106
+ if len(self._edges) == 0:
107
+ # Handle empty graphs
108
+ self._permutation = None
109
+ self._n_vertices = 0
110
+ self.__n_vertices_init = 0
111
+ elif self._permute:
103
112
  self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
104
113
  self._permutation = self._permute_graph(tail, head)
105
114
  self._n_vertices = len(self._permutation)
@@ -138,7 +147,7 @@ class Dijkstra:
138
147
  self._path_links = None
139
148
 
140
149
  @property
141
- def edges(self):
150
+ def edges(self) -> Any:
142
151
  """
143
152
  Getter for the graph edge dataframe.
144
153
 
@@ -150,7 +159,7 @@ class Dijkstra:
150
159
  return self._edges
151
160
 
152
161
  @property
153
- def n_edges(self):
162
+ def n_edges(self) -> int:
154
163
  """
155
164
  Getter for the number of graph edges.
156
165
 
@@ -162,7 +171,7 @@ class Dijkstra:
162
171
  return self._n_edges
163
172
 
164
173
  @property
165
- def n_vertices(self):
174
+ def n_vertices(self) -> int:
166
175
  """
167
176
  Getter for the number of graph vertices.
168
177
 
@@ -174,7 +183,7 @@ class Dijkstra:
174
183
  return self._n_vertices
175
184
 
176
185
  @property
177
- def orientation(self):
186
+ def orientation(self) -> str:
178
187
  """
179
188
  Getter of Dijkstra's algorithm orientation ("in" or "out").
180
189
 
@@ -186,7 +195,7 @@ class Dijkstra:
186
195
  return self._orientation
187
196
 
188
197
  @property
189
- def permute(self):
198
+ def permute(self) -> bool:
190
199
  """
191
200
  Getter for the graph permutation/reindexing option.
192
201
 
@@ -198,7 +207,7 @@ class Dijkstra:
198
207
  return self._permute
199
208
 
200
209
  @property
201
- def path_links(self):
210
+ def path_links(self) -> Optional[np.ndarray]:
202
211
  """
203
212
  Getter for the graph permutation/reindexing option.
204
213
 
@@ -239,7 +248,7 @@ class Dijkstra:
239
248
 
240
249
  def _check_edges(self, edges, tail, head, weight):
241
250
  """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
242
- if not isinstance(edges, pd.core.frame.DataFrame):
251
+ if not isinstance(edges, pd.DataFrame):
243
252
  raise TypeError("edges should be a pandas DataFrame")
244
253
 
245
254
  if tail not in edges:
@@ -257,7 +266,7 @@ class Dijkstra:
257
266
  f"edge weight column '{weight}' not found in graph edges dataframe"
258
267
  )
259
268
 
260
- if edges[[tail, head, weight]].isna().any().any():
269
+ if edges[[tail, head, weight]].isnull().to_numpy().any():
261
270
  raise ValueError(
262
271
  " ".join(
263
272
  [
@@ -287,7 +296,7 @@ class Dijkstra:
287
296
  permutation = pd.DataFrame(
288
297
  data={
289
298
  "vert_idx": np.union1d(
290
- self._edges[tail].values, self._edges[head].values
299
+ np.asarray(self._edges[tail]), np.asarray(self._edges[head])
291
300
  )
292
301
  }
293
302
  )
@@ -330,13 +339,13 @@ class Dijkstra:
330
339
 
331
340
  def run(
332
341
  self,
333
- vertex_idx,
334
- path_tracking=False,
335
- return_inf=True,
336
- return_series=False,
337
- heap_length_ratio=1.0,
338
- termination_nodes=None,
339
- ):
342
+ vertex_idx: int,
343
+ path_tracking: bool = False,
344
+ return_inf: bool = True,
345
+ return_series: bool = False,
346
+ heap_length_ratio: float = 1.0,
347
+ termination_nodes: Optional[List[int]] = None,
348
+ ) -> Union[np.ndarray, pd.Series]:
340
349
  """
341
350
  Runs shortest path algorithm between a given vertex and all other vertices in the graph.
342
351
 
@@ -369,17 +378,10 @@ class Dijkstra:
369
378
  Pandas Series object with the same data and the vertex indices as index.
370
379
 
371
380
  """
372
- # validate the input arguments
373
- if not isinstance(vertex_idx, int):
374
- try:
375
- vertex_idx = int(vertex_idx)
376
- except ValueError as exc:
377
- raise TypeError(
378
- f"argument 'vertex_idx={vertex_idx}' must be an integer"
379
- ) from exc
381
+ # validate the input arguments - type checking handled by static typing
380
382
  if vertex_idx < 0:
381
383
  raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
382
- if self._permute:
384
+ if self._permute and self._permutation is not None:
383
385
  if vertex_idx not in self._permutation.vert_idx_old.values:
384
386
  raise ValueError(f"vertex {vertex_idx} not found in graph")
385
387
  vertex_new = self._permutation.loc[
@@ -389,16 +391,7 @@ class Dijkstra:
389
391
  if vertex_idx >= self._n_vertices:
390
392
  raise ValueError(f"vertex {vertex_idx} not found in graph")
391
393
  vertex_new = vertex_idx
392
- if not isinstance(path_tracking, bool):
393
- raise TypeError(
394
- f"argument 'path_tracking=f{path_tracking}' must be of bool type"
395
- )
396
- if not isinstance(return_inf, bool):
397
- raise TypeError(f"argument 'return_inf=f{return_inf}' must be of bool type")
398
- if not isinstance(return_series, bool):
399
- raise TypeError(
400
- f"argument 'return_series=f{return_series}' must be of bool type"
401
- )
394
+ # Type checking is now handled by static typing
402
395
  if not isinstance(heap_length_ratio, float):
403
396
  raise TypeError(
404
397
  f"argument 'heap_length_ratio=f{heap_length_ratio}' must be of float type"
@@ -428,7 +421,7 @@ class Dijkstra:
428
421
  raise ValueError("argument 'termination_nodes' must not be empty")
429
422
 
430
423
  # handle vertex permutation if needed
431
- if self._permute:
424
+ if self._permute and self._permutation is not None:
432
425
  termination_nodes_permuted = []
433
426
  for termination_node in termination_nodes_array:
434
427
  if termination_node not in self._permutation.vert_idx_old.values:
@@ -546,7 +539,7 @@ class Dijkstra:
546
539
  heap_length,
547
540
  )
548
541
 
549
- if self._permute:
542
+ if self._permute and self._permutation is not None:
550
543
  # permute back the path vertex indices
551
544
  path_df = pd.DataFrame(
552
545
  data={
@@ -592,11 +585,17 @@ class Dijkstra:
592
585
 
593
586
  # reorder path lengths
594
587
  if return_series:
595
- if self._permute and termination_nodes_array is None:
588
+ if (
589
+ self._permute
590
+ and termination_nodes_array is None
591
+ and self._permutation is not None
592
+ ):
596
593
  self._permutation["path_length"] = path_length_values
597
- path_lengths_df = self._permutation[
598
- ["vert_idx_old", "path_length"]
599
- ].sort_values(by="vert_idx_old")
594
+ path_lengths_df = (
595
+ self._permutation[["vert_idx_old", "path_length"]]
596
+ .copy()
597
+ .sort_values("vert_idx_old")
598
+ ) # type: ignore
600
599
  path_lengths_df.set_index("vert_idx_old", drop=True, inplace=True)
601
600
  path_lengths_df.index.name = "vertex_idx"
602
601
  path_lengths_series = path_lengths_df.path_length
@@ -604,7 +603,11 @@ class Dijkstra:
604
603
  path_lengths_series = pd.Series(path_length_values)
605
604
  path_lengths_series.index.name = "vertex_idx"
606
605
  path_lengths_series.name = "path_length"
607
- if self._permute and termination_nodes_array is not None:
606
+ if (
607
+ self._permute
608
+ and termination_nodes_array is not None
609
+ and termination_nodes is not None
610
+ ):
608
611
  # For early termination with permutation, use original termination node indices
609
612
  path_lengths_series.index = termination_nodes
610
613
 
@@ -614,19 +617,20 @@ class Dijkstra:
614
617
  if termination_nodes_array is not None:
615
618
  return path_length_values
616
619
 
617
- if self._permute:
620
+ if self._permute and self._permutation is not None:
618
621
  self._permutation["path_length"] = path_length_values
619
622
  if return_inf:
620
623
  path_length_values = np.inf * np.ones(self.__n_vertices_init)
621
624
  else:
622
625
  path_length_values = DTYPE_INF_PY * np.ones(self.__n_vertices_init)
626
+ assert self._permutation is not None # guaranteed by condition above
623
627
  path_length_values[self._permutation.vert_idx_old.values] = (
624
628
  self._permutation.path_length.values
625
629
  )
626
630
 
627
631
  return path_length_values
628
632
 
629
- def get_vertices(self):
633
+ def get_vertices(self) -> Any:
630
634
  """
631
635
  Get the unique vertices from the graph.
632
636
 
@@ -638,11 +642,13 @@ class Dijkstra:
638
642
  vertices : ndarray
639
643
  A 1-D array containing the unique vertices.
640
644
  """
641
- if self._permute:
642
- return self._permutation.vert_idx_old.values
643
- return np.union1d(self._edges["tail"], self._edges["head"])
645
+ if self._permute and self._permutation is not None:
646
+ return np.asarray(self._permutation.vert_idx_old)
647
+ return np.union1d(
648
+ np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
649
+ )
644
650
 
645
- def get_path(self, vertex_idx):
651
+ def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
646
652
  """Compute path from predecessors or successors.
647
653
 
648
654
  Parameters:
@@ -715,19 +721,20 @@ class BellmanFord:
715
721
 
716
722
  def __init__(
717
723
  self,
718
- edges,
719
- tail="tail",
720
- head="head",
721
- weight="weight",
722
- orientation="out",
723
- check_edges=False,
724
- permute=False,
725
- verbose=False,
726
- ):
724
+ edges: pd.DataFrame,
725
+ tail: str = "tail",
726
+ head: str = "head",
727
+ weight: str = "weight",
728
+ orientation: str = "out",
729
+ check_edges: bool = False,
730
+ permute: bool = False,
731
+ verbose: bool = False,
732
+ ) -> None:
727
733
  # load the edges
728
734
  if check_edges:
729
735
  self._check_edges(edges, tail, head, weight)
730
- self._edges = edges[[tail, head, weight]].copy(deep=True)
736
+ # Convert to standardized NumPy-backed pandas DataFrame
737
+ self._edges = standardize_graph_dataframe(edges, tail, head, weight)
731
738
  self._n_edges = len(self._edges)
732
739
  self._verbose = verbose
733
740
 
@@ -736,7 +743,12 @@ class BellmanFord:
736
743
 
737
744
  # reindex the vertices
738
745
  self._permute = permute
739
- if self._permute:
746
+ if len(self._edges) == 0:
747
+ # Handle empty graphs
748
+ self._permutation = None
749
+ self._n_vertices = 0
750
+ self.__n_vertices_init = 0
751
+ elif self._permute:
740
752
  self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
741
753
  self._permutation = self._permute_graph(tail, head)
742
754
  self._n_vertices = len(self._permutation)
@@ -770,7 +782,7 @@ class BellmanFord:
770
782
  self._has_negative_cycle = False
771
783
 
772
784
  @property
773
- def edges(self):
785
+ def edges(self) -> Any:
774
786
  """
775
787
  Getter for the graph edge dataframe.
776
788
 
@@ -782,7 +794,7 @@ class BellmanFord:
782
794
  return self._edges
783
795
 
784
796
  @property
785
- def n_edges(self):
797
+ def n_edges(self) -> int:
786
798
  """
787
799
  Getter for the number of graph edges.
788
800
 
@@ -794,7 +806,7 @@ class BellmanFord:
794
806
  return self._n_edges
795
807
 
796
808
  @property
797
- def n_vertices(self):
809
+ def n_vertices(self) -> int:
798
810
  """
799
811
  Getter for the number of graph vertices.
800
812
 
@@ -806,7 +818,7 @@ class BellmanFord:
806
818
  return self._n_vertices
807
819
 
808
820
  @property
809
- def orientation(self):
821
+ def orientation(self) -> str:
810
822
  """
811
823
  Getter of Bellman-Ford's algorithm orientation ("in" or "out").
812
824
 
@@ -818,7 +830,7 @@ class BellmanFord:
818
830
  return self._orientation
819
831
 
820
832
  @property
821
- def permute(self):
833
+ def permute(self) -> bool:
822
834
  """
823
835
  Getter for the graph permutation/reindexing option.
824
836
 
@@ -830,7 +842,7 @@ class BellmanFord:
830
842
  return self._permute
831
843
 
832
844
  @property
833
- def path_links(self):
845
+ def path_links(self) -> Optional[np.ndarray]:
834
846
  """
835
847
  Getter for the path links (predecessors or successors).
836
848
 
@@ -871,7 +883,7 @@ class BellmanFord:
871
883
 
872
884
  def _check_edges(self, edges, tail, head, weight):
873
885
  """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
874
- if not isinstance(edges, pd.core.frame.DataFrame):
886
+ if not isinstance(edges, pd.DataFrame):
875
887
  raise TypeError("edges should be a pandas DataFrame")
876
888
 
877
889
  if tail not in edges:
@@ -889,7 +901,7 @@ class BellmanFord:
889
901
  f"edge weight column '{weight}' not found in graph edges dataframe"
890
902
  )
891
903
 
892
- if edges[[tail, head, weight]].isna().any().any():
904
+ if edges[[tail, head, weight]].isnull().to_numpy().any():
893
905
  raise ValueError(
894
906
  " ".join(
895
907
  [
@@ -917,7 +929,7 @@ class BellmanFord:
917
929
  permutation = pd.DataFrame(
918
930
  data={
919
931
  "vert_idx": np.union1d(
920
- self._edges[tail].values, self._edges[head].values
932
+ np.asarray(self._edges[tail]), np.asarray(self._edges[head])
921
933
  )
922
934
  }
923
935
  )
@@ -960,12 +972,12 @@ class BellmanFord:
960
972
 
961
973
  def run(
962
974
  self,
963
- vertex_idx,
964
- path_tracking=False,
965
- return_inf=True,
966
- return_series=False,
967
- detect_negative_cycles=True,
968
- ):
975
+ vertex_idx: int,
976
+ path_tracking: bool = False,
977
+ return_inf: bool = True,
978
+ return_series: bool = False,
979
+ detect_negative_cycles: bool = True,
980
+ ) -> Union[np.ndarray, pd.Series]:
969
981
  """
970
982
  Runs Bellman-Ford shortest path algorithm between a given vertex and all other vertices
971
983
  in the graph.
@@ -1000,17 +1012,10 @@ class BellmanFord:
1000
1012
  ValueError
1001
1013
  If detect_negative_cycles is True and a negative cycle is detected in the graph.
1002
1014
  """
1003
- # validate the input arguments
1004
- if not isinstance(vertex_idx, int):
1005
- try:
1006
- vertex_idx = int(vertex_idx)
1007
- except ValueError as exc:
1008
- raise TypeError(
1009
- f"argument 'vertex_idx={vertex_idx}' must be an integer"
1010
- ) from exc
1015
+ # validate the input arguments - type checking handled by static typing
1011
1016
  if vertex_idx < 0:
1012
1017
  raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
1013
- if self._permute:
1018
+ if self._permute and self._permutation is not None:
1014
1019
  if vertex_idx not in self._permutation.vert_idx_old.values:
1015
1020
  raise ValueError(f"vertex {vertex_idx} not found in graph")
1016
1021
  vertex_new = self._permutation.loc[
@@ -1020,20 +1025,7 @@ class BellmanFord:
1020
1025
  if vertex_idx >= self._n_vertices:
1021
1026
  raise ValueError(f"vertex {vertex_idx} not found in graph")
1022
1027
  vertex_new = vertex_idx
1023
- if not isinstance(path_tracking, bool):
1024
- raise TypeError(
1025
- f"argument 'path_tracking=f{path_tracking}' must be of bool type"
1026
- )
1027
- if not isinstance(return_inf, bool):
1028
- raise TypeError(f"argument 'return_inf=f{return_inf}' must be of bool type")
1029
- if not isinstance(return_series, bool):
1030
- raise TypeError(
1031
- f"argument 'return_series=f{return_series}' must be of bool type"
1032
- )
1033
- if not isinstance(detect_negative_cycles, bool):
1034
- raise TypeError(
1035
- f"argument 'detect_negative_cycles={detect_negative_cycles}' must be of bool type"
1036
- )
1028
+ # Type checking is now handled by static typing
1037
1029
 
1038
1030
  # compute path length
1039
1031
  if not path_tracking:
@@ -1075,7 +1067,7 @@ class BellmanFord:
1075
1067
  self._n_vertices,
1076
1068
  )
1077
1069
 
1078
- if self._permute:
1070
+ if self._permute and self._permutation is not None:
1079
1071
  # permute back the path vertex indices
1080
1072
  path_df = pd.DataFrame(
1081
1073
  data={
@@ -1146,7 +1138,7 @@ class BellmanFord:
1146
1138
 
1147
1139
  # reorder path lengths
1148
1140
  if return_series:
1149
- if self._permute:
1141
+ if self._permute and self._permutation is not None:
1150
1142
  path_df = pd.DataFrame(
1151
1143
  data={"path_length": path_length_values[: self._n_vertices]}
1152
1144
  )
@@ -1170,7 +1162,7 @@ class BellmanFord:
1170
1162
  return path_lengths_series
1171
1163
 
1172
1164
  # No else needed - de-indent the code
1173
- if self._permute:
1165
+ if self._permute and self._permutation is not None:
1174
1166
  path_df = pd.DataFrame(
1175
1167
  data={"path_length": path_length_values[: self._n_vertices]}
1176
1168
  )
@@ -1191,7 +1183,7 @@ class BellmanFord:
1191
1183
  )
1192
1184
  return path_length_values
1193
1185
 
1194
- def get_path(self, vertex_idx):
1186
+ def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
1195
1187
  """Compute path from predecessors or successors.
1196
1188
 
1197
1189
  Parameters:
@@ -1301,18 +1293,21 @@ class HyperpathGenerating:
1301
1293
 
1302
1294
  def __init__(
1303
1295
  self,
1304
- edges,
1305
- tail="tail",
1306
- head="head",
1307
- trav_time="trav_time",
1308
- freq="freq",
1309
- check_edges=False,
1310
- orientation="in",
1311
- ):
1296
+ edges: pd.DataFrame,
1297
+ tail: str = "tail",
1298
+ head: str = "head",
1299
+ trav_time: str = "trav_time",
1300
+ freq: str = "freq",
1301
+ check_edges: bool = False,
1302
+ orientation: str = "in",
1303
+ ) -> None:
1312
1304
  # load the edges
1313
1305
  if check_edges:
1314
1306
  self._check_edges(edges, tail, head, trav_time, freq)
1315
- self._edges = edges[[tail, head, trav_time, freq]].copy(deep=True)
1307
+ # Convert to standardized NumPy-backed pandas DataFrame
1308
+ self._edges = standardize_graph_dataframe(
1309
+ edges, tail, head, trav_time=trav_time, freq=freq
1310
+ )
1316
1311
  self.edge_count = len(self._edges)
1317
1312
 
1318
1313
  # remove inf values if any, and values close to zero
@@ -1354,15 +1349,21 @@ class HyperpathGenerating:
1354
1349
  self._edge_idx = rs_data.astype(np.uint32)
1355
1350
 
1356
1351
  # edge attributes
1357
- self._trav_time = self._edges[trav_time].values.astype(DTYPE_PY)
1358
- self._freq = self._edges[freq].values.astype(DTYPE_PY)
1359
- self._tail = self._edges[tail].values.astype(np.uint32)
1360
- self._head = self._edges[head].values.astype(np.uint32)
1352
+ self._trav_time = np.asarray(self._edges[trav_time]).astype(DTYPE_PY)
1353
+ self._freq = np.asarray(self._edges[freq]).astype(DTYPE_PY)
1354
+ self._tail = np.asarray(self._edges[tail]).astype(np.uint32)
1355
+ self._head = np.asarray(self._edges[head]).astype(np.uint32)
1361
1356
 
1362
1357
  # node attribute
1363
1358
  self.u_i_vec = None
1364
1359
 
1365
- def run(self, origin, destination, volume, return_inf=False):
1360
+ def run(
1361
+ self,
1362
+ origin: Union[int, List[int]],
1363
+ destination: int,
1364
+ volume: Union[float, List[float]],
1365
+ return_inf: bool = False,
1366
+ ) -> None:
1366
1367
  """
1367
1368
  Computes the hyperpath and updates edge volumes based on the input demand and configuration.
1368
1369
 
@@ -1439,7 +1440,7 @@ class HyperpathGenerating:
1439
1440
  self._head,
1440
1441
  demand_indices, # source vertex indices
1441
1442
  demand_values,
1442
- self._edges["volume"].values,
1443
+ np.asarray(self._edges["volume"]),
1443
1444
  u_i_vec,
1444
1445
  self.vertex_count,
1445
1446
  destination,
@@ -1456,7 +1457,7 @@ class HyperpathGenerating:
1456
1457
  assert v >= 0.0
1457
1458
 
1458
1459
  def _check_edges(self, edges, tail, head, trav_time, freq):
1459
- if not isinstance(edges, pd.core.frame.DataFrame):
1460
+ if not isinstance(edges, pd.DataFrame):
1460
1461
  raise TypeError("edges should be a pandas DataFrame")
1461
1462
 
1462
1463
  for col in [tail, head, trav_time, freq]:
@@ -1465,7 +1466,7 @@ class HyperpathGenerating:
1465
1466
  f"edge column '{col}' not found in graph edges dataframe"
1466
1467
  )
1467
1468
 
1468
- if edges[[tail, head, trav_time, freq]].isna().any().any():
1469
+ if edges[[tail, head, trav_time, freq]].isnull().to_numpy().any():
1469
1470
  raise ValueError(
1470
1471
  " ".join(
1471
1472
  [
@@ -1487,6 +1488,552 @@ class HyperpathGenerating:
1487
1488
  raise ValueError(f"column '{col}' should be nonnegative")
1488
1489
 
1489
1490
 
1491
+ class BFS:
1492
+ """
1493
+ Breadth-First Search algorithm for finding shortest paths in directed graphs.
1494
+
1495
+ BFS ignores edge weights (treats all edges as having equal weight) and finds the shortest
1496
+ path in terms of the minimum number of edges/hops between vertices. This implementation
1497
+ works on directed graphs using CSR format for forward traversal and CSC format for
1498
+ backward traversal.
1499
+
1500
+ Note: If parallel edges exist between the same pair of vertices, only one edge will be
1501
+ kept automatically during initialization.
1502
+
1503
+ Parameters:
1504
+ -----------
1505
+ edges: pandas.DataFrame
1506
+ DataFrame containing the edges of the graph. It should have two columns: 'tail' and 'head'.
1507
+ The 'tail' column should contain the IDs of the starting nodes, and the 'head' column
1508
+ should contain the IDs of the ending nodes. If a 'weight' column is present, it will be
1509
+ ignored.
1510
+ tail: str, optional (default='tail')
1511
+ The name of the column in the DataFrame that contains the IDs of the edge starting nodes.
1512
+ head: str, optional (default='head')
1513
+ The name of the column in the DataFrame that contains the IDs of the edge ending nodes.
1514
+ orientation: str, optional (default='out')
1515
+ The orientation of BFS algorithm. It can be either 'out' for single source shortest
1516
+ paths or 'in' for single target shortest path.
1517
+ check_edges: bool, optional (default=False)
1518
+ Whether to check if the edges DataFrame is well-formed. If set to True, the edges
1519
+ DataFrame will be checked for missing values and invalid data types.
1520
+ permute: bool, optional (default=False)
1521
+ Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed
1522
+ to start from 0 and be contiguous.
1523
+ verbose: bool, optional (default=False)
1524
+ Whether to print messages about parallel edge removal.
1525
+ sentinel: int, optional (default=-9999)
1526
+ Sentinel value for unreachable nodes and the start vertex in the predecessor array.
1527
+ Must be a negative integer that fits in int32 range.
1528
+ """
1529
+
1530
+ def __init__(
1531
+ self,
1532
+ edges: pd.DataFrame,
1533
+ tail: str = "tail",
1534
+ head: str = "head",
1535
+ orientation: str = "out",
1536
+ check_edges: bool = False,
1537
+ permute: bool = False,
1538
+ verbose: bool = False,
1539
+ sentinel: int = -9999,
1540
+ ) -> None:
1541
+ # Validate sentinel value
1542
+ if not isinstance(sentinel, int):
1543
+ raise TypeError(
1544
+ f"sentinel must be an integer, got {type(sentinel).__name__}"
1545
+ )
1546
+ if sentinel >= 0:
1547
+ raise ValueError(f"sentinel must be negative, got {sentinel}")
1548
+ if sentinel < np.iinfo(np.int32).min or sentinel > np.iinfo(np.int32).max:
1549
+ raise ValueError(
1550
+ f"sentinel must fit in int32 range [{np.iinfo(np.int32).min}, "
1551
+ f"{np.iinfo(np.int32).max}], got {sentinel}"
1552
+ )
1553
+ self._sentinel = sentinel
1554
+
1555
+ # load the edges
1556
+ if check_edges:
1557
+ self._check_edges(edges, tail, head)
1558
+ # Convert to standardized NumPy-backed pandas DataFrame
1559
+ # Note: BFS doesn't need weights, but standardize_graph_dataframe handles that
1560
+ self._edges = standardize_graph_dataframe(edges, tail, head)
1561
+ self._n_edges = len(self._edges)
1562
+ self._verbose = verbose
1563
+
1564
+ # preprocess edges to handle parallel edges
1565
+ self._preprocess_edges(tail, head)
1566
+
1567
+ # reindex the vertices
1568
+ self._permute = permute
1569
+ if len(self._edges) == 0:
1570
+ # Handle empty graphs
1571
+ self._permutation = None
1572
+ self._n_vertices = 0
1573
+ self.__n_vertices_init = 0
1574
+ elif self._permute:
1575
+ self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
1576
+ self._permutation = self._permute_graph(tail, head)
1577
+ self._n_vertices = len(self._permutation)
1578
+ else:
1579
+ self._permutation = None
1580
+ self._n_vertices = self._edges[[tail, head]].max(axis=0).max() + 1
1581
+ self.__n_vertices_init = self._n_vertices
1582
+
1583
+ # convert to CSR/CSC
1584
+ self._check_orientation(orientation)
1585
+ self._orientation = orientation
1586
+ if self._orientation == "out":
1587
+ # Use dummy weight column for conversion (BFS doesn't use weights)
1588
+ self._edges["_bfs_dummy_weight"] = 1.0
1589
+ fs_indptr, fs_indices, _ = convert_graph_to_csr_float64(
1590
+ self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
1591
+ )
1592
+ self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
1593
+ self.__indices = fs_indices.astype(np.uint32)
1594
+ self.__indptr = fs_indptr.astype(np.uint32)
1595
+ else:
1596
+ self._edges["_bfs_dummy_weight"] = 1.0
1597
+ rs_indptr, rs_indices, _ = convert_graph_to_csc_float64(
1598
+ self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
1599
+ )
1600
+ self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
1601
+ self.__indices = rs_indices.astype(np.uint32)
1602
+ self.__indptr = rs_indptr.astype(np.uint32)
1603
+
1604
+ self._path_links = None
1605
+
1606
+ @property
1607
+ def UNREACHABLE(self) -> int:
1608
+ """
1609
+ Getter for the sentinel value used for unreachable nodes.
1610
+
1611
+ Returns
1612
+ -------
1613
+ sentinel : int
1614
+ The sentinel value for unreachable nodes and the start vertex.
1615
+ """
1616
+ return self._sentinel
1617
+
1618
+ @property
1619
+ def edges(self) -> Any:
1620
+ """
1621
+ Getter for the graph edge dataframe.
1622
+
1623
+ Returns
1624
+ -------
1625
+ edges: pandas.DataFrame
1626
+ DataFrame containing the edges of the graph.
1627
+ """
1628
+ return self._edges
1629
+
1630
+ @property
1631
+ def n_edges(self) -> int:
1632
+ """
1633
+ Getter for the number of graph edges.
1634
+
1635
+ Returns
1636
+ -------
1637
+ n_edges: int
1638
+ The number of edges in the graph.
1639
+ """
1640
+ return self._n_edges
1641
+
1642
+ @property
1643
+ def n_vertices(self) -> int:
1644
+ """
1645
+ Getter for the number of graph vertices.
1646
+
1647
+ Returns
1648
+ -------
1649
+ n_vertices: int
1650
+ The number of nodes in the graph (after permutation, if _permute is True).
1651
+ """
1652
+ return self._n_vertices
1653
+
1654
+ @property
1655
+ def orientation(self) -> str:
1656
+ """
1657
+ Getter of BFS algorithm orientation ("in" or "out").
1658
+
1659
+ Returns
1660
+ -------
1661
+ orientation : str
1662
+ The orientation of BFS algorithm.
1663
+ """
1664
+ return self._orientation
1665
+
1666
+ @property
1667
+ def permute(self) -> bool:
1668
+ """
1669
+ Getter for the graph permutation/reindexing option.
1670
+
1671
+ Returns
1672
+ -------
1673
+ permute : bool
1674
+ Whether to permute the IDs of the nodes.
1675
+ """
1676
+ return self._permute
1677
+
1678
+ @property
1679
+ def path_links(self) -> Optional[np.ndarray]:
1680
+ """
1681
+ Getter for the path links (predecessors or successors).
1682
+
1683
+ Returns
1684
+ -------
1685
+ path_links: numpy.ndarray
1686
+ predecessors or successors node index if the path tracking is activated.
1687
+ """
1688
+ return self._path_links
1689
+
1690
+ def _preprocess_edges(self, tail, head):
1691
+ """
1692
+ Preprocess edges to handle parallel edges by keeping only one edge
1693
+ between any pair of vertices (BFS doesn't use weights).
1694
+
1695
+ Parameters
1696
+ ----------
1697
+ tail : str
1698
+ The column name for tail vertices
1699
+ head : str
1700
+ The column name for head vertices
1701
+ """
1702
+ original_count = len(self._edges)
1703
+ self._edges = self._edges.groupby([tail, head], as_index=False).first()
1704
+ final_count = len(self._edges)
1705
+
1706
+ if original_count > final_count:
1707
+ parallel_edges_removed = original_count - final_count
1708
+ if self._verbose:
1709
+ print(
1710
+ f"Automatically removed {parallel_edges_removed} parallel edge(s). "
1711
+ f"BFS treats all edges equally."
1712
+ )
1713
+
1714
+ self._n_edges = len(self._edges)
1715
+
1716
+ def _check_edges(self, edges, tail, head):
1717
+ """Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
1718
+ if not isinstance(edges, pd.DataFrame):
1719
+ raise TypeError("edges should be a pandas DataFrame")
1720
+
1721
+ if tail not in edges:
1722
+ raise KeyError(
1723
+ f"edge tail column '{tail}' not found in graph edges dataframe"
1724
+ )
1725
+
1726
+ if head not in edges:
1727
+ raise KeyError(
1728
+ f"edge head column '{head}' not found in graph edges dataframe"
1729
+ )
1730
+
1731
+ if edges[[tail, head]].isnull().to_numpy().any():
1732
+ raise ValueError(
1733
+ " ".join(
1734
+ [
1735
+ f"edges[[{tail}, {head}]] ",
1736
+ "should not have any missing value",
1737
+ ]
1738
+ )
1739
+ )
1740
+
1741
+ for col in [tail, head]:
1742
+ if not pd.api.types.is_integer_dtype(edges[col].dtype):
1743
+ raise TypeError(f"edges['{col}'] should be of integer type")
1744
+
1745
+ def _permute_graph(self, tail, head):
1746
+ """Permute the IDs of the nodes to start from 0 and be contiguous.
1747
+ Returns a DataFrame with the permuted IDs."""
1748
+
1749
+ permutation = pd.DataFrame(
1750
+ data={
1751
+ "vert_idx": np.union1d(
1752
+ np.asarray(self._edges[tail]), np.asarray(self._edges[head])
1753
+ )
1754
+ }
1755
+ )
1756
+ permutation["vert_idx_new"] = permutation.index
1757
+ permutation.index.name = "index"
1758
+
1759
+ self._edges = pd.merge(
1760
+ self._edges,
1761
+ permutation[["vert_idx", "vert_idx_new"]],
1762
+ left_on=tail,
1763
+ right_on="vert_idx",
1764
+ how="left",
1765
+ )
1766
+ self._edges.drop([tail, "vert_idx"], axis=1, inplace=True)
1767
+ self._edges.rename(columns={"vert_idx_new": tail}, inplace=True)
1768
+
1769
+ self._edges = pd.merge(
1770
+ self._edges,
1771
+ permutation[["vert_idx", "vert_idx_new"]],
1772
+ left_on=head,
1773
+ right_on="vert_idx",
1774
+ how="left",
1775
+ )
1776
+ self._edges.drop([head, "vert_idx"], axis=1, inplace=True)
1777
+ self._edges.rename(columns={"vert_idx_new": head}, inplace=True)
1778
+
1779
+ permutation.rename(columns={"vert_idx": "vert_idx_old"}, inplace=True)
1780
+ permutation.reset_index(drop=True, inplace=True)
1781
+ permutation.sort_values(by="vert_idx_new", inplace=True)
1782
+
1783
+ permutation.index.name = "index"
1784
+ self._edges.index.name = "index"
1785
+
1786
+ return permutation
1787
+
1788
+ def _check_orientation(self, orientation):
1789
+ """Checks the orientation attribute."""
1790
+ if orientation not in ["in", "out"]:
1791
+ raise ValueError("orientation should be either 'in' on 'out'")
1792
+
1793
+ def run(
1794
+ self,
1795
+ vertex_idx: int,
1796
+ path_tracking: bool = False,
1797
+ return_series: bool = False,
1798
+ ) -> Union[np.ndarray, pd.Series]:
1799
+ """
1800
+ Runs BFS algorithm between a given vertex and all other vertices in the graph.
1801
+
1802
+ Parameters
1803
+ ----------
1804
+ vertex_idx : int
1805
+ The index of the source/target vertex.
1806
+ path_tracking : bool, optional (default=False)
1807
+ Whether to track the shortest path(s) from the source vertex to all other vertices
1808
+ in the graph. When True, predecessors are stored and can be retrieved with get_path().
1809
+ return_series : bool, optional (default=False)
1810
+ Whether to return a Pandas Series object indexed by vertex indices with predecessors
1811
+ as values.
1812
+
1813
+ Returns
1814
+ -------
1815
+ predecessors : np.ndarray or pd.Series
1816
+ If `return_series=False`, a 1D Numpy array of shape (n_vertices,) with the
1817
+ predecessor of each vertex in the BFS tree (`orientation="out"`), or
1818
+ the successor of each vertex (`orientation="in"`).
1819
+ Unreachable vertices and the start vertex have the sentinel value (default: -9999).
1820
+ If `return_series=True`, a Pandas Series object with the same data and the
1821
+ vertex indices as index.
1822
+ """
1823
+ # validate the input arguments
1824
+ if vertex_idx < 0:
1825
+ raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be non-negative")
1826
+ if self._permute and self._permutation is not None:
1827
+ if vertex_idx not in self._permutation.vert_idx_old.values:
1828
+ raise ValueError(f"vertex {vertex_idx} not found in graph")
1829
+ vertex_new = self._permutation.loc[
1830
+ self._permutation.vert_idx_old == vertex_idx, "vert_idx_new"
1831
+ ].iloc[0]
1832
+ else:
1833
+ if vertex_idx >= self._n_vertices:
1834
+ raise ValueError(f"vertex {vertex_idx} not found in graph")
1835
+ vertex_new = vertex_idx
1836
+
1837
+ # compute BFS predecessors
1838
+ if self._orientation == "out":
1839
+ predecessors = bfs_csr(
1840
+ self.__indptr,
1841
+ self.__indices,
1842
+ vertex_new,
1843
+ self._n_vertices,
1844
+ self._sentinel,
1845
+ )
1846
+ else:
1847
+ predecessors = bfs_csc(
1848
+ self.__indptr,
1849
+ self.__indices,
1850
+ vertex_new,
1851
+ self._n_vertices,
1852
+ self._sentinel,
1853
+ )
1854
+
1855
+ # store path links if tracking is enabled
1856
+ if path_tracking:
1857
+ # Convert predecessors to path_links format (uint32)
1858
+ # Replace sentinel value with vertex's own index (like Dijkstra does)
1859
+ self._path_links = np.arange(self._n_vertices, dtype=np.uint32)
1860
+ reachable_mask = predecessors != self._sentinel
1861
+ self._path_links[reachable_mask] = predecessors[reachable_mask].astype(
1862
+ np.uint32
1863
+ )
1864
+
1865
+ if self._permute and self._permutation is not None:
1866
+ # permute back the path vertex indices (same approach as Dijkstra)
1867
+ path_df = pd.DataFrame(
1868
+ data={
1869
+ "vertex_idx": np.arange(self._n_vertices),
1870
+ "associated_idx": self._path_links,
1871
+ }
1872
+ )
1873
+ path_df = pd.merge(
1874
+ path_df,
1875
+ self._permutation,
1876
+ left_on="vertex_idx",
1877
+ right_on="vert_idx_new",
1878
+ how="left",
1879
+ )
1880
+ path_df.drop(["vertex_idx", "vert_idx_new"], axis=1, inplace=True)
1881
+ path_df.rename(columns={"vert_idx_old": "vertex_idx"}, inplace=True)
1882
+ path_df = pd.merge(
1883
+ path_df,
1884
+ self._permutation,
1885
+ left_on="associated_idx",
1886
+ right_on="vert_idx_new",
1887
+ how="left",
1888
+ )
1889
+ path_df.drop(["associated_idx", "vert_idx_new"], axis=1, inplace=True)
1890
+ path_df.rename(columns={"vert_idx_old": "associated_idx"}, inplace=True)
1891
+
1892
+ if return_series:
1893
+ path_df.set_index("vertex_idx", inplace=True)
1894
+ self._path_links = path_df.associated_idx.astype(np.uint32)
1895
+ else:
1896
+ self._path_links = np.arange(
1897
+ self.__n_vertices_init, dtype=np.uint32
1898
+ )
1899
+ self._path_links[path_df.vertex_idx.values] = (
1900
+ path_df.associated_idx.values
1901
+ )
1902
+ else:
1903
+ self._path_links = None
1904
+
1905
+ # reorder predecessors for permuted graphs
1906
+ if return_series:
1907
+ if self._permute and self._permutation is not None:
1908
+ pred_df = pd.DataFrame(data={"predecessor": predecessors})
1909
+ pred_df["vert_idx_new"] = pred_df.index
1910
+ pred_df = pd.merge(
1911
+ pred_df,
1912
+ self._permutation,
1913
+ left_on="vert_idx_new",
1914
+ right_on="vert_idx_new",
1915
+ how="left",
1916
+ )
1917
+
1918
+ # Map predecessor values back to original IDs
1919
+ valid_mask = pred_df["predecessor"] != self._sentinel
1920
+ if valid_mask.any():
1921
+ pred_df_valid = pred_df[valid_mask].copy()
1922
+ pred_df_valid = pd.merge(
1923
+ pred_df_valid,
1924
+ self._permutation,
1925
+ left_on="predecessor",
1926
+ right_on="vert_idx_new",
1927
+ how="left",
1928
+ suffixes=("", "_pred"),
1929
+ )
1930
+ pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
1931
+ "vert_idx_old_pred"
1932
+ ].values.astype(np.int32)
1933
+
1934
+ pred_df.set_index("vert_idx_old", inplace=True)
1935
+ predecessors_series = pred_df.predecessor.astype(np.int32)
1936
+ predecessors_series.index.name = "vertex_idx"
1937
+ predecessors_series.name = "predecessor"
1938
+ else:
1939
+ predecessors_series = pd.Series(predecessors, dtype=np.int32)
1940
+ predecessors_series.index.name = "vertex_idx"
1941
+ predecessors_series.name = "predecessor"
1942
+
1943
+ return predecessors_series
1944
+
1945
+ # For array output with permutation
1946
+ if self._permute and self._permutation is not None:
1947
+ pred_df = pd.DataFrame(data={"predecessor": predecessors})
1948
+ pred_df["vert_idx_new"] = pred_df.index
1949
+ pred_df = pd.merge(
1950
+ pred_df,
1951
+ self._permutation,
1952
+ left_on="vert_idx_new",
1953
+ right_on="vert_idx_new",
1954
+ how="left",
1955
+ )
1956
+
1957
+ # Map predecessor values back to original IDs
1958
+ valid_mask = pred_df["predecessor"] != self._sentinel
1959
+ if valid_mask.any():
1960
+ pred_df_valid = pred_df[valid_mask].copy()
1961
+ pred_df_valid = pd.merge(
1962
+ pred_df_valid,
1963
+ self._permutation,
1964
+ left_on="predecessor",
1965
+ right_on="vert_idx_new",
1966
+ how="left",
1967
+ suffixes=("", "_pred"),
1968
+ )
1969
+ pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
1970
+ "vert_idx_old_pred"
1971
+ ].values.astype(np.int32)
1972
+
1973
+ predecessors_array = np.full(
1974
+ self.__n_vertices_init, self._sentinel, dtype=np.int32
1975
+ )
1976
+ predecessors_array[pred_df.vert_idx_old.values] = (
1977
+ pred_df.predecessor.values.astype(np.int32)
1978
+ )
1979
+ return predecessors_array
1980
+
1981
+ return predecessors
1982
+
1983
+ def get_vertices(self) -> Any:
1984
+ """
1985
+ Get the unique vertices from the graph.
1986
+
1987
+ If the graph has been permuted, this method returns the vertices based on the original
1988
+ indexing. Otherwise, it returns the union of tail and head vertices from the edges.
1989
+
1990
+ Returns
1991
+ -------
1992
+ vertices : ndarray
1993
+ A 1-D array containing the unique vertices.
1994
+ """
1995
+ if self._permute and self._permutation is not None:
1996
+ return np.asarray(self._permutation.vert_idx_old)
1997
+ return np.union1d(
1998
+ np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
1999
+ )
2000
+
2001
+ def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
2002
+ """Compute path from predecessors or successors.
2003
+
2004
+ Parameters:
2005
+ -----------
2006
+
2007
+ vertex_idx : int
2008
+ source or target vertex index.
2009
+
2010
+ Returns
2011
+ -------
2012
+
2013
+ path_vertices : numpy.ndarray
2014
+ Array of np.int32 type storing the path from or to the given vertex index. If we are
2015
+ dealing with BFS from a source (orientation="out"), the input vertex is the target
2016
+ vertex and the path to the source is given backward from the target to the source
2017
+ using the predecessors. If we are dealing with BFS to a target (orientation="in"),
2018
+ the input vertex is the source vertex and the path to the target is given backward
2019
+ from the target to the source using the successors.
2020
+
2021
+ """
2022
+ if self._path_links is None:
2023
+ warnings.warn(
2024
+ "Current BFS instance has no path attribute: "
2025
+ "make sure path_tracking is set to True, and run the "
2026
+ "BFS algorithm",
2027
+ UserWarning,
2028
+ )
2029
+ return None
2030
+ if isinstance(self._path_links, pd.Series):
2031
+ path_vertices = compute_path(self._path_links.values, vertex_idx)
2032
+ else:
2033
+ path_vertices = compute_path(self._path_links, vertex_idx)
2034
+ return path_vertices
2035
+
2036
+
1490
2037
  # author : Francois Pacull
1491
2038
  # copyright : Architecture & Performance
1492
2039
  # email: francois.pacull@architecture-performance.fr