edsger 0.1.4__cp311-cp311-macosx_11_0_arm64.whl → 0.1.6__cp311-cp311-macosx_11_0_arm64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsger/_version.py +1 -1
- edsger/bellman_ford.c +35284 -0
- edsger/bellman_ford.cpython-311-darwin.so +0 -0
- edsger/bellman_ford.pyx +551 -0
- edsger/bfs.c +33575 -0
- edsger/bfs.cpython-311-darwin.so +0 -0
- edsger/bfs.pyx +243 -0
- edsger/commons.c +286 -278
- edsger/commons.cpython-311-darwin.so +0 -0
- edsger/commons.pyx +7 -0
- edsger/dijkstra.c +2433 -1857
- edsger/dijkstra.cpython-311-darwin.so +0 -0
- edsger/dijkstra.pyx +7 -0
- edsger/graph_importer.py +340 -0
- edsger/networks.py +4 -2
- edsger/path.py +1410 -264
- edsger/path_tracking.c +423 -302
- edsger/path_tracking.cpython-311-darwin.so +0 -0
- edsger/path_tracking.pyx +7 -0
- edsger/pq_4ary_dec_0b.c +1175 -1016
- edsger/pq_4ary_dec_0b.cpython-311-darwin.so +0 -0
- edsger/pq_4ary_dec_0b.pyx +7 -0
- edsger/spiess_florian.c +1410 -1140
- edsger/spiess_florian.cpython-311-darwin.so +0 -0
- edsger/spiess_florian.pyx +7 -0
- edsger/star.c +1240 -767
- edsger/star.cpython-311-darwin.so +0 -0
- edsger/star.pyx +7 -0
- edsger/utils.py +69 -4
- edsger-0.1.6.dist-info/METADATA +304 -0
- edsger-0.1.6.dist-info/RECORD +40 -0
- edsger-0.1.4.dist-info/METADATA +0 -125
- edsger-0.1.4.dist-info/RECORD +0 -33
- {edsger-0.1.4.dist-info → edsger-0.1.6.dist-info}/WHEEL +0 -0
- {edsger-0.1.4.dist-info → edsger-0.1.6.dist-info}/licenses/AUTHORS.rst +0 -0
- {edsger-0.1.4.dist-info → edsger-0.1.6.dist-info}/licenses/LICENSE +0 -0
- {edsger-0.1.4.dist-info → edsger-0.1.6.dist-info}/top_level.txt +0 -0
edsger/path.py
CHANGED
@@ -2,11 +2,13 @@
|
|
2
2
|
Path-related methods.
|
3
3
|
"""
|
4
4
|
|
5
|
+
from typing import Optional, Union, List, Any
|
5
6
|
import warnings
|
6
7
|
|
7
8
|
import numpy as np
|
8
9
|
import pandas as pd
|
9
10
|
|
11
|
+
from edsger.graph_importer import standardize_graph_dataframe
|
10
12
|
from edsger.commons import (
|
11
13
|
A_VERY_SMALL_TIME_INTERVAL_PY,
|
12
14
|
DTYPE_INF_PY,
|
@@ -14,6 +16,14 @@ from edsger.commons import (
|
|
14
16
|
INF_FREQ_PY,
|
15
17
|
MIN_FREQ_PY,
|
16
18
|
)
|
19
|
+
from edsger.bellman_ford import (
|
20
|
+
compute_bf_sssp,
|
21
|
+
compute_bf_sssp_w_path,
|
22
|
+
compute_bf_stsp,
|
23
|
+
compute_bf_stsp_w_path,
|
24
|
+
detect_negative_cycle,
|
25
|
+
detect_negative_cycle_csc,
|
26
|
+
)
|
17
27
|
from edsger.dijkstra import (
|
18
28
|
compute_sssp,
|
19
29
|
compute_sssp_w_path,
|
@@ -32,6 +42,7 @@ from edsger.star import (
|
|
32
42
|
convert_graph_to_csr_float64,
|
33
43
|
convert_graph_to_csr_uint32,
|
34
44
|
)
|
45
|
+
from edsger.bfs import bfs_csr, bfs_csc # pylint: disable=no-name-in-module
|
35
46
|
|
36
47
|
|
37
48
|
class Dijkstra:
|
@@ -39,6 +50,9 @@ class Dijkstra:
|
|
39
50
|
Dijkstra's algorithm for finding the shortest paths between nodes in directed graphs with
|
40
51
|
positive edge weights.
|
41
52
|
|
53
|
+
Note: If parallel edges exist between the same pair of vertices, only the edge with the minimum
|
54
|
+
weight will be kept automatically during initialization.
|
55
|
+
|
42
56
|
Parameters:
|
43
57
|
-----------
|
44
58
|
edges: pandas.DataFrame
|
@@ -61,27 +75,40 @@ class Dijkstra:
|
|
61
75
|
permute: bool, optional (default=False)
|
62
76
|
Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed to
|
63
77
|
start from 0 and be contiguous.
|
78
|
+
verbose: bool, optional (default=False)
|
79
|
+
Whether to print messages about parallel edge removal.
|
64
80
|
"""
|
65
81
|
|
66
82
|
def __init__(
|
67
83
|
self,
|
68
|
-
edges,
|
69
|
-
tail="tail",
|
70
|
-
head="head",
|
71
|
-
weight="weight",
|
72
|
-
orientation="out",
|
73
|
-
check_edges=False,
|
74
|
-
permute=False,
|
75
|
-
|
84
|
+
edges: pd.DataFrame,
|
85
|
+
tail: str = "tail",
|
86
|
+
head: str = "head",
|
87
|
+
weight: str = "weight",
|
88
|
+
orientation: str = "out",
|
89
|
+
check_edges: bool = False,
|
90
|
+
permute: bool = False,
|
91
|
+
verbose: bool = False,
|
92
|
+
) -> None:
|
76
93
|
# load the edges
|
77
94
|
if check_edges:
|
78
95
|
self._check_edges(edges, tail, head, weight)
|
79
|
-
|
96
|
+
# Convert to standardized NumPy-backed pandas DataFrame
|
97
|
+
self._edges = standardize_graph_dataframe(edges, tail, head, weight)
|
80
98
|
self._n_edges = len(self._edges)
|
99
|
+
self._verbose = verbose
|
100
|
+
|
101
|
+
# preprocess edges to handle parallel edges
|
102
|
+
self._preprocess_edges(tail, head, weight)
|
81
103
|
|
82
104
|
# reindex the vertices
|
83
105
|
self._permute = permute
|
84
|
-
if self.
|
106
|
+
if len(self._edges) == 0:
|
107
|
+
# Handle empty graphs
|
108
|
+
self._permutation = None
|
109
|
+
self._n_vertices = 0
|
110
|
+
self.__n_vertices_init = 0
|
111
|
+
elif self._permute:
|
85
112
|
self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
|
86
113
|
self._permutation = self._permute_graph(tail, head)
|
87
114
|
self._n_vertices = len(self._permutation)
|
@@ -120,7 +147,7 @@ class Dijkstra:
|
|
120
147
|
self._path_links = None
|
121
148
|
|
122
149
|
@property
|
123
|
-
def edges(self):
|
150
|
+
def edges(self) -> Any:
|
124
151
|
"""
|
125
152
|
Getter for the graph edge dataframe.
|
126
153
|
|
@@ -132,7 +159,7 @@ class Dijkstra:
|
|
132
159
|
return self._edges
|
133
160
|
|
134
161
|
@property
|
135
|
-
def n_edges(self):
|
162
|
+
def n_edges(self) -> int:
|
136
163
|
"""
|
137
164
|
Getter for the number of graph edges.
|
138
165
|
|
@@ -144,7 +171,7 @@ class Dijkstra:
|
|
144
171
|
return self._n_edges
|
145
172
|
|
146
173
|
@property
|
147
|
-
def n_vertices(self):
|
174
|
+
def n_vertices(self) -> int:
|
148
175
|
"""
|
149
176
|
Getter for the number of graph vertices.
|
150
177
|
|
@@ -156,7 +183,7 @@ class Dijkstra:
|
|
156
183
|
return self._n_vertices
|
157
184
|
|
158
185
|
@property
|
159
|
-
def orientation(self):
|
186
|
+
def orientation(self) -> str:
|
160
187
|
"""
|
161
188
|
Getter of Dijkstra's algorithm orientation ("in" or "out").
|
162
189
|
|
@@ -168,7 +195,7 @@ class Dijkstra:
|
|
168
195
|
return self._orientation
|
169
196
|
|
170
197
|
@property
|
171
|
-
def permute(self):
|
198
|
+
def permute(self) -> bool:
|
172
199
|
"""
|
173
200
|
Getter for the graph permutation/reindexing option.
|
174
201
|
|
@@ -180,7 +207,7 @@ class Dijkstra:
|
|
180
207
|
return self._permute
|
181
208
|
|
182
209
|
@property
|
183
|
-
def path_links(self):
|
210
|
+
def path_links(self) -> Optional[np.ndarray]:
|
184
211
|
"""
|
185
212
|
Getter for the graph permutation/reindexing option.
|
186
213
|
|
@@ -191,9 +218,37 @@ class Dijkstra:
|
|
191
218
|
"""
|
192
219
|
return self._path_links
|
193
220
|
|
221
|
+
def _preprocess_edges(self, tail, head, weight):
|
222
|
+
"""
|
223
|
+
Preprocess edges to handle parallel edges by keeping only the minimum weight edge
|
224
|
+
between any pair of vertices.
|
225
|
+
|
226
|
+
Parameters
|
227
|
+
----------
|
228
|
+
tail : str
|
229
|
+
The column name for tail vertices
|
230
|
+
head : str
|
231
|
+
The column name for head vertices
|
232
|
+
weight : str
|
233
|
+
The column name for edge weights
|
234
|
+
"""
|
235
|
+
original_count = len(self._edges)
|
236
|
+
self._edges = self._edges.groupby([tail, head], as_index=False)[weight].min()
|
237
|
+
final_count = len(self._edges)
|
238
|
+
|
239
|
+
if original_count > final_count:
|
240
|
+
parallel_edges_removed = original_count - final_count
|
241
|
+
if self._verbose:
|
242
|
+
print(
|
243
|
+
f"Automatically removed {parallel_edges_removed} parallel edge(s). "
|
244
|
+
f"For each pair of vertices, kept the edge with minimum weight."
|
245
|
+
)
|
246
|
+
|
247
|
+
self._n_edges = len(self._edges)
|
248
|
+
|
194
249
|
def _check_edges(self, edges, tail, head, weight):
|
195
250
|
"""Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
|
196
|
-
if not isinstance(edges, pd.
|
251
|
+
if not isinstance(edges, pd.DataFrame):
|
197
252
|
raise TypeError("edges should be a pandas DataFrame")
|
198
253
|
|
199
254
|
if tail not in edges:
|
@@ -211,7 +266,7 @@ class Dijkstra:
|
|
211
266
|
f"edge weight column '{weight}' not found in graph edges dataframe"
|
212
267
|
)
|
213
268
|
|
214
|
-
if edges[[tail, head, weight]].
|
269
|
+
if edges[[tail, head, weight]].isnull().to_numpy().any():
|
215
270
|
raise ValueError(
|
216
271
|
" ".join(
|
217
272
|
[
|
@@ -241,7 +296,7 @@ class Dijkstra:
|
|
241
296
|
permutation = pd.DataFrame(
|
242
297
|
data={
|
243
298
|
"vert_idx": np.union1d(
|
244
|
-
self._edges[tail]
|
299
|
+
np.asarray(self._edges[tail]), np.asarray(self._edges[head])
|
245
300
|
)
|
246
301
|
}
|
247
302
|
)
|
@@ -284,13 +339,13 @@ class Dijkstra:
|
|
284
339
|
|
285
340
|
def run(
|
286
341
|
self,
|
287
|
-
vertex_idx,
|
288
|
-
path_tracking=False,
|
289
|
-
return_inf=True,
|
290
|
-
return_series=False,
|
291
|
-
heap_length_ratio=1.0,
|
292
|
-
termination_nodes=None,
|
293
|
-
):
|
342
|
+
vertex_idx: int,
|
343
|
+
path_tracking: bool = False,
|
344
|
+
return_inf: bool = True,
|
345
|
+
return_series: bool = False,
|
346
|
+
heap_length_ratio: float = 1.0,
|
347
|
+
termination_nodes: Optional[List[int]] = None,
|
348
|
+
) -> Union[np.ndarray, pd.Series]:
|
294
349
|
"""
|
295
350
|
Runs shortest path algorithm between a given vertex and all other vertices in the graph.
|
296
351
|
|
@@ -323,17 +378,10 @@ class Dijkstra:
|
|
323
378
|
Pandas Series object with the same data and the vertex indices as index.
|
324
379
|
|
325
380
|
"""
|
326
|
-
# validate the input arguments
|
327
|
-
if not isinstance(vertex_idx, int):
|
328
|
-
try:
|
329
|
-
vertex_idx = int(vertex_idx)
|
330
|
-
except ValueError as exc:
|
331
|
-
raise TypeError(
|
332
|
-
f"argument 'vertex_idx={vertex_idx}' must be an integer"
|
333
|
-
) from exc
|
381
|
+
# validate the input arguments - type checking handled by static typing
|
334
382
|
if vertex_idx < 0:
|
335
383
|
raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
|
336
|
-
if self._permute:
|
384
|
+
if self._permute and self._permutation is not None:
|
337
385
|
if vertex_idx not in self._permutation.vert_idx_old.values:
|
338
386
|
raise ValueError(f"vertex {vertex_idx} not found in graph")
|
339
387
|
vertex_new = self._permutation.loc[
|
@@ -343,16 +391,7 @@ class Dijkstra:
|
|
343
391
|
if vertex_idx >= self._n_vertices:
|
344
392
|
raise ValueError(f"vertex {vertex_idx} not found in graph")
|
345
393
|
vertex_new = vertex_idx
|
346
|
-
|
347
|
-
raise TypeError(
|
348
|
-
f"argument 'path_tracking=f{path_tracking}' must be of bool type"
|
349
|
-
)
|
350
|
-
if not isinstance(return_inf, bool):
|
351
|
-
raise TypeError(f"argument 'return_inf=f{return_inf}' must be of bool type")
|
352
|
-
if not isinstance(return_series, bool):
|
353
|
-
raise TypeError(
|
354
|
-
f"argument 'return_series=f{return_series}' must be of bool type"
|
355
|
-
)
|
394
|
+
# Type checking is now handled by static typing
|
356
395
|
if not isinstance(heap_length_ratio, float):
|
357
396
|
raise TypeError(
|
358
397
|
f"argument 'heap_length_ratio=f{heap_length_ratio}' must be of float type"
|
@@ -370,10 +409,10 @@ class Dijkstra:
|
|
370
409
|
if termination_nodes is not None:
|
371
410
|
try:
|
372
411
|
termination_nodes_array = np.array(termination_nodes, dtype=np.uint32)
|
373
|
-
except (ValueError, TypeError):
|
412
|
+
except (ValueError, TypeError) as exc:
|
374
413
|
raise TypeError(
|
375
414
|
"argument 'termination_nodes' must be array-like of integers"
|
376
|
-
)
|
415
|
+
) from exc
|
377
416
|
|
378
417
|
if termination_nodes_array.ndim != 1:
|
379
418
|
raise ValueError("argument 'termination_nodes' must be 1-dimensional")
|
@@ -382,7 +421,7 @@ class Dijkstra:
|
|
382
421
|
raise ValueError("argument 'termination_nodes' must not be empty")
|
383
422
|
|
384
423
|
# handle vertex permutation if needed
|
385
|
-
if self._permute:
|
424
|
+
if self._permute and self._permutation is not None:
|
386
425
|
termination_nodes_permuted = []
|
387
426
|
for termination_node in termination_nodes_array:
|
388
427
|
if termination_node not in self._permutation.vert_idx_old.values:
|
@@ -500,7 +539,7 @@ class Dijkstra:
|
|
500
539
|
heap_length,
|
501
540
|
)
|
502
541
|
|
503
|
-
if self._permute:
|
542
|
+
if self._permute and self._permutation is not None:
|
504
543
|
# permute back the path vertex indices
|
505
544
|
path_df = pd.DataFrame(
|
506
545
|
data={
|
@@ -546,11 +585,17 @@ class Dijkstra:
|
|
546
585
|
|
547
586
|
# reorder path lengths
|
548
587
|
if return_series:
|
549
|
-
if
|
588
|
+
if (
|
589
|
+
self._permute
|
590
|
+
and termination_nodes_array is None
|
591
|
+
and self._permutation is not None
|
592
|
+
):
|
550
593
|
self._permutation["path_length"] = path_length_values
|
551
|
-
path_lengths_df =
|
552
|
-
["vert_idx_old", "path_length"]
|
553
|
-
|
594
|
+
path_lengths_df = (
|
595
|
+
self._permutation[["vert_idx_old", "path_length"]]
|
596
|
+
.copy()
|
597
|
+
.sort_values("vert_idx_old")
|
598
|
+
) # type: ignore
|
554
599
|
path_lengths_df.set_index("vert_idx_old", drop=True, inplace=True)
|
555
600
|
path_lengths_df.index.name = "vertex_idx"
|
556
601
|
path_lengths_series = path_lengths_df.path_length
|
@@ -558,7 +603,11 @@ class Dijkstra:
|
|
558
603
|
path_lengths_series = pd.Series(path_length_values)
|
559
604
|
path_lengths_series.index.name = "vertex_idx"
|
560
605
|
path_lengths_series.name = "path_length"
|
561
|
-
if
|
606
|
+
if (
|
607
|
+
self._permute
|
608
|
+
and termination_nodes_array is not None
|
609
|
+
and termination_nodes is not None
|
610
|
+
):
|
562
611
|
# For early termination with permutation, use original termination node indices
|
563
612
|
path_lengths_series.index = termination_nodes
|
564
613
|
|
@@ -568,19 +617,20 @@ class Dijkstra:
|
|
568
617
|
if termination_nodes_array is not None:
|
569
618
|
return path_length_values
|
570
619
|
|
571
|
-
if self._permute:
|
620
|
+
if self._permute and self._permutation is not None:
|
572
621
|
self._permutation["path_length"] = path_length_values
|
573
622
|
if return_inf:
|
574
623
|
path_length_values = np.inf * np.ones(self.__n_vertices_init)
|
575
624
|
else:
|
576
625
|
path_length_values = DTYPE_INF_PY * np.ones(self.__n_vertices_init)
|
626
|
+
assert self._permutation is not None # guaranteed by condition above
|
577
627
|
path_length_values[self._permutation.vert_idx_old.values] = (
|
578
628
|
self._permutation.path_length.values
|
579
629
|
)
|
580
630
|
|
581
631
|
return path_length_values
|
582
632
|
|
583
|
-
def get_vertices(self):
|
633
|
+
def get_vertices(self) -> Any:
|
584
634
|
"""
|
585
635
|
Get the unique vertices from the graph.
|
586
636
|
|
@@ -592,11 +642,13 @@ class Dijkstra:
|
|
592
642
|
vertices : ndarray
|
593
643
|
A 1-D array containing the unique vertices.
|
594
644
|
"""
|
595
|
-
if self._permute:
|
596
|
-
return self._permutation.vert_idx_old
|
597
|
-
return np.union1d(
|
645
|
+
if self._permute and self._permutation is not None:
|
646
|
+
return np.asarray(self._permutation.vert_idx_old)
|
647
|
+
return np.union1d(
|
648
|
+
np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
|
649
|
+
)
|
598
650
|
|
599
|
-
def get_path(self, vertex_idx):
|
651
|
+
def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
|
600
652
|
"""Compute path from predecessors or successors.
|
601
653
|
|
602
654
|
Parameters:
|
@@ -632,245 +684,1055 @@ class Dijkstra:
|
|
632
684
|
return path_vertices
|
633
685
|
|
634
686
|
|
635
|
-
class
|
687
|
+
class BellmanFord:
|
636
688
|
"""
|
637
|
-
|
638
|
-
|
639
|
-
|
640
|
-
Parameters
|
641
|
-
----------
|
642
|
-
edges : pandas.DataFrame
|
643
|
-
A DataFrame containing graph edge information with columns specified by `tail`, `head`,
|
644
|
-
`trav_time`, and `freq`. Must not contain missing values.
|
645
|
-
tail : str, optional
|
646
|
-
Name of the column in `edges` representing the tail nodes (source nodes), by default "tail".
|
647
|
-
head : str, optional
|
648
|
-
Name of the column in `edges` representing the head nodes (target nodes), by default "head".
|
649
|
-
trav_time : str, optional
|
650
|
-
Name of the column in `edges` representing travel times for edges, by default "trav_time".
|
651
|
-
freq : str, optional
|
652
|
-
Name of the column in `edges` representing frequencies of edges, by default "freq".
|
653
|
-
check_edges : bool, optional
|
654
|
-
Whether to validate the structure and data types of `edges`, by default False.
|
655
|
-
orientation : {"in", "out"}, optional
|
656
|
-
Determines the orientation of the graph structure for traversal.
|
657
|
-
- "in": Graph traversal is from destination to origin.
|
658
|
-
- "out": Graph traversal is from origin to destination.
|
659
|
-
By default "in".
|
689
|
+
Bellman-Ford algorithm for finding the shortest paths between nodes in directed graphs.
|
690
|
+
Supports negative edge weights and detects negative cycles.
|
660
691
|
|
661
|
-
|
662
|
-
|
663
|
-
edge_count : int
|
664
|
-
The number of edges in the graph.
|
665
|
-
vertex_count : int
|
666
|
-
The total number of vertices in the graph.
|
667
|
-
u_i_vec : numpy.ndarray
|
668
|
-
An array storing the least travel time for each vertex after running the algorithm.
|
669
|
-
_edges : pandas.DataFrame
|
670
|
-
Internal DataFrame containing the edges with additional metadata.
|
671
|
-
_trav_time : numpy.ndarray
|
672
|
-
Array of travel times for edges.
|
673
|
-
_freq : numpy.ndarray
|
674
|
-
Array of frequencies for edges.
|
675
|
-
_tail : numpy.ndarray
|
676
|
-
Array of tail nodes (source nodes) for edges.
|
677
|
-
_head : numpy.ndarray
|
678
|
-
Array of head nodes (target nodes) for edges.
|
679
|
-
__indptr : numpy.ndarray
|
680
|
-
Array for compressed row (or column) pointers in the CSR/CSC representation.
|
681
|
-
_edge_idx : numpy.ndarray
|
682
|
-
Array of edge indices in the CSR/CSC representation.
|
692
|
+
Note: If parallel edges exist between the same pair of vertices, only the edge with the minimum
|
693
|
+
weight will be kept automatically during initialization.
|
683
694
|
|
684
|
-
|
685
|
-
|
686
|
-
|
687
|
-
|
688
|
-
|
689
|
-
|
690
|
-
|
691
|
-
|
692
|
-
|
693
|
-
|
695
|
+
Parameters:
|
696
|
+
-----------
|
697
|
+
edges: pandas.DataFrame
|
698
|
+
DataFrame containing the edges of the graph. It should have three columns: 'tail', 'head',
|
699
|
+
and 'weight'. The 'tail' column should contain the IDs of the starting nodes, the 'head'
|
700
|
+
column should contain the IDs of the ending nodes, and the 'weight' column should contain
|
701
|
+
the weights of the edges (can be negative).
|
702
|
+
tail: str, optional (default='tail')
|
703
|
+
The name of the column in the DataFrame that contains the IDs of the edge starting nodes.
|
704
|
+
head: str, optional (default='head')
|
705
|
+
The name of the column in the DataFrame that contains the IDs of the edge ending nodes.
|
706
|
+
weight: str, optional (default='weight')
|
707
|
+
The name of the column in the DataFrame that contains the weights of the edges.
|
708
|
+
orientation: str, optional (default='out')
|
709
|
+
The orientation of Bellman-Ford's algorithm. It can be either 'out' for single source
|
710
|
+
shortest paths or 'in' for single target shortest path.
|
711
|
+
check_edges: bool, optional (default=False)
|
712
|
+
Whether to check if the edges DataFrame is well-formed. If set to True, the edges
|
713
|
+
DataFrame will be checked for missing values and invalid data types. Note: negative
|
714
|
+
weights are allowed.
|
715
|
+
permute: bool, optional (default=False)
|
716
|
+
Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed to
|
717
|
+
start from 0 and be contiguous.
|
718
|
+
verbose: bool, optional (default=False)
|
719
|
+
Whether to print messages about parallel edge removal.
|
694
720
|
"""
|
695
721
|
|
696
722
|
def __init__(
|
697
723
|
self,
|
698
|
-
edges,
|
699
|
-
tail="tail",
|
700
|
-
head="head",
|
701
|
-
|
702
|
-
|
703
|
-
check_edges=False,
|
704
|
-
|
705
|
-
|
724
|
+
edges: pd.DataFrame,
|
725
|
+
tail: str = "tail",
|
726
|
+
head: str = "head",
|
727
|
+
weight: str = "weight",
|
728
|
+
orientation: str = "out",
|
729
|
+
check_edges: bool = False,
|
730
|
+
permute: bool = False,
|
731
|
+
verbose: bool = False,
|
732
|
+
) -> None:
|
706
733
|
# load the edges
|
707
734
|
if check_edges:
|
708
|
-
self._check_edges(edges, tail, head,
|
709
|
-
|
710
|
-
self.
|
735
|
+
self._check_edges(edges, tail, head, weight)
|
736
|
+
# Convert to standardized NumPy-backed pandas DataFrame
|
737
|
+
self._edges = standardize_graph_dataframe(edges, tail, head, weight)
|
738
|
+
self._n_edges = len(self._edges)
|
739
|
+
self._verbose = verbose
|
711
740
|
|
712
|
-
#
|
713
|
-
self.
|
714
|
-
self._edges[trav_time] > DTYPE_INF_PY, DTYPE_INF_PY, self._edges[trav_time]
|
715
|
-
)
|
716
|
-
self._edges[trav_time] = np.where(
|
717
|
-
self._edges[trav_time] < A_VERY_SMALL_TIME_INTERVAL_PY,
|
718
|
-
A_VERY_SMALL_TIME_INTERVAL_PY,
|
719
|
-
self._edges[trav_time],
|
720
|
-
)
|
721
|
-
self._edges[freq] = np.where(
|
722
|
-
self._edges[freq] > INF_FREQ_PY, INF_FREQ_PY, self._edges[freq]
|
723
|
-
)
|
724
|
-
self._edges[freq] = np.where(
|
725
|
-
self._edges[freq] < MIN_FREQ_PY, MIN_FREQ_PY, self._edges[freq]
|
726
|
-
)
|
741
|
+
# preprocess edges to handle parallel edges
|
742
|
+
self._preprocess_edges(tail, head, weight)
|
727
743
|
|
728
|
-
#
|
729
|
-
self.
|
730
|
-
|
731
|
-
|
744
|
+
# reindex the vertices
|
745
|
+
self._permute = permute
|
746
|
+
if len(self._edges) == 0:
|
747
|
+
# Handle empty graphs
|
748
|
+
self._permutation = None
|
749
|
+
self._n_vertices = 0
|
750
|
+
self.__n_vertices_init = 0
|
751
|
+
elif self._permute:
|
752
|
+
self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
|
753
|
+
self._permutation = self._permute_graph(tail, head)
|
754
|
+
self._n_vertices = len(self._permutation)
|
755
|
+
else:
|
756
|
+
self._permutation = None
|
757
|
+
self._n_vertices = self._edges[[tail, head]].max(axis=0).max() + 1
|
758
|
+
self.__n_vertices_init = self._n_vertices
|
732
759
|
|
733
|
-
# convert to CSR/CSC
|
734
|
-
self.
|
735
|
-
assert orientation in ["out", "in"]
|
760
|
+
# convert to CSR/CSC:
|
761
|
+
self._check_orientation(orientation)
|
736
762
|
self._orientation = orientation
|
737
763
|
if self._orientation == "out":
|
738
|
-
fs_indptr,
|
739
|
-
self._edges, tail, head,
|
764
|
+
fs_indptr, fs_indices, fs_data = convert_graph_to_csr_float64(
|
765
|
+
self._edges, tail, head, weight, self._n_vertices
|
740
766
|
)
|
767
|
+
self.__indices = fs_indices.astype(np.uint32)
|
741
768
|
self.__indptr = fs_indptr.astype(np.uint32)
|
742
|
-
self.
|
769
|
+
self.__edge_weights = fs_data.astype(DTYPE_PY)
|
743
770
|
else:
|
744
|
-
rs_indptr,
|
745
|
-
self._edges, tail, head,
|
771
|
+
rs_indptr, rs_indices, rs_data = convert_graph_to_csc_float64(
|
772
|
+
self._edges, tail, head, weight, self._n_vertices
|
746
773
|
)
|
774
|
+
self.__indices = rs_indices.astype(np.uint32)
|
747
775
|
self.__indptr = rs_indptr.astype(np.uint32)
|
748
|
-
self.
|
776
|
+
self.__edge_weights = rs_data.astype(DTYPE_PY)
|
749
777
|
|
750
|
-
#
|
751
|
-
self.
|
752
|
-
self._freq = self._edges[freq].values.astype(DTYPE_PY)
|
753
|
-
self._tail = self._edges[tail].values.astype(np.uint32)
|
754
|
-
self._head = self._edges[head].values.astype(np.uint32)
|
778
|
+
# Check if graph has any negative weights (for optimization)
|
779
|
+
self._has_negative_weights = np.any(self.__edge_weights < 0)
|
755
780
|
|
756
|
-
|
757
|
-
self.
|
781
|
+
self._path_links = None
|
782
|
+
self._has_negative_cycle = False
|
758
783
|
|
759
|
-
|
784
|
+
@property
|
785
|
+
def edges(self) -> Any:
|
760
786
|
"""
|
761
|
-
|
762
|
-
|
763
|
-
Parameters
|
764
|
-
----------
|
765
|
-
origin : int or list of int
|
766
|
-
The starting vertex or vertices of the demand. If `self._orientation` is "in",
|
767
|
-
this can be a list of origins corresponding to the demand volumes.
|
768
|
-
destination : int or list of int
|
769
|
-
The target vertex or vertices of the demand. If `self._orientation` is "out",
|
770
|
-
this can be a list of destinations corresponding to the demand volumes.
|
771
|
-
volume : float or list of float
|
772
|
-
The demand volume associated with each origin or destination. Must be non-negative.
|
773
|
-
If a single float is provided, it is applied to a single origin-destination pair.
|
774
|
-
return_inf : bool, optional
|
775
|
-
If True, returns additional information from the computation (not yet implemented).
|
776
|
-
Default is False.
|
787
|
+
Getter for the graph edge dataframe.
|
777
788
|
|
778
|
-
|
779
|
-
|
780
|
-
|
781
|
-
|
782
|
-
|
783
|
-
|
784
|
-
If any vertex index or volume is invalid.
|
785
|
-
TypeError
|
786
|
-
If `volume` is not a float or list of floats.
|
787
|
-
ValueError
|
788
|
-
If any volume value is negative.
|
789
|
+
Returns
|
790
|
+
-------
|
791
|
+
edges: pandas.DataFrame
|
792
|
+
DataFrame containing the edges of the graph.
|
793
|
+
"""
|
794
|
+
return self._edges
|
789
795
|
|
790
|
-
|
791
|
-
|
792
|
-
- The method modifies the `self._edges` DataFrame by adding a "volume" column representing
|
793
|
-
edge volumes based on the computed hyperpath.
|
794
|
-
- The `self.u_i_vec` array is updated to store the least travel time for each vertex.
|
795
|
-
- Only "in" orientation is currently supported.
|
796
|
+
@property
|
797
|
+
def n_edges(self) -> int:
|
796
798
|
"""
|
797
|
-
|
798
|
-
self._edges["volume"] = 0.0
|
799
|
-
self.u_i_vec = None
|
799
|
+
Getter for the number of graph edges.
|
800
800
|
|
801
|
-
|
802
|
-
|
801
|
+
Returns
|
802
|
+
-------
|
803
|
+
n_edges: int
|
804
|
+
The number of edges in the graph.
|
805
|
+
"""
|
806
|
+
return self._n_edges
|
803
807
|
|
804
|
-
|
805
|
-
|
806
|
-
|
807
|
-
|
808
|
-
self._check_vertex_idx(origin)
|
809
|
-
if not isinstance(destination, list):
|
810
|
-
destination = [destination]
|
811
|
-
assert len(destination) == len(volume)
|
812
|
-
for i, item in enumerate(destination):
|
813
|
-
self._check_vertex_idx(item)
|
814
|
-
self._check_volume(volume[i])
|
815
|
-
demand_indices = np.array(destination, dtype=np.uint32)
|
816
|
-
elif self._orientation == "in":
|
817
|
-
if not isinstance(origin, list):
|
818
|
-
origin = [origin]
|
819
|
-
assert len(origin) == len(volume)
|
820
|
-
for i, item in enumerate(origin):
|
821
|
-
self._check_vertex_idx(item)
|
822
|
-
self._check_volume(volume[i])
|
823
|
-
self._check_vertex_idx(destination)
|
824
|
-
demand_indices = np.array(origin, dtype=np.uint32)
|
825
|
-
assert isinstance(return_inf, bool)
|
808
|
+
@property
|
809
|
+
def n_vertices(self) -> int:
|
810
|
+
"""
|
811
|
+
Getter for the number of graph vertices.
|
826
812
|
|
827
|
-
|
813
|
+
Returns
|
814
|
+
-------
|
815
|
+
n_vertices: int
|
816
|
+
The number of nodes in the graph (after permutation, if _permute is True).
|
817
|
+
"""
|
818
|
+
return self._n_vertices
|
828
819
|
|
829
|
-
|
830
|
-
|
831
|
-
|
832
|
-
|
820
|
+
@property
|
821
|
+
def orientation(self) -> str:
|
822
|
+
"""
|
823
|
+
Getter of Bellman-Ford's algorithm orientation ("in" or "out").
|
833
824
|
|
834
|
-
|
835
|
-
|
836
|
-
|
837
|
-
|
838
|
-
|
839
|
-
|
840
|
-
self._head,
|
841
|
-
demand_indices, # source vertex indices
|
842
|
-
demand_values,
|
843
|
-
self._edges["volume"].values,
|
844
|
-
u_i_vec,
|
845
|
-
self.vertex_count,
|
846
|
-
destination,
|
847
|
-
)
|
848
|
-
self.u_i_vec = u_i_vec
|
825
|
+
Returns
|
826
|
+
-------
|
827
|
+
orientation : str
|
828
|
+
The orientation of Bellman-Ford's algorithm.
|
829
|
+
"""
|
830
|
+
return self._orientation
|
849
831
|
|
850
|
-
|
851
|
-
|
852
|
-
|
853
|
-
|
832
|
+
@property
|
833
|
+
def permute(self) -> bool:
|
834
|
+
"""
|
835
|
+
Getter for the graph permutation/reindexing option.
|
854
836
|
|
855
|
-
|
856
|
-
|
837
|
+
Returns
|
838
|
+
-------
|
839
|
+
permute : bool
|
840
|
+
Whether to permute the IDs of the nodes.
|
841
|
+
"""
|
842
|
+
return self._permute
|
843
|
+
|
844
|
+
@property
|
845
|
+
def path_links(self) -> Optional[np.ndarray]:
|
846
|
+
"""
|
847
|
+
Getter for the path links (predecessors or successors).
|
848
|
+
|
849
|
+
Returns
|
850
|
+
-------
|
851
|
+
path_links: numpy.ndarray
|
852
|
+
predecessors or successors node index if the path tracking is activated.
|
853
|
+
"""
|
854
|
+
return self._path_links
|
855
|
+
|
856
|
+
def _preprocess_edges(self, tail, head, weight):
|
857
|
+
"""
|
858
|
+
Preprocess edges to handle parallel edges by keeping only the minimum weight edge
|
859
|
+
between any pair of vertices.
|
860
|
+
|
861
|
+
Parameters
|
862
|
+
----------
|
863
|
+
tail : str
|
864
|
+
The column name for tail vertices
|
865
|
+
head : str
|
866
|
+
The column name for head vertices
|
867
|
+
weight : str
|
868
|
+
The column name for edge weights
|
869
|
+
"""
|
870
|
+
original_count = len(self._edges)
|
871
|
+
self._edges = self._edges.groupby([tail, head], as_index=False)[weight].min()
|
872
|
+
final_count = len(self._edges)
|
873
|
+
|
874
|
+
if original_count > final_count:
|
875
|
+
parallel_edges_removed = original_count - final_count
|
876
|
+
if self._verbose:
|
877
|
+
print(
|
878
|
+
f"Automatically removed {parallel_edges_removed} parallel edge(s). "
|
879
|
+
f"For each pair of vertices, kept the edge with minimum weight."
|
880
|
+
)
|
881
|
+
|
882
|
+
self._n_edges = len(self._edges)
|
883
|
+
|
884
|
+
def _check_edges(self, edges, tail, head, weight):
|
885
|
+
"""Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
|
886
|
+
if not isinstance(edges, pd.DataFrame):
|
887
|
+
raise TypeError("edges should be a pandas DataFrame")
|
888
|
+
|
889
|
+
if tail not in edges:
|
890
|
+
raise KeyError(
|
891
|
+
f"edge tail column '{tail}' not found in graph edges dataframe"
|
892
|
+
)
|
893
|
+
|
894
|
+
if head not in edges:
|
895
|
+
raise KeyError(
|
896
|
+
f"edge head column '{head}' not found in graph edges dataframe"
|
897
|
+
)
|
898
|
+
|
899
|
+
if weight not in edges:
|
900
|
+
raise KeyError(
|
901
|
+
f"edge weight column '{weight}' not found in graph edges dataframe"
|
902
|
+
)
|
903
|
+
|
904
|
+
if edges[[tail, head, weight]].isnull().to_numpy().any():
|
905
|
+
raise ValueError(
|
906
|
+
" ".join(
|
907
|
+
[
|
908
|
+
f"edges[[{tail}, {head}, {weight}]] ",
|
909
|
+
"should not have any missing value",
|
910
|
+
]
|
911
|
+
)
|
912
|
+
)
|
913
|
+
|
914
|
+
for col in [tail, head]:
|
915
|
+
if not pd.api.types.is_integer_dtype(edges[col].dtype):
|
916
|
+
raise TypeError(f"edges['{col}'] should be of integer type")
|
917
|
+
|
918
|
+
if not pd.api.types.is_numeric_dtype(edges[weight].dtype):
|
919
|
+
raise TypeError(f"edges['{weight}'] should be of numeric type")
|
920
|
+
|
921
|
+
# Note: Unlike Dijkstra, we allow negative weights for Bellman-Ford
|
922
|
+
if not np.isfinite(edges[weight]).all():
|
923
|
+
raise ValueError(f"edges['{weight}'] should be finite")
|
924
|
+
|
925
|
+
def _permute_graph(self, tail, head):
|
926
|
+
"""Permute the IDs of the nodes to start from 0 and be contiguous.
|
927
|
+
Returns a DataFrame with the permuted IDs."""
|
928
|
+
|
929
|
+
permutation = pd.DataFrame(
|
930
|
+
data={
|
931
|
+
"vert_idx": np.union1d(
|
932
|
+
np.asarray(self._edges[tail]), np.asarray(self._edges[head])
|
933
|
+
)
|
934
|
+
}
|
935
|
+
)
|
936
|
+
permutation["vert_idx_new"] = permutation.index
|
937
|
+
permutation.index.name = "index"
|
938
|
+
|
939
|
+
self._edges = pd.merge(
|
940
|
+
self._edges,
|
941
|
+
permutation[["vert_idx", "vert_idx_new"]],
|
942
|
+
left_on=tail,
|
943
|
+
right_on="vert_idx",
|
944
|
+
how="left",
|
945
|
+
)
|
946
|
+
self._edges.drop([tail, "vert_idx"], axis=1, inplace=True)
|
947
|
+
self._edges.rename(columns={"vert_idx_new": tail}, inplace=True)
|
948
|
+
|
949
|
+
self._edges = pd.merge(
|
950
|
+
self._edges,
|
951
|
+
permutation[["vert_idx", "vert_idx_new"]],
|
952
|
+
left_on=head,
|
953
|
+
right_on="vert_idx",
|
954
|
+
how="left",
|
955
|
+
)
|
956
|
+
self._edges.drop([head, "vert_idx"], axis=1, inplace=True)
|
957
|
+
self._edges.rename(columns={"vert_idx_new": head}, inplace=True)
|
958
|
+
|
959
|
+
permutation.rename(columns={"vert_idx": "vert_idx_old"}, inplace=True)
|
960
|
+
permutation.reset_index(drop=True, inplace=True)
|
961
|
+
permutation.sort_values(by="vert_idx_new", inplace=True)
|
962
|
+
|
963
|
+
permutation.index.name = "index"
|
964
|
+
self._edges.index.name = "index"
|
965
|
+
|
966
|
+
return permutation
|
967
|
+
|
968
|
+
def _check_orientation(self, orientation):
|
969
|
+
"""Checks the orientation attribute."""
|
970
|
+
if orientation not in ["in", "out"]:
|
971
|
+
raise ValueError("orientation should be either 'in' on 'out'")
|
972
|
+
|
973
|
+
def run(
|
974
|
+
self,
|
975
|
+
vertex_idx: int,
|
976
|
+
path_tracking: bool = False,
|
977
|
+
return_inf: bool = True,
|
978
|
+
return_series: bool = False,
|
979
|
+
detect_negative_cycles: bool = True,
|
980
|
+
) -> Union[np.ndarray, pd.Series]:
|
981
|
+
"""
|
982
|
+
Runs Bellman-Ford shortest path algorithm between a given vertex and all other vertices
|
983
|
+
in the graph.
|
984
|
+
|
985
|
+
Parameters
|
986
|
+
----------
|
987
|
+
vertex_idx : int
|
988
|
+
The index of the source/target vertex.
|
989
|
+
path_tracking : bool, optional (default=False)
|
990
|
+
Whether to track the shortest path(s) from the source vertex to all other vertices in
|
991
|
+
the graph.
|
992
|
+
return_inf : bool, optional (default=True)
|
993
|
+
Whether to return path length(s) as infinity (np.inf) when no path exists.
|
994
|
+
return_series : bool, optional (default=False)
|
995
|
+
Whether to return a Pandas Series object indexed by vertex indices with path length(s)
|
996
|
+
as values.
|
997
|
+
detect_negative_cycles : bool, optional (default=True)
|
998
|
+
Whether to detect negative cycles in the graph. If True and a negative cycle is
|
999
|
+
detected,
|
1000
|
+
raises a ValueError.
|
1001
|
+
|
1002
|
+
Returns
|
1003
|
+
-------
|
1004
|
+
path_length_values or path_lengths_series : array_like or Pandas Series
|
1005
|
+
If `return_series=False`, a 1D Numpy array of shape (n_vertices,) with the shortest
|
1006
|
+
path length from the source vertex to each vertex in the graph (`orientation="out"`), or
|
1007
|
+
from each vertex to the target vertex (`orientation="in"`). If `return_series=True`, a
|
1008
|
+
Pandas Series object with the same data and the vertex indices as index.
|
1009
|
+
|
1010
|
+
Raises
|
1011
|
+
------
|
1012
|
+
ValueError
|
1013
|
+
If detect_negative_cycles is True and a negative cycle is detected in the graph.
|
1014
|
+
"""
|
1015
|
+
# validate the input arguments - type checking handled by static typing
|
1016
|
+
if vertex_idx < 0:
|
1017
|
+
raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be positive")
|
1018
|
+
if self._permute and self._permutation is not None:
|
1019
|
+
if vertex_idx not in self._permutation.vert_idx_old.values:
|
1020
|
+
raise ValueError(f"vertex {vertex_idx} not found in graph")
|
1021
|
+
vertex_new = self._permutation.loc[
|
1022
|
+
self._permutation.vert_idx_old == vertex_idx, "vert_idx_new"
|
1023
|
+
].iloc[0]
|
1024
|
+
else:
|
1025
|
+
if vertex_idx >= self._n_vertices:
|
1026
|
+
raise ValueError(f"vertex {vertex_idx} not found in graph")
|
1027
|
+
vertex_new = vertex_idx
|
1028
|
+
# Type checking is now handled by static typing
|
1029
|
+
|
1030
|
+
# compute path length
|
1031
|
+
if not path_tracking:
|
1032
|
+
self._path_links = None
|
1033
|
+
if self._orientation == "in":
|
1034
|
+
path_length_values = compute_bf_stsp(
|
1035
|
+
self.__indptr,
|
1036
|
+
self.__indices,
|
1037
|
+
self.__edge_weights,
|
1038
|
+
vertex_new,
|
1039
|
+
self._n_vertices,
|
1040
|
+
)
|
1041
|
+
else:
|
1042
|
+
path_length_values = compute_bf_sssp(
|
1043
|
+
self.__indptr,
|
1044
|
+
self.__indices,
|
1045
|
+
self.__edge_weights,
|
1046
|
+
vertex_new,
|
1047
|
+
self._n_vertices,
|
1048
|
+
)
|
1049
|
+
else:
|
1050
|
+
self._path_links = np.arange(0, self._n_vertices, dtype=np.uint32)
|
1051
|
+
if self._orientation == "in":
|
1052
|
+
path_length_values = compute_bf_stsp_w_path(
|
1053
|
+
self.__indptr,
|
1054
|
+
self.__indices,
|
1055
|
+
self.__edge_weights,
|
1056
|
+
self._path_links,
|
1057
|
+
vertex_new,
|
1058
|
+
self._n_vertices,
|
1059
|
+
)
|
1060
|
+
else:
|
1061
|
+
path_length_values = compute_bf_sssp_w_path(
|
1062
|
+
self.__indptr,
|
1063
|
+
self.__indices,
|
1064
|
+
self.__edge_weights,
|
1065
|
+
self._path_links,
|
1066
|
+
vertex_new,
|
1067
|
+
self._n_vertices,
|
1068
|
+
)
|
1069
|
+
|
1070
|
+
if self._permute and self._permutation is not None:
|
1071
|
+
# permute back the path vertex indices
|
1072
|
+
path_df = pd.DataFrame(
|
1073
|
+
data={
|
1074
|
+
"vertex_idx": np.arange(self._n_vertices),
|
1075
|
+
"associated_idx": self._path_links,
|
1076
|
+
}
|
1077
|
+
)
|
1078
|
+
path_df = pd.merge(
|
1079
|
+
path_df,
|
1080
|
+
self._permutation,
|
1081
|
+
left_on="vertex_idx",
|
1082
|
+
right_on="vert_idx_new",
|
1083
|
+
how="left",
|
1084
|
+
)
|
1085
|
+
path_df.drop(["vertex_idx", "vert_idx_new"], axis=1, inplace=True)
|
1086
|
+
path_df.rename(columns={"vert_idx_old": "vertex_idx"}, inplace=True)
|
1087
|
+
path_df = pd.merge(
|
1088
|
+
path_df,
|
1089
|
+
self._permutation,
|
1090
|
+
left_on="associated_idx",
|
1091
|
+
right_on="vert_idx_new",
|
1092
|
+
how="left",
|
1093
|
+
)
|
1094
|
+
path_df.drop(["associated_idx", "vert_idx_new"], axis=1, inplace=True)
|
1095
|
+
path_df.rename(columns={"vert_idx_old": "associated_idx"}, inplace=True)
|
1096
|
+
|
1097
|
+
if return_series:
|
1098
|
+
path_df.set_index("vertex_idx", inplace=True)
|
1099
|
+
self._path_links = path_df.associated_idx.astype(np.uint32)
|
1100
|
+
else:
|
1101
|
+
self._path_links = np.arange(
|
1102
|
+
self.__n_vertices_init, dtype=np.uint32
|
1103
|
+
)
|
1104
|
+
self._path_links[path_df.vertex_idx.values] = (
|
1105
|
+
path_df.associated_idx.values
|
1106
|
+
)
|
1107
|
+
|
1108
|
+
# detect negative cycles if requested (only if negative weights exist)
|
1109
|
+
if detect_negative_cycles and self._has_negative_weights:
|
1110
|
+
if self._orientation == "out":
|
1111
|
+
# CSR format - can use detect_negative_cycle directly
|
1112
|
+
self._has_negative_cycle = detect_negative_cycle(
|
1113
|
+
self.__indptr,
|
1114
|
+
self.__indices,
|
1115
|
+
self.__edge_weights,
|
1116
|
+
path_length_values,
|
1117
|
+
self._n_vertices,
|
1118
|
+
)
|
1119
|
+
else:
|
1120
|
+
# CSC format - use CSC-specific negative cycle detection
|
1121
|
+
# Much more efficient than converting CSC→CSR
|
1122
|
+
self._has_negative_cycle = detect_negative_cycle_csc(
|
1123
|
+
self.__indptr,
|
1124
|
+
self.__indices,
|
1125
|
+
self.__edge_weights,
|
1126
|
+
path_length_values,
|
1127
|
+
self._n_vertices,
|
1128
|
+
)
|
1129
|
+
|
1130
|
+
if self._has_negative_cycle:
|
1131
|
+
raise ValueError("Negative cycle detected in the graph")
|
1132
|
+
|
1133
|
+
# deal with infinity
|
1134
|
+
if return_inf:
|
1135
|
+
path_length_values = np.where(
|
1136
|
+
path_length_values == DTYPE_INF_PY, np.inf, path_length_values
|
1137
|
+
)
|
1138
|
+
|
1139
|
+
# reorder path lengths
|
1140
|
+
if return_series:
|
1141
|
+
if self._permute and self._permutation is not None:
|
1142
|
+
path_df = pd.DataFrame(
|
1143
|
+
data={"path_length": path_length_values[: self._n_vertices]}
|
1144
|
+
)
|
1145
|
+
path_df["vert_idx_new"] = path_df.index
|
1146
|
+
path_df = pd.merge(
|
1147
|
+
path_df,
|
1148
|
+
self._permutation,
|
1149
|
+
left_on="vert_idx_new",
|
1150
|
+
right_on="vert_idx_new",
|
1151
|
+
how="left",
|
1152
|
+
)
|
1153
|
+
path_df.drop(["vert_idx_new"], axis=1, inplace=True)
|
1154
|
+
path_df.set_index("vert_idx_old", inplace=True)
|
1155
|
+
path_lengths_series = path_df.path_length.astype(DTYPE_PY)
|
1156
|
+
else:
|
1157
|
+
path_lengths_series = pd.Series(
|
1158
|
+
data=path_length_values[: self._n_vertices], dtype=DTYPE_PY
|
1159
|
+
)
|
1160
|
+
path_lengths_series.index = np.arange(self._n_vertices)
|
1161
|
+
path_lengths_series.index.name = None
|
1162
|
+
return path_lengths_series
|
1163
|
+
|
1164
|
+
# No else needed - de-indent the code
|
1165
|
+
if self._permute and self._permutation is not None:
|
1166
|
+
path_df = pd.DataFrame(
|
1167
|
+
data={"path_length": path_length_values[: self._n_vertices]}
|
1168
|
+
)
|
1169
|
+
path_df["vert_idx_new"] = path_df.index
|
1170
|
+
path_df = pd.merge(
|
1171
|
+
path_df,
|
1172
|
+
self._permutation,
|
1173
|
+
left_on="vert_idx_new",
|
1174
|
+
right_on="vert_idx_new",
|
1175
|
+
how="left",
|
1176
|
+
)
|
1177
|
+
path_df.drop(["vert_idx_new"], axis=1, inplace=True)
|
1178
|
+
path_length_values = np.full(self.__n_vertices_init, DTYPE_INF_PY)
|
1179
|
+
path_length_values[path_df.vert_idx_old.values] = path_df.path_length.values
|
1180
|
+
if return_inf:
|
1181
|
+
path_length_values = np.where(
|
1182
|
+
path_length_values == DTYPE_INF_PY, np.inf, path_length_values
|
1183
|
+
)
|
1184
|
+
return path_length_values
|
1185
|
+
|
1186
|
+
def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
|
1187
|
+
"""Compute path from predecessors or successors.
|
1188
|
+
|
1189
|
+
Parameters:
|
1190
|
+
-----------
|
1191
|
+
|
1192
|
+
vertex_idx : int
|
1193
|
+
source or target vertex index.
|
1194
|
+
|
1195
|
+
Returns
|
1196
|
+
-------
|
1197
|
+
|
1198
|
+
path_vertices : numpy.ndarray
|
1199
|
+
Array of np.uint32 type storing the path from or to the given vertex index. If we are
|
1200
|
+
dealing with the sssp algorithm, the input vertex is the target vertex and the path to
|
1201
|
+
the source is given backward from the target to the source using the predecessors. If
|
1202
|
+
we are dealing with the stsp algorithm, the input vertex is the source vertex and the
|
1203
|
+
path to the target is given backward from the target to the source using the
|
1204
|
+
successors.
|
1205
|
+
|
1206
|
+
"""
|
1207
|
+
if self._path_links is None:
|
1208
|
+
warnings.warn(
|
1209
|
+
"Current BellmanFord instance has not path attribute : \
|
1210
|
+
make sure path_tracking is set to True, and run the \
|
1211
|
+
shortest path algorithm",
|
1212
|
+
UserWarning,
|
1213
|
+
)
|
1214
|
+
return None
|
1215
|
+
if isinstance(self._path_links, pd.Series):
|
1216
|
+
path_vertices = compute_path(self._path_links.values, vertex_idx)
|
1217
|
+
else:
|
1218
|
+
path_vertices = compute_path(self._path_links, vertex_idx)
|
1219
|
+
return path_vertices
|
1220
|
+
|
1221
|
+
def has_negative_cycle(self):
|
1222
|
+
"""
|
1223
|
+
Check if the last run detected a negative cycle.
|
1224
|
+
|
1225
|
+
Returns
|
1226
|
+
-------
|
1227
|
+
has_negative_cycle : bool
|
1228
|
+
True if a negative cycle was detected in the last run, False otherwise.
|
1229
|
+
"""
|
1230
|
+
return self._has_negative_cycle
|
1231
|
+
|
1232
|
+
|
1233
|
+
class HyperpathGenerating:
|
1234
|
+
"""
|
1235
|
+
A class for constructing and managing hyperpath-based routing and analysis in transportation
|
1236
|
+
or graph-based systems.
|
1237
|
+
|
1238
|
+
Parameters
|
1239
|
+
----------
|
1240
|
+
edges : pandas.DataFrame
|
1241
|
+
A DataFrame containing graph edge information with columns specified by `tail`, `head`,
|
1242
|
+
`trav_time`, and `freq`. Must not contain missing values.
|
1243
|
+
tail : str, optional
|
1244
|
+
Name of the column in `edges` representing the tail nodes (source nodes), by default "tail".
|
1245
|
+
head : str, optional
|
1246
|
+
Name of the column in `edges` representing the head nodes (target nodes), by default "head".
|
1247
|
+
trav_time : str, optional
|
1248
|
+
Name of the column in `edges` representing travel times for edges, by default "trav_time".
|
1249
|
+
freq : str, optional
|
1250
|
+
Name of the column in `edges` representing frequencies of edges, by default "freq".
|
1251
|
+
check_edges : bool, optional
|
1252
|
+
Whether to validate the structure and data types of `edges`, by default False.
|
1253
|
+
orientation : {"in", "out"}, optional
|
1254
|
+
Determines the orientation of the graph structure for traversal.
|
1255
|
+
- "in": Graph traversal is from destination to origin.
|
1256
|
+
- "out": Graph traversal is from origin to destination.
|
1257
|
+
By default "in".
|
1258
|
+
|
1259
|
+
Attributes
|
1260
|
+
----------
|
1261
|
+
edge_count : int
|
1262
|
+
The number of edges in the graph.
|
1263
|
+
vertex_count : int
|
1264
|
+
The total number of vertices in the graph.
|
1265
|
+
u_i_vec : numpy.ndarray
|
1266
|
+
An array storing the least travel time for each vertex after running the algorithm.
|
1267
|
+
_edges : pandas.DataFrame
|
1268
|
+
Internal DataFrame containing the edges with additional metadata.
|
1269
|
+
_trav_time : numpy.ndarray
|
1270
|
+
Array of travel times for edges.
|
1271
|
+
_freq : numpy.ndarray
|
1272
|
+
Array of frequencies for edges.
|
1273
|
+
_tail : numpy.ndarray
|
1274
|
+
Array of tail nodes (source nodes) for edges.
|
1275
|
+
_head : numpy.ndarray
|
1276
|
+
Array of head nodes (target nodes) for edges.
|
1277
|
+
__indptr : numpy.ndarray
|
1278
|
+
Array for compressed row (or column) pointers in the CSR/CSC representation.
|
1279
|
+
_edge_idx : numpy.ndarray
|
1280
|
+
Array of edge indices in the CSR/CSC representation.
|
1281
|
+
|
1282
|
+
Methods
|
1283
|
+
-------
|
1284
|
+
run(origin, destination, volume, return_inf=False)
|
1285
|
+
Computes the hyperpath and updates edge volumes based on the input demand and configuration.
|
1286
|
+
_check_vertex_idx(idx)
|
1287
|
+
Validates a vertex index to ensure it is within the graph's bounds.
|
1288
|
+
_check_volume(v)
|
1289
|
+
Validates a volume value to ensure it is a non-negative float.
|
1290
|
+
_check_edges(edges, tail, head, trav_time, freq)
|
1291
|
+
Validates the structure and data types of the input edges DataFrame.
|
1292
|
+
"""
|
1293
|
+
|
1294
|
+
def __init__(
|
1295
|
+
self,
|
1296
|
+
edges: pd.DataFrame,
|
1297
|
+
tail: str = "tail",
|
1298
|
+
head: str = "head",
|
1299
|
+
trav_time: str = "trav_time",
|
1300
|
+
freq: str = "freq",
|
1301
|
+
check_edges: bool = False,
|
1302
|
+
orientation: str = "in",
|
1303
|
+
) -> None:
|
1304
|
+
# load the edges
|
1305
|
+
if check_edges:
|
1306
|
+
self._check_edges(edges, tail, head, trav_time, freq)
|
1307
|
+
# Convert to standardized NumPy-backed pandas DataFrame
|
1308
|
+
self._edges = standardize_graph_dataframe(
|
1309
|
+
edges, tail, head, trav_time=trav_time, freq=freq
|
1310
|
+
)
|
1311
|
+
self.edge_count = len(self._edges)
|
1312
|
+
|
1313
|
+
# remove inf values if any, and values close to zero
|
1314
|
+
self._edges[trav_time] = np.where(
|
1315
|
+
self._edges[trav_time] > DTYPE_INF_PY, DTYPE_INF_PY, self._edges[trav_time]
|
1316
|
+
)
|
1317
|
+
self._edges[trav_time] = np.where(
|
1318
|
+
self._edges[trav_time] < A_VERY_SMALL_TIME_INTERVAL_PY,
|
1319
|
+
A_VERY_SMALL_TIME_INTERVAL_PY,
|
1320
|
+
self._edges[trav_time],
|
1321
|
+
)
|
1322
|
+
self._edges[freq] = np.where(
|
1323
|
+
self._edges[freq] > INF_FREQ_PY, INF_FREQ_PY, self._edges[freq]
|
1324
|
+
)
|
1325
|
+
self._edges[freq] = np.where(
|
1326
|
+
self._edges[freq] < MIN_FREQ_PY, MIN_FREQ_PY, self._edges[freq]
|
1327
|
+
)
|
1328
|
+
|
1329
|
+
# create an edge index column
|
1330
|
+
self._edges = self._edges.reset_index(drop=True)
|
1331
|
+
data_col = "edge_idx"
|
1332
|
+
self._edges[data_col] = self._edges.index
|
1333
|
+
|
1334
|
+
# convert to CSR/CSC format
|
1335
|
+
self.vertex_count = self._edges[[tail, head]].max().max() + 1
|
1336
|
+
assert orientation in ["out", "in"]
|
1337
|
+
self._orientation = orientation
|
1338
|
+
if self._orientation == "out":
|
1339
|
+
fs_indptr, _, fs_data = convert_graph_to_csr_uint32(
|
1340
|
+
self._edges, tail, head, data_col, self.vertex_count
|
1341
|
+
)
|
1342
|
+
self.__indptr = fs_indptr.astype(np.uint32)
|
1343
|
+
self._edge_idx = fs_data.astype(np.uint32)
|
1344
|
+
else:
|
1345
|
+
rs_indptr, _, rs_data = convert_graph_to_csc_uint32(
|
1346
|
+
self._edges, tail, head, data_col, self.vertex_count
|
1347
|
+
)
|
1348
|
+
self.__indptr = rs_indptr.astype(np.uint32)
|
1349
|
+
self._edge_idx = rs_data.astype(np.uint32)
|
1350
|
+
|
1351
|
+
# edge attributes
|
1352
|
+
self._trav_time = np.asarray(self._edges[trav_time]).astype(DTYPE_PY)
|
1353
|
+
self._freq = np.asarray(self._edges[freq]).astype(DTYPE_PY)
|
1354
|
+
self._tail = np.asarray(self._edges[tail]).astype(np.uint32)
|
1355
|
+
self._head = np.asarray(self._edges[head]).astype(np.uint32)
|
1356
|
+
|
1357
|
+
# node attribute
|
1358
|
+
self.u_i_vec = None
|
1359
|
+
|
1360
|
+
def run(
|
1361
|
+
self,
|
1362
|
+
origin: Union[int, List[int]],
|
1363
|
+
destination: int,
|
1364
|
+
volume: Union[float, List[float]],
|
1365
|
+
return_inf: bool = False,
|
1366
|
+
) -> None:
|
1367
|
+
"""
|
1368
|
+
Computes the hyperpath and updates edge volumes based on the input demand and configuration.
|
1369
|
+
|
1370
|
+
Parameters
|
1371
|
+
----------
|
1372
|
+
origin : int or list of int
|
1373
|
+
The starting vertex or vertices of the demand. If `self._orientation` is "in",
|
1374
|
+
this can be a list of origins corresponding to the demand volumes.
|
1375
|
+
destination : int or list of int
|
1376
|
+
The target vertex or vertices of the demand. If `self._orientation` is "out",
|
1377
|
+
this can be a list of destinations corresponding to the demand volumes.
|
1378
|
+
volume : float or list of float
|
1379
|
+
The demand volume associated with each origin or destination. Must be non-negative.
|
1380
|
+
If a single float is provided, it is applied to a single origin-destination pair.
|
1381
|
+
return_inf : bool, optional
|
1382
|
+
If True, returns additional information from the computation (not yet implemented).
|
1383
|
+
Default is False.
|
1384
|
+
|
1385
|
+
Raises
|
1386
|
+
------
|
1387
|
+
NotImplementedError
|
1388
|
+
If `self._orientation` is "out", as the one-to-many algorithm is not yet implemented.
|
1389
|
+
AssertionError
|
1390
|
+
If the lengths of `origin` or `destination` and `volume` do not match.
|
1391
|
+
If any vertex index or volume is invalid.
|
1392
|
+
TypeError
|
1393
|
+
If `volume` is not a float or list of floats.
|
1394
|
+
ValueError
|
1395
|
+
If any volume value is negative.
|
1396
|
+
|
1397
|
+
Notes
|
1398
|
+
-----
|
1399
|
+
- The method modifies the `self._edges` DataFrame by adding a "volume" column representing
|
1400
|
+
edge volumes based on the computed hyperpath.
|
1401
|
+
- The `self.u_i_vec` array is updated to store the least travel time for each vertex.
|
1402
|
+
- Only "in" orientation is currently supported.
|
1403
|
+
"""
|
1404
|
+
# column storing the resulting edge volumes
|
1405
|
+
self._edges["volume"] = 0.0
|
1406
|
+
self.u_i_vec = None
|
1407
|
+
|
1408
|
+
# vertex least travel time
|
1409
|
+
u_i_vec = DTYPE_INF_PY * np.ones(self.vertex_count, dtype=DTYPE_PY)
|
1410
|
+
|
1411
|
+
# input check
|
1412
|
+
if not isinstance(volume, list):
|
1413
|
+
volume = [volume]
|
1414
|
+
|
1415
|
+
if self._orientation == "out":
|
1416
|
+
raise NotImplementedError(
|
1417
|
+
"one-to-many Spiess & Florian's algorithm not implemented yet"
|
1418
|
+
)
|
1419
|
+
|
1420
|
+
# Only "in" orientation is supported currently
|
1421
|
+
if not isinstance(origin, list):
|
1422
|
+
origin = [origin]
|
1423
|
+
assert len(origin) == len(volume)
|
1424
|
+
for i, item in enumerate(origin):
|
1425
|
+
self._check_vertex_idx(item)
|
1426
|
+
self._check_volume(volume[i])
|
1427
|
+
self._check_vertex_idx(destination)
|
1428
|
+
demand_indices = np.array(origin, dtype=np.uint32)
|
1429
|
+
|
1430
|
+
assert isinstance(return_inf, bool)
|
1431
|
+
|
1432
|
+
demand_values = np.array(volume, dtype=DTYPE_PY)
|
1433
|
+
|
1434
|
+
compute_SF_in(
|
1435
|
+
self.__indptr,
|
1436
|
+
self._edge_idx,
|
1437
|
+
self._trav_time,
|
1438
|
+
self._freq,
|
1439
|
+
self._tail,
|
1440
|
+
self._head,
|
1441
|
+
demand_indices, # source vertex indices
|
1442
|
+
demand_values,
|
1443
|
+
np.asarray(self._edges["volume"]),
|
1444
|
+
u_i_vec,
|
1445
|
+
self.vertex_count,
|
1446
|
+
destination,
|
1447
|
+
)
|
1448
|
+
self.u_i_vec = u_i_vec
|
1449
|
+
|
1450
|
+
def _check_vertex_idx(self, idx):
|
1451
|
+
assert isinstance(idx, int)
|
1452
|
+
assert idx >= 0
|
1453
|
+
assert idx < self.vertex_count
|
1454
|
+
|
1455
|
+
def _check_volume(self, v):
|
1456
|
+
assert isinstance(v, float)
|
857
1457
|
assert v >= 0.0
|
858
1458
|
|
859
|
-
def _check_edges(self, edges, tail, head, trav_time, freq):
|
860
|
-
if not isinstance(edges, pd.
|
861
|
-
raise TypeError("edges should be a pandas DataFrame")
|
1459
|
+
def _check_edges(self, edges, tail, head, trav_time, freq):
|
1460
|
+
if not isinstance(edges, pd.DataFrame):
|
1461
|
+
raise TypeError("edges should be a pandas DataFrame")
|
1462
|
+
|
1463
|
+
for col in [tail, head, trav_time, freq]:
|
1464
|
+
if col not in edges:
|
1465
|
+
raise KeyError(
|
1466
|
+
f"edge column '{col}' not found in graph edges dataframe"
|
1467
|
+
)
|
1468
|
+
|
1469
|
+
if edges[[tail, head, trav_time, freq]].isnull().to_numpy().any():
|
1470
|
+
raise ValueError(
|
1471
|
+
" ".join(
|
1472
|
+
[
|
1473
|
+
f"edges[[{tail}, {head}, {trav_time}, {freq}]] ",
|
1474
|
+
"should not have any missing value",
|
1475
|
+
]
|
1476
|
+
)
|
1477
|
+
)
|
1478
|
+
|
1479
|
+
for col in [tail, head]:
|
1480
|
+
if not pd.api.types.is_integer_dtype(edges[col].dtype):
|
1481
|
+
raise TypeError(f"column '{col}' should be of integer type")
|
1482
|
+
|
1483
|
+
for col in [trav_time, freq]:
|
1484
|
+
if not pd.api.types.is_numeric_dtype(edges[col].dtype):
|
1485
|
+
raise TypeError(f"column '{col}' should be of numeric type")
|
1486
|
+
|
1487
|
+
if edges[col].min() < 0.0:
|
1488
|
+
raise ValueError(f"column '{col}' should be nonnegative")
|
1489
|
+
|
1490
|
+
|
1491
|
+
class BFS:
|
1492
|
+
"""
|
1493
|
+
Breadth-First Search algorithm for finding shortest paths in directed graphs.
|
1494
|
+
|
1495
|
+
BFS ignores edge weights (treats all edges as having equal weight) and finds the shortest
|
1496
|
+
path in terms of the minimum number of edges/hops between vertices. This implementation
|
1497
|
+
works on directed graphs using CSR format for forward traversal and CSC format for
|
1498
|
+
backward traversal.
|
1499
|
+
|
1500
|
+
Note: If parallel edges exist between the same pair of vertices, only one edge will be
|
1501
|
+
kept automatically during initialization.
|
1502
|
+
|
1503
|
+
Parameters:
|
1504
|
+
-----------
|
1505
|
+
edges: pandas.DataFrame
|
1506
|
+
DataFrame containing the edges of the graph. It should have two columns: 'tail' and 'head'.
|
1507
|
+
The 'tail' column should contain the IDs of the starting nodes, and the 'head' column
|
1508
|
+
should contain the IDs of the ending nodes. If a 'weight' column is present, it will be
|
1509
|
+
ignored.
|
1510
|
+
tail: str, optional (default='tail')
|
1511
|
+
The name of the column in the DataFrame that contains the IDs of the edge starting nodes.
|
1512
|
+
head: str, optional (default='head')
|
1513
|
+
The name of the column in the DataFrame that contains the IDs of the edge ending nodes.
|
1514
|
+
orientation: str, optional (default='out')
|
1515
|
+
The orientation of BFS algorithm. It can be either 'out' for single source shortest
|
1516
|
+
paths or 'in' for single target shortest path.
|
1517
|
+
check_edges: bool, optional (default=False)
|
1518
|
+
Whether to check if the edges DataFrame is well-formed. If set to True, the edges
|
1519
|
+
DataFrame will be checked for missing values and invalid data types.
|
1520
|
+
permute: bool, optional (default=False)
|
1521
|
+
Whether to permute the IDs of the nodes. If set to True, the node IDs will be reindexed
|
1522
|
+
to start from 0 and be contiguous.
|
1523
|
+
verbose: bool, optional (default=False)
|
1524
|
+
Whether to print messages about parallel edge removal.
|
1525
|
+
sentinel: int, optional (default=-9999)
|
1526
|
+
Sentinel value for unreachable nodes and the start vertex in the predecessor array.
|
1527
|
+
Must be a negative integer that fits in int32 range.
|
1528
|
+
"""
|
1529
|
+
|
1530
|
+
def __init__(
|
1531
|
+
self,
|
1532
|
+
edges: pd.DataFrame,
|
1533
|
+
tail: str = "tail",
|
1534
|
+
head: str = "head",
|
1535
|
+
orientation: str = "out",
|
1536
|
+
check_edges: bool = False,
|
1537
|
+
permute: bool = False,
|
1538
|
+
verbose: bool = False,
|
1539
|
+
sentinel: int = -9999,
|
1540
|
+
) -> None:
|
1541
|
+
# Validate sentinel value
|
1542
|
+
if not isinstance(sentinel, int):
|
1543
|
+
raise TypeError(
|
1544
|
+
f"sentinel must be an integer, got {type(sentinel).__name__}"
|
1545
|
+
)
|
1546
|
+
if sentinel >= 0:
|
1547
|
+
raise ValueError(f"sentinel must be negative, got {sentinel}")
|
1548
|
+
if sentinel < np.iinfo(np.int32).min or sentinel > np.iinfo(np.int32).max:
|
1549
|
+
raise ValueError(
|
1550
|
+
f"sentinel must fit in int32 range [{np.iinfo(np.int32).min}, "
|
1551
|
+
f"{np.iinfo(np.int32).max}], got {sentinel}"
|
1552
|
+
)
|
1553
|
+
self._sentinel = sentinel
|
1554
|
+
|
1555
|
+
# load the edges
|
1556
|
+
if check_edges:
|
1557
|
+
self._check_edges(edges, tail, head)
|
1558
|
+
# Convert to standardized NumPy-backed pandas DataFrame
|
1559
|
+
# Note: BFS doesn't need weights, but standardize_graph_dataframe handles that
|
1560
|
+
self._edges = standardize_graph_dataframe(edges, tail, head)
|
1561
|
+
self._n_edges = len(self._edges)
|
1562
|
+
self._verbose = verbose
|
1563
|
+
|
1564
|
+
# preprocess edges to handle parallel edges
|
1565
|
+
self._preprocess_edges(tail, head)
|
1566
|
+
|
1567
|
+
# reindex the vertices
|
1568
|
+
self._permute = permute
|
1569
|
+
if len(self._edges) == 0:
|
1570
|
+
# Handle empty graphs
|
1571
|
+
self._permutation = None
|
1572
|
+
self._n_vertices = 0
|
1573
|
+
self.__n_vertices_init = 0
|
1574
|
+
elif self._permute:
|
1575
|
+
self.__n_vertices_init = self._edges[[tail, head]].max(axis=0).max() + 1
|
1576
|
+
self._permutation = self._permute_graph(tail, head)
|
1577
|
+
self._n_vertices = len(self._permutation)
|
1578
|
+
else:
|
1579
|
+
self._permutation = None
|
1580
|
+
self._n_vertices = self._edges[[tail, head]].max(axis=0).max() + 1
|
1581
|
+
self.__n_vertices_init = self._n_vertices
|
1582
|
+
|
1583
|
+
# convert to CSR/CSC
|
1584
|
+
self._check_orientation(orientation)
|
1585
|
+
self._orientation = orientation
|
1586
|
+
if self._orientation == "out":
|
1587
|
+
# Use dummy weight column for conversion (BFS doesn't use weights)
|
1588
|
+
self._edges["_bfs_dummy_weight"] = 1.0
|
1589
|
+
fs_indptr, fs_indices, _ = convert_graph_to_csr_float64(
|
1590
|
+
self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
|
1591
|
+
)
|
1592
|
+
self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
|
1593
|
+
self.__indices = fs_indices.astype(np.uint32)
|
1594
|
+
self.__indptr = fs_indptr.astype(np.uint32)
|
1595
|
+
else:
|
1596
|
+
self._edges["_bfs_dummy_weight"] = 1.0
|
1597
|
+
rs_indptr, rs_indices, _ = convert_graph_to_csc_float64(
|
1598
|
+
self._edges, tail, head, "_bfs_dummy_weight", self._n_vertices
|
1599
|
+
)
|
1600
|
+
self._edges.drop("_bfs_dummy_weight", axis=1, inplace=True)
|
1601
|
+
self.__indices = rs_indices.astype(np.uint32)
|
1602
|
+
self.__indptr = rs_indptr.astype(np.uint32)
|
1603
|
+
|
1604
|
+
self._path_links = None
|
1605
|
+
|
1606
|
+
@property
|
1607
|
+
def UNREACHABLE(self) -> int:
|
1608
|
+
"""
|
1609
|
+
Getter for the sentinel value used for unreachable nodes.
|
1610
|
+
|
1611
|
+
Returns
|
1612
|
+
-------
|
1613
|
+
sentinel : int
|
1614
|
+
The sentinel value for unreachable nodes and the start vertex.
|
1615
|
+
"""
|
1616
|
+
return self._sentinel
|
1617
|
+
|
1618
|
+
@property
|
1619
|
+
def edges(self) -> Any:
|
1620
|
+
"""
|
1621
|
+
Getter for the graph edge dataframe.
|
1622
|
+
|
1623
|
+
Returns
|
1624
|
+
-------
|
1625
|
+
edges: pandas.DataFrame
|
1626
|
+
DataFrame containing the edges of the graph.
|
1627
|
+
"""
|
1628
|
+
return self._edges
|
1629
|
+
|
1630
|
+
@property
|
1631
|
+
def n_edges(self) -> int:
|
1632
|
+
"""
|
1633
|
+
Getter for the number of graph edges.
|
1634
|
+
|
1635
|
+
Returns
|
1636
|
+
-------
|
1637
|
+
n_edges: int
|
1638
|
+
The number of edges in the graph.
|
1639
|
+
"""
|
1640
|
+
return self._n_edges
|
862
1641
|
|
863
|
-
|
864
|
-
|
865
|
-
|
866
|
-
|
1642
|
+
@property
|
1643
|
+
def n_vertices(self) -> int:
|
1644
|
+
"""
|
1645
|
+
Getter for the number of graph vertices.
|
1646
|
+
|
1647
|
+
Returns
|
1648
|
+
-------
|
1649
|
+
n_vertices: int
|
1650
|
+
The number of nodes in the graph (after permutation, if _permute is True).
|
1651
|
+
"""
|
1652
|
+
return self._n_vertices
|
1653
|
+
|
1654
|
+
@property
|
1655
|
+
def orientation(self) -> str:
|
1656
|
+
"""
|
1657
|
+
Getter of BFS algorithm orientation ("in" or "out").
|
1658
|
+
|
1659
|
+
Returns
|
1660
|
+
-------
|
1661
|
+
orientation : str
|
1662
|
+
The orientation of BFS algorithm.
|
1663
|
+
"""
|
1664
|
+
return self._orientation
|
1665
|
+
|
1666
|
+
@property
|
1667
|
+
def permute(self) -> bool:
|
1668
|
+
"""
|
1669
|
+
Getter for the graph permutation/reindexing option.
|
1670
|
+
|
1671
|
+
Returns
|
1672
|
+
-------
|
1673
|
+
permute : bool
|
1674
|
+
Whether to permute the IDs of the nodes.
|
1675
|
+
"""
|
1676
|
+
return self._permute
|
1677
|
+
|
1678
|
+
@property
|
1679
|
+
def path_links(self) -> Optional[np.ndarray]:
|
1680
|
+
"""
|
1681
|
+
Getter for the path links (predecessors or successors).
|
1682
|
+
|
1683
|
+
Returns
|
1684
|
+
-------
|
1685
|
+
path_links: numpy.ndarray
|
1686
|
+
predecessors or successors node index if the path tracking is activated.
|
1687
|
+
"""
|
1688
|
+
return self._path_links
|
1689
|
+
|
1690
|
+
def _preprocess_edges(self, tail, head):
|
1691
|
+
"""
|
1692
|
+
Preprocess edges to handle parallel edges by keeping only one edge
|
1693
|
+
between any pair of vertices (BFS doesn't use weights).
|
1694
|
+
|
1695
|
+
Parameters
|
1696
|
+
----------
|
1697
|
+
tail : str
|
1698
|
+
The column name for tail vertices
|
1699
|
+
head : str
|
1700
|
+
The column name for head vertices
|
1701
|
+
"""
|
1702
|
+
original_count = len(self._edges)
|
1703
|
+
self._edges = self._edges.groupby([tail, head], as_index=False).first()
|
1704
|
+
final_count = len(self._edges)
|
1705
|
+
|
1706
|
+
if original_count > final_count:
|
1707
|
+
parallel_edges_removed = original_count - final_count
|
1708
|
+
if self._verbose:
|
1709
|
+
print(
|
1710
|
+
f"Automatically removed {parallel_edges_removed} parallel edge(s). "
|
1711
|
+
f"BFS treats all edges equally."
|
867
1712
|
)
|
868
1713
|
|
869
|
-
|
1714
|
+
self._n_edges = len(self._edges)
|
1715
|
+
|
1716
|
+
def _check_edges(self, edges, tail, head):
|
1717
|
+
"""Checks if the edges DataFrame is well-formed. If not, raises an appropriate error."""
|
1718
|
+
if not isinstance(edges, pd.DataFrame):
|
1719
|
+
raise TypeError("edges should be a pandas DataFrame")
|
1720
|
+
|
1721
|
+
if tail not in edges:
|
1722
|
+
raise KeyError(
|
1723
|
+
f"edge tail column '{tail}' not found in graph edges dataframe"
|
1724
|
+
)
|
1725
|
+
|
1726
|
+
if head not in edges:
|
1727
|
+
raise KeyError(
|
1728
|
+
f"edge head column '{head}' not found in graph edges dataframe"
|
1729
|
+
)
|
1730
|
+
|
1731
|
+
if edges[[tail, head]].isnull().to_numpy().any():
|
870
1732
|
raise ValueError(
|
871
1733
|
" ".join(
|
872
1734
|
[
|
873
|
-
f"edges[[{tail}, {head}
|
1735
|
+
f"edges[[{tail}, {head}]] ",
|
874
1736
|
"should not have any missing value",
|
875
1737
|
]
|
876
1738
|
)
|
@@ -878,14 +1740,298 @@ class HyperpathGenerating:
|
|
878
1740
|
|
879
1741
|
for col in [tail, head]:
|
880
1742
|
if not pd.api.types.is_integer_dtype(edges[col].dtype):
|
881
|
-
raise TypeError(f"
|
1743
|
+
raise TypeError(f"edges['{col}'] should be of integer type")
|
882
1744
|
|
883
|
-
|
884
|
-
|
885
|
-
|
1745
|
+
def _permute_graph(self, tail, head):
|
1746
|
+
"""Permute the IDs of the nodes to start from 0 and be contiguous.
|
1747
|
+
Returns a DataFrame with the permuted IDs."""
|
886
1748
|
|
887
|
-
|
888
|
-
|
1749
|
+
permutation = pd.DataFrame(
|
1750
|
+
data={
|
1751
|
+
"vert_idx": np.union1d(
|
1752
|
+
np.asarray(self._edges[tail]), np.asarray(self._edges[head])
|
1753
|
+
)
|
1754
|
+
}
|
1755
|
+
)
|
1756
|
+
permutation["vert_idx_new"] = permutation.index
|
1757
|
+
permutation.index.name = "index"
|
1758
|
+
|
1759
|
+
self._edges = pd.merge(
|
1760
|
+
self._edges,
|
1761
|
+
permutation[["vert_idx", "vert_idx_new"]],
|
1762
|
+
left_on=tail,
|
1763
|
+
right_on="vert_idx",
|
1764
|
+
how="left",
|
1765
|
+
)
|
1766
|
+
self._edges.drop([tail, "vert_idx"], axis=1, inplace=True)
|
1767
|
+
self._edges.rename(columns={"vert_idx_new": tail}, inplace=True)
|
1768
|
+
|
1769
|
+
self._edges = pd.merge(
|
1770
|
+
self._edges,
|
1771
|
+
permutation[["vert_idx", "vert_idx_new"]],
|
1772
|
+
left_on=head,
|
1773
|
+
right_on="vert_idx",
|
1774
|
+
how="left",
|
1775
|
+
)
|
1776
|
+
self._edges.drop([head, "vert_idx"], axis=1, inplace=True)
|
1777
|
+
self._edges.rename(columns={"vert_idx_new": head}, inplace=True)
|
1778
|
+
|
1779
|
+
permutation.rename(columns={"vert_idx": "vert_idx_old"}, inplace=True)
|
1780
|
+
permutation.reset_index(drop=True, inplace=True)
|
1781
|
+
permutation.sort_values(by="vert_idx_new", inplace=True)
|
1782
|
+
|
1783
|
+
permutation.index.name = "index"
|
1784
|
+
self._edges.index.name = "index"
|
1785
|
+
|
1786
|
+
return permutation
|
1787
|
+
|
1788
|
+
def _check_orientation(self, orientation):
|
1789
|
+
"""Checks the orientation attribute."""
|
1790
|
+
if orientation not in ["in", "out"]:
|
1791
|
+
raise ValueError("orientation should be either 'in' on 'out'")
|
1792
|
+
|
1793
|
+
def run(
|
1794
|
+
self,
|
1795
|
+
vertex_idx: int,
|
1796
|
+
path_tracking: bool = False,
|
1797
|
+
return_series: bool = False,
|
1798
|
+
) -> Union[np.ndarray, pd.Series]:
|
1799
|
+
"""
|
1800
|
+
Runs BFS algorithm between a given vertex and all other vertices in the graph.
|
1801
|
+
|
1802
|
+
Parameters
|
1803
|
+
----------
|
1804
|
+
vertex_idx : int
|
1805
|
+
The index of the source/target vertex.
|
1806
|
+
path_tracking : bool, optional (default=False)
|
1807
|
+
Whether to track the shortest path(s) from the source vertex to all other vertices
|
1808
|
+
in the graph. When True, predecessors are stored and can be retrieved with get_path().
|
1809
|
+
return_series : bool, optional (default=False)
|
1810
|
+
Whether to return a Pandas Series object indexed by vertex indices with predecessors
|
1811
|
+
as values.
|
1812
|
+
|
1813
|
+
Returns
|
1814
|
+
-------
|
1815
|
+
predecessors : np.ndarray or pd.Series
|
1816
|
+
If `return_series=False`, a 1D Numpy array of shape (n_vertices,) with the
|
1817
|
+
predecessor of each vertex in the BFS tree (`orientation="out"`), or
|
1818
|
+
the successor of each vertex (`orientation="in"`).
|
1819
|
+
Unreachable vertices and the start vertex have the sentinel value (default: -9999).
|
1820
|
+
If `return_series=True`, a Pandas Series object with the same data and the
|
1821
|
+
vertex indices as index.
|
1822
|
+
"""
|
1823
|
+
# validate the input arguments
|
1824
|
+
if vertex_idx < 0:
|
1825
|
+
raise ValueError(f"argument 'vertex_idx={vertex_idx}' must be non-negative")
|
1826
|
+
if self._permute and self._permutation is not None:
|
1827
|
+
if vertex_idx not in self._permutation.vert_idx_old.values:
|
1828
|
+
raise ValueError(f"vertex {vertex_idx} not found in graph")
|
1829
|
+
vertex_new = self._permutation.loc[
|
1830
|
+
self._permutation.vert_idx_old == vertex_idx, "vert_idx_new"
|
1831
|
+
].iloc[0]
|
1832
|
+
else:
|
1833
|
+
if vertex_idx >= self._n_vertices:
|
1834
|
+
raise ValueError(f"vertex {vertex_idx} not found in graph")
|
1835
|
+
vertex_new = vertex_idx
|
1836
|
+
|
1837
|
+
# compute BFS predecessors
|
1838
|
+
if self._orientation == "out":
|
1839
|
+
predecessors = bfs_csr(
|
1840
|
+
self.__indptr,
|
1841
|
+
self.__indices,
|
1842
|
+
vertex_new,
|
1843
|
+
self._n_vertices,
|
1844
|
+
self._sentinel,
|
1845
|
+
)
|
1846
|
+
else:
|
1847
|
+
predecessors = bfs_csc(
|
1848
|
+
self.__indptr,
|
1849
|
+
self.__indices,
|
1850
|
+
vertex_new,
|
1851
|
+
self._n_vertices,
|
1852
|
+
self._sentinel,
|
1853
|
+
)
|
1854
|
+
|
1855
|
+
# store path links if tracking is enabled
|
1856
|
+
if path_tracking:
|
1857
|
+
# Convert predecessors to path_links format (uint32)
|
1858
|
+
# Replace sentinel value with vertex's own index (like Dijkstra does)
|
1859
|
+
self._path_links = np.arange(self._n_vertices, dtype=np.uint32)
|
1860
|
+
reachable_mask = predecessors != self._sentinel
|
1861
|
+
self._path_links[reachable_mask] = predecessors[reachable_mask].astype(
|
1862
|
+
np.uint32
|
1863
|
+
)
|
1864
|
+
|
1865
|
+
if self._permute and self._permutation is not None:
|
1866
|
+
# permute back the path vertex indices (same approach as Dijkstra)
|
1867
|
+
path_df = pd.DataFrame(
|
1868
|
+
data={
|
1869
|
+
"vertex_idx": np.arange(self._n_vertices),
|
1870
|
+
"associated_idx": self._path_links,
|
1871
|
+
}
|
1872
|
+
)
|
1873
|
+
path_df = pd.merge(
|
1874
|
+
path_df,
|
1875
|
+
self._permutation,
|
1876
|
+
left_on="vertex_idx",
|
1877
|
+
right_on="vert_idx_new",
|
1878
|
+
how="left",
|
1879
|
+
)
|
1880
|
+
path_df.drop(["vertex_idx", "vert_idx_new"], axis=1, inplace=True)
|
1881
|
+
path_df.rename(columns={"vert_idx_old": "vertex_idx"}, inplace=True)
|
1882
|
+
path_df = pd.merge(
|
1883
|
+
path_df,
|
1884
|
+
self._permutation,
|
1885
|
+
left_on="associated_idx",
|
1886
|
+
right_on="vert_idx_new",
|
1887
|
+
how="left",
|
1888
|
+
)
|
1889
|
+
path_df.drop(["associated_idx", "vert_idx_new"], axis=1, inplace=True)
|
1890
|
+
path_df.rename(columns={"vert_idx_old": "associated_idx"}, inplace=True)
|
1891
|
+
|
1892
|
+
if return_series:
|
1893
|
+
path_df.set_index("vertex_idx", inplace=True)
|
1894
|
+
self._path_links = path_df.associated_idx.astype(np.uint32)
|
1895
|
+
else:
|
1896
|
+
self._path_links = np.arange(
|
1897
|
+
self.__n_vertices_init, dtype=np.uint32
|
1898
|
+
)
|
1899
|
+
self._path_links[path_df.vertex_idx.values] = (
|
1900
|
+
path_df.associated_idx.values
|
1901
|
+
)
|
1902
|
+
else:
|
1903
|
+
self._path_links = None
|
1904
|
+
|
1905
|
+
# reorder predecessors for permuted graphs
|
1906
|
+
if return_series:
|
1907
|
+
if self._permute and self._permutation is not None:
|
1908
|
+
pred_df = pd.DataFrame(data={"predecessor": predecessors})
|
1909
|
+
pred_df["vert_idx_new"] = pred_df.index
|
1910
|
+
pred_df = pd.merge(
|
1911
|
+
pred_df,
|
1912
|
+
self._permutation,
|
1913
|
+
left_on="vert_idx_new",
|
1914
|
+
right_on="vert_idx_new",
|
1915
|
+
how="left",
|
1916
|
+
)
|
1917
|
+
|
1918
|
+
# Map predecessor values back to original IDs
|
1919
|
+
valid_mask = pred_df["predecessor"] != self._sentinel
|
1920
|
+
if valid_mask.any():
|
1921
|
+
pred_df_valid = pred_df[valid_mask].copy()
|
1922
|
+
pred_df_valid = pd.merge(
|
1923
|
+
pred_df_valid,
|
1924
|
+
self._permutation,
|
1925
|
+
left_on="predecessor",
|
1926
|
+
right_on="vert_idx_new",
|
1927
|
+
how="left",
|
1928
|
+
suffixes=("", "_pred"),
|
1929
|
+
)
|
1930
|
+
pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
|
1931
|
+
"vert_idx_old_pred"
|
1932
|
+
].values.astype(np.int32)
|
1933
|
+
|
1934
|
+
pred_df.set_index("vert_idx_old", inplace=True)
|
1935
|
+
predecessors_series = pred_df.predecessor.astype(np.int32)
|
1936
|
+
predecessors_series.index.name = "vertex_idx"
|
1937
|
+
predecessors_series.name = "predecessor"
|
1938
|
+
else:
|
1939
|
+
predecessors_series = pd.Series(predecessors, dtype=np.int32)
|
1940
|
+
predecessors_series.index.name = "vertex_idx"
|
1941
|
+
predecessors_series.name = "predecessor"
|
1942
|
+
|
1943
|
+
return predecessors_series
|
1944
|
+
|
1945
|
+
# For array output with permutation
|
1946
|
+
if self._permute and self._permutation is not None:
|
1947
|
+
pred_df = pd.DataFrame(data={"predecessor": predecessors})
|
1948
|
+
pred_df["vert_idx_new"] = pred_df.index
|
1949
|
+
pred_df = pd.merge(
|
1950
|
+
pred_df,
|
1951
|
+
self._permutation,
|
1952
|
+
left_on="vert_idx_new",
|
1953
|
+
right_on="vert_idx_new",
|
1954
|
+
how="left",
|
1955
|
+
)
|
1956
|
+
|
1957
|
+
# Map predecessor values back to original IDs
|
1958
|
+
valid_mask = pred_df["predecessor"] != self._sentinel
|
1959
|
+
if valid_mask.any():
|
1960
|
+
pred_df_valid = pred_df[valid_mask].copy()
|
1961
|
+
pred_df_valid = pd.merge(
|
1962
|
+
pred_df_valid,
|
1963
|
+
self._permutation,
|
1964
|
+
left_on="predecessor",
|
1965
|
+
right_on="vert_idx_new",
|
1966
|
+
how="left",
|
1967
|
+
suffixes=("", "_pred"),
|
1968
|
+
)
|
1969
|
+
pred_df.loc[valid_mask, "predecessor"] = pred_df_valid[
|
1970
|
+
"vert_idx_old_pred"
|
1971
|
+
].values.astype(np.int32)
|
1972
|
+
|
1973
|
+
predecessors_array = np.full(
|
1974
|
+
self.__n_vertices_init, self._sentinel, dtype=np.int32
|
1975
|
+
)
|
1976
|
+
predecessors_array[pred_df.vert_idx_old.values] = (
|
1977
|
+
pred_df.predecessor.values.astype(np.int32)
|
1978
|
+
)
|
1979
|
+
return predecessors_array
|
1980
|
+
|
1981
|
+
return predecessors
|
1982
|
+
|
1983
|
+
def get_vertices(self) -> Any:
|
1984
|
+
"""
|
1985
|
+
Get the unique vertices from the graph.
|
1986
|
+
|
1987
|
+
If the graph has been permuted, this method returns the vertices based on the original
|
1988
|
+
indexing. Otherwise, it returns the union of tail and head vertices from the edges.
|
1989
|
+
|
1990
|
+
Returns
|
1991
|
+
-------
|
1992
|
+
vertices : ndarray
|
1993
|
+
A 1-D array containing the unique vertices.
|
1994
|
+
"""
|
1995
|
+
if self._permute and self._permutation is not None:
|
1996
|
+
return np.asarray(self._permutation.vert_idx_old)
|
1997
|
+
return np.union1d(
|
1998
|
+
np.asarray(self._edges["tail"]), np.asarray(self._edges["head"])
|
1999
|
+
)
|
2000
|
+
|
2001
|
+
def get_path(self, vertex_idx: int) -> Optional[np.ndarray]:
|
2002
|
+
"""Compute path from predecessors or successors.
|
2003
|
+
|
2004
|
+
Parameters:
|
2005
|
+
-----------
|
2006
|
+
|
2007
|
+
vertex_idx : int
|
2008
|
+
source or target vertex index.
|
2009
|
+
|
2010
|
+
Returns
|
2011
|
+
-------
|
2012
|
+
|
2013
|
+
path_vertices : numpy.ndarray
|
2014
|
+
Array of np.int32 type storing the path from or to the given vertex index. If we are
|
2015
|
+
dealing with BFS from a source (orientation="out"), the input vertex is the target
|
2016
|
+
vertex and the path to the source is given backward from the target to the source
|
2017
|
+
using the predecessors. If we are dealing with BFS to a target (orientation="in"),
|
2018
|
+
the input vertex is the source vertex and the path to the target is given backward
|
2019
|
+
from the target to the source using the successors.
|
2020
|
+
|
2021
|
+
"""
|
2022
|
+
if self._path_links is None:
|
2023
|
+
warnings.warn(
|
2024
|
+
"Current BFS instance has no path attribute: "
|
2025
|
+
"make sure path_tracking is set to True, and run the "
|
2026
|
+
"BFS algorithm",
|
2027
|
+
UserWarning,
|
2028
|
+
)
|
2029
|
+
return None
|
2030
|
+
if isinstance(self._path_links, pd.Series):
|
2031
|
+
path_vertices = compute_path(self._path_links.values, vertex_idx)
|
2032
|
+
else:
|
2033
|
+
path_vertices = compute_path(self._path_links, vertex_idx)
|
2034
|
+
return path_vertices
|
889
2035
|
|
890
2036
|
|
891
2037
|
# author : Francois Pacull
|