edsger 0.1.5__cp311-cp311-win_amd64.whl → 0.1.6__cp311-cp311-win_amd64.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- edsger/_version.py +1 -1
- edsger/bellman_ford.cp311-win_amd64.pyd +0 -0
- edsger/bellman_ford.pyx +7 -0
- edsger/bfs.cp311-win_amd64.pyd +0 -0
- edsger/bfs.pyx +243 -0
- edsger/commons.cp311-win_amd64.pyd +0 -0
- edsger/commons.pyx +7 -0
- edsger/dijkstra.cp311-win_amd64.pyd +0 -0
- edsger/dijkstra.pyx +7 -0
- edsger/graph_importer.py +340 -0
- edsger/networks.py +4 -2
- edsger/path.py +676 -129
- edsger/path_tracking.cp311-win_amd64.pyd +0 -0
- edsger/path_tracking.pyx +7 -0
- edsger/pq_4ary_dec_0b.cp311-win_amd64.pyd +0 -0
- edsger/pq_4ary_dec_0b.pyx +7 -0
- edsger/spiess_florian.cp311-win_amd64.pyd +0 -0
- edsger/spiess_florian.pyx +7 -0
- edsger/star.cp311-win_amd64.pyd +0 -0
- edsger/star.pyx +7 -0
- edsger/utils.py +9 -8
- {edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/METADATA +124 -2
- edsger-0.1.6.dist-info/RECORD +32 -0
- edsger-0.1.5.dist-info/RECORD +0 -29
- {edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/WHEEL +0 -0
- {edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/licenses/AUTHORS.rst +0 -0
- {edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/licenses/LICENSE +0 -0
- {edsger-0.1.5.dist-info → edsger-0.1.6.dist-info}/top_level.txt +0 -0
edsger/_version.py
CHANGED
@@ -1 +1 @@
|
|
1
|
-
__version__ = "0.1.
|
1
|
+
__version__ = "0.1.6"
|
Binary file
|
edsger/bellman_ford.pyx
CHANGED
@@ -19,6 +19,13 @@ cpdef functions:
|
|
19
19
|
Detect negative cycles in the graph.
|
20
20
|
"""
|
21
21
|
|
22
|
+
# cython: language_level=3
|
23
|
+
# cython: boundscheck=False
|
24
|
+
# cython: wraparound=False
|
25
|
+
# cython: embedsignature=False
|
26
|
+
# cython: cdivision=True
|
27
|
+
# cython: initializedcheck=False
|
28
|
+
|
22
29
|
cimport numpy as cnp
|
23
30
|
import numpy as np
|
24
31
|
|
Binary file
|
edsger/bfs.pyx
ADDED
@@ -0,0 +1,243 @@
|
|
1
|
+
"""
|
2
|
+
Breadth-First Search (BFS) implementation.
|
3
|
+
|
4
|
+
cpdef functions:
|
5
|
+
|
6
|
+
- bfs_csr
|
7
|
+
Compute BFS tree using CSR format (forward traversal). Returns predecessors.
|
8
|
+
- bfs_csc
|
9
|
+
Compute BFS tree using CSC format (backward traversal). Returns predecessors.
|
10
|
+
"""
|
11
|
+
|
12
|
+
# cython: language_level=3
|
13
|
+
# cython: boundscheck=False
|
14
|
+
# cython: wraparound=False
|
15
|
+
# cython: embedsignature=False
|
16
|
+
# cython: cdivision=True
|
17
|
+
# cython: initializedcheck=False
|
18
|
+
|
19
|
+
cimport numpy as cnp
|
20
|
+
import numpy as np
|
21
|
+
|
22
|
+
cpdef cnp.ndarray bfs_csr(
|
23
|
+
cnp.uint32_t[::1] csr_indptr,
|
24
|
+
cnp.uint32_t[::1] csr_indices,
|
25
|
+
int start_vert_idx,
|
26
|
+
int vertex_count,
|
27
|
+
int sentinel=-9999):
|
28
|
+
"""
|
29
|
+
Compute BFS tree using CSR format (forward traversal from start vertex).
|
30
|
+
|
31
|
+
Parameters
|
32
|
+
----------
|
33
|
+
csr_indptr : cnp.uint32_t[::1]
|
34
|
+
Pointers in the CSR format
|
35
|
+
csr_indices : cnp.uint32_t[::1]
|
36
|
+
Indices in the CSR format
|
37
|
+
start_vert_idx : int
|
38
|
+
Starting vertex index
|
39
|
+
vertex_count : int
|
40
|
+
Total number of vertices
|
41
|
+
sentinel : int, optional
|
42
|
+
Sentinel value for unreachable nodes and start vertex (default: -9999)
|
43
|
+
|
44
|
+
Returns
|
45
|
+
-------
|
46
|
+
predecessors : cnp.ndarray
|
47
|
+
Predecessor array where predecessors[i] contains the predecessor
|
48
|
+
of vertex i in the BFS tree. Unreachable vertices and the start
|
49
|
+
vertex have the sentinel value.
|
50
|
+
"""
|
51
|
+
|
52
|
+
cdef:
|
53
|
+
size_t tail_vert_idx, head_vert_idx, idx
|
54
|
+
size_t queue_head = 0, queue_tail = 0
|
55
|
+
size_t start = <size_t>start_vert_idx
|
56
|
+
cnp.uint32_t[::1] queue
|
57
|
+
cnp.int8_t[::1] visited
|
58
|
+
cnp.int32_t[::1] predecessors
|
59
|
+
|
60
|
+
# Allocate arrays
|
61
|
+
queue = np.empty(vertex_count, dtype=np.uint32)
|
62
|
+
visited = np.zeros(vertex_count, dtype=np.int8)
|
63
|
+
predecessors = np.full(vertex_count, sentinel, dtype=np.int32)
|
64
|
+
|
65
|
+
with nogil:
|
66
|
+
# Initialize: mark start vertex as visited and enqueue it
|
67
|
+
visited[start] = 1
|
68
|
+
queue[queue_tail] = start
|
69
|
+
queue_tail += 1
|
70
|
+
|
71
|
+
# BFS main loop
|
72
|
+
while queue_head < queue_tail:
|
73
|
+
tail_vert_idx = queue[queue_head]
|
74
|
+
queue_head += 1
|
75
|
+
|
76
|
+
# Process all outgoing edges from tail_vert_idx
|
77
|
+
for idx in range(<size_t>csr_indptr[tail_vert_idx],
|
78
|
+
<size_t>csr_indptr[tail_vert_idx + 1]):
|
79
|
+
head_vert_idx = <size_t>csr_indices[idx]
|
80
|
+
|
81
|
+
# If not visited, mark as visited and enqueue
|
82
|
+
if visited[head_vert_idx] == 0:
|
83
|
+
visited[head_vert_idx] = 1
|
84
|
+
predecessors[head_vert_idx] = <int>tail_vert_idx
|
85
|
+
queue[queue_tail] = head_vert_idx
|
86
|
+
queue_tail += 1
|
87
|
+
|
88
|
+
# Convert to numpy array
|
89
|
+
return np.asarray(predecessors)
|
90
|
+
|
91
|
+
|
92
|
+
cpdef cnp.ndarray bfs_csc(
|
93
|
+
cnp.uint32_t[::1] csc_indptr,
|
94
|
+
cnp.uint32_t[::1] csc_indices,
|
95
|
+
int start_vert_idx,
|
96
|
+
int vertex_count,
|
97
|
+
int sentinel=-9999):
|
98
|
+
"""
|
99
|
+
Compute BFS tree using CSC format (backward traversal from start vertex).
|
100
|
+
|
101
|
+
Parameters
|
102
|
+
----------
|
103
|
+
csc_indptr : cnp.uint32_t[::1]
|
104
|
+
Pointers in the CSC format
|
105
|
+
csc_indices : cnp.uint32_t[::1]
|
106
|
+
Indices in the CSC format
|
107
|
+
start_vert_idx : int
|
108
|
+
Starting vertex index
|
109
|
+
vertex_count : int
|
110
|
+
Total number of vertices
|
111
|
+
sentinel : int, optional
|
112
|
+
Sentinel value for unreachable nodes and start vertex (default: -9999)
|
113
|
+
|
114
|
+
Returns
|
115
|
+
-------
|
116
|
+
predecessors : cnp.ndarray
|
117
|
+
Predecessor array where predecessors[i] contains the successor
|
118
|
+
of vertex i in the BFS tree (since we're traversing backward).
|
119
|
+
Unreachable vertices and the start vertex have the sentinel value.
|
120
|
+
"""
|
121
|
+
|
122
|
+
cdef:
|
123
|
+
size_t tail_vert_idx, head_vert_idx, idx
|
124
|
+
size_t queue_head = 0, queue_tail = 0
|
125
|
+
size_t start = <size_t>start_vert_idx
|
126
|
+
cnp.uint32_t[::1] queue
|
127
|
+
cnp.int8_t[::1] visited
|
128
|
+
cnp.int32_t[::1] predecessors
|
129
|
+
|
130
|
+
# Allocate arrays
|
131
|
+
queue = np.empty(vertex_count, dtype=np.uint32)
|
132
|
+
visited = np.zeros(vertex_count, dtype=np.int8)
|
133
|
+
predecessors = np.full(vertex_count, sentinel, dtype=np.int32)
|
134
|
+
|
135
|
+
with nogil:
|
136
|
+
# Initialize: mark start vertex as visited and enqueue it
|
137
|
+
visited[start] = 1
|
138
|
+
queue[queue_tail] = start
|
139
|
+
queue_tail += 1
|
140
|
+
|
141
|
+
# BFS main loop (processing incoming edges using CSC)
|
142
|
+
while queue_head < queue_tail:
|
143
|
+
head_vert_idx = queue[queue_head]
|
144
|
+
queue_head += 1
|
145
|
+
|
146
|
+
# Process all incoming edges to head_vert_idx
|
147
|
+
for idx in range(<size_t>csc_indptr[head_vert_idx],
|
148
|
+
<size_t>csc_indptr[head_vert_idx + 1]):
|
149
|
+
tail_vert_idx = <size_t>csc_indices[idx]
|
150
|
+
|
151
|
+
# If not visited, mark as visited and enqueue
|
152
|
+
if visited[tail_vert_idx] == 0:
|
153
|
+
visited[tail_vert_idx] = 1
|
154
|
+
predecessors[tail_vert_idx] = <int>head_vert_idx
|
155
|
+
queue[queue_tail] = tail_vert_idx
|
156
|
+
queue_tail += 1
|
157
|
+
|
158
|
+
# Convert to numpy array
|
159
|
+
return np.asarray(predecessors)
|
160
|
+
|
161
|
+
|
162
|
+
# ============================================================================ #
|
163
|
+
# tests #
|
164
|
+
# ============================================================================ #
|
165
|
+
|
166
|
+
|
167
|
+
cdef generate_simple_graph_csr():
|
168
|
+
"""
|
169
|
+
Generate a simple directed graph in CSR format.
|
170
|
+
|
171
|
+
Graph structure:
|
172
|
+
0 -> 1 -> 3
|
173
|
+
0 -> 2 -> 3
|
174
|
+
|
175
|
+
4 vertices, 4 edges
|
176
|
+
"""
|
177
|
+
csr_indptr = np.array([0, 2, 3, 4, 4], dtype=np.uint32)
|
178
|
+
csr_indices = np.array([1, 2, 3, 3], dtype=np.uint32)
|
179
|
+
return csr_indptr, csr_indices
|
180
|
+
|
181
|
+
|
182
|
+
cdef generate_simple_graph_csc():
|
183
|
+
"""
|
184
|
+
Generate a simple directed graph in CSC format.
|
185
|
+
|
186
|
+
Graph structure (same as CSR version):
|
187
|
+
0 -> 1 -> 3
|
188
|
+
0 -> 2 -> 3
|
189
|
+
|
190
|
+
4 vertices, 4 edges
|
191
|
+
"""
|
192
|
+
csc_indptr = np.array([0, 0, 1, 2, 4], dtype=np.uint32)
|
193
|
+
csc_indices = np.array([0, 0, 1, 2], dtype=np.uint32)
|
194
|
+
return csc_indptr, csc_indices
|
195
|
+
|
196
|
+
|
197
|
+
cpdef test_bfs_csr_01():
|
198
|
+
"""Test BFS CSR on simple graph from vertex 0."""
|
199
|
+
cdef int UNREACHABLE = -9999
|
200
|
+
csr_indptr, csr_indices = generate_simple_graph_csr()
|
201
|
+
|
202
|
+
predecessors = bfs_csr(csr_indptr, csr_indices, 0, 4)
|
203
|
+
|
204
|
+
# Expected: 0 is start, 1 and 2 have predecessor 0, 3 has predecessor 1 or 2
|
205
|
+
assert predecessors[0] == UNREACHABLE # start vertex
|
206
|
+
assert predecessors[1] == 0
|
207
|
+
assert predecessors[2] == 0
|
208
|
+
assert predecessors[3] in [1, 2] # could be reached from either 1 or 2
|
209
|
+
|
210
|
+
|
211
|
+
cpdef test_bfs_csc_01():
|
212
|
+
"""Test BFS CSC on simple graph to vertex 3."""
|
213
|
+
cdef int UNREACHABLE = -9999
|
214
|
+
csc_indptr, csc_indices = generate_simple_graph_csc()
|
215
|
+
|
216
|
+
predecessors = bfs_csc(csc_indptr, csc_indices, 3, 4)
|
217
|
+
|
218
|
+
# Expected: working backward from 3
|
219
|
+
assert predecessors[3] == UNREACHABLE # start vertex
|
220
|
+
assert predecessors[1] in [3, UNREACHABLE] or predecessors[2] in [3, UNREACHABLE]
|
221
|
+
assert predecessors[0] in [1, 2, UNREACHABLE]
|
222
|
+
|
223
|
+
|
224
|
+
cpdef test_bfs_unreachable():
|
225
|
+
"""Test BFS with unreachable vertices."""
|
226
|
+
cdef int UNREACHABLE = -9999
|
227
|
+
# Graph: 0 -> 1, 2 -> 3 (two disconnected components)
|
228
|
+
csr_indptr = np.array([0, 1, 1, 2, 2], dtype=np.uint32)
|
229
|
+
csr_indices = np.array([1, 3], dtype=np.uint32)
|
230
|
+
|
231
|
+
predecessors = bfs_csr(csr_indptr, csr_indices, 0, 4)
|
232
|
+
|
233
|
+
# From 0, can reach 1 but not 2 or 3
|
234
|
+
assert predecessors[0] == UNREACHABLE # start
|
235
|
+
assert predecessors[1] == 0
|
236
|
+
assert predecessors[2] == UNREACHABLE # unreachable
|
237
|
+
assert predecessors[3] == UNREACHABLE # unreachable
|
238
|
+
|
239
|
+
|
240
|
+
# author : Francois Pacull
|
241
|
+
# copyright : Architecture & Performance
|
242
|
+
# email: francois.pacull@architecture-performance.fr
|
243
|
+
# license : MIT
|
Binary file
|
edsger/commons.pyx
CHANGED
@@ -2,6 +2,13 @@
|
|
2
2
|
Common definitions.
|
3
3
|
"""
|
4
4
|
|
5
|
+
# cython: language_level=3
|
6
|
+
# cython: boundscheck=False
|
7
|
+
# cython: wraparound=False
|
8
|
+
# cython: embedsignature=False
|
9
|
+
# cython: cdivision=True
|
10
|
+
# cython: initializedcheck=False
|
11
|
+
|
5
12
|
import numpy as np
|
6
13
|
|
7
14
|
DTYPE_PY = np.float64
|
Binary file
|
edsger/dijkstra.pyx
CHANGED
@@ -29,6 +29,13 @@ cpdef functions:
|
|
29
29
|
are reached. Compute successors.
|
30
30
|
"""
|
31
31
|
|
32
|
+
# cython: language_level=3
|
33
|
+
# cython: boundscheck=False
|
34
|
+
# cython: wraparound=False
|
35
|
+
# cython: embedsignature=False
|
36
|
+
# cython: cdivision=True
|
37
|
+
# cython: initializedcheck=False
|
38
|
+
|
32
39
|
cimport numpy as cnp
|
33
40
|
|
34
41
|
from edsger.commons cimport (
|
edsger/graph_importer.py
ADDED
@@ -0,0 +1,340 @@
|
|
1
|
+
"""
|
2
|
+
Graph importer module for converting various DataFrame formats to NumPy-backed pandas DataFrames.
|
3
|
+
|
4
|
+
This module provides a unified interface for importing graph data from different DataFrame libraries
|
5
|
+
(pandas with NumPy backend, pandas with Arrow backend, Polars, etc.) and converting them to a
|
6
|
+
standardized NumPy-backed pandas DataFrame format that is optimal for the graph algorithms.
|
7
|
+
"""
|
8
|
+
|
9
|
+
from abc import ABC, abstractmethod
|
10
|
+
from typing import Optional, List
|
11
|
+
import warnings
|
12
|
+
|
13
|
+
import numpy as np
|
14
|
+
import pandas as pd
|
15
|
+
|
16
|
+
|
17
|
+
class GraphImporter(ABC):
|
18
|
+
"""
|
19
|
+
Abstract base class for importing graph data from various DataFrame libraries.
|
20
|
+
|
21
|
+
All importers convert their input format to a NumPy-backed pandas DataFrame
|
22
|
+
with contiguous memory layout for optimal performance in Cython algorithms.
|
23
|
+
"""
|
24
|
+
|
25
|
+
def __init__(self, edges_df):
|
26
|
+
"""
|
27
|
+
Initialize the importer with a DataFrame.
|
28
|
+
|
29
|
+
Parameters
|
30
|
+
----------
|
31
|
+
edges_df : DataFrame-like
|
32
|
+
The edges DataFrame in the specific library format.
|
33
|
+
"""
|
34
|
+
self.edges_df = edges_df
|
35
|
+
|
36
|
+
@staticmethod
|
37
|
+
def from_dataframe( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
38
|
+
edges,
|
39
|
+
tail: str = "tail",
|
40
|
+
head: str = "head",
|
41
|
+
weight: Optional[str] = None,
|
42
|
+
trav_time: Optional[str] = None,
|
43
|
+
freq: Optional[str] = None,
|
44
|
+
) -> "GraphImporter":
|
45
|
+
"""
|
46
|
+
Factory method to create the appropriate importer based on DataFrame type.
|
47
|
+
|
48
|
+
Parameters
|
49
|
+
----------
|
50
|
+
edges : DataFrame-like
|
51
|
+
The edges DataFrame to import.
|
52
|
+
tail : str
|
53
|
+
Column name for tail vertices.
|
54
|
+
head : str
|
55
|
+
Column name for head vertices.
|
56
|
+
weight : str, optional
|
57
|
+
Column name for edge weights (for shortest path algorithms).
|
58
|
+
trav_time : str, optional
|
59
|
+
Column name for travel time (for hyperpath algorithms).
|
60
|
+
freq : str, optional
|
61
|
+
Column name for frequency (for hyperpath algorithms).
|
62
|
+
|
63
|
+
Returns
|
64
|
+
-------
|
65
|
+
GraphImporter
|
66
|
+
An instance of the appropriate importer subclass.
|
67
|
+
"""
|
68
|
+
# Note: tail, head, weight, trav_time, freq are part of API but not used here
|
69
|
+
# They're used by calling code after factory creates the importer
|
70
|
+
# pylint: disable=unused-argument
|
71
|
+
try:
|
72
|
+
# Check for Polars DataFrame
|
73
|
+
if hasattr(edges, "__class__") and edges.__class__.__module__.startswith(
|
74
|
+
"polars"
|
75
|
+
):
|
76
|
+
return PolarsImporter(edges)
|
77
|
+
except (AttributeError, TypeError):
|
78
|
+
# If __class__ or __module__ access fails, continue to other checks
|
79
|
+
pass
|
80
|
+
|
81
|
+
# Check for pandas DataFrame
|
82
|
+
if isinstance(edges, pd.DataFrame):
|
83
|
+
try:
|
84
|
+
# Check if any column has Arrow backend
|
85
|
+
has_arrow = any(
|
86
|
+
hasattr(dtype, "pyarrow_dtype") for dtype in edges.dtypes
|
87
|
+
)
|
88
|
+
|
89
|
+
if has_arrow:
|
90
|
+
return PandasArrowImporter(edges)
|
91
|
+
return PandasNumpyImporter(edges)
|
92
|
+
except (AttributeError, TypeError):
|
93
|
+
# If dtype checking fails, assume NumPy backend
|
94
|
+
return PandasNumpyImporter(edges)
|
95
|
+
|
96
|
+
# Unknown type - try to convert to pandas
|
97
|
+
warnings.warn(
|
98
|
+
f"Unknown DataFrame type {type(edges)}. Attempting to convert to pandas.",
|
99
|
+
UserWarning,
|
100
|
+
)
|
101
|
+
return PandasNumpyImporter(pd.DataFrame(edges))
|
102
|
+
|
103
|
+
@abstractmethod
|
104
|
+
def to_numpy_edges(self, columns: List[str]) -> pd.DataFrame:
|
105
|
+
"""
|
106
|
+
Convert the DataFrame to a NumPy-backed pandas DataFrame.
|
107
|
+
|
108
|
+
Parameters
|
109
|
+
----------
|
110
|
+
columns : List[str]
|
111
|
+
List of column names to extract.
|
112
|
+
|
113
|
+
Returns
|
114
|
+
-------
|
115
|
+
pd.DataFrame
|
116
|
+
A pandas DataFrame with NumPy backend and contiguous memory.
|
117
|
+
"""
|
118
|
+
|
119
|
+
def _ensure_contiguous(self, array: np.ndarray) -> np.ndarray:
|
120
|
+
"""
|
121
|
+
Ensure the array is C-contiguous.
|
122
|
+
|
123
|
+
Parameters
|
124
|
+
----------
|
125
|
+
array : np.ndarray
|
126
|
+
Input array.
|
127
|
+
|
128
|
+
Returns
|
129
|
+
-------
|
130
|
+
np.ndarray
|
131
|
+
C-contiguous array.
|
132
|
+
"""
|
133
|
+
if not array.flags["C_CONTIGUOUS"]:
|
134
|
+
return np.ascontiguousarray(array)
|
135
|
+
return array
|
136
|
+
|
137
|
+
|
138
|
+
class PandasNumpyImporter(GraphImporter):
|
139
|
+
"""
|
140
|
+
Importer for pandas DataFrames with NumPy backend.
|
141
|
+
|
142
|
+
This is the most efficient case as it requires minimal conversion.
|
143
|
+
"""
|
144
|
+
|
145
|
+
def to_numpy_edges(self, columns: List[str]) -> pd.DataFrame:
|
146
|
+
"""
|
147
|
+
Extract columns and ensure they are NumPy-backed.
|
148
|
+
|
149
|
+
For NumPy-backed pandas, this is mostly a pass-through operation
|
150
|
+
with validation to ensure contiguous memory.
|
151
|
+
"""
|
152
|
+
# Extract only the needed columns
|
153
|
+
result_df = self.edges_df[columns].copy(deep=True)
|
154
|
+
|
155
|
+
# Ensure all columns are contiguous NumPy arrays
|
156
|
+
for col in columns:
|
157
|
+
if not isinstance(result_df[col].values, np.ndarray):
|
158
|
+
# Convert to NumPy if somehow not already
|
159
|
+
result_df[col] = result_df[col].to_numpy()
|
160
|
+
|
161
|
+
return result_df
|
162
|
+
|
163
|
+
|
164
|
+
class PandasArrowImporter(GraphImporter):
|
165
|
+
"""
|
166
|
+
Importer for pandas DataFrames with Arrow backend.
|
167
|
+
|
168
|
+
Converts Arrow-backed columns to NumPy arrays with proper data types.
|
169
|
+
"""
|
170
|
+
|
171
|
+
def to_numpy_edges(self, columns: List[str]) -> pd.DataFrame:
|
172
|
+
"""
|
173
|
+
Convert Arrow-backed columns to NumPy arrays.
|
174
|
+
|
175
|
+
Uses to_numpy() method which ensures contiguous memory layout.
|
176
|
+
"""
|
177
|
+
result_data = {}
|
178
|
+
|
179
|
+
for col in columns:
|
180
|
+
series = self.edges_df[col]
|
181
|
+
|
182
|
+
# Determine target dtype based on column values
|
183
|
+
if col in columns[:2]: # Assume first two are vertex indices (tail, head)
|
184
|
+
# Try to use uint32 for vertex indices if possible
|
185
|
+
max_val = series.max()
|
186
|
+
if max_val < np.iinfo(np.uint32).max:
|
187
|
+
target_dtype = np.uint32
|
188
|
+
else:
|
189
|
+
target_dtype = np.uint64
|
190
|
+
else:
|
191
|
+
# Use float64 for weights/times/frequencies
|
192
|
+
target_dtype = np.float64
|
193
|
+
|
194
|
+
# Convert to NumPy with specified dtype
|
195
|
+
if hasattr(series, "to_numpy"):
|
196
|
+
# Use to_numpy() for Arrow-backed series
|
197
|
+
result_data[col] = series.to_numpy(dtype=target_dtype, copy=True)
|
198
|
+
else:
|
199
|
+
# Fallback for older pandas versions
|
200
|
+
result_data[col] = series.values.astype(target_dtype)
|
201
|
+
|
202
|
+
# Ensure contiguous
|
203
|
+
result_data[col] = self._ensure_contiguous(result_data[col])
|
204
|
+
|
205
|
+
return pd.DataFrame(result_data)
|
206
|
+
|
207
|
+
|
208
|
+
class PolarsImporter(GraphImporter):
|
209
|
+
"""
|
210
|
+
Importer for Polars DataFrames.
|
211
|
+
|
212
|
+
Converts Polars DataFrames to NumPy-backed pandas DataFrames.
|
213
|
+
"""
|
214
|
+
|
215
|
+
def to_numpy_edges(
|
216
|
+
self, columns: List[str]
|
217
|
+
) -> pd.DataFrame: # pylint: disable=too-many-branches
|
218
|
+
"""
|
219
|
+
Convert Polars DataFrame to NumPy-backed pandas DataFrame.
|
220
|
+
|
221
|
+
Uses Polars' to_pandas() method or to_numpy() depending on what's available.
|
222
|
+
"""
|
223
|
+
try:
|
224
|
+
import polars # pylint: disable=import-outside-toplevel,unused-import
|
225
|
+
except ImportError as exc:
|
226
|
+
raise ImportError(
|
227
|
+
"Polars is required to import Polars DataFrames. "
|
228
|
+
"Install it with: pip install polars"
|
229
|
+
) from exc
|
230
|
+
|
231
|
+
# Select only needed columns
|
232
|
+
selected_df = self.edges_df.select(columns)
|
233
|
+
|
234
|
+
# Method 1: Direct to_pandas() conversion (simplest)
|
235
|
+
if hasattr(selected_df, "to_pandas"):
|
236
|
+
result_df = selected_df.to_pandas()
|
237
|
+
|
238
|
+
# Handle empty DataFrames
|
239
|
+
if len(result_df) == 0:
|
240
|
+
return result_df
|
241
|
+
|
242
|
+
# Ensure proper dtypes
|
243
|
+
for i, col in enumerate(columns):
|
244
|
+
if i < 2: # Vertex indices
|
245
|
+
# Check if column contains numeric data
|
246
|
+
if np.issubdtype(result_df[col].dtype, np.integer):
|
247
|
+
# Try to use uint32 for efficiency
|
248
|
+
max_val = result_df[col].max()
|
249
|
+
if not pd.isna(max_val) and max_val < np.iinfo(np.uint32).max:
|
250
|
+
result_df[col] = result_df[col].astype(np.uint32)
|
251
|
+
# If not numeric (e.g., strings), leave as is
|
252
|
+
else:
|
253
|
+
# Weights/times/frequencies
|
254
|
+
result_df[col] = result_df[col].astype(np.float64)
|
255
|
+
|
256
|
+
return result_df
|
257
|
+
|
258
|
+
# Method 2: Column-by-column conversion
|
259
|
+
result_data = {}
|
260
|
+
|
261
|
+
# Handle empty DataFrames
|
262
|
+
if len(selected_df) == 0:
|
263
|
+
return selected_df.to_pandas()
|
264
|
+
|
265
|
+
for i, col in enumerate(columns):
|
266
|
+
series = selected_df[col]
|
267
|
+
|
268
|
+
# Determine target dtype
|
269
|
+
if i < 2: # Vertex indices
|
270
|
+
# Check if the series contains numeric data
|
271
|
+
if hasattr(series, "dtype") and series.dtype.is_integer():
|
272
|
+
max_val = series.max()
|
273
|
+
if max_val is not None and max_val < np.iinfo(np.uint32).max:
|
274
|
+
target_dtype = np.uint32
|
275
|
+
else:
|
276
|
+
target_dtype = np.uint64
|
277
|
+
else:
|
278
|
+
# Non-numeric columns, convert to pandas as is
|
279
|
+
result_data[col] = series.to_pandas()
|
280
|
+
continue
|
281
|
+
else:
|
282
|
+
target_dtype = np.float64
|
283
|
+
|
284
|
+
# Convert to NumPy
|
285
|
+
if hasattr(series, "to_numpy"):
|
286
|
+
np_array = series.to_numpy().astype(target_dtype)
|
287
|
+
else:
|
288
|
+
# Fallback for older Polars versions
|
289
|
+
np_array = series.to_list()
|
290
|
+
np_array = np.array(np_array, dtype=target_dtype)
|
291
|
+
|
292
|
+
# Ensure contiguous
|
293
|
+
result_data[col] = self._ensure_contiguous(np_array)
|
294
|
+
|
295
|
+
return pd.DataFrame(result_data)
|
296
|
+
|
297
|
+
|
298
|
+
def standardize_graph_dataframe( # pylint: disable=too-many-arguments,too-many-positional-arguments
|
299
|
+
edges,
|
300
|
+
tail: str = "tail",
|
301
|
+
head: str = "head",
|
302
|
+
weight: Optional[str] = None,
|
303
|
+
trav_time: Optional[str] = None,
|
304
|
+
freq: Optional[str] = None,
|
305
|
+
) -> pd.DataFrame:
|
306
|
+
"""
|
307
|
+
Convenience function to standardize any DataFrame format to NumPy-backed pandas.
|
308
|
+
|
309
|
+
Parameters
|
310
|
+
----------
|
311
|
+
edges : DataFrame-like
|
312
|
+
Input edges DataFrame in any supported format.
|
313
|
+
tail : str
|
314
|
+
Column name for tail vertices.
|
315
|
+
head : str
|
316
|
+
Column name for head vertices.
|
317
|
+
weight : str, optional
|
318
|
+
Column name for edge weights.
|
319
|
+
trav_time : str, optional
|
320
|
+
Column name for travel time.
|
321
|
+
freq : str, optional
|
322
|
+
Column name for frequency.
|
323
|
+
|
324
|
+
Returns
|
325
|
+
-------
|
326
|
+
pd.DataFrame
|
327
|
+
NumPy-backed pandas DataFrame with only the specified columns.
|
328
|
+
"""
|
329
|
+
# Determine which columns to extract
|
330
|
+
columns = [tail, head]
|
331
|
+
if weight is not None:
|
332
|
+
columns.append(weight)
|
333
|
+
if trav_time is not None:
|
334
|
+
columns.append(trav_time)
|
335
|
+
if freq is not None:
|
336
|
+
columns.append(freq)
|
337
|
+
|
338
|
+
# Create appropriate importer and convert
|
339
|
+
importer = GraphImporter.from_dataframe(edges, tail, head, weight, trav_time, freq)
|
340
|
+
return importer.to_numpy_edges(columns)
|
edsger/networks.py
CHANGED
@@ -31,7 +31,7 @@ class SiouxFalls:
|
|
31
31
|
"""
|
32
32
|
|
33
33
|
@property
|
34
|
-
def edges(self):
|
34
|
+
def edges(self) -> pd.DataFrame:
|
35
35
|
"""
|
36
36
|
A DataFrame containing the edges of the Sioux Falls network.
|
37
37
|
|
@@ -130,7 +130,9 @@ class SiouxFalls:
|
|
130
130
|
return graph_edges
|
131
131
|
|
132
132
|
|
133
|
-
def create_sf_network(
|
133
|
+
def create_sf_network(
|
134
|
+
dwell_time: float = 1.0e-6, board_alight_ratio: float = 0.5
|
135
|
+
) -> pd.DataFrame:
|
134
136
|
"""
|
135
137
|
Example network from Spiess, H. and Florian, M. (1989).
|
136
138
|
Optimal strategies: A new assignment model for transit networks.
|