edsger 0.1.4__cp311-cp311-win32.whl → 0.1.6__cp311-cp311-win32.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
edsger/bfs.pyx ADDED
@@ -0,0 +1,243 @@
1
+ """
2
+ Breadth-First Search (BFS) implementation.
3
+
4
+ cpdef functions:
5
+
6
+ - bfs_csr
7
+ Compute BFS tree using CSR format (forward traversal). Returns predecessors.
8
+ - bfs_csc
9
+ Compute BFS tree using CSC format (backward traversal). Returns predecessors.
10
+ """
11
+
12
+ # cython: language_level=3
13
+ # cython: boundscheck=False
14
+ # cython: wraparound=False
15
+ # cython: embedsignature=False
16
+ # cython: cdivision=True
17
+ # cython: initializedcheck=False
18
+
19
+ cimport numpy as cnp
20
+ import numpy as np
21
+
22
+ cpdef cnp.ndarray bfs_csr(
23
+ cnp.uint32_t[::1] csr_indptr,
24
+ cnp.uint32_t[::1] csr_indices,
25
+ int start_vert_idx,
26
+ int vertex_count,
27
+ int sentinel=-9999):
28
+ """
29
+ Compute BFS tree using CSR format (forward traversal from start vertex).
30
+
31
+ Parameters
32
+ ----------
33
+ csr_indptr : cnp.uint32_t[::1]
34
+ Pointers in the CSR format
35
+ csr_indices : cnp.uint32_t[::1]
36
+ Indices in the CSR format
37
+ start_vert_idx : int
38
+ Starting vertex index
39
+ vertex_count : int
40
+ Total number of vertices
41
+ sentinel : int, optional
42
+ Sentinel value for unreachable nodes and start vertex (default: -9999)
43
+
44
+ Returns
45
+ -------
46
+ predecessors : cnp.ndarray
47
+ Predecessor array where predecessors[i] contains the predecessor
48
+ of vertex i in the BFS tree. Unreachable vertices and the start
49
+ vertex have the sentinel value.
50
+ """
51
+
52
+ cdef:
53
+ size_t tail_vert_idx, head_vert_idx, idx
54
+ size_t queue_head = 0, queue_tail = 0
55
+ size_t start = <size_t>start_vert_idx
56
+ cnp.uint32_t[::1] queue
57
+ cnp.int8_t[::1] visited
58
+ cnp.int32_t[::1] predecessors
59
+
60
+ # Allocate arrays
61
+ queue = np.empty(vertex_count, dtype=np.uint32)
62
+ visited = np.zeros(vertex_count, dtype=np.int8)
63
+ predecessors = np.full(vertex_count, sentinel, dtype=np.int32)
64
+
65
+ with nogil:
66
+ # Initialize: mark start vertex as visited and enqueue it
67
+ visited[start] = 1
68
+ queue[queue_tail] = start
69
+ queue_tail += 1
70
+
71
+ # BFS main loop
72
+ while queue_head < queue_tail:
73
+ tail_vert_idx = queue[queue_head]
74
+ queue_head += 1
75
+
76
+ # Process all outgoing edges from tail_vert_idx
77
+ for idx in range(<size_t>csr_indptr[tail_vert_idx],
78
+ <size_t>csr_indptr[tail_vert_idx + 1]):
79
+ head_vert_idx = <size_t>csr_indices[idx]
80
+
81
+ # If not visited, mark as visited and enqueue
82
+ if visited[head_vert_idx] == 0:
83
+ visited[head_vert_idx] = 1
84
+ predecessors[head_vert_idx] = <int>tail_vert_idx
85
+ queue[queue_tail] = head_vert_idx
86
+ queue_tail += 1
87
+
88
+ # Convert to numpy array
89
+ return np.asarray(predecessors)
90
+
91
+
92
+ cpdef cnp.ndarray bfs_csc(
93
+ cnp.uint32_t[::1] csc_indptr,
94
+ cnp.uint32_t[::1] csc_indices,
95
+ int start_vert_idx,
96
+ int vertex_count,
97
+ int sentinel=-9999):
98
+ """
99
+ Compute BFS tree using CSC format (backward traversal from start vertex).
100
+
101
+ Parameters
102
+ ----------
103
+ csc_indptr : cnp.uint32_t[::1]
104
+ Pointers in the CSC format
105
+ csc_indices : cnp.uint32_t[::1]
106
+ Indices in the CSC format
107
+ start_vert_idx : int
108
+ Starting vertex index
109
+ vertex_count : int
110
+ Total number of vertices
111
+ sentinel : int, optional
112
+ Sentinel value for unreachable nodes and start vertex (default: -9999)
113
+
114
+ Returns
115
+ -------
116
+ predecessors : cnp.ndarray
117
+ Predecessor array where predecessors[i] contains the successor
118
+ of vertex i in the BFS tree (since we're traversing backward).
119
+ Unreachable vertices and the start vertex have the sentinel value.
120
+ """
121
+
122
+ cdef:
123
+ size_t tail_vert_idx, head_vert_idx, idx
124
+ size_t queue_head = 0, queue_tail = 0
125
+ size_t start = <size_t>start_vert_idx
126
+ cnp.uint32_t[::1] queue
127
+ cnp.int8_t[::1] visited
128
+ cnp.int32_t[::1] predecessors
129
+
130
+ # Allocate arrays
131
+ queue = np.empty(vertex_count, dtype=np.uint32)
132
+ visited = np.zeros(vertex_count, dtype=np.int8)
133
+ predecessors = np.full(vertex_count, sentinel, dtype=np.int32)
134
+
135
+ with nogil:
136
+ # Initialize: mark start vertex as visited and enqueue it
137
+ visited[start] = 1
138
+ queue[queue_tail] = start
139
+ queue_tail += 1
140
+
141
+ # BFS main loop (processing incoming edges using CSC)
142
+ while queue_head < queue_tail:
143
+ head_vert_idx = queue[queue_head]
144
+ queue_head += 1
145
+
146
+ # Process all incoming edges to head_vert_idx
147
+ for idx in range(<size_t>csc_indptr[head_vert_idx],
148
+ <size_t>csc_indptr[head_vert_idx + 1]):
149
+ tail_vert_idx = <size_t>csc_indices[idx]
150
+
151
+ # If not visited, mark as visited and enqueue
152
+ if visited[tail_vert_idx] == 0:
153
+ visited[tail_vert_idx] = 1
154
+ predecessors[tail_vert_idx] = <int>head_vert_idx
155
+ queue[queue_tail] = tail_vert_idx
156
+ queue_tail += 1
157
+
158
+ # Convert to numpy array
159
+ return np.asarray(predecessors)
160
+
161
+
162
+ # ============================================================================ #
163
+ # tests #
164
+ # ============================================================================ #
165
+
166
+
167
+ cdef generate_simple_graph_csr():
168
+ """
169
+ Generate a simple directed graph in CSR format.
170
+
171
+ Graph structure:
172
+ 0 -> 1 -> 3
173
+ 0 -> 2 -> 3
174
+
175
+ 4 vertices, 4 edges
176
+ """
177
+ csr_indptr = np.array([0, 2, 3, 4, 4], dtype=np.uint32)
178
+ csr_indices = np.array([1, 2, 3, 3], dtype=np.uint32)
179
+ return csr_indptr, csr_indices
180
+
181
+
182
+ cdef generate_simple_graph_csc():
183
+ """
184
+ Generate a simple directed graph in CSC format.
185
+
186
+ Graph structure (same as CSR version):
187
+ 0 -> 1 -> 3
188
+ 0 -> 2 -> 3
189
+
190
+ 4 vertices, 4 edges
191
+ """
192
+ csc_indptr = np.array([0, 0, 1, 2, 4], dtype=np.uint32)
193
+ csc_indices = np.array([0, 0, 1, 2], dtype=np.uint32)
194
+ return csc_indptr, csc_indices
195
+
196
+
197
+ cpdef test_bfs_csr_01():
198
+ """Test BFS CSR on simple graph from vertex 0."""
199
+ cdef int UNREACHABLE = -9999
200
+ csr_indptr, csr_indices = generate_simple_graph_csr()
201
+
202
+ predecessors = bfs_csr(csr_indptr, csr_indices, 0, 4)
203
+
204
+ # Expected: 0 is start, 1 and 2 have predecessor 0, 3 has predecessor 1 or 2
205
+ assert predecessors[0] == UNREACHABLE # start vertex
206
+ assert predecessors[1] == 0
207
+ assert predecessors[2] == 0
208
+ assert predecessors[3] in [1, 2] # could be reached from either 1 or 2
209
+
210
+
211
+ cpdef test_bfs_csc_01():
212
+ """Test BFS CSC on simple graph to vertex 3."""
213
+ cdef int UNREACHABLE = -9999
214
+ csc_indptr, csc_indices = generate_simple_graph_csc()
215
+
216
+ predecessors = bfs_csc(csc_indptr, csc_indices, 3, 4)
217
+
218
+ # Expected: working backward from 3
219
+ assert predecessors[3] == UNREACHABLE # start vertex
220
+ assert predecessors[1] in [3, UNREACHABLE] or predecessors[2] in [3, UNREACHABLE]
221
+ assert predecessors[0] in [1, 2, UNREACHABLE]
222
+
223
+
224
+ cpdef test_bfs_unreachable():
225
+ """Test BFS with unreachable vertices."""
226
+ cdef int UNREACHABLE = -9999
227
+ # Graph: 0 -> 1, 2 -> 3 (two disconnected components)
228
+ csr_indptr = np.array([0, 1, 1, 2, 2], dtype=np.uint32)
229
+ csr_indices = np.array([1, 3], dtype=np.uint32)
230
+
231
+ predecessors = bfs_csr(csr_indptr, csr_indices, 0, 4)
232
+
233
+ # From 0, can reach 1 but not 2 or 3
234
+ assert predecessors[0] == UNREACHABLE # start
235
+ assert predecessors[1] == 0
236
+ assert predecessors[2] == UNREACHABLE # unreachable
237
+ assert predecessors[3] == UNREACHABLE # unreachable
238
+
239
+
240
+ # author : Francois Pacull
241
+ # copyright : Architecture & Performance
242
+ # email: francois.pacull@architecture-performance.fr
243
+ # license : MIT
Binary file
edsger/commons.pyx CHANGED
@@ -2,6 +2,13 @@
2
2
  Common definitions.
3
3
  """
4
4
 
5
+ # cython: language_level=3
6
+ # cython: boundscheck=False
7
+ # cython: wraparound=False
8
+ # cython: embedsignature=False
9
+ # cython: cdivision=True
10
+ # cython: initializedcheck=False
11
+
5
12
  import numpy as np
6
13
 
7
14
  DTYPE_PY = np.float64
Binary file
edsger/dijkstra.pyx CHANGED
@@ -29,6 +29,13 @@ cpdef functions:
29
29
  are reached. Compute successors.
30
30
  """
31
31
 
32
+ # cython: language_level=3
33
+ # cython: boundscheck=False
34
+ # cython: wraparound=False
35
+ # cython: embedsignature=False
36
+ # cython: cdivision=True
37
+ # cython: initializedcheck=False
38
+
32
39
  cimport numpy as cnp
33
40
 
34
41
  from edsger.commons cimport (
@@ -0,0 +1,340 @@
1
+ """
2
+ Graph importer module for converting various DataFrame formats to NumPy-backed pandas DataFrames.
3
+
4
+ This module provides a unified interface for importing graph data from different DataFrame libraries
5
+ (pandas with NumPy backend, pandas with Arrow backend, Polars, etc.) and converting them to a
6
+ standardized NumPy-backed pandas DataFrame format that is optimal for the graph algorithms.
7
+ """
8
+
9
+ from abc import ABC, abstractmethod
10
+ from typing import Optional, List
11
+ import warnings
12
+
13
+ import numpy as np
14
+ import pandas as pd
15
+
16
+
17
+ class GraphImporter(ABC):
18
+ """
19
+ Abstract base class for importing graph data from various DataFrame libraries.
20
+
21
+ All importers convert their input format to a NumPy-backed pandas DataFrame
22
+ with contiguous memory layout for optimal performance in Cython algorithms.
23
+ """
24
+
25
+ def __init__(self, edges_df):
26
+ """
27
+ Initialize the importer with a DataFrame.
28
+
29
+ Parameters
30
+ ----------
31
+ edges_df : DataFrame-like
32
+ The edges DataFrame in the specific library format.
33
+ """
34
+ self.edges_df = edges_df
35
+
36
+ @staticmethod
37
+ def from_dataframe( # pylint: disable=too-many-arguments,too-many-positional-arguments
38
+ edges,
39
+ tail: str = "tail",
40
+ head: str = "head",
41
+ weight: Optional[str] = None,
42
+ trav_time: Optional[str] = None,
43
+ freq: Optional[str] = None,
44
+ ) -> "GraphImporter":
45
+ """
46
+ Factory method to create the appropriate importer based on DataFrame type.
47
+
48
+ Parameters
49
+ ----------
50
+ edges : DataFrame-like
51
+ The edges DataFrame to import.
52
+ tail : str
53
+ Column name for tail vertices.
54
+ head : str
55
+ Column name for head vertices.
56
+ weight : str, optional
57
+ Column name for edge weights (for shortest path algorithms).
58
+ trav_time : str, optional
59
+ Column name for travel time (for hyperpath algorithms).
60
+ freq : str, optional
61
+ Column name for frequency (for hyperpath algorithms).
62
+
63
+ Returns
64
+ -------
65
+ GraphImporter
66
+ An instance of the appropriate importer subclass.
67
+ """
68
+ # Note: tail, head, weight, trav_time, freq are part of API but not used here
69
+ # They're used by calling code after factory creates the importer
70
+ # pylint: disable=unused-argument
71
+ try:
72
+ # Check for Polars DataFrame
73
+ if hasattr(edges, "__class__") and edges.__class__.__module__.startswith(
74
+ "polars"
75
+ ):
76
+ return PolarsImporter(edges)
77
+ except (AttributeError, TypeError):
78
+ # If __class__ or __module__ access fails, continue to other checks
79
+ pass
80
+
81
+ # Check for pandas DataFrame
82
+ if isinstance(edges, pd.DataFrame):
83
+ try:
84
+ # Check if any column has Arrow backend
85
+ has_arrow = any(
86
+ hasattr(dtype, "pyarrow_dtype") for dtype in edges.dtypes
87
+ )
88
+
89
+ if has_arrow:
90
+ return PandasArrowImporter(edges)
91
+ return PandasNumpyImporter(edges)
92
+ except (AttributeError, TypeError):
93
+ # If dtype checking fails, assume NumPy backend
94
+ return PandasNumpyImporter(edges)
95
+
96
+ # Unknown type - try to convert to pandas
97
+ warnings.warn(
98
+ f"Unknown DataFrame type {type(edges)}. Attempting to convert to pandas.",
99
+ UserWarning,
100
+ )
101
+ return PandasNumpyImporter(pd.DataFrame(edges))
102
+
103
+ @abstractmethod
104
+ def to_numpy_edges(self, columns: List[str]) -> pd.DataFrame:
105
+ """
106
+ Convert the DataFrame to a NumPy-backed pandas DataFrame.
107
+
108
+ Parameters
109
+ ----------
110
+ columns : List[str]
111
+ List of column names to extract.
112
+
113
+ Returns
114
+ -------
115
+ pd.DataFrame
116
+ A pandas DataFrame with NumPy backend and contiguous memory.
117
+ """
118
+
119
+ def _ensure_contiguous(self, array: np.ndarray) -> np.ndarray:
120
+ """
121
+ Ensure the array is C-contiguous.
122
+
123
+ Parameters
124
+ ----------
125
+ array : np.ndarray
126
+ Input array.
127
+
128
+ Returns
129
+ -------
130
+ np.ndarray
131
+ C-contiguous array.
132
+ """
133
+ if not array.flags["C_CONTIGUOUS"]:
134
+ return np.ascontiguousarray(array)
135
+ return array
136
+
137
+
138
+ class PandasNumpyImporter(GraphImporter):
139
+ """
140
+ Importer for pandas DataFrames with NumPy backend.
141
+
142
+ This is the most efficient case as it requires minimal conversion.
143
+ """
144
+
145
+ def to_numpy_edges(self, columns: List[str]) -> pd.DataFrame:
146
+ """
147
+ Extract columns and ensure they are NumPy-backed.
148
+
149
+ For NumPy-backed pandas, this is mostly a pass-through operation
150
+ with validation to ensure contiguous memory.
151
+ """
152
+ # Extract only the needed columns
153
+ result_df = self.edges_df[columns].copy(deep=True)
154
+
155
+ # Ensure all columns are contiguous NumPy arrays
156
+ for col in columns:
157
+ if not isinstance(result_df[col].values, np.ndarray):
158
+ # Convert to NumPy if somehow not already
159
+ result_df[col] = result_df[col].to_numpy()
160
+
161
+ return result_df
162
+
163
+
164
+ class PandasArrowImporter(GraphImporter):
165
+ """
166
+ Importer for pandas DataFrames with Arrow backend.
167
+
168
+ Converts Arrow-backed columns to NumPy arrays with proper data types.
169
+ """
170
+
171
+ def to_numpy_edges(self, columns: List[str]) -> pd.DataFrame:
172
+ """
173
+ Convert Arrow-backed columns to NumPy arrays.
174
+
175
+ Uses to_numpy() method which ensures contiguous memory layout.
176
+ """
177
+ result_data = {}
178
+
179
+ for col in columns:
180
+ series = self.edges_df[col]
181
+
182
+ # Determine target dtype based on column values
183
+ if col in columns[:2]: # Assume first two are vertex indices (tail, head)
184
+ # Try to use uint32 for vertex indices if possible
185
+ max_val = series.max()
186
+ if max_val < np.iinfo(np.uint32).max:
187
+ target_dtype = np.uint32
188
+ else:
189
+ target_dtype = np.uint64
190
+ else:
191
+ # Use float64 for weights/times/frequencies
192
+ target_dtype = np.float64
193
+
194
+ # Convert to NumPy with specified dtype
195
+ if hasattr(series, "to_numpy"):
196
+ # Use to_numpy() for Arrow-backed series
197
+ result_data[col] = series.to_numpy(dtype=target_dtype, copy=True)
198
+ else:
199
+ # Fallback for older pandas versions
200
+ result_data[col] = series.values.astype(target_dtype)
201
+
202
+ # Ensure contiguous
203
+ result_data[col] = self._ensure_contiguous(result_data[col])
204
+
205
+ return pd.DataFrame(result_data)
206
+
207
+
208
+ class PolarsImporter(GraphImporter):
209
+ """
210
+ Importer for Polars DataFrames.
211
+
212
+ Converts Polars DataFrames to NumPy-backed pandas DataFrames.
213
+ """
214
+
215
+ def to_numpy_edges(
216
+ self, columns: List[str]
217
+ ) -> pd.DataFrame: # pylint: disable=too-many-branches
218
+ """
219
+ Convert Polars DataFrame to NumPy-backed pandas DataFrame.
220
+
221
+ Uses Polars' to_pandas() method or to_numpy() depending on what's available.
222
+ """
223
+ try:
224
+ import polars # pylint: disable=import-outside-toplevel,unused-import
225
+ except ImportError as exc:
226
+ raise ImportError(
227
+ "Polars is required to import Polars DataFrames. "
228
+ "Install it with: pip install polars"
229
+ ) from exc
230
+
231
+ # Select only needed columns
232
+ selected_df = self.edges_df.select(columns)
233
+
234
+ # Method 1: Direct to_pandas() conversion (simplest)
235
+ if hasattr(selected_df, "to_pandas"):
236
+ result_df = selected_df.to_pandas()
237
+
238
+ # Handle empty DataFrames
239
+ if len(result_df) == 0:
240
+ return result_df
241
+
242
+ # Ensure proper dtypes
243
+ for i, col in enumerate(columns):
244
+ if i < 2: # Vertex indices
245
+ # Check if column contains numeric data
246
+ if np.issubdtype(result_df[col].dtype, np.integer):
247
+ # Try to use uint32 for efficiency
248
+ max_val = result_df[col].max()
249
+ if not pd.isna(max_val) and max_val < np.iinfo(np.uint32).max:
250
+ result_df[col] = result_df[col].astype(np.uint32)
251
+ # If not numeric (e.g., strings), leave as is
252
+ else:
253
+ # Weights/times/frequencies
254
+ result_df[col] = result_df[col].astype(np.float64)
255
+
256
+ return result_df
257
+
258
+ # Method 2: Column-by-column conversion
259
+ result_data = {}
260
+
261
+ # Handle empty DataFrames
262
+ if len(selected_df) == 0:
263
+ return selected_df.to_pandas()
264
+
265
+ for i, col in enumerate(columns):
266
+ series = selected_df[col]
267
+
268
+ # Determine target dtype
269
+ if i < 2: # Vertex indices
270
+ # Check if the series contains numeric data
271
+ if hasattr(series, "dtype") and series.dtype.is_integer():
272
+ max_val = series.max()
273
+ if max_val is not None and max_val < np.iinfo(np.uint32).max:
274
+ target_dtype = np.uint32
275
+ else:
276
+ target_dtype = np.uint64
277
+ else:
278
+ # Non-numeric columns, convert to pandas as is
279
+ result_data[col] = series.to_pandas()
280
+ continue
281
+ else:
282
+ target_dtype = np.float64
283
+
284
+ # Convert to NumPy
285
+ if hasattr(series, "to_numpy"):
286
+ np_array = series.to_numpy().astype(target_dtype)
287
+ else:
288
+ # Fallback for older Polars versions
289
+ np_array = series.to_list()
290
+ np_array = np.array(np_array, dtype=target_dtype)
291
+
292
+ # Ensure contiguous
293
+ result_data[col] = self._ensure_contiguous(np_array)
294
+
295
+ return pd.DataFrame(result_data)
296
+
297
+
298
+ def standardize_graph_dataframe( # pylint: disable=too-many-arguments,too-many-positional-arguments
299
+ edges,
300
+ tail: str = "tail",
301
+ head: str = "head",
302
+ weight: Optional[str] = None,
303
+ trav_time: Optional[str] = None,
304
+ freq: Optional[str] = None,
305
+ ) -> pd.DataFrame:
306
+ """
307
+ Convenience function to standardize any DataFrame format to NumPy-backed pandas.
308
+
309
+ Parameters
310
+ ----------
311
+ edges : DataFrame-like
312
+ Input edges DataFrame in any supported format.
313
+ tail : str
314
+ Column name for tail vertices.
315
+ head : str
316
+ Column name for head vertices.
317
+ weight : str, optional
318
+ Column name for edge weights.
319
+ trav_time : str, optional
320
+ Column name for travel time.
321
+ freq : str, optional
322
+ Column name for frequency.
323
+
324
+ Returns
325
+ -------
326
+ pd.DataFrame
327
+ NumPy-backed pandas DataFrame with only the specified columns.
328
+ """
329
+ # Determine which columns to extract
330
+ columns = [tail, head]
331
+ if weight is not None:
332
+ columns.append(weight)
333
+ if trav_time is not None:
334
+ columns.append(trav_time)
335
+ if freq is not None:
336
+ columns.append(freq)
337
+
338
+ # Create appropriate importer and convert
339
+ importer = GraphImporter.from_dataframe(edges, tail, head, weight, trav_time, freq)
340
+ return importer.to_numpy_edges(columns)
edsger/networks.py CHANGED
@@ -31,7 +31,7 @@ class SiouxFalls:
31
31
  """
32
32
 
33
33
  @property
34
- def edges(self):
34
+ def edges(self) -> pd.DataFrame:
35
35
  """
36
36
  A DataFrame containing the edges of the Sioux Falls network.
37
37
 
@@ -130,7 +130,9 @@ class SiouxFalls:
130
130
  return graph_edges
131
131
 
132
132
 
133
- def create_sf_network(dwell_time=1.0e-6, board_alight_ratio=0.5):
133
+ def create_sf_network(
134
+ dwell_time: float = 1.0e-6, board_alight_ratio: float = 0.5
135
+ ) -> pd.DataFrame:
134
136
  """
135
137
  Example network from Spiess, H. and Florian, M. (1989).
136
138
  Optimal strategies: A new assignment model for transit networks.