rxgraph 0.2.0__tar.gz → 0.3.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (39) hide show
  1. {rxgraph-0.2.0 → rxgraph-0.3.0}/Cargo.lock +2 -2
  2. {rxgraph-0.2.0 → rxgraph-0.3.0}/PKG-INFO +1 -1
  3. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/Cargo.toml +5 -1
  4. rxgraph-0.3.0/crates/rxgraph/benches/memory.rs +121 -0
  5. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/csr.rs +16 -5
  6. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/graph.rs +13 -2
  7. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/repo.rs +165 -46
  8. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/traversal/algo.rs +17 -0
  9. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/traversal/config.rs +15 -0
  10. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/traversal/mod.rs +1 -0
  11. rxgraph-0.3.0/crates/rxgraph/src/traversal/progress.rs +160 -0
  12. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph-python/Cargo.toml +1 -1
  13. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph-python/src/lib.rs +20 -2
  14. {rxgraph-0.2.0 → rxgraph-0.3.0}/pyproject.toml +1 -0
  15. {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/__init__.py +172 -24
  16. {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/__init__.pyi +14 -1
  17. {rxgraph-0.2.0 → rxgraph-0.3.0}/Cargo.toml +0 -0
  18. {rxgraph-0.2.0 → rxgraph-0.3.0}/README.md +0 -0
  19. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/README.md +0 -0
  20. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/benches/flight_routes.rs +0 -0
  21. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/benches/payment_risk.rs +0 -0
  22. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/examples/flight_routes.rs +0 -0
  23. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/arrow.rs +0 -0
  24. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/arrow_value.rs +0 -0
  25. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/bind.rs +0 -0
  26. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/eval.rs +0 -0
  27. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/expr.rs +0 -0
  28. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/mod.rs +0 -0
  29. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/list.rs +0 -0
  30. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/mod.rs +0 -0
  31. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/scalar.rs +0 -0
  32. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/string.rs +0 -0
  33. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/struct_.rs +0 -0
  34. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/polars_json.rs +0 -0
  35. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/value.rs +0 -0
  36. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/mod.rs +0 -0
  37. {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/lib.rs +0 -0
  38. {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/_graph_tables.py +0 -0
  39. {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/py.typed +0 -0
@@ -1293,7 +1293,7 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
1293
1293
 
1294
1294
  [[package]]
1295
1295
  name = "rxgraph"
1296
- version = "0.2.0"
1296
+ version = "0.3.0"
1297
1297
  dependencies = [
1298
1298
  "anyhow",
1299
1299
  "arrow",
@@ -1311,7 +1311,7 @@ dependencies = [
1311
1311
 
1312
1312
  [[package]]
1313
1313
  name = "rxgraph-python"
1314
- version = "0.2.0"
1314
+ version = "0.3.0"
1315
1315
  dependencies = [
1316
1316
  "anyhow",
1317
1317
  "arrow",
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: rxgraph
3
- Version: 0.2.0
3
+ Version: 0.3.0
4
4
  Classifier: Programming Language :: Rust
5
5
  Classifier: Programming Language :: Python :: 3.11
6
6
  Classifier: Programming Language :: Python :: 3.12
@@ -1,6 +1,6 @@
1
1
  [package]
2
2
  name = "rxgraph"
3
- version = "0.2.0"
3
+ version = "0.3.0"
4
4
  edition.workspace = true
5
5
  rust-version.workspace = true
6
6
  description = "High-performance graph traversal engine"
@@ -40,3 +40,7 @@ harness = false
40
40
  [[bench]]
41
41
  name = "flight_routes"
42
42
  harness = false
43
+
44
+ [[bench]]
45
+ name = "memory"
46
+ harness = false
@@ -0,0 +1,121 @@
1
+ //! Memory benchmarking for graph construction, holding, and from-source search.
2
+ //!
3
+ //! Unlike the criterion benches (which measure time), this binary uses `stats_alloc` to
4
+ //! report allocation deltas at each stage. Run with:
5
+ //!
6
+ //! ```sh
7
+ //! cargo bench -p rxgraph --bench memory
8
+ //! ```
9
+ //!
10
+ //! It builds a large, sparse graph where only a small subset is reachable from the source,
11
+ //! and reports bytes allocated/RSS.
12
+
13
+ use std::{alloc::System, hint::black_box, sync::Arc, time::Instant};
14
+
15
+ use arrow::{
16
+ array::{ArrayRef, UInt64Array},
17
+ datatypes::{DataType, Field, Schema},
18
+ record_batch::RecordBatch,
19
+ };
20
+ use rxgraph::Graph;
21
+ use stats_alloc::{INSTRUMENTED_SYSTEM, Region, StatsAlloc};
22
+
23
+ #[global_allocator]
24
+ static GLOBAL: &StatsAlloc<System> = &INSTRUMENTED_SYSTEM;
25
+
26
+ /// Number of nodes in the synthetic graph.
27
+ const NODES: u64 = 5_000_000;
28
+ /// Length of the single reachable chain from node 0 (the "working set").
29
+ const REACHABLE_CHAIN: u64 = 5_000;
30
+
31
+ fn batch(fields: Vec<Field>, columns: Vec<ArrayRef>) -> RecordBatch {
32
+ RecordBatch::try_new(Arc::new(Schema::new(fields)), columns).unwrap()
33
+ }
34
+
35
+ /// Builds a graph with `NODES` contiguous u64 node ids and a single linear chain of
36
+ /// `REACHABLE_CHAIN` edges from node 0. Everything past the chain is unreachable, so a
37
+ /// search from node 0 only ever needs a tiny working set.
38
+ fn tables() -> (RecordBatch, RecordBatch) {
39
+ let node_ids: Vec<u64> = (0..NODES).collect();
40
+ let nodes = batch(
41
+ vec![Field::new("id", DataType::UInt64, false)],
42
+ vec![Arc::new(UInt64Array::from(node_ids)) as ArrayRef],
43
+ );
44
+
45
+ let edge_count = REACHABLE_CHAIN;
46
+ let edge_ids: Vec<u64> = (0..edge_count).collect();
47
+ let srcs: Vec<u64> = (0..edge_count).collect();
48
+ let dests: Vec<u64> = (1..=edge_count).collect();
49
+ let edges = batch(
50
+ vec![
51
+ Field::new("id", DataType::UInt64, false),
52
+ Field::new("src", DataType::UInt64, false),
53
+ Field::new("dest", DataType::UInt64, false),
54
+ ],
55
+ vec![
56
+ Arc::new(UInt64Array::from(edge_ids)) as ArrayRef,
57
+ Arc::new(UInt64Array::from(srcs)),
58
+ Arc::new(UInt64Array::from(dests)),
59
+ ],
60
+ );
61
+
62
+ (nodes, edges)
63
+ }
64
+
65
+ fn mib(bytes: isize) -> f64 {
66
+ bytes as f64 / (1024.0 * 1024.0)
67
+ }
68
+
69
+ /// Runs the received callback inside an allocation-tracking region and reports memory use.
70
+ ///
71
+ /// Two distinct numbers are reported:
72
+ /// - `retained`: net bytes still held after the call returns (allocated minus freed). This
73
+ /// is what grows the resident set. A value that escapes the closure (e.g. the graph)
74
+ /// counts here; a result that is dropped inside the closure does not.
75
+ /// - `churn`: total bytes allocated during the call, regardless of whether they were freed
76
+ /// again before returning. High churn with ~zero retained means lots of short-lived
77
+ /// allocations (e.g. WCC building one Vec per component, then handing it back and dropping
78
+ /// it), not a memory leak.
79
+ fn measure<T>(label: &str, f: impl FnOnce() -> T) -> T {
80
+ let region = Region::new(GLOBAL);
81
+ let started = Instant::now();
82
+ let value = f();
83
+ let stats = region.change();
84
+ let elapsed = started.elapsed();
85
+ let retained = stats.bytes_allocated as isize - stats.bytes_deallocated as isize;
86
+ eprintln!(
87
+ "{label:<28} retained={:>9.2} MiB churn={:>8.2} MiB in {:<8} allocs (freed={:.2} MiB) {elapsed:?}",
88
+ mib(retained),
89
+ mib(stats.bytes_allocated as isize),
90
+ stats.allocations,
91
+ mib(stats.bytes_deallocated as isize),
92
+ );
93
+ value
94
+ }
95
+
96
+ fn main() {
97
+ eprintln!("memory profile: nodes={NODES} reachable_chain={REACHABLE_CHAIN}\n");
98
+
99
+ let (nodes, edges) = tables();
100
+
101
+ // Construction: forward CSR + identity only. Reverse CSR is NOT built here (lazy).
102
+ let graph = measure("construct", || Graph::new(nodes, edges).unwrap());
103
+
104
+ // Forward-only BFS from node 0: touches only the reachable chain.
105
+ measure("bfs_from_source", || {
106
+ black_box(graph.bfs_u64(0, None).unwrap());
107
+ });
108
+
109
+ // First degree query forces the lazy reverse CSR to materialize.
110
+ measure("in_degrees (builds rev CSR)", || {
111
+ black_box(graph.in_degrees());
112
+ });
113
+
114
+ // Subsequent reverse-adjacency use is free (cached).
115
+ measure("weakly_connected (rev cached)", || {
116
+ black_box(graph.weakly_connected_components_u64());
117
+ });
118
+
119
+ // Keep the graph alive so its footprint is attributed to the stages above.
120
+ black_box(&graph);
121
+ }
@@ -1,17 +1,29 @@
1
- use anyhow::Result;
1
+ use anyhow::{Result, bail};
2
2
 
3
3
  use super::repo::{EdgeId, NodeId};
4
4
 
5
+ /// Type used for CSR row offsets. `u32` keeps per-node overhead small; edge counts are
6
+ /// already bounded by `EdgeId = u32`, so offsets cannot exceed `u32::MAX`.
7
+ pub(crate) type Offset = u32;
8
+
5
9
  #[derive(Debug, Clone, PartialEq, Eq)]
6
10
  pub(crate) struct Csr {
7
- pub(crate) offsets: Vec<usize>,
11
+ pub(crate) offsets: Vec<Offset>,
8
12
  pub(crate) edge_ids: Vec<EdgeId>,
9
13
  pub(crate) dests: Vec<NodeId>,
10
14
  }
11
15
 
12
16
  /// Constructs a CSR (Compressed Sparse Row) data structure for outgoing edges.
13
17
  pub(crate) fn build_csr(node_count: usize, edges: &[(NodeId, NodeId)]) -> Result<Csr> {
14
- let mut offsets = vec![0usize; node_count + 1];
18
+ if edges.len() > Offset::MAX as usize {
19
+ bail!(
20
+ "too many edges for u32 CSR offsets ({} > {})",
21
+ edges.len(),
22
+ Offset::MAX
23
+ );
24
+ }
25
+
26
+ let mut offsets = vec![0 as Offset; node_count + 1];
15
27
 
16
28
  for &(src, _dest) in edges {
17
29
  offsets[src as usize + 1] += 1;
@@ -26,8 +38,7 @@ pub(crate) fn build_csr(node_count: usize, edges: &[(NodeId, NodeId)]) -> Result
26
38
  let mut cursor = offsets.clone();
27
39
 
28
40
  for (edge_id, &(src, dest)) in edges.iter().enumerate() {
29
- let pos = cursor[src as usize];
30
- // TODO: Check?
41
+ let pos = cursor[src as usize] as usize;
31
42
  edge_ids[pos] = edge_id as EdgeId;
32
43
  dests[pos] = dest;
33
44
  cursor[src as usize] += 1;
@@ -35,6 +35,11 @@ impl Graph {
35
35
  })
36
36
  }
37
37
 
38
+ /// Replaces the payload (attribute) tables, reusing the existing topology.
39
+ pub fn set_payloads(&mut self, nodes: RecordBatch, edges: RecordBatch) -> Result<()> {
40
+ self.repo.set_payloads(nodes, edges)
41
+ }
42
+
38
43
  /// Number of node rows.
39
44
  pub fn node_count(&self) -> usize {
40
45
  self.repo.nodes.num_rows()
@@ -211,6 +216,8 @@ impl Graph {
211
216
  pub fn weakly_connected_components(&self) -> Vec<Vec<GraphId<'_>>> {
212
217
  let mut visited = vec![0u8; self.node_count()];
213
218
  let mut components = Vec::new();
219
+ // Reused across components to reduce allocations.
220
+ let mut frontier = Vec::new();
214
221
 
215
222
  for start in 0..self.node_count() {
216
223
  if visited[start] != 0 {
@@ -218,7 +225,8 @@ impl Graph {
218
225
  }
219
226
 
220
227
  let mut component = Vec::new();
221
- let mut frontier = vec![start as NodeId];
228
+ frontier.clear();
229
+ frontier.push(start as NodeId);
222
230
  let mut head = 0;
223
231
  visited[start] = 1;
224
232
 
@@ -253,6 +261,8 @@ impl Graph {
253
261
  pub fn weakly_connected_components_u64(&self) -> Option<Vec<Vec<u64>>> {
254
262
  let mut visited = vec![0u8; self.node_count()];
255
263
  let mut components = Vec::new();
264
+ // Reused across components to reduce allocations.
265
+ let mut frontier = Vec::new();
256
266
 
257
267
  for start in 0..self.node_count() {
258
268
  if visited[start] != 0 {
@@ -260,7 +270,8 @@ impl Graph {
260
270
  }
261
271
 
262
272
  let mut component = Vec::new();
263
- let mut frontier = vec![start as NodeId];
273
+ frontier.clear();
274
+ frontier.push(start as NodeId);
264
275
  let mut head = 0;
265
276
  visited[start] = 1;
266
277
 
@@ -1,4 +1,4 @@
1
- use std::{collections::HashMap, fmt};
1
+ use std::{collections::HashMap, fmt, sync::OnceLock};
2
2
 
3
3
  use anyhow::{Context, Result, bail};
4
4
  use arrow::array::{
@@ -8,13 +8,13 @@ use arrow_schema::DataType;
8
8
 
9
9
  use crate::{
10
10
  arrow::validate_field_exists,
11
- graph::csr::{Csr, build_csr},
11
+ graph::csr::{Csr, Offset, build_csr},
12
12
  };
13
13
 
14
- /// Compact internal node identifier used by traversal code.
14
+ /// Compact internal node identifier used for traversal.
15
15
  pub type NodeId = u32;
16
16
 
17
- /// Compact internal edge identifier used by traversal code.
17
+ /// Compact internal edge identifier used for traversal.
18
18
  pub type EdgeId = u32;
19
19
 
20
20
  pub const ID_COL: &str = "id";
@@ -100,17 +100,34 @@ pub trait GraphRepo {
100
100
 
101
101
  #[derive(Debug)]
102
102
  pub(crate) struct Repo {
103
- csr_offsets: Vec<usize>,
103
+ csr_offsets: Vec<Offset>,
104
104
  csr_dests: Vec<NodeId>,
105
105
  edge_ids: Vec<EdgeId>,
106
- incoming_offsets: Vec<usize>,
107
- incoming_srcs: Vec<NodeId>,
108
- out_degrees: Vec<usize>,
109
- in_degrees: Vec<usize>,
110
- degrees: Vec<usize>,
106
+
111
107
  identity: Identity,
112
108
  pub nodes: RecordBatch,
113
109
  pub edges: RecordBatch,
110
+
111
+ /// Reverse adjacency (incoming edges).
112
+ /// Used for optimization - only some searches require it and it's built lazily on first use
113
+ /// to keep construction memory and time low (and proportional) foraward only workloads
114
+ /// (like BFS, as opposed to WCC or degrees).
115
+ incoming: OnceLock<IncomingCsr>,
116
+ /// Endpoints retained to build the reverse CSR lazily without re-reading Arrow columns.
117
+ edge_endpoints: Vec<(NodeId, NodeId)>,
118
+
119
+ /// Degree vectors, only used when whole-graph degree query and cached after.
120
+ /// Search-only workloads never touch these, so construction stays cheap;
121
+ /// degree-heavy workloads pay the O(n) build once instead of on every call.
122
+ out_degrees: OnceLock<Vec<usize>>,
123
+ in_degrees: OnceLock<Vec<usize>>,
124
+ degrees: OnceLock<Vec<usize>>,
125
+ }
126
+
127
+ #[derive(Debug)]
128
+ struct IncomingCsr {
129
+ offsets: Vec<Offset>,
130
+ srcs: Vec<NodeId>,
114
131
  }
115
132
 
116
133
  #[derive(Debug)]
@@ -203,6 +220,33 @@ impl Repo {
203
220
  self.identity.is_contiguous_u64()
204
221
  }
205
222
 
223
+ /// Replaces the payload (attribute) tables without rebuilding topology.
224
+ ///
225
+ /// Used by lazy graphs to swap in column-projected payload batches for a single search.
226
+ /// The new batches must keep the original row order and count: DSL column reads index
227
+ /// payload arrays by internal node/edge ID, which equals the Arrow row position.
228
+ /// Identity (`id`/`src`/`dest`) is resolved from the precomputed mapping, not these
229
+ /// batches, so the projected batches only need the columns the kernel references.
230
+ pub(crate) fn set_payloads(&mut self, nodes: RecordBatch, edges: RecordBatch) -> Result<()> {
231
+ if nodes.num_rows() != self.nodes.num_rows() {
232
+ bail!(
233
+ "projected nodes table has {} rows but topology expects {}",
234
+ nodes.num_rows(),
235
+ self.nodes.num_rows()
236
+ );
237
+ }
238
+ if edges.num_rows() != self.edges.num_rows() {
239
+ bail!(
240
+ "projected edges table has {} rows but topology expects {}",
241
+ edges.num_rows(),
242
+ self.edges.num_rows()
243
+ );
244
+ }
245
+ self.nodes = nodes;
246
+ self.edges = edges;
247
+ Ok(())
248
+ }
249
+
206
250
  pub(crate) fn internal_node_u64(&self, external: u64) -> Option<NodeId> {
207
251
  self.identity.internal_node_u64(external)
208
252
  }
@@ -215,8 +259,8 @@ impl Repo {
215
259
  impl GraphRepo for Repo {
216
260
  fn outgoing(&self, node: NodeId) -> impl Iterator<Item = (EdgeId, NodeId)> {
217
261
  let i = node as usize;
218
- let start = self.csr_offsets[i];
219
- let end = self.csr_offsets[i + 1];
262
+ let start = self.csr_offsets[i] as usize;
263
+ let end = self.csr_offsets[i + 1] as usize;
220
264
 
221
265
  self.edge_ids[start..end]
222
266
  .iter()
@@ -226,16 +270,17 @@ impl GraphRepo for Repo {
226
270
 
227
271
  fn outgoing_slice(&self, node: NodeId) -> (&[EdgeId], &[NodeId]) {
228
272
  let i = node as usize;
229
- let start = self.csr_offsets[i];
230
- let end = self.csr_offsets[i + 1];
273
+ let start = self.csr_offsets[i] as usize;
274
+ let end = self.csr_offsets[i + 1] as usize;
231
275
  (&self.edge_ids[start..end], &self.csr_dests[start..end])
232
276
  }
233
277
 
234
278
  fn incoming(&self, node: NodeId) -> impl Iterator<Item = NodeId> {
279
+ let incoming = self.incoming();
235
280
  let i = node as usize;
236
- let start = self.incoming_offsets[i];
237
- let end = self.incoming_offsets[i + 1];
238
- self.incoming_srcs[start..end].iter().copied()
281
+ let start = incoming.offsets[i] as usize;
282
+ let end = incoming.offsets[i + 1] as usize;
283
+ incoming.srcs[start..end].iter().copied()
239
284
  }
240
285
 
241
286
  fn internal_node(&self, external: GraphId<'_>) -> Option<NodeId> {
@@ -251,25 +296,61 @@ impl GraphRepo for Repo {
251
296
  }
252
297
 
253
298
  fn out_degree(&self, node: NodeId) -> usize {
254
- self.out_degrees[node as usize]
299
+ let i = node as usize;
300
+ (self.csr_offsets[i + 1] - self.csr_offsets[i]) as usize
255
301
  }
256
302
 
257
303
  fn in_degree(&self, node: NodeId) -> usize {
258
- self.in_degrees[node as usize]
304
+ let incoming = self.incoming();
305
+ let i = node as usize;
306
+ (incoming.offsets[i + 1] - incoming.offsets[i]) as usize
259
307
  }
260
308
  }
261
309
 
262
310
  impl Repo {
311
+ /// Returns the reverse-adjacency CSR, building it on first use.
312
+ fn incoming(&self) -> &IncomingCsr {
313
+ self.incoming.get_or_init(|| {
314
+ let incoming_edges = self
315
+ .edge_endpoints
316
+ .iter()
317
+ .map(|&(src, dest)| (dest, src))
318
+ .collect::<Vec<_>>();
319
+ let Csr { offsets, dests, .. } = build_csr(self.nodes.num_rows(), &incoming_edges)
320
+ .expect("incoming CSR has the same edge count as the forward CSR");
321
+ IncomingCsr {
322
+ offsets,
323
+ srcs: dests,
324
+ }
325
+ })
326
+ }
327
+
263
328
  pub(crate) fn out_degrees(&self) -> Vec<usize> {
264
- self.out_degrees.clone()
329
+ self.out_degrees
330
+ .get_or_init(|| degrees_from_offsets(&self.csr_offsets))
331
+ .clone()
265
332
  }
266
333
 
267
334
  pub(crate) fn in_degrees(&self) -> Vec<usize> {
268
- self.in_degrees.clone()
335
+ self.in_degrees
336
+ .get_or_init(|| degrees_from_offsets(&self.incoming().offsets))
337
+ .clone()
269
338
  }
270
339
 
271
340
  pub(crate) fn degrees(&self) -> Vec<usize> {
272
- self.degrees.clone()
341
+ self.degrees.get_or_init(|| self.compute_degrees()).clone()
342
+ }
343
+
344
+ fn compute_degrees(&self) -> Vec<usize> {
345
+ let out = &self.csr_offsets;
346
+ let incoming = &self.incoming().offsets;
347
+ (0..self.nodes.num_rows())
348
+ .map(|i| {
349
+ let out_deg = (out[i + 1] - out[i]) as usize;
350
+ let in_deg = (incoming[i + 1] - incoming[i]) as usize;
351
+ out_deg + in_deg
352
+ })
353
+ .collect()
273
354
  }
274
355
  }
275
356
 
@@ -285,23 +366,6 @@ impl Repo {
285
366
  edge_ids,
286
367
  dests: csr_dests,
287
368
  } = build_csr(nodes.num_rows(), &edge_endpoints).context("failed to construct CSR")?;
288
- let incoming_edges = edge_endpoints
289
- .iter()
290
- .map(|&(src, dest)| (dest, src))
291
- .collect::<Vec<_>>();
292
- let Csr {
293
- offsets: incoming_offsets,
294
- dests: incoming_srcs,
295
- ..
296
- } = build_csr(nodes.num_rows(), &incoming_edges)
297
- .context("failed to construct incoming CSR")?;
298
- let out_degrees = degrees_from_offsets(&csr_offsets);
299
- let in_degrees = degrees_from_offsets(&incoming_offsets);
300
- let degrees = out_degrees
301
- .iter()
302
- .zip(&in_degrees)
303
- .map(|(out, incoming)| out + incoming)
304
- .collect();
305
369
 
306
370
  Ok(Self {
307
371
  nodes,
@@ -309,18 +373,21 @@ impl Repo {
309
373
  csr_offsets,
310
374
  csr_dests,
311
375
  edge_ids,
312
- incoming_offsets,
313
- incoming_srcs,
314
- out_degrees,
315
- in_degrees,
316
- degrees,
376
+ incoming: OnceLock::new(),
377
+ edge_endpoints,
317
378
  identity,
379
+ out_degrees: OnceLock::new(),
380
+ in_degrees: OnceLock::new(),
381
+ degrees: OnceLock::new(),
318
382
  })
319
383
  }
320
384
  }
321
385
 
322
- fn degrees_from_offsets(offsets: &[usize]) -> Vec<usize> {
323
- offsets.windows(2).map(|pair| pair[1] - pair[0]).collect()
386
+ fn degrees_from_offsets(offsets: &[Offset]) -> Vec<usize> {
387
+ offsets
388
+ .windows(2)
389
+ .map(|pair| (pair[1] - pair[0]) as usize)
390
+ .collect()
324
391
  }
325
392
 
326
393
  struct Preprocessed {
@@ -688,4 +755,56 @@ mod tests {
688
755
  .contains("missing dest")
689
756
  );
690
757
  }
758
+
759
+ #[test]
760
+ fn set_payloads_swaps_columns_and_keeps_topology() {
761
+ let nodes = record_batch!((ID_COL, UInt64, [0, 1, 2])).unwrap();
762
+ let edges = record_batch!(
763
+ (ID_COL, UInt64, [0, 1]),
764
+ (EDGE_SRC_COL, UInt64, [0, 1]),
765
+ (EDGE_DEST_COL, UInt64, [1, 2])
766
+ )
767
+ .unwrap();
768
+ let mut repo = Repo::from_tables(nodes, edges).unwrap();
769
+
770
+ // Project to a different set of payload columns (same row counts).
771
+ let new_nodes =
772
+ record_batch!((ID_COL, UInt64, [0, 1, 2]), ("score", Int64, [10, 20, 30])).unwrap();
773
+ let new_edges = record_batch!(
774
+ (ID_COL, UInt64, [0, 1]),
775
+ (EDGE_SRC_COL, UInt64, [0, 1]),
776
+ (EDGE_DEST_COL, UInt64, [1, 2])
777
+ )
778
+ .unwrap();
779
+ repo.set_payloads(new_nodes, new_edges).unwrap();
780
+
781
+ // Topology is unchanged after the swap.
782
+ assert_eq!(outgoing_for(&repo, GraphId::U64(0)), vec![GraphId::U64(1)]);
783
+ assert!(repo.nodes.column_by_name("score").is_some());
784
+ }
785
+
786
+ #[test]
787
+ fn set_payloads_rejects_row_count_mismatch() {
788
+ let nodes = record_batch!((ID_COL, UInt64, [0, 1, 2])).unwrap();
789
+ let edges = record_batch!(
790
+ (ID_COL, UInt64, [0]),
791
+ (EDGE_SRC_COL, UInt64, [0]),
792
+ (EDGE_DEST_COL, UInt64, [1])
793
+ )
794
+ .unwrap();
795
+ let mut repo = Repo::from_tables(nodes, edges).unwrap();
796
+
797
+ let bad_nodes = record_batch!((ID_COL, UInt64, [0, 1])).unwrap();
798
+ let same_edges = record_batch!(
799
+ (ID_COL, UInt64, [0]),
800
+ (EDGE_SRC_COL, UInt64, [0]),
801
+ (EDGE_DEST_COL, UInt64, [1])
802
+ )
803
+ .unwrap();
804
+ let err = repo
805
+ .set_payloads(bad_nodes, same_edges)
806
+ .unwrap_err()
807
+ .to_string();
808
+ assert!(err.contains("projected nodes table has 2 rows"));
809
+ }
691
810
  }
@@ -12,6 +12,7 @@ use crate::{
12
12
  traversal::{
13
13
  GraphPath, SearchResult, SearchStats,
14
14
  config::{TraversalConfig, TraversalStrategy},
15
+ progress::Progress,
15
16
  },
16
17
  };
17
18
 
@@ -36,6 +37,7 @@ impl Graph {
36
37
  max_revisits_per_node,
37
38
  parallel,
38
39
  intermediate_states,
40
+ progress,
39
41
  } = config;
40
42
  let kernel = kernel.bind(self)?;
41
43
  let cfg = RunConfig {
@@ -45,6 +47,7 @@ impl Graph {
45
47
  strategy,
46
48
  max_revisits_per_node,
47
49
  intermediate_states,
50
+ progress,
48
51
  };
49
52
 
50
53
  match (parallel, strategy) {
@@ -65,6 +68,7 @@ struct RunConfig {
65
68
  strategy: TraversalStrategy,
66
69
  max_revisits_per_node: usize,
67
70
  intermediate_states: bool,
71
+ progress: bool,
68
72
  }
69
73
 
70
74
  #[derive(Debug, Clone)]
@@ -104,8 +108,10 @@ fn search_serial<'a>(
104
108
  ) -> Result<SearchResult<'a>> {
105
109
  let (mut arena, mut frontier, mut stats) = initial_arena(graph, cfg, kernel)?;
106
110
  let mut paths = Vec::new();
111
+ let mut progress = Progress::new(cfg.progress);
107
112
 
108
113
  while let Some(parent) = pop(&mut frontier, cfg.strategy) {
114
+ progress.tick(&stats);
109
115
  if arena[parent].depth >= cfg.max_depth {
110
116
  continue;
111
117
  }
@@ -128,6 +134,7 @@ fn search_serial<'a>(
128
134
  paths.push(materialize(graph, &arena, child, cfg, kernel)?);
129
135
  stats.stopped_paths += 1;
130
136
  if cfg.max_paths.is_some_and(|max| paths.len() >= max) {
137
+ progress.finish(&stats);
131
138
  return Ok(SearchResult { paths, stats });
132
139
  }
133
140
  } else {
@@ -136,6 +143,7 @@ fn search_serial<'a>(
136
143
  }
137
144
  }
138
145
 
146
+ progress.finish(&stats);
139
147
  Ok(SearchResult { paths, stats })
140
148
  }
141
149
 
@@ -147,8 +155,10 @@ fn search_bfs_parallel<'a>(
147
155
  let (mut arena, frontier, mut stats) = initial_arena(graph, cfg, kernel)?;
148
156
  let mut frontier = frontier.into_iter().collect::<Vec<_>>();
149
157
  let mut paths = Vec::new();
158
+ let mut progress = Progress::new(cfg.progress);
150
159
 
151
160
  while !frontier.is_empty() {
161
+ progress.tick(&stats);
152
162
  let edge_count = frontier
153
163
  .iter()
154
164
  .map(|&p| graph.repo.out_degree(arena[p].node))
@@ -187,11 +197,13 @@ fn search_bfs_parallel<'a>(
187
197
  && paths.len() >= max
188
198
  {
189
199
  paths.truncate(max);
200
+ progress.finish(&stats);
190
201
  return Ok(SearchResult { paths, stats });
191
202
  }
192
203
  frontier = next;
193
204
  }
194
205
 
206
+ progress.finish(&stats);
195
207
  Ok(SearchResult { paths, stats })
196
208
  }
197
209
 
@@ -202,12 +214,16 @@ fn search_dfs_parallel<'a>(
202
214
  ) -> Result<SearchResult<'a>> {
203
215
  let (queue, mut stats) = initial_tasks(graph, cfg, kernel)?;
204
216
  let mut seed_paths = Vec::new();
217
+ let mut progress = Progress::new(cfg.progress);
218
+ progress.tick(&stats);
205
219
  let seeds = build_dfs_seeds(graph, cfg, kernel, queue, &mut seed_paths, &mut stats)?;
220
+ progress.tick(&stats);
206
221
 
207
222
  if let Some(max) = cfg.max_paths
208
223
  && seed_paths.len() >= max
209
224
  {
210
225
  seed_paths.truncate(max);
226
+ progress.finish(&stats);
211
227
  return Ok(SearchResult {
212
228
  paths: seed_paths,
213
229
  stats,
@@ -235,6 +251,7 @@ fn search_dfs_parallel<'a>(
235
251
  if let Some(max) = cfg.max_paths {
236
252
  paths.truncate(max);
237
253
  }
254
+ progress.finish(&stats);
238
255
  Ok(SearchResult { paths, stats })
239
256
  }
240
257