rxgraph 0.2.0__tar.gz → 0.3.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rxgraph-0.2.0 → rxgraph-0.3.0}/Cargo.lock +2 -2
- {rxgraph-0.2.0 → rxgraph-0.3.0}/PKG-INFO +1 -1
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/Cargo.toml +5 -1
- rxgraph-0.3.0/crates/rxgraph/benches/memory.rs +121 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/csr.rs +16 -5
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/graph.rs +13 -2
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/repo.rs +165 -46
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/traversal/algo.rs +17 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/traversal/config.rs +15 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/traversal/mod.rs +1 -0
- rxgraph-0.3.0/crates/rxgraph/src/traversal/progress.rs +160 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph-python/Cargo.toml +1 -1
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph-python/src/lib.rs +20 -2
- {rxgraph-0.2.0 → rxgraph-0.3.0}/pyproject.toml +1 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/__init__.py +172 -24
- {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/__init__.pyi +14 -1
- {rxgraph-0.2.0 → rxgraph-0.3.0}/Cargo.toml +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/README.md +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/README.md +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/benches/flight_routes.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/benches/payment_risk.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/examples/flight_routes.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/arrow.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/arrow_value.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/bind.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/eval.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/expr.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/mod.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/list.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/mod.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/scalar.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/string.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/ops/struct_.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/polars_json.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/dsl/value.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/graph/mod.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/crates/rxgraph/src/lib.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/_graph_tables.py +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.0}/python/rxgraph/py.typed +0 -0
|
@@ -1293,7 +1293,7 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
|
|
1293
1293
|
|
|
1294
1294
|
[[package]]
|
|
1295
1295
|
name = "rxgraph"
|
|
1296
|
-
version = "0.
|
|
1296
|
+
version = "0.3.0"
|
|
1297
1297
|
dependencies = [
|
|
1298
1298
|
"anyhow",
|
|
1299
1299
|
"arrow",
|
|
@@ -1311,7 +1311,7 @@ dependencies = [
|
|
|
1311
1311
|
|
|
1312
1312
|
[[package]]
|
|
1313
1313
|
name = "rxgraph-python"
|
|
1314
|
-
version = "0.
|
|
1314
|
+
version = "0.3.0"
|
|
1315
1315
|
dependencies = [
|
|
1316
1316
|
"anyhow",
|
|
1317
1317
|
"arrow",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "rxgraph"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.0"
|
|
4
4
|
edition.workspace = true
|
|
5
5
|
rust-version.workspace = true
|
|
6
6
|
description = "High-performance graph traversal engine"
|
|
@@ -40,3 +40,7 @@ harness = false
|
|
|
40
40
|
[[bench]]
|
|
41
41
|
name = "flight_routes"
|
|
42
42
|
harness = false
|
|
43
|
+
|
|
44
|
+
[[bench]]
|
|
45
|
+
name = "memory"
|
|
46
|
+
harness = false
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
//! Memory benchmarking for graph construction, holding, and from-source search.
|
|
2
|
+
//!
|
|
3
|
+
//! Unlike the criterion benches (which measure time), this binary uses `stats_alloc` to
|
|
4
|
+
//! report allocation deltas at each stage. Run with:
|
|
5
|
+
//!
|
|
6
|
+
//! ```sh
|
|
7
|
+
//! cargo bench -p rxgraph --bench memory
|
|
8
|
+
//! ```
|
|
9
|
+
//!
|
|
10
|
+
//! It builds a large, sparse graph where only a small subset is reachable from the source,
|
|
11
|
+
//! and reports bytes allocated/RSS.
|
|
12
|
+
|
|
13
|
+
use std::{alloc::System, hint::black_box, sync::Arc, time::Instant};
|
|
14
|
+
|
|
15
|
+
use arrow::{
|
|
16
|
+
array::{ArrayRef, UInt64Array},
|
|
17
|
+
datatypes::{DataType, Field, Schema},
|
|
18
|
+
record_batch::RecordBatch,
|
|
19
|
+
};
|
|
20
|
+
use rxgraph::Graph;
|
|
21
|
+
use stats_alloc::{INSTRUMENTED_SYSTEM, Region, StatsAlloc};
|
|
22
|
+
|
|
23
|
+
#[global_allocator]
|
|
24
|
+
static GLOBAL: &StatsAlloc<System> = &INSTRUMENTED_SYSTEM;
|
|
25
|
+
|
|
26
|
+
/// Number of nodes in the synthetic graph.
|
|
27
|
+
const NODES: u64 = 5_000_000;
|
|
28
|
+
/// Length of the single reachable chain from node 0 (the "working set").
|
|
29
|
+
const REACHABLE_CHAIN: u64 = 5_000;
|
|
30
|
+
|
|
31
|
+
fn batch(fields: Vec<Field>, columns: Vec<ArrayRef>) -> RecordBatch {
|
|
32
|
+
RecordBatch::try_new(Arc::new(Schema::new(fields)), columns).unwrap()
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/// Builds a graph with `NODES` contiguous u64 node ids and a single linear chain of
|
|
36
|
+
/// `REACHABLE_CHAIN` edges from node 0. Everything past the chain is unreachable, so a
|
|
37
|
+
/// search from node 0 only ever needs a tiny working set.
|
|
38
|
+
fn tables() -> (RecordBatch, RecordBatch) {
|
|
39
|
+
let node_ids: Vec<u64> = (0..NODES).collect();
|
|
40
|
+
let nodes = batch(
|
|
41
|
+
vec![Field::new("id", DataType::UInt64, false)],
|
|
42
|
+
vec![Arc::new(UInt64Array::from(node_ids)) as ArrayRef],
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
let edge_count = REACHABLE_CHAIN;
|
|
46
|
+
let edge_ids: Vec<u64> = (0..edge_count).collect();
|
|
47
|
+
let srcs: Vec<u64> = (0..edge_count).collect();
|
|
48
|
+
let dests: Vec<u64> = (1..=edge_count).collect();
|
|
49
|
+
let edges = batch(
|
|
50
|
+
vec![
|
|
51
|
+
Field::new("id", DataType::UInt64, false),
|
|
52
|
+
Field::new("src", DataType::UInt64, false),
|
|
53
|
+
Field::new("dest", DataType::UInt64, false),
|
|
54
|
+
],
|
|
55
|
+
vec![
|
|
56
|
+
Arc::new(UInt64Array::from(edge_ids)) as ArrayRef,
|
|
57
|
+
Arc::new(UInt64Array::from(srcs)),
|
|
58
|
+
Arc::new(UInt64Array::from(dests)),
|
|
59
|
+
],
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
(nodes, edges)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
fn mib(bytes: isize) -> f64 {
|
|
66
|
+
bytes as f64 / (1024.0 * 1024.0)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/// Runs the received callback inside an allocation-tracking region and reports memory use.
|
|
70
|
+
///
|
|
71
|
+
/// Two distinct numbers are reported:
|
|
72
|
+
/// - `retained`: net bytes still held after the call returns (allocated minus freed). This
|
|
73
|
+
/// is what grows the resident set. A value that escapes the closure (e.g. the graph)
|
|
74
|
+
/// counts here; a result that is dropped inside the closure does not.
|
|
75
|
+
/// - `churn`: total bytes allocated during the call, regardless of whether they were freed
|
|
76
|
+
/// again before returning. High churn with ~zero retained means lots of short-lived
|
|
77
|
+
/// allocations (e.g. WCC building one Vec per component, then handing it back and dropping
|
|
78
|
+
/// it), not a memory leak.
|
|
79
|
+
fn measure<T>(label: &str, f: impl FnOnce() -> T) -> T {
|
|
80
|
+
let region = Region::new(GLOBAL);
|
|
81
|
+
let started = Instant::now();
|
|
82
|
+
let value = f();
|
|
83
|
+
let stats = region.change();
|
|
84
|
+
let elapsed = started.elapsed();
|
|
85
|
+
let retained = stats.bytes_allocated as isize - stats.bytes_deallocated as isize;
|
|
86
|
+
eprintln!(
|
|
87
|
+
"{label:<28} retained={:>9.2} MiB churn={:>8.2} MiB in {:<8} allocs (freed={:.2} MiB) {elapsed:?}",
|
|
88
|
+
mib(retained),
|
|
89
|
+
mib(stats.bytes_allocated as isize),
|
|
90
|
+
stats.allocations,
|
|
91
|
+
mib(stats.bytes_deallocated as isize),
|
|
92
|
+
);
|
|
93
|
+
value
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
fn main() {
|
|
97
|
+
eprintln!("memory profile: nodes={NODES} reachable_chain={REACHABLE_CHAIN}\n");
|
|
98
|
+
|
|
99
|
+
let (nodes, edges) = tables();
|
|
100
|
+
|
|
101
|
+
// Construction: forward CSR + identity only. Reverse CSR is NOT built here (lazy).
|
|
102
|
+
let graph = measure("construct", || Graph::new(nodes, edges).unwrap());
|
|
103
|
+
|
|
104
|
+
// Forward-only BFS from node 0: touches only the reachable chain.
|
|
105
|
+
measure("bfs_from_source", || {
|
|
106
|
+
black_box(graph.bfs_u64(0, None).unwrap());
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
// First degree query forces the lazy reverse CSR to materialize.
|
|
110
|
+
measure("in_degrees (builds rev CSR)", || {
|
|
111
|
+
black_box(graph.in_degrees());
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// Subsequent reverse-adjacency use is free (cached).
|
|
115
|
+
measure("weakly_connected (rev cached)", || {
|
|
116
|
+
black_box(graph.weakly_connected_components_u64());
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// Keep the graph alive so its footprint is attributed to the stages above.
|
|
120
|
+
black_box(&graph);
|
|
121
|
+
}
|
|
@@ -1,17 +1,29 @@
|
|
|
1
|
-
use anyhow::Result;
|
|
1
|
+
use anyhow::{Result, bail};
|
|
2
2
|
|
|
3
3
|
use super::repo::{EdgeId, NodeId};
|
|
4
4
|
|
|
5
|
+
/// Type used for CSR row offsets. `u32` keeps per-node overhead small; edge counts are
|
|
6
|
+
/// already bounded by `EdgeId = u32`, so offsets cannot exceed `u32::MAX`.
|
|
7
|
+
pub(crate) type Offset = u32;
|
|
8
|
+
|
|
5
9
|
#[derive(Debug, Clone, PartialEq, Eq)]
|
|
6
10
|
pub(crate) struct Csr {
|
|
7
|
-
pub(crate) offsets: Vec<
|
|
11
|
+
pub(crate) offsets: Vec<Offset>,
|
|
8
12
|
pub(crate) edge_ids: Vec<EdgeId>,
|
|
9
13
|
pub(crate) dests: Vec<NodeId>,
|
|
10
14
|
}
|
|
11
15
|
|
|
12
16
|
/// Constructs a CSR (Compressed Sparse Row) data structure for outgoing edges.
|
|
13
17
|
pub(crate) fn build_csr(node_count: usize, edges: &[(NodeId, NodeId)]) -> Result<Csr> {
|
|
14
|
-
|
|
18
|
+
if edges.len() > Offset::MAX as usize {
|
|
19
|
+
bail!(
|
|
20
|
+
"too many edges for u32 CSR offsets ({} > {})",
|
|
21
|
+
edges.len(),
|
|
22
|
+
Offset::MAX
|
|
23
|
+
);
|
|
24
|
+
}
|
|
25
|
+
|
|
26
|
+
let mut offsets = vec![0 as Offset; node_count + 1];
|
|
15
27
|
|
|
16
28
|
for &(src, _dest) in edges {
|
|
17
29
|
offsets[src as usize + 1] += 1;
|
|
@@ -26,8 +38,7 @@ pub(crate) fn build_csr(node_count: usize, edges: &[(NodeId, NodeId)]) -> Result
|
|
|
26
38
|
let mut cursor = offsets.clone();
|
|
27
39
|
|
|
28
40
|
for (edge_id, &(src, dest)) in edges.iter().enumerate() {
|
|
29
|
-
let pos = cursor[src as usize];
|
|
30
|
-
// TODO: Check?
|
|
41
|
+
let pos = cursor[src as usize] as usize;
|
|
31
42
|
edge_ids[pos] = edge_id as EdgeId;
|
|
32
43
|
dests[pos] = dest;
|
|
33
44
|
cursor[src as usize] += 1;
|
|
@@ -35,6 +35,11 @@ impl Graph {
|
|
|
35
35
|
})
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
/// Replaces the payload (attribute) tables, reusing the existing topology.
|
|
39
|
+
pub fn set_payloads(&mut self, nodes: RecordBatch, edges: RecordBatch) -> Result<()> {
|
|
40
|
+
self.repo.set_payloads(nodes, edges)
|
|
41
|
+
}
|
|
42
|
+
|
|
38
43
|
/// Number of node rows.
|
|
39
44
|
pub fn node_count(&self) -> usize {
|
|
40
45
|
self.repo.nodes.num_rows()
|
|
@@ -211,6 +216,8 @@ impl Graph {
|
|
|
211
216
|
pub fn weakly_connected_components(&self) -> Vec<Vec<GraphId<'_>>> {
|
|
212
217
|
let mut visited = vec![0u8; self.node_count()];
|
|
213
218
|
let mut components = Vec::new();
|
|
219
|
+
// Reused across components to reduce allocations.
|
|
220
|
+
let mut frontier = Vec::new();
|
|
214
221
|
|
|
215
222
|
for start in 0..self.node_count() {
|
|
216
223
|
if visited[start] != 0 {
|
|
@@ -218,7 +225,8 @@ impl Graph {
|
|
|
218
225
|
}
|
|
219
226
|
|
|
220
227
|
let mut component = Vec::new();
|
|
221
|
-
|
|
228
|
+
frontier.clear();
|
|
229
|
+
frontier.push(start as NodeId);
|
|
222
230
|
let mut head = 0;
|
|
223
231
|
visited[start] = 1;
|
|
224
232
|
|
|
@@ -253,6 +261,8 @@ impl Graph {
|
|
|
253
261
|
pub fn weakly_connected_components_u64(&self) -> Option<Vec<Vec<u64>>> {
|
|
254
262
|
let mut visited = vec![0u8; self.node_count()];
|
|
255
263
|
let mut components = Vec::new();
|
|
264
|
+
// Reused across components to reduce allocations.
|
|
265
|
+
let mut frontier = Vec::new();
|
|
256
266
|
|
|
257
267
|
for start in 0..self.node_count() {
|
|
258
268
|
if visited[start] != 0 {
|
|
@@ -260,7 +270,8 @@ impl Graph {
|
|
|
260
270
|
}
|
|
261
271
|
|
|
262
272
|
let mut component = Vec::new();
|
|
263
|
-
|
|
273
|
+
frontier.clear();
|
|
274
|
+
frontier.push(start as NodeId);
|
|
264
275
|
let mut head = 0;
|
|
265
276
|
visited[start] = 1;
|
|
266
277
|
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use std::{collections::HashMap, fmt};
|
|
1
|
+
use std::{collections::HashMap, fmt, sync::OnceLock};
|
|
2
2
|
|
|
3
3
|
use anyhow::{Context, Result, bail};
|
|
4
4
|
use arrow::array::{
|
|
@@ -8,13 +8,13 @@ use arrow_schema::DataType;
|
|
|
8
8
|
|
|
9
9
|
use crate::{
|
|
10
10
|
arrow::validate_field_exists,
|
|
11
|
-
graph::csr::{Csr, build_csr},
|
|
11
|
+
graph::csr::{Csr, Offset, build_csr},
|
|
12
12
|
};
|
|
13
13
|
|
|
14
|
-
/// Compact internal node identifier used
|
|
14
|
+
/// Compact internal node identifier used for traversal.
|
|
15
15
|
pub type NodeId = u32;
|
|
16
16
|
|
|
17
|
-
/// Compact internal edge identifier used
|
|
17
|
+
/// Compact internal edge identifier used for traversal.
|
|
18
18
|
pub type EdgeId = u32;
|
|
19
19
|
|
|
20
20
|
pub const ID_COL: &str = "id";
|
|
@@ -100,17 +100,34 @@ pub trait GraphRepo {
|
|
|
100
100
|
|
|
101
101
|
#[derive(Debug)]
|
|
102
102
|
pub(crate) struct Repo {
|
|
103
|
-
csr_offsets: Vec<
|
|
103
|
+
csr_offsets: Vec<Offset>,
|
|
104
104
|
csr_dests: Vec<NodeId>,
|
|
105
105
|
edge_ids: Vec<EdgeId>,
|
|
106
|
-
|
|
107
|
-
incoming_srcs: Vec<NodeId>,
|
|
108
|
-
out_degrees: Vec<usize>,
|
|
109
|
-
in_degrees: Vec<usize>,
|
|
110
|
-
degrees: Vec<usize>,
|
|
106
|
+
|
|
111
107
|
identity: Identity,
|
|
112
108
|
pub nodes: RecordBatch,
|
|
113
109
|
pub edges: RecordBatch,
|
|
110
|
+
|
|
111
|
+
/// Reverse adjacency (incoming edges).
|
|
112
|
+
/// Used for optimization - only some searches require it and it's built lazily on first use
|
|
113
|
+
/// to keep construction memory and time low (and proportional) foraward only workloads
|
|
114
|
+
/// (like BFS, as opposed to WCC or degrees).
|
|
115
|
+
incoming: OnceLock<IncomingCsr>,
|
|
116
|
+
/// Endpoints retained to build the reverse CSR lazily without re-reading Arrow columns.
|
|
117
|
+
edge_endpoints: Vec<(NodeId, NodeId)>,
|
|
118
|
+
|
|
119
|
+
/// Degree vectors, only used when whole-graph degree query and cached after.
|
|
120
|
+
/// Search-only workloads never touch these, so construction stays cheap;
|
|
121
|
+
/// degree-heavy workloads pay the O(n) build once instead of on every call.
|
|
122
|
+
out_degrees: OnceLock<Vec<usize>>,
|
|
123
|
+
in_degrees: OnceLock<Vec<usize>>,
|
|
124
|
+
degrees: OnceLock<Vec<usize>>,
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
#[derive(Debug)]
|
|
128
|
+
struct IncomingCsr {
|
|
129
|
+
offsets: Vec<Offset>,
|
|
130
|
+
srcs: Vec<NodeId>,
|
|
114
131
|
}
|
|
115
132
|
|
|
116
133
|
#[derive(Debug)]
|
|
@@ -203,6 +220,33 @@ impl Repo {
|
|
|
203
220
|
self.identity.is_contiguous_u64()
|
|
204
221
|
}
|
|
205
222
|
|
|
223
|
+
/// Replaces the payload (attribute) tables without rebuilding topology.
|
|
224
|
+
///
|
|
225
|
+
/// Used by lazy graphs to swap in column-projected payload batches for a single search.
|
|
226
|
+
/// The new batches must keep the original row order and count: DSL column reads index
|
|
227
|
+
/// payload arrays by internal node/edge ID, which equals the Arrow row position.
|
|
228
|
+
/// Identity (`id`/`src`/`dest`) is resolved from the precomputed mapping, not these
|
|
229
|
+
/// batches, so the projected batches only need the columns the kernel references.
|
|
230
|
+
pub(crate) fn set_payloads(&mut self, nodes: RecordBatch, edges: RecordBatch) -> Result<()> {
|
|
231
|
+
if nodes.num_rows() != self.nodes.num_rows() {
|
|
232
|
+
bail!(
|
|
233
|
+
"projected nodes table has {} rows but topology expects {}",
|
|
234
|
+
nodes.num_rows(),
|
|
235
|
+
self.nodes.num_rows()
|
|
236
|
+
);
|
|
237
|
+
}
|
|
238
|
+
if edges.num_rows() != self.edges.num_rows() {
|
|
239
|
+
bail!(
|
|
240
|
+
"projected edges table has {} rows but topology expects {}",
|
|
241
|
+
edges.num_rows(),
|
|
242
|
+
self.edges.num_rows()
|
|
243
|
+
);
|
|
244
|
+
}
|
|
245
|
+
self.nodes = nodes;
|
|
246
|
+
self.edges = edges;
|
|
247
|
+
Ok(())
|
|
248
|
+
}
|
|
249
|
+
|
|
206
250
|
pub(crate) fn internal_node_u64(&self, external: u64) -> Option<NodeId> {
|
|
207
251
|
self.identity.internal_node_u64(external)
|
|
208
252
|
}
|
|
@@ -215,8 +259,8 @@ impl Repo {
|
|
|
215
259
|
impl GraphRepo for Repo {
|
|
216
260
|
fn outgoing(&self, node: NodeId) -> impl Iterator<Item = (EdgeId, NodeId)> {
|
|
217
261
|
let i = node as usize;
|
|
218
|
-
let start = self.csr_offsets[i];
|
|
219
|
-
let end = self.csr_offsets[i + 1];
|
|
262
|
+
let start = self.csr_offsets[i] as usize;
|
|
263
|
+
let end = self.csr_offsets[i + 1] as usize;
|
|
220
264
|
|
|
221
265
|
self.edge_ids[start..end]
|
|
222
266
|
.iter()
|
|
@@ -226,16 +270,17 @@ impl GraphRepo for Repo {
|
|
|
226
270
|
|
|
227
271
|
fn outgoing_slice(&self, node: NodeId) -> (&[EdgeId], &[NodeId]) {
|
|
228
272
|
let i = node as usize;
|
|
229
|
-
let start = self.csr_offsets[i];
|
|
230
|
-
let end = self.csr_offsets[i + 1];
|
|
273
|
+
let start = self.csr_offsets[i] as usize;
|
|
274
|
+
let end = self.csr_offsets[i + 1] as usize;
|
|
231
275
|
(&self.edge_ids[start..end], &self.csr_dests[start..end])
|
|
232
276
|
}
|
|
233
277
|
|
|
234
278
|
fn incoming(&self, node: NodeId) -> impl Iterator<Item = NodeId> {
|
|
279
|
+
let incoming = self.incoming();
|
|
235
280
|
let i = node as usize;
|
|
236
|
-
let start =
|
|
237
|
-
let end =
|
|
238
|
-
|
|
281
|
+
let start = incoming.offsets[i] as usize;
|
|
282
|
+
let end = incoming.offsets[i + 1] as usize;
|
|
283
|
+
incoming.srcs[start..end].iter().copied()
|
|
239
284
|
}
|
|
240
285
|
|
|
241
286
|
fn internal_node(&self, external: GraphId<'_>) -> Option<NodeId> {
|
|
@@ -251,25 +296,61 @@ impl GraphRepo for Repo {
|
|
|
251
296
|
}
|
|
252
297
|
|
|
253
298
|
fn out_degree(&self, node: NodeId) -> usize {
|
|
254
|
-
|
|
299
|
+
let i = node as usize;
|
|
300
|
+
(self.csr_offsets[i + 1] - self.csr_offsets[i]) as usize
|
|
255
301
|
}
|
|
256
302
|
|
|
257
303
|
fn in_degree(&self, node: NodeId) -> usize {
|
|
258
|
-
self.
|
|
304
|
+
let incoming = self.incoming();
|
|
305
|
+
let i = node as usize;
|
|
306
|
+
(incoming.offsets[i + 1] - incoming.offsets[i]) as usize
|
|
259
307
|
}
|
|
260
308
|
}
|
|
261
309
|
|
|
262
310
|
impl Repo {
|
|
311
|
+
/// Returns the reverse-adjacency CSR, building it on first use.
|
|
312
|
+
fn incoming(&self) -> &IncomingCsr {
|
|
313
|
+
self.incoming.get_or_init(|| {
|
|
314
|
+
let incoming_edges = self
|
|
315
|
+
.edge_endpoints
|
|
316
|
+
.iter()
|
|
317
|
+
.map(|&(src, dest)| (dest, src))
|
|
318
|
+
.collect::<Vec<_>>();
|
|
319
|
+
let Csr { offsets, dests, .. } = build_csr(self.nodes.num_rows(), &incoming_edges)
|
|
320
|
+
.expect("incoming CSR has the same edge count as the forward CSR");
|
|
321
|
+
IncomingCsr {
|
|
322
|
+
offsets,
|
|
323
|
+
srcs: dests,
|
|
324
|
+
}
|
|
325
|
+
})
|
|
326
|
+
}
|
|
327
|
+
|
|
263
328
|
pub(crate) fn out_degrees(&self) -> Vec<usize> {
|
|
264
|
-
self.out_degrees
|
|
329
|
+
self.out_degrees
|
|
330
|
+
.get_or_init(|| degrees_from_offsets(&self.csr_offsets))
|
|
331
|
+
.clone()
|
|
265
332
|
}
|
|
266
333
|
|
|
267
334
|
pub(crate) fn in_degrees(&self) -> Vec<usize> {
|
|
268
|
-
self.in_degrees
|
|
335
|
+
self.in_degrees
|
|
336
|
+
.get_or_init(|| degrees_from_offsets(&self.incoming().offsets))
|
|
337
|
+
.clone()
|
|
269
338
|
}
|
|
270
339
|
|
|
271
340
|
pub(crate) fn degrees(&self) -> Vec<usize> {
|
|
272
|
-
self.degrees.clone()
|
|
341
|
+
self.degrees.get_or_init(|| self.compute_degrees()).clone()
|
|
342
|
+
}
|
|
343
|
+
|
|
344
|
+
fn compute_degrees(&self) -> Vec<usize> {
|
|
345
|
+
let out = &self.csr_offsets;
|
|
346
|
+
let incoming = &self.incoming().offsets;
|
|
347
|
+
(0..self.nodes.num_rows())
|
|
348
|
+
.map(|i| {
|
|
349
|
+
let out_deg = (out[i + 1] - out[i]) as usize;
|
|
350
|
+
let in_deg = (incoming[i + 1] - incoming[i]) as usize;
|
|
351
|
+
out_deg + in_deg
|
|
352
|
+
})
|
|
353
|
+
.collect()
|
|
273
354
|
}
|
|
274
355
|
}
|
|
275
356
|
|
|
@@ -285,23 +366,6 @@ impl Repo {
|
|
|
285
366
|
edge_ids,
|
|
286
367
|
dests: csr_dests,
|
|
287
368
|
} = build_csr(nodes.num_rows(), &edge_endpoints).context("failed to construct CSR")?;
|
|
288
|
-
let incoming_edges = edge_endpoints
|
|
289
|
-
.iter()
|
|
290
|
-
.map(|&(src, dest)| (dest, src))
|
|
291
|
-
.collect::<Vec<_>>();
|
|
292
|
-
let Csr {
|
|
293
|
-
offsets: incoming_offsets,
|
|
294
|
-
dests: incoming_srcs,
|
|
295
|
-
..
|
|
296
|
-
} = build_csr(nodes.num_rows(), &incoming_edges)
|
|
297
|
-
.context("failed to construct incoming CSR")?;
|
|
298
|
-
let out_degrees = degrees_from_offsets(&csr_offsets);
|
|
299
|
-
let in_degrees = degrees_from_offsets(&incoming_offsets);
|
|
300
|
-
let degrees = out_degrees
|
|
301
|
-
.iter()
|
|
302
|
-
.zip(&in_degrees)
|
|
303
|
-
.map(|(out, incoming)| out + incoming)
|
|
304
|
-
.collect();
|
|
305
369
|
|
|
306
370
|
Ok(Self {
|
|
307
371
|
nodes,
|
|
@@ -309,18 +373,21 @@ impl Repo {
|
|
|
309
373
|
csr_offsets,
|
|
310
374
|
csr_dests,
|
|
311
375
|
edge_ids,
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
out_degrees,
|
|
315
|
-
in_degrees,
|
|
316
|
-
degrees,
|
|
376
|
+
incoming: OnceLock::new(),
|
|
377
|
+
edge_endpoints,
|
|
317
378
|
identity,
|
|
379
|
+
out_degrees: OnceLock::new(),
|
|
380
|
+
in_degrees: OnceLock::new(),
|
|
381
|
+
degrees: OnceLock::new(),
|
|
318
382
|
})
|
|
319
383
|
}
|
|
320
384
|
}
|
|
321
385
|
|
|
322
|
-
fn degrees_from_offsets(offsets: &[
|
|
323
|
-
offsets
|
|
386
|
+
fn degrees_from_offsets(offsets: &[Offset]) -> Vec<usize> {
|
|
387
|
+
offsets
|
|
388
|
+
.windows(2)
|
|
389
|
+
.map(|pair| (pair[1] - pair[0]) as usize)
|
|
390
|
+
.collect()
|
|
324
391
|
}
|
|
325
392
|
|
|
326
393
|
struct Preprocessed {
|
|
@@ -688,4 +755,56 @@ mod tests {
|
|
|
688
755
|
.contains("missing dest")
|
|
689
756
|
);
|
|
690
757
|
}
|
|
758
|
+
|
|
759
|
+
#[test]
|
|
760
|
+
fn set_payloads_swaps_columns_and_keeps_topology() {
|
|
761
|
+
let nodes = record_batch!((ID_COL, UInt64, [0, 1, 2])).unwrap();
|
|
762
|
+
let edges = record_batch!(
|
|
763
|
+
(ID_COL, UInt64, [0, 1]),
|
|
764
|
+
(EDGE_SRC_COL, UInt64, [0, 1]),
|
|
765
|
+
(EDGE_DEST_COL, UInt64, [1, 2])
|
|
766
|
+
)
|
|
767
|
+
.unwrap();
|
|
768
|
+
let mut repo = Repo::from_tables(nodes, edges).unwrap();
|
|
769
|
+
|
|
770
|
+
// Project to a different set of payload columns (same row counts).
|
|
771
|
+
let new_nodes =
|
|
772
|
+
record_batch!((ID_COL, UInt64, [0, 1, 2]), ("score", Int64, [10, 20, 30])).unwrap();
|
|
773
|
+
let new_edges = record_batch!(
|
|
774
|
+
(ID_COL, UInt64, [0, 1]),
|
|
775
|
+
(EDGE_SRC_COL, UInt64, [0, 1]),
|
|
776
|
+
(EDGE_DEST_COL, UInt64, [1, 2])
|
|
777
|
+
)
|
|
778
|
+
.unwrap();
|
|
779
|
+
repo.set_payloads(new_nodes, new_edges).unwrap();
|
|
780
|
+
|
|
781
|
+
// Topology is unchanged after the swap.
|
|
782
|
+
assert_eq!(outgoing_for(&repo, GraphId::U64(0)), vec![GraphId::U64(1)]);
|
|
783
|
+
assert!(repo.nodes.column_by_name("score").is_some());
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
#[test]
|
|
787
|
+
fn set_payloads_rejects_row_count_mismatch() {
|
|
788
|
+
let nodes = record_batch!((ID_COL, UInt64, [0, 1, 2])).unwrap();
|
|
789
|
+
let edges = record_batch!(
|
|
790
|
+
(ID_COL, UInt64, [0]),
|
|
791
|
+
(EDGE_SRC_COL, UInt64, [0]),
|
|
792
|
+
(EDGE_DEST_COL, UInt64, [1])
|
|
793
|
+
)
|
|
794
|
+
.unwrap();
|
|
795
|
+
let mut repo = Repo::from_tables(nodes, edges).unwrap();
|
|
796
|
+
|
|
797
|
+
let bad_nodes = record_batch!((ID_COL, UInt64, [0, 1])).unwrap();
|
|
798
|
+
let same_edges = record_batch!(
|
|
799
|
+
(ID_COL, UInt64, [0]),
|
|
800
|
+
(EDGE_SRC_COL, UInt64, [0]),
|
|
801
|
+
(EDGE_DEST_COL, UInt64, [1])
|
|
802
|
+
)
|
|
803
|
+
.unwrap();
|
|
804
|
+
let err = repo
|
|
805
|
+
.set_payloads(bad_nodes, same_edges)
|
|
806
|
+
.unwrap_err()
|
|
807
|
+
.to_string();
|
|
808
|
+
assert!(err.contains("projected nodes table has 2 rows"));
|
|
809
|
+
}
|
|
691
810
|
}
|
|
@@ -12,6 +12,7 @@ use crate::{
|
|
|
12
12
|
traversal::{
|
|
13
13
|
GraphPath, SearchResult, SearchStats,
|
|
14
14
|
config::{TraversalConfig, TraversalStrategy},
|
|
15
|
+
progress::Progress,
|
|
15
16
|
},
|
|
16
17
|
};
|
|
17
18
|
|
|
@@ -36,6 +37,7 @@ impl Graph {
|
|
|
36
37
|
max_revisits_per_node,
|
|
37
38
|
parallel,
|
|
38
39
|
intermediate_states,
|
|
40
|
+
progress,
|
|
39
41
|
} = config;
|
|
40
42
|
let kernel = kernel.bind(self)?;
|
|
41
43
|
let cfg = RunConfig {
|
|
@@ -45,6 +47,7 @@ impl Graph {
|
|
|
45
47
|
strategy,
|
|
46
48
|
max_revisits_per_node,
|
|
47
49
|
intermediate_states,
|
|
50
|
+
progress,
|
|
48
51
|
};
|
|
49
52
|
|
|
50
53
|
match (parallel, strategy) {
|
|
@@ -65,6 +68,7 @@ struct RunConfig {
|
|
|
65
68
|
strategy: TraversalStrategy,
|
|
66
69
|
max_revisits_per_node: usize,
|
|
67
70
|
intermediate_states: bool,
|
|
71
|
+
progress: bool,
|
|
68
72
|
}
|
|
69
73
|
|
|
70
74
|
#[derive(Debug, Clone)]
|
|
@@ -104,8 +108,10 @@ fn search_serial<'a>(
|
|
|
104
108
|
) -> Result<SearchResult<'a>> {
|
|
105
109
|
let (mut arena, mut frontier, mut stats) = initial_arena(graph, cfg, kernel)?;
|
|
106
110
|
let mut paths = Vec::new();
|
|
111
|
+
let mut progress = Progress::new(cfg.progress);
|
|
107
112
|
|
|
108
113
|
while let Some(parent) = pop(&mut frontier, cfg.strategy) {
|
|
114
|
+
progress.tick(&stats);
|
|
109
115
|
if arena[parent].depth >= cfg.max_depth {
|
|
110
116
|
continue;
|
|
111
117
|
}
|
|
@@ -128,6 +134,7 @@ fn search_serial<'a>(
|
|
|
128
134
|
paths.push(materialize(graph, &arena, child, cfg, kernel)?);
|
|
129
135
|
stats.stopped_paths += 1;
|
|
130
136
|
if cfg.max_paths.is_some_and(|max| paths.len() >= max) {
|
|
137
|
+
progress.finish(&stats);
|
|
131
138
|
return Ok(SearchResult { paths, stats });
|
|
132
139
|
}
|
|
133
140
|
} else {
|
|
@@ -136,6 +143,7 @@ fn search_serial<'a>(
|
|
|
136
143
|
}
|
|
137
144
|
}
|
|
138
145
|
|
|
146
|
+
progress.finish(&stats);
|
|
139
147
|
Ok(SearchResult { paths, stats })
|
|
140
148
|
}
|
|
141
149
|
|
|
@@ -147,8 +155,10 @@ fn search_bfs_parallel<'a>(
|
|
|
147
155
|
let (mut arena, frontier, mut stats) = initial_arena(graph, cfg, kernel)?;
|
|
148
156
|
let mut frontier = frontier.into_iter().collect::<Vec<_>>();
|
|
149
157
|
let mut paths = Vec::new();
|
|
158
|
+
let mut progress = Progress::new(cfg.progress);
|
|
150
159
|
|
|
151
160
|
while !frontier.is_empty() {
|
|
161
|
+
progress.tick(&stats);
|
|
152
162
|
let edge_count = frontier
|
|
153
163
|
.iter()
|
|
154
164
|
.map(|&p| graph.repo.out_degree(arena[p].node))
|
|
@@ -187,11 +197,13 @@ fn search_bfs_parallel<'a>(
|
|
|
187
197
|
&& paths.len() >= max
|
|
188
198
|
{
|
|
189
199
|
paths.truncate(max);
|
|
200
|
+
progress.finish(&stats);
|
|
190
201
|
return Ok(SearchResult { paths, stats });
|
|
191
202
|
}
|
|
192
203
|
frontier = next;
|
|
193
204
|
}
|
|
194
205
|
|
|
206
|
+
progress.finish(&stats);
|
|
195
207
|
Ok(SearchResult { paths, stats })
|
|
196
208
|
}
|
|
197
209
|
|
|
@@ -202,12 +214,16 @@ fn search_dfs_parallel<'a>(
|
|
|
202
214
|
) -> Result<SearchResult<'a>> {
|
|
203
215
|
let (queue, mut stats) = initial_tasks(graph, cfg, kernel)?;
|
|
204
216
|
let mut seed_paths = Vec::new();
|
|
217
|
+
let mut progress = Progress::new(cfg.progress);
|
|
218
|
+
progress.tick(&stats);
|
|
205
219
|
let seeds = build_dfs_seeds(graph, cfg, kernel, queue, &mut seed_paths, &mut stats)?;
|
|
220
|
+
progress.tick(&stats);
|
|
206
221
|
|
|
207
222
|
if let Some(max) = cfg.max_paths
|
|
208
223
|
&& seed_paths.len() >= max
|
|
209
224
|
{
|
|
210
225
|
seed_paths.truncate(max);
|
|
226
|
+
progress.finish(&stats);
|
|
211
227
|
return Ok(SearchResult {
|
|
212
228
|
paths: seed_paths,
|
|
213
229
|
stats,
|
|
@@ -235,6 +251,7 @@ fn search_dfs_parallel<'a>(
|
|
|
235
251
|
if let Some(max) = cfg.max_paths {
|
|
236
252
|
paths.truncate(max);
|
|
237
253
|
}
|
|
254
|
+
progress.finish(&stats);
|
|
238
255
|
Ok(SearchResult { paths, stats })
|
|
239
256
|
}
|
|
240
257
|
|