rxgraph 0.2.0__tar.gz → 0.3.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {rxgraph-0.2.0 → rxgraph-0.3.1}/Cargo.lock +2 -2
- {rxgraph-0.2.0 → rxgraph-0.3.1}/PKG-INFO +1 -1
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/Cargo.toml +5 -1
- rxgraph-0.3.1/crates/rxgraph/benches/memory.rs +121 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/arrow_value.rs +167 -2
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/bind.rs +40 -9
- rxgraph-0.3.1/crates/rxgraph/src/dsl/eval.rs +170 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/mod.rs +123 -1
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/ops/list.rs +139 -28
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/value.rs +30 -10
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/graph/csr.rs +16 -5
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/graph/graph.rs +250 -13
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/graph/mod.rs +2 -1
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/graph/repo.rs +247 -59
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/traversal/algo.rs +289 -57
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/traversal/config.rs +15 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/traversal/mod.rs +1 -0
- rxgraph-0.3.1/crates/rxgraph/src/traversal/progress.rs +160 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph-python/Cargo.toml +1 -1
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph-python/src/lib.rs +20 -2
- {rxgraph-0.2.0 → rxgraph-0.3.1}/pyproject.toml +1 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/python/rxgraph/__init__.py +176 -24
- {rxgraph-0.2.0 → rxgraph-0.3.1}/python/rxgraph/__init__.pyi +14 -1
- {rxgraph-0.2.0 → rxgraph-0.3.1}/python/rxgraph/_graph_tables.py +4 -10
- rxgraph-0.2.0/crates/rxgraph/src/dsl/eval.rs +0 -95
- {rxgraph-0.2.0 → rxgraph-0.3.1}/Cargo.toml +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/README.md +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/README.md +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/benches/flight_routes.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/benches/payment_risk.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/examples/flight_routes.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/arrow.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/expr.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/ops/mod.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/ops/scalar.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/ops/string.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/ops/struct_.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/dsl/polars_json.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/crates/rxgraph/src/lib.rs +0 -0
- {rxgraph-0.2.0 → rxgraph-0.3.1}/python/rxgraph/py.typed +0 -0
|
@@ -1293,7 +1293,7 @@ checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
|
|
|
1293
1293
|
|
|
1294
1294
|
[[package]]
|
|
1295
1295
|
name = "rxgraph"
|
|
1296
|
-
version = "0.
|
|
1296
|
+
version = "0.3.1"
|
|
1297
1297
|
dependencies = [
|
|
1298
1298
|
"anyhow",
|
|
1299
1299
|
"arrow",
|
|
@@ -1311,7 +1311,7 @@ dependencies = [
|
|
|
1311
1311
|
|
|
1312
1312
|
[[package]]
|
|
1313
1313
|
name = "rxgraph-python"
|
|
1314
|
-
version = "0.
|
|
1314
|
+
version = "0.3.1"
|
|
1315
1315
|
dependencies = [
|
|
1316
1316
|
"anyhow",
|
|
1317
1317
|
"arrow",
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
[package]
|
|
2
2
|
name = "rxgraph"
|
|
3
|
-
version = "0.
|
|
3
|
+
version = "0.3.1"
|
|
4
4
|
edition.workspace = true
|
|
5
5
|
rust-version.workspace = true
|
|
6
6
|
description = "High-performance graph traversal engine"
|
|
@@ -40,3 +40,7 @@ harness = false
|
|
|
40
40
|
[[bench]]
|
|
41
41
|
name = "flight_routes"
|
|
42
42
|
harness = false
|
|
43
|
+
|
|
44
|
+
[[bench]]
|
|
45
|
+
name = "memory"
|
|
46
|
+
harness = false
|
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
//! Memory benchmarking for graph construction, holding, and from-source search.
|
|
2
|
+
//!
|
|
3
|
+
//! Unlike the criterion benches (which measure time), this binary uses `stats_alloc` to
|
|
4
|
+
//! report allocation deltas at each stage. Run with:
|
|
5
|
+
//!
|
|
6
|
+
//! ```sh
|
|
7
|
+
//! cargo bench -p rxgraph --bench memory
|
|
8
|
+
//! ```
|
|
9
|
+
//!
|
|
10
|
+
//! It builds a large, sparse graph where only a small subset is reachable from the source,
|
|
11
|
+
//! and reports bytes allocated/RSS.
|
|
12
|
+
|
|
13
|
+
use std::{alloc::System, hint::black_box, sync::Arc, time::Instant};
|
|
14
|
+
|
|
15
|
+
use arrow::{
|
|
16
|
+
array::{ArrayRef, UInt64Array},
|
|
17
|
+
datatypes::{DataType, Field, Schema},
|
|
18
|
+
record_batch::RecordBatch,
|
|
19
|
+
};
|
|
20
|
+
use rxgraph::Graph;
|
|
21
|
+
use stats_alloc::{INSTRUMENTED_SYSTEM, Region, StatsAlloc};
|
|
22
|
+
|
|
23
|
+
#[global_allocator]
|
|
24
|
+
static GLOBAL: &StatsAlloc<System> = &INSTRUMENTED_SYSTEM;
|
|
25
|
+
|
|
26
|
+
/// Number of nodes in the synthetic graph.
|
|
27
|
+
const NODES: u64 = 5_000_000;
|
|
28
|
+
/// Length of the single reachable chain from node 0 (the "working set").
|
|
29
|
+
const REACHABLE_CHAIN: u64 = 5_000;
|
|
30
|
+
|
|
31
|
+
fn batch(fields: Vec<Field>, columns: Vec<ArrayRef>) -> RecordBatch {
|
|
32
|
+
RecordBatch::try_new(Arc::new(Schema::new(fields)), columns).unwrap()
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
/// Builds a graph with `NODES` contiguous u64 node ids and a single linear chain of
|
|
36
|
+
/// `REACHABLE_CHAIN` edges from node 0. Everything past the chain is unreachable, so a
|
|
37
|
+
/// search from node 0 only ever needs a tiny working set.
|
|
38
|
+
fn tables() -> (RecordBatch, RecordBatch) {
|
|
39
|
+
let node_ids: Vec<u64> = (0..NODES).collect();
|
|
40
|
+
let nodes = batch(
|
|
41
|
+
vec![Field::new("id", DataType::UInt64, false)],
|
|
42
|
+
vec![Arc::new(UInt64Array::from(node_ids)) as ArrayRef],
|
|
43
|
+
);
|
|
44
|
+
|
|
45
|
+
let edge_count = REACHABLE_CHAIN;
|
|
46
|
+
let edge_ids: Vec<u64> = (0..edge_count).collect();
|
|
47
|
+
let srcs: Vec<u64> = (0..edge_count).collect();
|
|
48
|
+
let dests: Vec<u64> = (1..=edge_count).collect();
|
|
49
|
+
let edges = batch(
|
|
50
|
+
vec![
|
|
51
|
+
Field::new("id", DataType::UInt64, false),
|
|
52
|
+
Field::new("src", DataType::UInt64, false),
|
|
53
|
+
Field::new("dest", DataType::UInt64, false),
|
|
54
|
+
],
|
|
55
|
+
vec![
|
|
56
|
+
Arc::new(UInt64Array::from(edge_ids)) as ArrayRef,
|
|
57
|
+
Arc::new(UInt64Array::from(srcs)),
|
|
58
|
+
Arc::new(UInt64Array::from(dests)),
|
|
59
|
+
],
|
|
60
|
+
);
|
|
61
|
+
|
|
62
|
+
(nodes, edges)
|
|
63
|
+
}
|
|
64
|
+
|
|
65
|
+
fn mib(bytes: isize) -> f64 {
|
|
66
|
+
bytes as f64 / (1024.0 * 1024.0)
|
|
67
|
+
}
|
|
68
|
+
|
|
69
|
+
/// Runs the received callback inside an allocation-tracking region and reports memory use.
|
|
70
|
+
///
|
|
71
|
+
/// Two distinct numbers are reported:
|
|
72
|
+
/// - `retained`: net bytes still held after the call returns (allocated minus freed). This
|
|
73
|
+
/// is what grows the resident set. A value that escapes the closure (e.g. the graph)
|
|
74
|
+
/// counts here; a result that is dropped inside the closure does not.
|
|
75
|
+
/// - `churn`: total bytes allocated during the call, regardless of whether they were freed
|
|
76
|
+
/// again before returning. High churn with ~zero retained means lots of short-lived
|
|
77
|
+
/// allocations (e.g. WCC building one Vec per component, then handing it back and dropping
|
|
78
|
+
/// it), not a memory leak.
|
|
79
|
+
fn measure<T>(label: &str, f: impl FnOnce() -> T) -> T {
|
|
80
|
+
let region = Region::new(GLOBAL);
|
|
81
|
+
let started = Instant::now();
|
|
82
|
+
let value = f();
|
|
83
|
+
let stats = region.change();
|
|
84
|
+
let elapsed = started.elapsed();
|
|
85
|
+
let retained = stats.bytes_allocated as isize - stats.bytes_deallocated as isize;
|
|
86
|
+
eprintln!(
|
|
87
|
+
"{label:<28} retained={:>9.2} MiB churn={:>8.2} MiB in {:<8} allocs (freed={:.2} MiB) {elapsed:?}",
|
|
88
|
+
mib(retained),
|
|
89
|
+
mib(stats.bytes_allocated as isize),
|
|
90
|
+
stats.allocations,
|
|
91
|
+
mib(stats.bytes_deallocated as isize),
|
|
92
|
+
);
|
|
93
|
+
value
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
fn main() {
|
|
97
|
+
eprintln!("memory profile: nodes={NODES} reachable_chain={REACHABLE_CHAIN}\n");
|
|
98
|
+
|
|
99
|
+
let (nodes, edges) = tables();
|
|
100
|
+
|
|
101
|
+
// Construction: forward CSR + identity only. Reverse CSR is NOT built here (lazy).
|
|
102
|
+
let graph = measure("construct", || Graph::new(nodes, edges).unwrap());
|
|
103
|
+
|
|
104
|
+
// Forward-only BFS from node 0: touches only the reachable chain.
|
|
105
|
+
measure("bfs_from_source", || {
|
|
106
|
+
black_box(graph.bfs_u64(0, None).unwrap());
|
|
107
|
+
});
|
|
108
|
+
|
|
109
|
+
// First degree query forces the lazy reverse CSR to materialize.
|
|
110
|
+
measure("in_degrees (builds rev CSR)", || {
|
|
111
|
+
black_box(graph.in_degrees());
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
// Subsequent reverse-adjacency use is free (cached).
|
|
115
|
+
measure("weakly_connected (rev cached)", || {
|
|
116
|
+
black_box(graph.weakly_connected_components_u64());
|
|
117
|
+
});
|
|
118
|
+
|
|
119
|
+
// Keep the graph alive so its footprint is attributed to the stages above.
|
|
120
|
+
black_box(&graph);
|
|
121
|
+
}
|
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
use std::{io::Cursor, sync::Arc};
|
|
1
|
+
use std::{cmp::Ordering, io::Cursor, sync::Arc};
|
|
2
2
|
|
|
3
3
|
use anyhow::{Context, Result, bail};
|
|
4
4
|
use arrow::{
|
|
@@ -12,7 +12,7 @@ use arrow::{
|
|
|
12
12
|
record_batch::RecordBatch,
|
|
13
13
|
};
|
|
14
14
|
|
|
15
|
-
use crate::dsl::Value;
|
|
15
|
+
use crate::dsl::{Value, ops::scalar::ScalarOp};
|
|
16
16
|
|
|
17
17
|
#[derive(Debug, Clone)]
|
|
18
18
|
pub(crate) enum ColumnReader {
|
|
@@ -35,6 +35,13 @@ pub(crate) enum ColumnReader {
|
|
|
35
35
|
Struct(StructArray),
|
|
36
36
|
}
|
|
37
37
|
|
|
38
|
+
enum ScalarValueRef<'a> {
|
|
39
|
+
Null,
|
|
40
|
+
Bool(bool),
|
|
41
|
+
Number(f64),
|
|
42
|
+
Str(&'a str),
|
|
43
|
+
}
|
|
44
|
+
|
|
38
45
|
impl ColumnReader {
|
|
39
46
|
pub(crate) fn bind(batch: &RecordBatch, name: &str) -> Result<Self> {
|
|
40
47
|
let column = batch
|
|
@@ -106,6 +113,164 @@ impl ColumnReader {
|
|
|
106
113
|
Self::Struct(array) => nullable!(array, struct_row_to_value(array, row)?),
|
|
107
114
|
})
|
|
108
115
|
}
|
|
116
|
+
|
|
117
|
+
pub(crate) fn eval_scalar_literal(
|
|
118
|
+
&self,
|
|
119
|
+
row: usize,
|
|
120
|
+
op: ScalarOp,
|
|
121
|
+
literal: &Value,
|
|
122
|
+
reverse: bool,
|
|
123
|
+
) -> Result<Option<Value>> {
|
|
124
|
+
let Some(value) = self.scalar_value(row) else {
|
|
125
|
+
return Ok(None);
|
|
126
|
+
};
|
|
127
|
+
Ok(Some(match value {
|
|
128
|
+
ScalarValueRef::Null => eval_null_literal(op, literal),
|
|
129
|
+
ScalarValueRef::Bool(value) => eval_bool_literal(value, op, literal, reverse)?,
|
|
130
|
+
ScalarValueRef::Number(value) => eval_number_literal(value, op, literal, reverse)?,
|
|
131
|
+
ScalarValueRef::Str(value) => eval_str_literal(value, op, literal, reverse)?,
|
|
132
|
+
}))
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
fn scalar_value(&self, row: usize) -> Option<ScalarValueRef<'_>> {
|
|
136
|
+
macro_rules! nullable {
|
|
137
|
+
($array:expr, $value:expr) => {
|
|
138
|
+
if $array.is_null(row) {
|
|
139
|
+
ScalarValueRef::Null
|
|
140
|
+
} else {
|
|
141
|
+
$value
|
|
142
|
+
}
|
|
143
|
+
};
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
Some(match self {
|
|
147
|
+
Self::Bool(array) => nullable!(array, ScalarValueRef::Bool(array.value(row))),
|
|
148
|
+
Self::I8(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
149
|
+
Self::I16(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
150
|
+
Self::I32(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
151
|
+
Self::I64(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
152
|
+
Self::U8(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
153
|
+
Self::U16(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
154
|
+
Self::U32(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
155
|
+
Self::U64(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
156
|
+
Self::F32(array) => nullable!(array, ScalarValueRef::Number(array.value(row) as f64)),
|
|
157
|
+
Self::F64(array) => nullable!(array, ScalarValueRef::Number(array.value(row))),
|
|
158
|
+
Self::Utf8(array) => nullable!(array, ScalarValueRef::Str(array.value(row))),
|
|
159
|
+
Self::LargeUtf8(array) => nullable!(array, ScalarValueRef::Str(array.value(row))),
|
|
160
|
+
Self::Utf8View(array) => nullable!(array, ScalarValueRef::Str(array.value(row))),
|
|
161
|
+
Self::List(_) | Self::LargeList(_) | Self::Struct(_) => return None,
|
|
162
|
+
})
|
|
163
|
+
}
|
|
164
|
+
}
|
|
165
|
+
|
|
166
|
+
fn eval_null_literal(op: ScalarOp, literal: &Value) -> Value {
|
|
167
|
+
match op {
|
|
168
|
+
ScalarOp::Eq => Value::Bool(literal.is_null()),
|
|
169
|
+
ScalarOp::NotEq => Value::Bool(!literal.is_null()),
|
|
170
|
+
ScalarOp::Lt | ScalarOp::LtEq | ScalarOp::Gt | ScalarOp::GtEq => Value::Null,
|
|
171
|
+
_ => unreachable!("fast scalar literal only handles comparison ops"),
|
|
172
|
+
}
|
|
173
|
+
}
|
|
174
|
+
|
|
175
|
+
fn eval_non_null_null_literal(op: ScalarOp) -> Value {
|
|
176
|
+
match op {
|
|
177
|
+
ScalarOp::Eq => Value::Bool(false),
|
|
178
|
+
ScalarOp::NotEq => Value::Bool(true),
|
|
179
|
+
ScalarOp::Lt | ScalarOp::LtEq | ScalarOp::Gt | ScalarOp::GtEq => Value::Null,
|
|
180
|
+
_ => unreachable!("fast scalar literal only handles comparison ops"),
|
|
181
|
+
}
|
|
182
|
+
}
|
|
183
|
+
|
|
184
|
+
fn eval_bool_literal(value: bool, op: ScalarOp, literal: &Value, reverse: bool) -> Result<Value> {
|
|
185
|
+
if literal.is_null() {
|
|
186
|
+
return Ok(eval_non_null_null_literal(op));
|
|
187
|
+
}
|
|
188
|
+
let Some(rhs) = literal_bool(literal) else {
|
|
189
|
+
return eval_incomparable_literal(op);
|
|
190
|
+
};
|
|
191
|
+
Ok(eval_ordering_or_eq(
|
|
192
|
+
op,
|
|
193
|
+
value == rhs,
|
|
194
|
+
value.cmp(&rhs),
|
|
195
|
+
reverse,
|
|
196
|
+
))
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
fn eval_number_literal(value: f64, op: ScalarOp, literal: &Value, reverse: bool) -> Result<Value> {
|
|
200
|
+
if literal.is_null() {
|
|
201
|
+
return Ok(eval_non_null_null_literal(op));
|
|
202
|
+
}
|
|
203
|
+
let Some(rhs) = literal.as_f64() else {
|
|
204
|
+
return eval_incomparable_literal(op);
|
|
205
|
+
};
|
|
206
|
+
match op {
|
|
207
|
+
ScalarOp::Eq => Ok(Value::Bool(value == rhs)),
|
|
208
|
+
ScalarOp::NotEq => Ok(Value::Bool(value != rhs)),
|
|
209
|
+
ScalarOp::Lt | ScalarOp::LtEq | ScalarOp::Gt | ScalarOp::GtEq => {
|
|
210
|
+
let ordering = value.partial_cmp(&rhs).context("cannot compare values")?;
|
|
211
|
+
Ok(Value::Bool(apply_ordering(op, ordering, reverse)))
|
|
212
|
+
}
|
|
213
|
+
_ => unreachable!("fast scalar literal only handles comparison ops"),
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
|
|
217
|
+
fn eval_str_literal(value: &str, op: ScalarOp, literal: &Value, reverse: bool) -> Result<Value> {
|
|
218
|
+
if literal.is_null() {
|
|
219
|
+
return Ok(eval_non_null_null_literal(op));
|
|
220
|
+
}
|
|
221
|
+
let Value::Str(rhs) = literal else {
|
|
222
|
+
return eval_incomparable_literal(op);
|
|
223
|
+
};
|
|
224
|
+
Ok(eval_ordering_or_eq(
|
|
225
|
+
op,
|
|
226
|
+
value == rhs.as_ref(),
|
|
227
|
+
value.cmp(rhs.as_ref()),
|
|
228
|
+
reverse,
|
|
229
|
+
))
|
|
230
|
+
}
|
|
231
|
+
|
|
232
|
+
fn literal_bool(literal: &Value) -> Option<bool> {
|
|
233
|
+
match literal {
|
|
234
|
+
Value::Bool(value) => Some(*value),
|
|
235
|
+
_ => None,
|
|
236
|
+
}
|
|
237
|
+
}
|
|
238
|
+
|
|
239
|
+
fn eval_incomparable_literal(op: ScalarOp) -> Result<Value> {
|
|
240
|
+
match op {
|
|
241
|
+
ScalarOp::Eq => Ok(Value::Bool(false)),
|
|
242
|
+
ScalarOp::NotEq => Ok(Value::Bool(true)),
|
|
243
|
+
ScalarOp::Lt | ScalarOp::LtEq | ScalarOp::Gt | ScalarOp::GtEq => {
|
|
244
|
+
bail!("cannot compare values")
|
|
245
|
+
}
|
|
246
|
+
_ => unreachable!("fast scalar literal only handles comparison ops"),
|
|
247
|
+
}
|
|
248
|
+
}
|
|
249
|
+
|
|
250
|
+
fn eval_ordering_or_eq(op: ScalarOp, equal: bool, ordering: Ordering, reverse: bool) -> Value {
|
|
251
|
+
match op {
|
|
252
|
+
ScalarOp::Eq => Value::Bool(equal),
|
|
253
|
+
ScalarOp::NotEq => Value::Bool(!equal),
|
|
254
|
+
ScalarOp::Lt | ScalarOp::LtEq | ScalarOp::Gt | ScalarOp::GtEq => {
|
|
255
|
+
Value::Bool(apply_ordering(op, ordering, reverse))
|
|
256
|
+
}
|
|
257
|
+
_ => unreachable!("fast scalar literal only handles comparison ops"),
|
|
258
|
+
}
|
|
259
|
+
}
|
|
260
|
+
|
|
261
|
+
fn apply_ordering(op: ScalarOp, ordering: Ordering, reverse: bool) -> bool {
|
|
262
|
+
let ordering = if reverse {
|
|
263
|
+
ordering.reverse()
|
|
264
|
+
} else {
|
|
265
|
+
ordering
|
|
266
|
+
};
|
|
267
|
+
match op {
|
|
268
|
+
ScalarOp::Lt => ordering.is_lt(),
|
|
269
|
+
ScalarOp::LtEq => ordering.is_le(),
|
|
270
|
+
ScalarOp::Gt => ordering.is_gt(),
|
|
271
|
+
ScalarOp::GtEq => ordering.is_ge(),
|
|
272
|
+
_ => unreachable!("fast scalar literal only handles ordering ops"),
|
|
273
|
+
}
|
|
109
274
|
}
|
|
110
275
|
|
|
111
276
|
pub(crate) fn array_to_values(array: &dyn Array) -> Result<Vec<Value>> {
|
|
@@ -2,12 +2,13 @@ use anyhow::{Context, Result};
|
|
|
2
2
|
|
|
3
3
|
use crate::{
|
|
4
4
|
dsl::{
|
|
5
|
-
DslKernel, StateRow, StateValues, Value,
|
|
5
|
+
DslKernel, StateRow, StateValue, StateValues, Value,
|
|
6
6
|
arrow_value::ColumnReader,
|
|
7
7
|
eval::EvalCtx,
|
|
8
8
|
expr::{ColumnRef, Expr},
|
|
9
|
+
ops::scalar::ScalarOp,
|
|
9
10
|
},
|
|
10
|
-
graph::{Graph, GraphId, GraphRepo},
|
|
11
|
+
graph::{EDGE_DEST_COL, EDGE_SRC_COL, Graph, GraphId, GraphRepo, ID_COL},
|
|
11
12
|
};
|
|
12
13
|
|
|
13
14
|
#[derive(Debug)]
|
|
@@ -50,10 +51,14 @@ impl BoundKernel {
|
|
|
50
51
|
self.visit.eval(ctx)?.truthy()
|
|
51
52
|
}
|
|
52
53
|
|
|
53
|
-
pub(crate) fn next_state(
|
|
54
|
+
pub(crate) fn next_state(
|
|
55
|
+
&self,
|
|
56
|
+
current: &[StateValue],
|
|
57
|
+
ctx: &EvalCtx<'_>,
|
|
58
|
+
) -> Result<StateValues> {
|
|
54
59
|
let mut next = current.iter().cloned().collect::<StateValues>();
|
|
55
60
|
for (index, expr) in &self.next_state {
|
|
56
|
-
next[*index] = expr.eval(ctx)
|
|
61
|
+
next[*index] = StateValue::new(expr.eval(ctx)?);
|
|
57
62
|
}
|
|
58
63
|
Ok(next)
|
|
59
64
|
}
|
|
@@ -62,11 +67,11 @@ impl BoundKernel {
|
|
|
62
67
|
self.stop.eval(ctx)?.truthy()
|
|
63
68
|
}
|
|
64
69
|
|
|
65
|
-
pub(crate) fn state_row(&self, state: &[
|
|
70
|
+
pub(crate) fn state_row(&self, state: &[StateValue]) -> StateRow {
|
|
66
71
|
self.names
|
|
67
72
|
.iter()
|
|
68
73
|
.cloned()
|
|
69
|
-
.zip(state.iter().
|
|
74
|
+
.zip(state.iter().map(StateValue::to_value))
|
|
70
75
|
.collect()
|
|
71
76
|
}
|
|
72
77
|
}
|
|
@@ -89,8 +94,13 @@ impl BoundColumn {
|
|
|
89
94
|
ColumnRef::SrcId => Self::SrcId,
|
|
90
95
|
ColumnRef::DestId => Self::DestId,
|
|
91
96
|
ColumnRef::EdgeId => Self::EdgeId,
|
|
97
|
+
ColumnRef::SrcField(name) if name == ID_COL => Self::SrcId,
|
|
92
98
|
ColumnRef::SrcField(name) => Self::Src(ColumnReader::bind(&graph.repo.nodes, &name)?),
|
|
99
|
+
ColumnRef::DestField(name) if name == ID_COL => Self::DestId,
|
|
93
100
|
ColumnRef::DestField(name) => Self::Dest(ColumnReader::bind(&graph.repo.nodes, &name)?),
|
|
101
|
+
ColumnRef::EdgeField(name) if name == ID_COL => Self::EdgeId,
|
|
102
|
+
ColumnRef::EdgeField(name) if name == EDGE_SRC_COL => Self::SrcId,
|
|
103
|
+
ColumnRef::EdgeField(name) if name == EDGE_DEST_COL => Self::DestId,
|
|
94
104
|
ColumnRef::EdgeField(name) => Self::Edge(ColumnReader::bind(&graph.repo.edges, &name)?),
|
|
95
105
|
ColumnRef::State(name) => state_index(names, &name)
|
|
96
106
|
.map(Self::State)
|
|
@@ -121,10 +131,31 @@ impl BoundColumn {
|
|
|
121
131
|
Self::Src(reader) => reader.value(ctx.src as usize),
|
|
122
132
|
Self::Dest(reader) => reader.value(ctx.dest as usize),
|
|
123
133
|
Self::Edge(reader) => reader.value(ctx.edge as usize),
|
|
124
|
-
Self::State(index) => Ok(ctx.state[*index].
|
|
134
|
+
Self::State(index) => Ok(ctx.state[*index].to_value()),
|
|
125
135
|
Self::MissingState => Ok(Value::Null),
|
|
126
136
|
}
|
|
127
137
|
}
|
|
138
|
+
|
|
139
|
+
pub(crate) fn eval_scalar_literal(
|
|
140
|
+
&self,
|
|
141
|
+
ctx: &EvalCtx<'_>,
|
|
142
|
+
op: ScalarOp,
|
|
143
|
+
literal: &Value,
|
|
144
|
+
reverse: bool,
|
|
145
|
+
) -> Result<Option<Value>> {
|
|
146
|
+
match self {
|
|
147
|
+
Self::Src(reader) => reader.eval_scalar_literal(ctx.src as usize, op, literal, reverse),
|
|
148
|
+
Self::Dest(reader) => {
|
|
149
|
+
reader.eval_scalar_literal(ctx.dest as usize, op, literal, reverse)
|
|
150
|
+
}
|
|
151
|
+
Self::Edge(reader) => {
|
|
152
|
+
reader.eval_scalar_literal(ctx.edge as usize, op, literal, reverse)
|
|
153
|
+
}
|
|
154
|
+
Self::SrcId | Self::DestId | Self::EdgeId | Self::State(_) | Self::MissingState => {
|
|
155
|
+
Ok(None)
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
}
|
|
128
159
|
}
|
|
129
160
|
|
|
130
161
|
fn graph_id_value(id: GraphId<'_>) -> Result<Value> {
|
|
@@ -152,8 +183,8 @@ fn normalize_state(state: StateRow, names: &[String]) -> StateValues {
|
|
|
152
183
|
state
|
|
153
184
|
.binary_search_by(|(key, _)| key.as_str().cmp(name))
|
|
154
185
|
.ok()
|
|
155
|
-
.map(|i| state[i].1.clone())
|
|
156
|
-
.
|
|
186
|
+
.map(|i| StateValue::new(state[i].1.clone()))
|
|
187
|
+
.unwrap_or_else(|| StateValue::new(Value::Null))
|
|
157
188
|
})
|
|
158
189
|
.collect::<StateValues>()
|
|
159
190
|
}
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
use anyhow::{Context, Result};
|
|
2
|
+
|
|
3
|
+
use crate::{
|
|
4
|
+
dsl::{StateValue, Value, bind::BoundColumn, expr::Expr, ops::scalar::ScalarOp},
|
|
5
|
+
graph::{EdgeId, Graph, NodeId},
|
|
6
|
+
};
|
|
7
|
+
|
|
8
|
+
pub(crate) struct EvalCtx<'a> {
|
|
9
|
+
pub(crate) graph: &'a Graph,
|
|
10
|
+
pub(crate) src: NodeId,
|
|
11
|
+
pub(crate) dest: NodeId,
|
|
12
|
+
pub(crate) edge: EdgeId,
|
|
13
|
+
pub(crate) state: &'a [StateValue],
|
|
14
|
+
element: Option<&'a Value>,
|
|
15
|
+
}
|
|
16
|
+
|
|
17
|
+
impl<'a> EvalCtx<'a> {
|
|
18
|
+
pub(crate) fn new(
|
|
19
|
+
graph: &'a Graph,
|
|
20
|
+
src: NodeId,
|
|
21
|
+
dest: NodeId,
|
|
22
|
+
edge: EdgeId,
|
|
23
|
+
state: &'a [StateValue],
|
|
24
|
+
) -> Self {
|
|
25
|
+
Self {
|
|
26
|
+
graph,
|
|
27
|
+
src,
|
|
28
|
+
dest,
|
|
29
|
+
edge,
|
|
30
|
+
state,
|
|
31
|
+
element: None,
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
pub(crate) fn with_state<'b>(&'b self, state: &'b [StateValue]) -> EvalCtx<'b> {
|
|
36
|
+
EvalCtx {
|
|
37
|
+
graph: self.graph,
|
|
38
|
+
src: self.src,
|
|
39
|
+
dest: self.dest,
|
|
40
|
+
edge: self.edge,
|
|
41
|
+
state,
|
|
42
|
+
element: self.element,
|
|
43
|
+
}
|
|
44
|
+
}
|
|
45
|
+
|
|
46
|
+
pub(crate) fn with_element<'b>(&'b self, element: &'b Value) -> EvalCtx<'b> {
|
|
47
|
+
EvalCtx {
|
|
48
|
+
graph: self.graph,
|
|
49
|
+
src: self.src,
|
|
50
|
+
dest: self.dest,
|
|
51
|
+
edge: self.edge,
|
|
52
|
+
state: self.state,
|
|
53
|
+
element: Some(element),
|
|
54
|
+
}
|
|
55
|
+
}
|
|
56
|
+
}
|
|
57
|
+
|
|
58
|
+
impl Expr<BoundColumn> {
|
|
59
|
+
pub(crate) fn eval(&self, ctx: &EvalCtx<'_>) -> Result<Value> {
|
|
60
|
+
match self {
|
|
61
|
+
Self::Column(column) => column.value(ctx),
|
|
62
|
+
Self::Element => ctx
|
|
63
|
+
.element
|
|
64
|
+
.cloned()
|
|
65
|
+
.context("pl.element() is only valid inside list.eval/list.filter"),
|
|
66
|
+
Self::Literal(value) => Ok(value.clone()),
|
|
67
|
+
Self::Alias(expr, _) => expr.eval(ctx),
|
|
68
|
+
Self::Ternary {
|
|
69
|
+
predicate,
|
|
70
|
+
truthy,
|
|
71
|
+
falsy,
|
|
72
|
+
} => {
|
|
73
|
+
if predicate.eval(ctx)?.truthy()? {
|
|
74
|
+
truthy.eval(ctx)
|
|
75
|
+
} else {
|
|
76
|
+
falsy.eval(ctx)
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
Self::Scalar(ScalarOp::And, args) => eval_and(args, ctx),
|
|
80
|
+
Self::Scalar(ScalarOp::Or, args) => eval_or(args, ctx),
|
|
81
|
+
Self::Scalar(op, args) => {
|
|
82
|
+
if let Some(value) = try_eval_scalar_fast_path(*op, args, ctx)? {
|
|
83
|
+
return Ok(value);
|
|
84
|
+
}
|
|
85
|
+
let args = eval_args(args, ctx)?;
|
|
86
|
+
op.eval(&args)
|
|
87
|
+
}
|
|
88
|
+
Self::String(op, args) => {
|
|
89
|
+
let args = eval_args(args, ctx)?;
|
|
90
|
+
op.eval(&args)
|
|
91
|
+
}
|
|
92
|
+
Self::List(op, args) => op.eval_with_exprs(args, ctx),
|
|
93
|
+
Self::Struct(op, args) => op.eval_with_exprs(args, ctx),
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
}
|
|
97
|
+
|
|
98
|
+
fn eval_args(args: &[Expr<BoundColumn>], ctx: &EvalCtx<'_>) -> Result<Vec<Value>> {
|
|
99
|
+
args.iter().map(|expr| expr.eval(ctx)).collect()
|
|
100
|
+
}
|
|
101
|
+
|
|
102
|
+
fn eval_and(args: &[Expr<BoundColumn>], ctx: &EvalCtx<'_>) -> Result<Value> {
|
|
103
|
+
let left = expr_arg(args, 0)?;
|
|
104
|
+
let right = expr_arg(args, 1)?;
|
|
105
|
+
// Short circuit (optimization)
|
|
106
|
+
if !left.eval(ctx)?.truthy()? {
|
|
107
|
+
return Ok(Value::Bool(false));
|
|
108
|
+
}
|
|
109
|
+
Ok(Value::Bool(right.eval(ctx)?.truthy()?))
|
|
110
|
+
}
|
|
111
|
+
|
|
112
|
+
fn eval_or(args: &[Expr<BoundColumn>], ctx: &EvalCtx<'_>) -> Result<Value> {
|
|
113
|
+
let left = expr_arg(args, 0)?;
|
|
114
|
+
let right = expr_arg(args, 1)?;
|
|
115
|
+
// Short circuit (optimization)
|
|
116
|
+
if left.eval(ctx)?.truthy()? {
|
|
117
|
+
return Ok(Value::Bool(true));
|
|
118
|
+
}
|
|
119
|
+
Ok(Value::Bool(right.eval(ctx)?.truthy()?))
|
|
120
|
+
}
|
|
121
|
+
|
|
122
|
+
// Handles primitive column/literal comparisons without boxing column values (optimization).
|
|
123
|
+
fn try_eval_scalar_fast_path(
|
|
124
|
+
op: ScalarOp,
|
|
125
|
+
args: &[Expr<BoundColumn>],
|
|
126
|
+
ctx: &EvalCtx<'_>,
|
|
127
|
+
) -> Result<Option<Value>> {
|
|
128
|
+
if !matches!(
|
|
129
|
+
op,
|
|
130
|
+
ScalarOp::Eq
|
|
131
|
+
| ScalarOp::NotEq
|
|
132
|
+
| ScalarOp::Lt
|
|
133
|
+
| ScalarOp::LtEq
|
|
134
|
+
| ScalarOp::Gt
|
|
135
|
+
| ScalarOp::GtEq
|
|
136
|
+
) {
|
|
137
|
+
return Ok(None);
|
|
138
|
+
}
|
|
139
|
+
|
|
140
|
+
let left = expr_arg(args, 0)?;
|
|
141
|
+
let right = expr_arg(args, 1)?;
|
|
142
|
+
if let (Some(column), Some(literal)) = (column_expr(left), literal_expr(right)) {
|
|
143
|
+
return column.eval_scalar_literal(ctx, op, literal, false);
|
|
144
|
+
}
|
|
145
|
+
if let (Some(literal), Some(column)) = (literal_expr(left), column_expr(right)) {
|
|
146
|
+
return column.eval_scalar_literal(ctx, op, literal, true);
|
|
147
|
+
}
|
|
148
|
+
Ok(None)
|
|
149
|
+
}
|
|
150
|
+
|
|
151
|
+
fn expr_arg(args: &[Expr<BoundColumn>], index: usize) -> Result<&Expr<BoundColumn>> {
|
|
152
|
+
args.get(index)
|
|
153
|
+
.with_context(|| format!("missing scalar op argument {index}"))
|
|
154
|
+
}
|
|
155
|
+
|
|
156
|
+
fn column_expr(expr: &Expr<BoundColumn>) -> Option<&BoundColumn> {
|
|
157
|
+
match expr {
|
|
158
|
+
Expr::Column(column) => Some(column),
|
|
159
|
+
Expr::Alias(expr, _) => column_expr(expr),
|
|
160
|
+
_ => None,
|
|
161
|
+
}
|
|
162
|
+
}
|
|
163
|
+
|
|
164
|
+
fn literal_expr(expr: &Expr<BoundColumn>) -> Option<&Value> {
|
|
165
|
+
match expr {
|
|
166
|
+
Expr::Literal(value) => Some(value),
|
|
167
|
+
Expr::Alias(expr, _) => literal_expr(expr),
|
|
168
|
+
_ => None,
|
|
169
|
+
}
|
|
170
|
+
}
|