flowquery 1.0.69 → 1.0.71
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +682 -86
- package/dist/compute/provenance.d.ts +201 -0
- package/dist/compute/provenance.d.ts.map +1 -0
- package/dist/compute/provenance.js +216 -0
- package/dist/compute/provenance.js.map +1 -0
- package/dist/compute/runner.d.ts +202 -3
- package/dist/compute/runner.d.ts.map +1 -1
- package/dist/compute/runner.js +309 -2
- package/dist/compute/runner.js.map +1 -1
- package/dist/flowquery.min.js +1 -1
- package/dist/graph/bindings.d.ts +78 -0
- package/dist/graph/bindings.d.ts.map +1 -0
- package/dist/graph/bindings.js +224 -0
- package/dist/graph/bindings.js.map +1 -0
- package/dist/graph/data_cache.d.ts +4 -0
- package/dist/graph/data_cache.d.ts.map +1 -1
- package/dist/graph/data_cache.js +15 -3
- package/dist/graph/data_cache.js.map +1 -1
- package/dist/graph/data_resolver.d.ts.map +1 -1
- package/dist/graph/data_resolver.js +23 -4
- package/dist/graph/data_resolver.js.map +1 -1
- package/dist/graph/database.d.ts +4 -2
- package/dist/graph/database.d.ts.map +1 -1
- package/dist/graph/database.js +45 -9
- package/dist/graph/database.js.map +1 -1
- package/dist/graph/physical_node.d.ts +7 -2
- package/dist/graph/physical_node.d.ts.map +1 -1
- package/dist/graph/physical_node.js +22 -26
- package/dist/graph/physical_node.js.map +1 -1
- package/dist/graph/physical_relationship.d.ts +7 -2
- package/dist/graph/physical_relationship.d.ts.map +1 -1
- package/dist/graph/physical_relationship.js +22 -26
- package/dist/graph/physical_relationship.js.map +1 -1
- package/dist/graph/relationship_match_collector.d.ts.map +1 -1
- package/dist/graph/relationship_match_collector.js +9 -0
- package/dist/graph/relationship_match_collector.js.map +1 -1
- package/dist/graph/virtual_sources.d.ts +8 -0
- package/dist/graph/virtual_sources.d.ts.map +1 -0
- package/dist/graph/virtual_sources.js +31 -0
- package/dist/graph/virtual_sources.js.map +1 -0
- package/dist/graph/virtual_statement.d.ts +41 -0
- package/dist/graph/virtual_statement.d.ts.map +1 -0
- package/dist/graph/virtual_statement.js +143 -0
- package/dist/graph/virtual_statement.js.map +1 -0
- package/dist/index.browser.d.ts +3 -3
- package/dist/index.browser.d.ts.map +1 -1
- package/dist/index.browser.js.map +1 -1
- package/dist/index.node.d.ts +3 -3
- package/dist/index.node.d.ts.map +1 -1
- package/dist/index.node.js.map +1 -1
- package/dist/parsing/ast_node.d.ts +9 -0
- package/dist/parsing/ast_node.d.ts.map +1 -1
- package/dist/parsing/ast_node.js +21 -4
- package/dist/parsing/ast_node.js.map +1 -1
- package/dist/parsing/expressions/binding_reference.d.ts +16 -0
- package/dist/parsing/expressions/binding_reference.d.ts.map +1 -0
- package/dist/parsing/expressions/binding_reference.js +34 -0
- package/dist/parsing/expressions/binding_reference.js.map +1 -0
- package/dist/parsing/operations/aggregated_return.d.ts +10 -0
- package/dist/parsing/operations/aggregated_return.d.ts.map +1 -1
- package/dist/parsing/operations/aggregated_return.js +56 -3
- package/dist/parsing/operations/aggregated_return.js.map +1 -1
- package/dist/parsing/operations/aggregated_with.d.ts +20 -0
- package/dist/parsing/operations/aggregated_with.d.ts.map +1 -1
- package/dist/parsing/operations/aggregated_with.js +33 -1
- package/dist/parsing/operations/aggregated_with.js.map +1 -1
- package/dist/parsing/operations/create_node.d.ts +5 -1
- package/dist/parsing/operations/create_node.d.ts.map +1 -1
- package/dist/parsing/operations/create_node.js +12 -2
- package/dist/parsing/operations/create_node.js.map +1 -1
- package/dist/parsing/operations/create_relationship.d.ts +5 -1
- package/dist/parsing/operations/create_relationship.d.ts.map +1 -1
- package/dist/parsing/operations/create_relationship.js +12 -2
- package/dist/parsing/operations/create_relationship.js.map +1 -1
- package/dist/parsing/operations/drop_binding.d.ts +15 -0
- package/dist/parsing/operations/drop_binding.d.ts.map +1 -0
- package/dist/parsing/operations/drop_binding.js +42 -0
- package/dist/parsing/operations/drop_binding.js.map +1 -0
- package/dist/parsing/operations/group_by.d.ts +32 -0
- package/dist/parsing/operations/group_by.d.ts.map +1 -1
- package/dist/parsing/operations/group_by.js +103 -0
- package/dist/parsing/operations/group_by.js.map +1 -1
- package/dist/parsing/operations/let.d.ts +36 -0
- package/dist/parsing/operations/let.d.ts.map +1 -0
- package/dist/parsing/operations/let.js +154 -0
- package/dist/parsing/operations/let.js.map +1 -0
- package/dist/parsing/operations/load.d.ts +31 -0
- package/dist/parsing/operations/load.d.ts.map +1 -1
- package/dist/parsing/operations/load.js +102 -2
- package/dist/parsing/operations/load.js.map +1 -1
- package/dist/parsing/operations/merge.d.ts +158 -0
- package/dist/parsing/operations/merge.d.ts.map +1 -0
- package/dist/parsing/operations/merge.js +338 -0
- package/dist/parsing/operations/merge.js.map +1 -0
- package/dist/parsing/operations/order_by.d.ts +6 -0
- package/dist/parsing/operations/order_by.d.ts.map +1 -1
- package/dist/parsing/operations/order_by.js +10 -1
- package/dist/parsing/operations/order_by.js.map +1 -1
- package/dist/parsing/operations/refresh_binding.d.ts +15 -0
- package/dist/parsing/operations/refresh_binding.d.ts.map +1 -0
- package/dist/parsing/operations/refresh_binding.js +42 -0
- package/dist/parsing/operations/refresh_binding.js.map +1 -0
- package/dist/parsing/operations/refresh_node.d.ts +11 -0
- package/dist/parsing/operations/refresh_node.d.ts.map +1 -0
- package/dist/parsing/operations/refresh_node.js +46 -0
- package/dist/parsing/operations/refresh_node.js.map +1 -0
- package/dist/parsing/operations/refresh_relationship.d.ts +11 -0
- package/dist/parsing/operations/refresh_relationship.d.ts.map +1 -0
- package/dist/parsing/operations/refresh_relationship.js +46 -0
- package/dist/parsing/operations/refresh_relationship.js.map +1 -0
- package/dist/parsing/operations/return.d.ts +34 -0
- package/dist/parsing/operations/return.d.ts.map +1 -1
- package/dist/parsing/operations/return.js +102 -3
- package/dist/parsing/operations/return.js.map +1 -1
- package/dist/parsing/operations/union.d.ts +19 -2
- package/dist/parsing/operations/union.d.ts.map +1 -1
- package/dist/parsing/operations/union.js +47 -11
- package/dist/parsing/operations/union.js.map +1 -1
- package/dist/parsing/operations/union_all.d.ts +5 -1
- package/dist/parsing/operations/union_all.d.ts.map +1 -1
- package/dist/parsing/operations/union_all.js +8 -2
- package/dist/parsing/operations/union_all.js.map +1 -1
- package/dist/parsing/operations/update.d.ts +27 -0
- package/dist/parsing/operations/update.d.ts.map +1 -0
- package/dist/parsing/operations/update.js +88 -0
- package/dist/parsing/operations/update.js.map +1 -0
- package/dist/parsing/operations/update_delete.d.ts +43 -0
- package/dist/parsing/operations/update_delete.d.ts.map +1 -0
- package/dist/parsing/operations/update_delete.js +105 -0
- package/dist/parsing/operations/update_delete.js.map +1 -0
- package/dist/parsing/parser.d.ts +70 -1
- package/dist/parsing/parser.d.ts.map +1 -1
- package/dist/parsing/parser.js +704 -10
- package/dist/parsing/parser.js.map +1 -1
- package/dist/parsing/statement_info_crawler.d.ts +106 -0
- package/dist/parsing/statement_info_crawler.d.ts.map +1 -1
- package/dist/parsing/statement_info_crawler.js +231 -12
- package/dist/parsing/statement_info_crawler.js.map +1 -1
- package/dist/tokenization/keyword.d.ts +21 -1
- package/dist/tokenization/keyword.d.ts.map +1 -1
- package/dist/tokenization/keyword.js +20 -0
- package/dist/tokenization/keyword.js.map +1 -1
- package/dist/tokenization/token.d.ts +24 -0
- package/dist/tokenization/token.d.ts.map +1 -1
- package/dist/tokenization/token.js +73 -0
- package/dist/tokenization/token.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -12,11 +12,11 @@ Beyond graphs, FlowQuery provides a full data processing pipeline language with
|
|
|
12
12
|
|
|
13
13
|
The combination of graph querying and pipeline processing makes FlowQuery ideal for the retrieval stage of Retrieval Augmented Generation (RAG). A typical graph RAG flow works as follows:
|
|
14
14
|
|
|
15
|
-
1. **User query**
|
|
16
|
-
2. **Schema retrieval**
|
|
17
|
-
3. **Query generation**
|
|
18
|
-
4. **Query execution**
|
|
19
|
-
5. **Response formulation**
|
|
15
|
+
1. **User query** - The user asks a question in natural language.
|
|
16
|
+
2. **Schema retrieval** - The application retrieves the virtual graph schema via `CALL schema()` and injects it into the system instructions of the query-generation LLM, so it knows which node labels, relationship types, and properties are available.
|
|
17
|
+
3. **Query generation** - The LLM, grounded in the schema, generates a precise OpenCypher query to retrieve the data needed to answer the question.
|
|
18
|
+
4. **Query execution** - The FlowQuery engine executes the generated OpenCypher query against the virtual graph and returns the results as grounding data.
|
|
19
|
+
5. **Response formulation** - The LLM formulates a final response informed by the grounding data.
|
|
20
20
|
|
|
21
21
|
```
|
|
22
22
|
┌───────────────────┐
|
|
@@ -37,7 +37,7 @@ The combination of graph querying and pipeline processing makes FlowQuery ideal
|
|
|
37
37
|
└──────────┘
|
|
38
38
|
```
|
|
39
39
|
|
|
40
|
-
The schema is retrieved using FlowQuery's built-in `schema()` function, which returns the structure of all registered virtual nodes and relationships
|
|
40
|
+
The schema is retrieved using FlowQuery's built-in `schema()` function, which returns the structure of all registered virtual nodes and relationships - including labels, types, endpoint labels, property names, and sample values. This schema is then included in the LLM's system instructions so it can generate correct queries grounded in the actual graph model:
|
|
41
41
|
|
|
42
42
|
```cypher
|
|
43
43
|
CALL schema() YIELD kind, label, type, from_label, to_label, properties, sample
|
|
@@ -227,7 +227,7 @@ WITH 1 AS x RETURN x UNION ALL WITH 1 AS x RETURN x
|
|
|
227
227
|
|
|
228
228
|
#### Multi-Statement Queries
|
|
229
229
|
|
|
230
|
-
Multiple statements can be separated by semicolons. Only `CREATE VIRTUAL`
|
|
230
|
+
Multiple statements can be separated by semicolons. Only declaration statements - `CREATE VIRTUAL`, `DELETE VIRTUAL` (alias: `DROP VIRTUAL`), `REFRESH VIRTUAL`, `LET`, `UPDATE`, and `MERGE INTO` - may appear before the last statement. The last statement can be any valid query.
|
|
231
231
|
|
|
232
232
|
```cypher
|
|
233
233
|
CREATE VIRTUAL (:Person) AS {
|
|
@@ -253,87 +253,96 @@ console.log(runner.metadata);
|
|
|
253
253
|
// info: { node_labels: ["X"], relationship_types: [], sources: [], ... } }
|
|
254
254
|
```
|
|
255
255
|
|
|
256
|
-
####
|
|
256
|
+
#### Caching Virtual Entities: `STATIC` and `REFRESH`
|
|
257
257
|
|
|
258
|
-
`
|
|
259
|
-
query
|
|
258
|
+
By default, every `MATCH` against a virtual node or relationship re-executes
|
|
259
|
+
its backing sub-query. For expensive sources (HTTP endpoints, large CSV files)
|
|
260
|
+
you can opt in to persistent caching with the `STATIC` keyword:
|
|
260
261
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
- The properties **declared** by each virtual's `RETURN` clause via
|
|
268
|
-
`info.declared`, so you can validate that a query references only
|
|
269
|
-
declared properties.
|
|
270
|
-
- Literal values supplied for properties at the call site via
|
|
271
|
-
`info.nodes[Label].literal_values` — collected from inline pattern
|
|
272
|
-
properties and from equality / `IN` predicates such as
|
|
273
|
-
`WHERE u.id = 'rick.o'` or `WHERE u.id IN ['a', 'b']`.
|
|
262
|
+
```cypher
|
|
263
|
+
CREATE STATIC VIRTUAL (:Country) AS {
|
|
264
|
+
LOAD JSON FROM 'https://restcountries.com/v3.1/all?fields=name,cca2,population' AS c
|
|
265
|
+
RETURN c.cca2 AS code, c.name.common AS name
|
|
266
|
+
};
|
|
267
|
+
```
|
|
274
268
|
|
|
275
|
-
|
|
276
|
-
|
|
269
|
+
The sub-query runs once on first access and the result is reused for every
|
|
270
|
+
subsequent query in the same process - across `Runner` instances. STATIC
|
|
271
|
+
virtual entities are protected: re-running `CREATE STATIC VIRTUAL (:Country)`
|
|
272
|
+
without first dropping the existing entry raises an error. Use
|
|
273
|
+
`DROP VIRTUAL (:Country)` (an alias for `DELETE VIRTUAL`) to remove it.
|
|
277
274
|
|
|
278
|
-
|
|
279
|
-
|
|
275
|
+
To refresh on a schedule, add a `REFRESH EVERY <n> <unit>` clause. Supported
|
|
276
|
+
units are `SECOND[S]`, `MINUTE[S]`, `HOUR[S]`, and `DAY[S]`:
|
|
280
277
|
|
|
281
|
-
```
|
|
282
|
-
|
|
278
|
+
```cypher
|
|
279
|
+
CREATE STATIC VIRTUAL (:Country) AS {
|
|
280
|
+
LOAD JSON FROM 'https://restcountries.com/v3.1/all?fields=name,cca2,population' AS c
|
|
281
|
+
RETURN c.cca2 AS code, c.name.common AS name
|
|
282
|
+
} REFRESH EVERY 1 HOUR;
|
|
283
|
+
```
|
|
283
284
|
|
|
284
|
-
|
|
285
|
-
|
|
285
|
+
Refresh is lazy: the cache is re-populated on the first access after the TTL
|
|
286
|
+
elapses; no background timers are scheduled. `REFRESH EVERY` requires
|
|
287
|
+
`STATIC` (caching must be enabled to refresh).
|
|
288
|
+
|
|
289
|
+
To force an immediate refresh from anywhere in a query, use
|
|
290
|
+
`REFRESH VIRTUAL (...)`:
|
|
291
|
+
|
|
292
|
+
```cypher
|
|
293
|
+
REFRESH VIRTUAL (:Country);
|
|
294
|
+
MATCH (c:Country) RETURN c.name
|
|
286
295
|
```
|
|
287
296
|
|
|
288
|
-
|
|
289
|
-
|
|
297
|
+
`REFRESH VIRTUAL` works on both nodes and relationships and clears the cache
|
|
298
|
+
so that the next access re-executes the backing sub-query.
|
|
290
299
|
|
|
291
|
-
|
|
292
|
-
const runner = new FlowQuery(`
|
|
293
|
-
CREATE VIRTUAL (:City) AS {
|
|
294
|
-
LOAD JSON FROM "https://example.com/cities" AS c
|
|
295
|
-
RETURN c.id AS id, c.name AS name, c.country AS country
|
|
296
|
-
};
|
|
297
|
-
CREATE VIRTUAL (:City)-[:FLIGHT]-(:City) AS {
|
|
298
|
-
LOAD JSON FROM "https://example.com/flights" AS f
|
|
299
|
-
RETURN f.left_id AS left_id, f.right_id AS right_id, f.airline AS airline
|
|
300
|
-
};
|
|
301
|
-
MATCH (a:City {name: 'NYC'})-[r:FLIGHT]->(b:City)
|
|
302
|
-
WHERE b.country IN ['US', 'CA']
|
|
303
|
-
RETURN a.name AS origin, b.name AS destination, r.airline AS airline
|
|
304
|
-
`);
|
|
305
|
-
const { info } = runner.metadata;
|
|
300
|
+
#### Refreshable `LET` Bindings: `REFRESH EVERY`, `REFRESH BINDING`, `DROP BINDING`
|
|
306
301
|
|
|
307
|
-
|
|
308
|
-
|
|
309
|
-
|
|
310
|
-
|
|
311
|
-
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
|
|
319
|
-
|
|
320
|
-
|
|
321
|
-
|
|
322
|
-
|
|
323
|
-
|
|
324
|
-
|
|
325
|
-
|
|
326
|
-
|
|
302
|
+
`LET` bindings live for the lifetime of the process, just like virtual
|
|
303
|
+
nodes and relationships, and the same caching primitives apply. A plain
|
|
304
|
+
`LET name = { ... }` evaluates the sub-query once, when the `LET`
|
|
305
|
+
statement executes, and stores the result in the global binding store;
|
|
306
|
+
all subsequent reads return that cached value without re-running the
|
|
307
|
+
sub-query. To opt into TTL-based re-evaluation, add a trailing
|
|
308
|
+
`REFRESH EVERY <n> <unit>` clause:
|
|
309
|
+
|
|
310
|
+
```cypher
|
|
311
|
+
LET users = {
|
|
312
|
+
LOAD JSON FROM 'https://example.com/users.json' AS u
|
|
313
|
+
RETURN u.id AS id, u.name AS name
|
|
314
|
+
} REFRESH EVERY 5 MINUTES;
|
|
315
|
+
LOAD JSON FROM users AS u RETURN u.id AS id, u.name AS name
|
|
316
|
+
```
|
|
317
|
+
|
|
318
|
+
The sub-query still runs eagerly at `LET` time and the result is cached
|
|
319
|
+
just like a plain binding; the `REFRESH EVERY` clause additionally
|
|
320
|
+
arranges for the next read after the TTL has elapsed to re-execute the
|
|
321
|
+
sub-query.
|
|
322
|
+
|
|
323
|
+
Refreshable bindings (those with a `REFRESH EVERY` clause) cannot be
|
|
324
|
+
silently overwritten: re-running `LET name = { ... } REFRESH EVERY ...`
|
|
325
|
+
without first dropping the existing binding raises an error, and so
|
|
326
|
+
does `UPDATE name = ...` or `MERGE INTO name ...` against the same
|
|
327
|
+
name. `LET ... REFRESH EVERY` requires a sub-query right-hand side
|
|
328
|
+
(an expression like `42 REFRESH EVERY 1 MINUTE` is rejected). To force
|
|
329
|
+
an immediate refresh outside the TTL schedule, use `REFRESH BINDING`:
|
|
330
|
+
|
|
331
|
+
```cypher
|
|
332
|
+
REFRESH BINDING users;
|
|
333
|
+
LOAD JSON FROM users AS u RETURN u.id AS id
|
|
327
334
|
```
|
|
328
335
|
|
|
329
|
-
|
|
330
|
-
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
335
|
-
|
|
336
|
-
|
|
336
|
+
To remove any binding (plain or refreshable), use `DROP BINDING`:
|
|
337
|
+
|
|
338
|
+
```cypher
|
|
339
|
+
DROP BINDING users;
|
|
340
|
+
```
|
|
341
|
+
|
|
342
|
+
`UPDATE` and `MERGE INTO` against a refreshable binding are blocked
|
|
343
|
+
because the mutation would be invisibly overwritten by the next
|
|
344
|
+
refresh. Use `REFRESH BINDING name` to re-evaluate the source, or
|
|
345
|
+
`DROP BINDING name` and redefine the binding plainly.
|
|
337
346
|
|
|
338
347
|
### WHERE Clause
|
|
339
348
|
|
|
@@ -391,6 +400,115 @@ UNWIND [1, 1, 2, 2] AS i RETURN DISTINCT i
|
|
|
391
400
|
// [{ i: 1 }, { i: 2 }]
|
|
392
401
|
```
|
|
393
402
|
|
|
403
|
+
### Bindings (`LET` / `UPDATE` / `MERGE INTO`)
|
|
404
|
+
|
|
405
|
+
A **binding** is a named, mutable value that persists across statements in a multi-statement query. Bindings live in a flat per-query namespace, are introduced with `LET`, wholesale-replaced with `UPDATE`, row-filtered with `UPDATE … AS x DELETE WHERE …`, and per-row upserted/merged with `MERGE INTO … USING … ON … WHEN …`. Once bound, the value can be referenced anywhere an expression is allowed (e.g. as the source of `LOAD JSON FROM`, `UNWIND`, `MERGE INTO`'s `USING`, or directly inside an expression).
|
|
406
|
+
|
|
407
|
+
#### LET
|
|
408
|
+
|
|
409
|
+
`LET name = <expression-or-subquery>` introduces a new binding. The right-hand side can be any expression or a braced sub-query whose final `RETURN` provides the value.
|
|
410
|
+
|
|
411
|
+
```cypher
|
|
412
|
+
LET data = [{id: 1, name: 'Alice'}, {id: 2, name: 'Bob'}];
|
|
413
|
+
LET threshold = 10;
|
|
414
|
+
LET fresh = {
|
|
415
|
+
UNWIND [1, 2, 3] AS n
|
|
416
|
+
RETURN n AS n
|
|
417
|
+
};
|
|
418
|
+
LOAD JSON FROM data AS d
|
|
419
|
+
WITH d WHERE d.id >= threshold OR d.id <= 2
|
|
420
|
+
RETURN d.id AS id, d.name AS name
|
|
421
|
+
```
|
|
422
|
+
|
|
423
|
+
`LET` fails if the binding already exists - use `UPDATE` to overwrite.
|
|
424
|
+
|
|
425
|
+
#### UPDATE
|
|
426
|
+
|
|
427
|
+
`UPDATE name = <expression-or-subquery>` replaces the value of an existing binding wholesale. Works for any value (scalars, maps, arrays).
|
|
428
|
+
|
|
429
|
+
```cypher
|
|
430
|
+
LET counter = 0;
|
|
431
|
+
UPDATE counter = counter + 1;
|
|
432
|
+
RETURN counter AS counter
|
|
433
|
+
// [{ counter: 1 }]
|
|
434
|
+
```
|
|
435
|
+
|
|
436
|
+
`UPDATE` fails if the binding doesn't exist - use `LET` first.
|
|
437
|
+
|
|
438
|
+
#### UPDATE ... AS alias DELETE WHERE ...
|
|
439
|
+
|
|
440
|
+
Filters rows out of an array binding in place. The alias names each row during predicate evaluation.
|
|
441
|
+
|
|
442
|
+
```cypher
|
|
443
|
+
LET users = [
|
|
444
|
+
{id: 1, name: 'Alice', expired: false},
|
|
445
|
+
{id: 2, name: 'Bob', expired: true}
|
|
446
|
+
];
|
|
447
|
+
UPDATE users AS u DELETE WHERE u.expired;
|
|
448
|
+
LOAD JSON FROM users AS u
|
|
449
|
+
RETURN u.id AS id, u.name AS name
|
|
450
|
+
// [{ id: 1, name: 'Alice' }]
|
|
451
|
+
```
|
|
452
|
+
|
|
453
|
+
#### MERGE INTO ... USING ... ON ... WHEN ...
|
|
454
|
+
|
|
455
|
+
SQL-style keyed merge. For each row of the source, find matching rows in the target (by key or predicate) and apply per-row branches:
|
|
456
|
+
|
|
457
|
+
```
|
|
458
|
+
MERGE INTO target [AS t]
|
|
459
|
+
USING <source-expression-or-subquery> [AS s]
|
|
460
|
+
ON <key-or-key-list> | <predicate>
|
|
461
|
+
[WHEN MATCHED THEN UPDATE SET <field-list>]
|
|
462
|
+
[WHEN MATCHED THEN DELETE]
|
|
463
|
+
[WHEN NOT MATCHED THEN INSERT [<row-expression>]]
|
|
464
|
+
```
|
|
465
|
+
|
|
466
|
+
```cypher
|
|
467
|
+
// Upsert by key: replace listed fields on matches; append unmatched source rows
|
|
468
|
+
LET users = [{id: 1, name: 'Alice'}, {id: 2, name: 'Bob'}];
|
|
469
|
+
MERGE INTO users
|
|
470
|
+
USING [{id: 2, name: 'Bobby'}, {id: 3, name: 'Charlie'}]
|
|
471
|
+
ON id
|
|
472
|
+
WHEN MATCHED THEN UPDATE SET .id, .name
|
|
473
|
+
WHEN NOT MATCHED THEN INSERT;
|
|
474
|
+
// users → [{id:1,name:'Alice'}, {id:2,name:'Bobby'}, {id:3,name:'Charlie'}]
|
|
475
|
+
|
|
476
|
+
// Composite key
|
|
477
|
+
MERGE INTO rows
|
|
478
|
+
USING incoming
|
|
479
|
+
ON (tenant, id)
|
|
480
|
+
WHEN MATCHED THEN UPDATE SET .v
|
|
481
|
+
WHEN NOT MATCHED THEN INSERT;
|
|
482
|
+
|
|
483
|
+
// Per-row expressions across target (u) and source (s) aliases
|
|
484
|
+
MERGE INTO users AS u
|
|
485
|
+
USING incoming AS s
|
|
486
|
+
ON id
|
|
487
|
+
WHEN MATCHED THEN UPDATE SET .name = s.name + ' (' + u.name + ')'
|
|
488
|
+
WHEN NOT MATCHED THEN INSERT {id: s.id, name: 'New: ' + s.name};
|
|
489
|
+
|
|
490
|
+
// Predicate-based join
|
|
491
|
+
MERGE INTO users AS u
|
|
492
|
+
USING incoming AS s
|
|
493
|
+
ON u.tenant = s.tenant AND u.email = s.email
|
|
494
|
+
WHEN MATCHED THEN UPDATE SET .v = s.v
|
|
495
|
+
WHEN NOT MATCHED THEN INSERT;
|
|
496
|
+
|
|
497
|
+
// Tombstone delete: rows in target that also appear in source are removed
|
|
498
|
+
MERGE INTO users
|
|
499
|
+
USING [{id: 2}, {id: 3}]
|
|
500
|
+
ON id
|
|
501
|
+
WHEN MATCHED THEN DELETE;
|
|
502
|
+
```
|
|
503
|
+
|
|
504
|
+
Notes:
|
|
505
|
+
|
|
506
|
+
- The source may be any expression (array literal, binding name) or a braced sub-query. When the source is a bare binding name, give it an alias: `USING incoming AS s`.
|
|
507
|
+
- `ON id` is shorthand for the equality predicate `t.id = s.id`; `ON (a, b)` requires equality on every listed key. Anything else is treated as a Boolean predicate evaluated per `(target, source)` pair.
|
|
508
|
+
- `WHEN MATCHED THEN UPDATE SET .field` overwrites only the listed fields, preserving the rest from the existing row. `SET .field = expr` evaluates `expr` per matched pair, with target and source aliases in scope.
|
|
509
|
+
- `WHEN NOT MATCHED THEN INSERT` (no row expression) appends the source row as-is. `INSERT { … }` appends an explicit row expression instead.
|
|
510
|
+
- A `MERGE INTO` must declare at least one `WHEN` clause. Branches are independent - omit `WHEN NOT MATCHED` to skip insertion, omit `WHEN MATCHED` to skip updates/deletes.
|
|
511
|
+
|
|
394
512
|
### Expressions
|
|
395
513
|
|
|
396
514
|
#### Arithmetic
|
|
@@ -504,16 +622,16 @@ RETURN sum(n IN [1+2+3, 2, 3] | n^2) AS sum // 49
|
|
|
504
622
|
Test list elements against a condition. Follow standard Cypher syntax.
|
|
505
623
|
|
|
506
624
|
```cypher
|
|
507
|
-
// any
|
|
625
|
+
// any - true if at least one element matches
|
|
508
626
|
RETURN any(n IN [1, 2, 3] WHERE n > 2) // true
|
|
509
627
|
|
|
510
|
-
// all
|
|
628
|
+
// all - true if every element matches
|
|
511
629
|
RETURN all(n IN [2, 4, 6] WHERE n > 0) // true
|
|
512
630
|
|
|
513
|
-
// none
|
|
631
|
+
// none - true if no element matches
|
|
514
632
|
RETURN none(n IN [1, 2, 3] WHERE n > 5) // true
|
|
515
633
|
|
|
516
|
-
// single
|
|
634
|
+
// single - true if exactly one element matches
|
|
517
635
|
RETURN single(n IN [1, 2, 3] WHERE n > 2) // true
|
|
518
636
|
|
|
519
637
|
// In a WHERE clause
|
|
@@ -698,7 +816,7 @@ MATCH (a:Person) WHERE NOT (a)-[:KNOWS]->(:Person) RETURN a.name
|
|
|
698
816
|
**Subquery Expressions:** `EXISTS`, `COUNT`, and `COLLECT` evaluate a full subquery as an expression. The subquery can reference outer-scope variables and supports the complete FlowQuery pipeline (MATCH, WITH, WHERE, UNWIND, LOAD, etc.).
|
|
699
817
|
|
|
700
818
|
```cypher
|
|
701
|
-
// EXISTS
|
|
819
|
+
// EXISTS - returns true if the subquery produces any rows
|
|
702
820
|
MATCH (p:Person)
|
|
703
821
|
WHERE EXISTS {
|
|
704
822
|
MATCH (p)-[:KNOWS]->(friend:Person)
|
|
@@ -706,12 +824,12 @@ WHERE EXISTS {
|
|
|
706
824
|
}
|
|
707
825
|
RETURN p.name
|
|
708
826
|
|
|
709
|
-
// NOT EXISTS
|
|
827
|
+
// NOT EXISTS - negate with NOT
|
|
710
828
|
MATCH (p:Person)
|
|
711
829
|
WHERE NOT EXISTS { MATCH (p)-[:KNOWS]->(:Person) }
|
|
712
830
|
RETURN p.name
|
|
713
831
|
|
|
714
|
-
// COUNT
|
|
832
|
+
// COUNT - returns the number of rows the subquery produces
|
|
715
833
|
MATCH (p:Person)
|
|
716
834
|
WHERE COUNT { MATCH (p)-[:KNOWS]->(:Person) } > 2
|
|
717
835
|
RETURN p.name
|
|
@@ -720,7 +838,7 @@ RETURN p.name
|
|
|
720
838
|
MATCH (p:Person)
|
|
721
839
|
RETURN p.name, COUNT { MATCH (p)-[:KNOWS]->(:Person) } AS friendCount
|
|
722
840
|
|
|
723
|
-
// COLLECT
|
|
841
|
+
// COLLECT - returns a list of single-column values from the subquery
|
|
724
842
|
MATCH (p:Person)
|
|
725
843
|
RETURN COLLECT {
|
|
726
844
|
MATCH (p)-[:KNOWS]->(friend:Person)
|
|
@@ -832,6 +950,18 @@ RETURN f.name, f.description, f.category
|
|
|
832
950
|
│ stmt1; stmt2; ... stmtN -- multi-statement │
|
|
833
951
|
│ LIMIT n │
|
|
834
952
|
├─────────────────────────────────────────────────────────────┤
|
|
953
|
+
│ BINDINGS │
|
|
954
|
+
├─────────────────────────────────────────────────────────────┤
|
|
955
|
+
│ LET name = expr | { subquery } -- new binding │
|
|
956
|
+
│ UPDATE name = expr | { subquery } -- replace existing │
|
|
957
|
+
│ UPDATE name AS x DELETE WHERE cond -- row-filter binding │
|
|
958
|
+
│ MERGE INTO target [AS t] │
|
|
959
|
+
│ USING <expr | subquery> [AS s] │
|
|
960
|
+
│ ON key | (k1,k2,...) | predicate │
|
|
961
|
+
│ [WHEN MATCHED THEN UPDATE SET .f [= expr], ...] │
|
|
962
|
+
│ [WHEN MATCHED THEN DELETE] │
|
|
963
|
+
│ [WHEN NOT MATCHED THEN INSERT [ {row-expr} ]] │
|
|
964
|
+
├─────────────────────────────────────────────────────────────┤
|
|
835
965
|
│ GRAPH OPERATIONS │
|
|
836
966
|
├─────────────────────────────────────────────────────────────┤
|
|
837
967
|
│ CREATE VIRTUAL (:Label) AS { subquery } │
|
|
@@ -1084,7 +1214,7 @@ RETURN f.name AS name, f.description AS description, f.category AS category
|
|
|
1084
1214
|
|
|
1085
1215
|
### Virtual Org Chart
|
|
1086
1216
|
|
|
1087
|
-
This single multi-statement query creates a virtual graph for a fictitious company
|
|
1217
|
+
This single multi-statement query creates a virtual graph for a fictitious company - complete with employees, skills, phone numbers, and a management chain - then queries it to produce an org chart. [Try live!](https://microsoft.github.io/FlowQuery/?rZXPbtNAEMbFhUMOReXAeW4OwkH521ZBQgolrVQlSogDQlRVtdjTeKm9a603Bavqw_AAPEVfDO16s_EmEVElfPHuzGf7N9-M7UN4BfX-MM0SXiC-hkEA9zUAgJdwqc_quKdRH1o-MJJiH7yACAKnMTLP1-kf_PucykSlTocTE6weEWZEyBSZ7IM3_IXhUtI71MIs5kxd-KbV6PV6jWar2fR8yG9pkuR9uPQCKYjEReH54I2QRCjymGZqd0YZYSF6Vw--C9q2oGMiwmUOM3qHgnh-FfTLFPgNDNmCMkRB2ULTuKBOchO03Ww5oAMRxlRiKJdCVeadJnwZqcUYmeT6JlukHUs6FbQgMCUSEw2yRToVPFqG0prrkLrJTdLOBumMkyglWVbW5Q0YSQpJw1xtPn_dpuxayguSYg4fBOe3Sl2lDJBRLqyhBuVJfrYdynmRYRAKmqmyvGkhY67mzTsXJIs_jbYxexZzRBl8i4np6f_G7LhmLnMNGBS5xFR7-BHvJlm-DXhkAQepeoMueMxyXVQF0LQSxoSRxW7Afd12fQxCsUw1FpEEdLNzqjkDSW4x5kmEAsaLVG4TH1viOU8ff-dwTkT4-Ids9p7fyJ9EoGvrk0ztuqYi0fVtDMEcc7nzPTqxnGdE0pTAIGkEpMDI9XYX55Moew7leibHI2vwlGaYUIaq_xrySn1SBYZcRHp_YDZvaaQyNPJXAVWCCqmzDa7wVWK1XlcPsNKt61DK9c7eSFejcnphw2U5Kl6uag_vaofuP6Fx2Z8Np5PZPLieT64a-_8WCd7Ia_MlFnQRl5tWtWtW0tkv6VYl7Z2S3n7JUVXS2Sk5rkq6OyUnruTfHTZXqbRZWttXN9GXmrWy_gXUce1v7RnUt_x_X08XoqI50A_FcnzWxyAANBrfKOwsWYVcjxNWR8ikK2NkNOUUVR9SjpNJm2mqpMtImU8XwqXUvmVcyHzOa88dBN9U9Bc)
|
|
1088
1218
|
|
|
1089
1219
|
```cypher
|
|
1090
1220
|
CREATE VIRTUAL (:Employee) AS {
|
|
@@ -1138,7 +1268,7 @@ Output:
|
|
|
1138
1268
|
| Amara Johnson | Product Manager | Product | +1-555-0302 | [Scrum, Data Analysis, Stakeholder Mgmt] | Priya Patel |
|
|
1139
1269
|
| Priya Patel | VP of Product | Product | +1-555-0301 | [Roadmapping, Analytics, UX] | Sara Chen |
|
|
1140
1270
|
|
|
1141
|
-
You can further explore the graph
|
|
1271
|
+
You can further explore the graph - for example, find the full management chain from any employee up to the CEO:
|
|
1142
1272
|
|
|
1143
1273
|
```cypher
|
|
1144
1274
|
MATCH (e:Employee)-[:REPORTS_TO*1..]->(mgr:Employee)
|
|
@@ -1166,6 +1296,472 @@ RETURN a.name AS employee1, b.name AS employee2, shared AS sharedSkills
|
|
|
1166
1296
|
ORDER BY size(shared) DESC
|
|
1167
1297
|
```
|
|
1168
1298
|
|
|
1299
|
+
### Virtual Country Borders Graph
|
|
1300
|
+
|
|
1301
|
+
This example pulls live data from the public [REST Countries](https://restcountries.com/) API and projects it into a `(:Country)-[:BORDERS]-(:Country)` virtual graph in a single semicolon-chained query, then ranks European countries by how many direct neighbors they have. [Try live!](https://microsoft.github.io/FlowQuery/?nZBRa4MwFIXf8yvumx1YXelbRxlWM-qwCtGujDGKxrQVrJFoB2Xsv49Eq3Ure1iekuvxnu8cD0dA-amoRcYqmMMnAgDwAsuB5zDw4YkEK9AOdV1WM9MUrKo7tUH50fyYGhMzzvPHXcbytJoX8ZHplMZTXbB9xgu95OUpj2t5TbhImag0sEKgyofgaE18oIb8A9pjhZClOoL-UEOulX5HXsjvymWoaOy6Da37UNOzKM0V2lDXgl52tU_09YCQTbAVYXhxSbS2PBjNbFXH-U4Kb5bXt_s7dpa2aduIXbY-UJ9lGGDA_zfa-G22CIiDSfg-_h_w2t-4vnPVjGzlVpSc7eqtzJMo8Gx_kE-Ft7Iiewmj-DZW0o3RZokJhvhSwBw0fBK8ZBpqzeKuqwb1rDeXUaJCFSzbHxIutmqIlAUsXn_MwcGhjTx35UYwuf8G)
|
|
1302
|
+
|
|
1303
|
+
A single shared `LET` binding fetches the data once. Both virtuals (the `(:Country)` nodes and the `(:Country)-[:BORDERS]-(:Country)` relationships) are projected from the same binding, so there is exactly one HTTP round-trip per Runner invocation. Add `REFRESH EVERY 1 HOUR` to the `LET` if you want the cache to auto-refresh on a schedule.
|
|
1304
|
+
|
|
1305
|
+
```cypher
|
|
1306
|
+
LET countries = {
|
|
1307
|
+
LOAD JSON FROM 'https://restcountries.com/v3.1/all?fields=name,cca3,region,population,borders' AS c
|
|
1308
|
+
RETURN c.cca3 AS id,
|
|
1309
|
+
c.name.common AS name,
|
|
1310
|
+
c.region AS region,
|
|
1311
|
+
c.population AS population,
|
|
1312
|
+
c.borders AS borders
|
|
1313
|
+
};
|
|
1314
|
+
CREATE VIRTUAL (:Country) AS {
|
|
1315
|
+
LOAD JSON FROM countries AS c
|
|
1316
|
+
RETURN c.id AS id, c.name AS name, c.region AS region, c.population AS population
|
|
1317
|
+
};
|
|
1318
|
+
CREATE VIRTUAL (:Country)-[:BORDERS]-(:Country) AS {
|
|
1319
|
+
LOAD JSON FROM countries AS c
|
|
1320
|
+
UNWIND c.borders AS b
|
|
1321
|
+
RETURN c.id AS left_id, b AS right_id
|
|
1322
|
+
};
|
|
1323
|
+
MATCH (a:Country)-[:BORDERS]-(b:Country)
|
|
1324
|
+
WHERE a.region = 'Europe'
|
|
1325
|
+
RETURN a.name AS country, count(b) AS neighbor_count
|
|
1326
|
+
ORDER BY neighbor_count DESC
|
|
1327
|
+
LIMIT 10
|
|
1328
|
+
```
|
|
1329
|
+
|
|
1330
|
+
## Lineage and Provenance
|
|
1331
|
+
|
|
1332
|
+
FlowQuery exposes two complementary forms of lineage:
|
|
1333
|
+
|
|
1334
|
+
- **Structural lineage** (`metadata.info`) - what labels, types,
|
|
1335
|
+
properties, and sources the parsed query touches. Available without
|
|
1336
|
+
running the query and with zero runtime overhead.
|
|
1337
|
+
- **Row-level provenance** (`runner.provenance`) - opt-in via
|
|
1338
|
+
`{ provenance: true }`; for every emitted result row, the concrete
|
|
1339
|
+
node ids and relationship hops bound to it, their matched property
|
|
1340
|
+
values, and (for virtual-backed records) the inner sub-query lineage
|
|
1341
|
+
that produced them.
|
|
1342
|
+
|
|
1343
|
+
Combined, they let you trace every cell of a result back to the source
|
|
1344
|
+
record, the source virtual, and ultimately the URL or call-site that
|
|
1345
|
+
backs it.
|
|
1346
|
+
|
|
1347
|
+
### Statement Info: Labels, Properties, and Source Lineage
|
|
1348
|
+
|
|
1349
|
+
`metadata.info` carries a `StatementInfo` describing the _structure_ the
|
|
1350
|
+
query touches - independent of execution. It captures:
|
|
1351
|
+
|
|
1352
|
+
- The node labels and relationship types referenced.
|
|
1353
|
+
- The data sources backing the underlying virtual definitions.
|
|
1354
|
+
- The node/relationship properties **consumed** by the query -
|
|
1355
|
+
`alias.prop` accesses anywhere in `MATCH`, `WHERE`, `WITH`, `RETURN`,
|
|
1356
|
+
`ORDER BY`, or function arguments, plus inline pattern properties
|
|
1357
|
+
like `(u:User {id: 'rick.o'})`.
|
|
1358
|
+
- The properties **declared** by each virtual's `RETURN` clause via
|
|
1359
|
+
`info.declared`, so you can validate that a query references only
|
|
1360
|
+
declared properties.
|
|
1361
|
+
- Literal values supplied for properties at the call site via
|
|
1362
|
+
`info.nodes[Label].literal_values` - collected from inline pattern
|
|
1363
|
+
properties and from equality / `IN` predicates such as
|
|
1364
|
+
`WHERE u.id = 'rick.o'` or `WHERE u.id IN ['a', 'b']`.
|
|
1365
|
+
|
|
1366
|
+
This is useful for governance, lineage UIs, query-cost estimation, schema
|
|
1367
|
+
validation, or routing decisions before the query runs.
|
|
1368
|
+
|
|
1369
|
+
The same `StatementInfoCrawler` can also be used directly on any parsed
|
|
1370
|
+
AST without going through a `Runner`:
|
|
1371
|
+
|
|
1372
|
+
```javascript
|
|
1373
|
+
import { StatementInfoCrawler } from "flowquery";
|
|
1374
|
+
|
|
1375
|
+
const crawler = new StatementInfoCrawler();
|
|
1376
|
+
const info = crawler.crawl(parsedAst);
|
|
1377
|
+
```
|
|
1378
|
+
|
|
1379
|
+
For end-to-end lineage from a property to its data source, use the
|
|
1380
|
+
per-entity `nodes` and `relationships` maps:
|
|
1381
|
+
|
|
1382
|
+
```javascript
|
|
1383
|
+
const runner = new FlowQuery(`
|
|
1384
|
+
CREATE VIRTUAL (:City) AS {
|
|
1385
|
+
LOAD JSON FROM "https://example.com/cities" AS c
|
|
1386
|
+
RETURN c.id AS id, c.name AS name, c.country AS country
|
|
1387
|
+
};
|
|
1388
|
+
CREATE VIRTUAL (:City)-[:FLIGHT]-(:City) AS {
|
|
1389
|
+
LOAD JSON FROM "https://example.com/flights" AS f
|
|
1390
|
+
RETURN f.left_id AS left_id, f.right_id AS right_id, f.airline AS airline
|
|
1391
|
+
};
|
|
1392
|
+
MATCH (a:City {name: 'NYC'})-[r:FLIGHT]->(b:City)
|
|
1393
|
+
WHERE b.country IN ['US', 'CA']
|
|
1394
|
+
RETURN a.name AS origin, b.name AS destination, r.airline AS airline
|
|
1395
|
+
`);
|
|
1396
|
+
const { info } = runner.metadata;
|
|
1397
|
+
|
|
1398
|
+
console.log(info.nodes);
|
|
1399
|
+
// {
|
|
1400
|
+
// City: {
|
|
1401
|
+
// properties: ["country", "name"],
|
|
1402
|
+
// sources: ["https://example.com/cities"],
|
|
1403
|
+
// literal_values: { country: ["US", "CA"], name: ["NYC"] }
|
|
1404
|
+
// }
|
|
1405
|
+
// }
|
|
1406
|
+
console.log(info.relationships);
|
|
1407
|
+
// {
|
|
1408
|
+
// FLIGHT: {
|
|
1409
|
+
// properties: ["airline"],
|
|
1410
|
+
// sources: ["https://example.com/flights"],
|
|
1411
|
+
// literal_values: {}
|
|
1412
|
+
// }
|
|
1413
|
+
// }
|
|
1414
|
+
console.log(info.declared.nodes.City);
|
|
1415
|
+
// { properties: ["country", "id", "name"], sources: ["https://example.com/cities"] }
|
|
1416
|
+
console.log(info.sources);
|
|
1417
|
+
// ["https://example.com/cities", "https://example.com/flights"]
|
|
1418
|
+
```
|
|
1419
|
+
|
|
1420
|
+
`StatementInfo` resolves sources and declared schemas for **any** virtual
|
|
1421
|
+
the query touches - both inline `CREATE VIRTUAL` clauses and
|
|
1422
|
+
previously-registered virtuals reached via `MATCH` or `DELETE`. The flat
|
|
1423
|
+
`node_labels`, `relationship_types`, `sources`, `node_properties`, and
|
|
1424
|
+
`relationship_properties` fields stay in sync with the per-entity `nodes` /
|
|
1425
|
+
`relationships` maps and are convenient for quick aggregate checks. Only
|
|
1426
|
+
purely literal AST subtrees end up in `literal_values` - values that depend
|
|
1427
|
+
on parameters, references, f-strings, or subqueries are skipped.
|
|
1428
|
+
|
|
1429
|
+
### Row-level Provenance: Node and Relationship IDs Behind Each Result
|
|
1430
|
+
|
|
1431
|
+
`StatementInfo` describes the _structural_ lineage of a query - which
|
|
1432
|
+
labels, types, and sources back it. To get the _row-level_ lineage -
|
|
1433
|
+
which concrete node ids and relationship `(left_id, right_id, type)`
|
|
1434
|
+
hops actually flowed into each result row - pass `{ provenance: true }`
|
|
1435
|
+
when constructing the runner and read `runner.provenance`:
|
|
1436
|
+
|
|
1437
|
+
```javascript
|
|
1438
|
+
const fq = new FlowQuery(
|
|
1439
|
+
`
|
|
1440
|
+
MATCH (a:City {name: 'NYC'})-[r:FLIGHT]->(b:City)
|
|
1441
|
+
RETURN a.name AS origin, b.name AS destination
|
|
1442
|
+
`,
|
|
1443
|
+
null,
|
|
1444
|
+
null,
|
|
1445
|
+
{ provenance: true }
|
|
1446
|
+
);
|
|
1447
|
+
await fq.run();
|
|
1448
|
+
|
|
1449
|
+
fq.results;
|
|
1450
|
+
// [{ origin: 'NYC', destination: 'LAX' }, { origin: 'NYC', destination: 'YYZ' }]
|
|
1451
|
+
|
|
1452
|
+
fq.provenance;
|
|
1453
|
+
// [
|
|
1454
|
+
// {
|
|
1455
|
+
// nodes: [
|
|
1456
|
+
// { alias: 'a', label: 'City', id: 'nyc' },
|
|
1457
|
+
// { alias: 'b', label: 'City', id: 'lax' }
|
|
1458
|
+
// ],
|
|
1459
|
+
// relationships: [
|
|
1460
|
+
// { alias: 'r', type: 'FLIGHT',
|
|
1461
|
+
// hops: [{ left_id: 'nyc', right_id: 'lax', type: 'FLIGHT' }],
|
|
1462
|
+
// path: ['nyc', 'lax'] }
|
|
1463
|
+
// ],
|
|
1464
|
+
// rows: [/* per-input-row segments, see below */]
|
|
1465
|
+
// },
|
|
1466
|
+
// { nodes: [...], relationships: [{ alias: 'r', type: 'FLIGHT',
|
|
1467
|
+
// hops: [{ left_id: 'nyc', right_id: 'yyz', type: 'FLIGHT' }],
|
|
1468
|
+
// path: ['nyc', 'yyz'] }], rows: [...] }
|
|
1469
|
+
// ]
|
|
1470
|
+
```
|
|
1471
|
+
|
|
1472
|
+
Semantics:
|
|
1473
|
+
|
|
1474
|
+
- `runner.provenance` is aligned **by index** with `runner.results`.
|
|
1475
|
+
- Each `NodeBinding.id` preserves the original scalar type of the
|
|
1476
|
+
underlying record (a numeric id stays a number).
|
|
1477
|
+
- Anonymous nodes / relationships from the pattern are included with
|
|
1478
|
+
`alias: null`.
|
|
1479
|
+
- Variable-length matches (`[:T*m..n]`) populate `hops` with every
|
|
1480
|
+
traversed edge in path order.
|
|
1481
|
+
- Every `RelationshipBinding` also carries a `path` field listing every
|
|
1482
|
+
visited node id in order: `[hops[0].left_id, hops[0].right_id,
|
|
1483
|
+
hops[1].right_id, …]`. For single-hop matches `path` has exactly two
|
|
1484
|
+
entries; for variable-length matches `path.length === hops.length + 1`.
|
|
1485
|
+
- `OPTIONAL MATCH` misses surface as `id: null` for the unmatched node
|
|
1486
|
+
and an empty `hops: []` for the unmatched relationship.
|
|
1487
|
+
- `ORDER BY` and `LIMIT` permute and truncate `provenance` in lockstep
|
|
1488
|
+
with `results`.
|
|
1489
|
+
- Aggregate `RETURN` (e.g. `count`, `collect`, `sum`) unions all
|
|
1490
|
+
contributing bindings into the output group's provenance, deduplicated
|
|
1491
|
+
per `(alias, id)` for nodes and per `(alias, hops)` for relationships.
|
|
1492
|
+
- `UNION ALL` concatenates branch provenance; `UNION` keeps the first
|
|
1493
|
+
branch's lineage for deduplicated rows.
|
|
1494
|
+
- Aggregating `WITH` clauses carry lineage forward. Inside the group, the
|
|
1495
|
+
contributing bindings (the upstream `MATCH` nodes and relationships)
|
|
1496
|
+
are deduplicated and frozen; any subsequent `MATCH` adds its own
|
|
1497
|
+
live bindings on top, so the final `RETURN` row's provenance shows
|
|
1498
|
+
both the pre-aggregation sources and the post-aggregation bindings.
|
|
1499
|
+
Chained aggregating `WITH` clauses compose transitively - the original
|
|
1500
|
+
ids and hops survive every aggregation hop.
|
|
1501
|
+
|
|
1502
|
+
When the option is omitted or set to `false`, the runner has zero
|
|
1503
|
+
provenance overhead and `runner.provenance` returns an empty array.
|
|
1504
|
+
|
|
1505
|
+
#### Per-Input-Row Segments: Aligning `collect()` with its Sources
|
|
1506
|
+
|
|
1507
|
+
Each `RowProvenance` also carries a `rows` array: one **segment** per
|
|
1508
|
+
input row that contributed to the result. A segment is just the
|
|
1509
|
+
`{ nodes, relationships }` slice for that single contributing row.
|
|
1510
|
+
|
|
1511
|
+
For non-aggregate rows `rows` always has length 1 and mirrors the
|
|
1512
|
+
top-level `nodes`/`relationships`. For aggregate rows the array
|
|
1513
|
+
positionally aligns with array-valued aggregates such as `collect`:
|
|
1514
|
+
|
|
1515
|
+
```cypher
|
|
1516
|
+
MATCH (a:City)-[:FLIGHT]->(b:City)
|
|
1517
|
+
RETURN a.country AS country, collect(b.name) AS destinations
|
|
1518
|
+
```
|
|
1519
|
+
|
|
1520
|
+
```javascript
|
|
1521
|
+
fq.results[0];
|
|
1522
|
+
// { country: 'US', destinations: ['LAX', 'YYZ'] }
|
|
1523
|
+
fq.provenance[0].rows.length; // 2
|
|
1524
|
+
fq.provenance[0].rows[0].nodes; // contributed LAX: includes b = lax
|
|
1525
|
+
fq.provenance[0].rows[1].nodes; // contributed YYZ: includes b = yyz
|
|
1526
|
+
```
|
|
1527
|
+
|
|
1528
|
+
This lets you map each element of a `collect`/`sum`/`avg` result back
|
|
1529
|
+
to the exact node / relationship ids that produced it.
|
|
1530
|
+
|
|
1531
|
+
#### Property-Level Lineage
|
|
1532
|
+
|
|
1533
|
+
Each `NodeBinding` and `RelationshipHop` produced under `{ provenance:
|
|
1534
|
+
true }` also carries the matched **property values** alongside the ids:
|
|
1535
|
+
|
|
1536
|
+
```javascript
|
|
1537
|
+
const fq = new FlowQuery(query, null, null, { provenance: true });
|
|
1538
|
+
await fq.run();
|
|
1539
|
+
|
|
1540
|
+
fq.provenance[0].nodes[0];
|
|
1541
|
+
// {
|
|
1542
|
+
// alias: 'a', label: 'City', id: 'nyc',
|
|
1543
|
+
// properties: { name: 'New York', country: 'US' }
|
|
1544
|
+
// }
|
|
1545
|
+
fq.provenance[0].relationships[0].hops[0];
|
|
1546
|
+
// {
|
|
1547
|
+
// left_id: 'nyc', right_id: 'lax', type: 'FLIGHT',
|
|
1548
|
+
// properties: { airline: 'AA' }
|
|
1549
|
+
// }
|
|
1550
|
+
```
|
|
1551
|
+
|
|
1552
|
+
- `NodeBinding.properties` is a shallow copy of the matched record with
|
|
1553
|
+
`id` and `_label` stripped. `RelationshipHop.properties` is a shallow
|
|
1554
|
+
copy of the matched relationship's user-visible properties.
|
|
1555
|
+
|
|
1556
|
+
#### Threading Lineage Through Virtual Sub-Queries
|
|
1557
|
+
|
|
1558
|
+
A `CREATE VIRTUAL (:X) AS { ... }` block wraps an inner FlowQuery that
|
|
1559
|
+
produces the synthesised records exposed under the `:X` label. By
|
|
1560
|
+
default, a downstream `MATCH (x:X)` only sees the synthesised row's
|
|
1561
|
+
`id` - the upstream query that produced it is opaque.
|
|
1562
|
+
|
|
1563
|
+
When `{ provenance: true }` is set, the inner runner's `RowProvenance`
|
|
1564
|
+
is threaded onto every binding whose record came from a virtual. Each
|
|
1565
|
+
`NodeBinding` and each `RelationshipHop` gains an optional `source:
|
|
1566
|
+
RowProvenance` field carrying the inner row's full lineage -
|
|
1567
|
+
recursively, when a virtual matches another virtual:
|
|
1568
|
+
|
|
1569
|
+
```typescript
|
|
1570
|
+
import { Runner } from "flowquery";
|
|
1571
|
+
|
|
1572
|
+
// Virtual graph: derived city = US-only subset of SrcCity.
|
|
1573
|
+
await new Runner(`
|
|
1574
|
+
CREATE VIRTUAL (:SrcCity) AS {
|
|
1575
|
+
UNWIND [
|
|
1576
|
+
{ id: 'nyc', country: 'US' },
|
|
1577
|
+
{ id: 'lhr', country: 'UK' }
|
|
1578
|
+
] AS c
|
|
1579
|
+
RETURN c.id AS id, c.country AS country
|
|
1580
|
+
}
|
|
1581
|
+
`).run();
|
|
1582
|
+
await new Runner(`
|
|
1583
|
+
CREATE VIRTUAL (:DerivedCity) AS {
|
|
1584
|
+
MATCH (s:SrcCity)
|
|
1585
|
+
WHERE s.country = 'US'
|
|
1586
|
+
RETURN s.id AS id
|
|
1587
|
+
}
|
|
1588
|
+
`).run();
|
|
1589
|
+
|
|
1590
|
+
const fq = new Runner(`MATCH (d:DerivedCity) RETURN d.id AS id`, null, null, { provenance: true });
|
|
1591
|
+
await fq.run();
|
|
1592
|
+
|
|
1593
|
+
fq.provenance[0].nodes[0];
|
|
1594
|
+
// {
|
|
1595
|
+
// alias: 'd', label: 'DerivedCity', id: 'nyc',
|
|
1596
|
+
// source: {
|
|
1597
|
+
// nodes: [{ alias: 's', label: 'SrcCity', id: 'nyc' }],
|
|
1598
|
+
// relationships: []
|
|
1599
|
+
// }
|
|
1600
|
+
// }
|
|
1601
|
+
```
|
|
1602
|
+
|
|
1603
|
+
Semantics:
|
|
1604
|
+
|
|
1605
|
+
- The `source` field is **omitted** when the binding's record did not
|
|
1606
|
+
come from a virtual sub-query (e.g. records from `UNWIND … RETURN`
|
|
1607
|
+
inside the virtual produce a `source` with empty `nodes` and
|
|
1608
|
+
`relationships`, signalling "lineage was threaded but no graph slots
|
|
1609
|
+
were bound at this level").
|
|
1610
|
+
- Sub-query lineage is **recursive**: a virtual that matches another
|
|
1611
|
+
virtual carries nested `source` chains all the way down.
|
|
1612
|
+
- Provenance mode bypasses the static-virtual cache because each
|
|
1613
|
+
invocation must produce fresh records to back the lineage weak-map.
|
|
1614
|
+
Static caching continues to apply when `provenance` is off.
|
|
1615
|
+
|
|
1616
|
+
#### Data Sources and LET Chaining
|
|
1617
|
+
|
|
1618
|
+
`LOAD` operations contribute row-level `data_sources` entries on each
|
|
1619
|
+
emitted `RowSegment`: one `{ source, source_provenance? }` per loaded
|
|
1620
|
+
record. `source` is the URL, file URI, async-function name, or
|
|
1621
|
+
`let://<name>` reference; `source_provenance` is the inner
|
|
1622
|
+
`RowProvenance` of the source row when the `LOAD` was reading from a
|
|
1623
|
+
`LET`-bound dataset whose right-hand side itself produced lineage.
|
|
1624
|
+
|
|
1625
|
+
The same chain surfaces structurally. `info.sources` and the per-label
|
|
1626
|
+
`info.nodes[Label].sources` arrays follow `LOAD FROM <letName>`
|
|
1627
|
+
references and emit a `let://<name>` entry, plus the underlying
|
|
1628
|
+
sources the `LET` sub-query touched (when both definitions are
|
|
1629
|
+
visible to the crawler in the same query).
|
|
1630
|
+
|
|
1631
|
+
```cypher
|
|
1632
|
+
LET cities = { LOAD JSON FROM 'file:///data/cities.json' AS c RETURN c.id AS id, c.name AS name };
|
|
1633
|
+
CREATE VIRTUAL (:City) AS { LOAD JSON FROM cities AS c RETURN c.id AS id, c.name AS name };
|
|
1634
|
+
MATCH (c:City) RETURN c.name AS name
|
|
1635
|
+
```
|
|
1636
|
+
|
|
1637
|
+
```javascript
|
|
1638
|
+
runner.metadata.info.nodes.City.sources;
|
|
1639
|
+
// ['file:///data/cities.json', 'let://cities']
|
|
1640
|
+
|
|
1641
|
+
runner.provenance[0].nodes[0].source.data_sources[0];
|
|
1642
|
+
// {
|
|
1643
|
+
// source: 'let://cities',
|
|
1644
|
+
// source_provenance: {
|
|
1645
|
+
// nodes: [], relationships: [],
|
|
1646
|
+
// data_sources: [{ source: 'file:///data/cities.json' }]
|
|
1647
|
+
// }
|
|
1648
|
+
// }
|
|
1649
|
+
```
|
|
1650
|
+
|
|
1651
|
+
#### Column-Level Lineage: Tracing Each Result Cell to Its Source
|
|
1652
|
+
|
|
1653
|
+
`runner.info.returns` (added to `StatementInfo`) maps every output
|
|
1654
|
+
column to the `alias.property` accesses that compose it. Combined with
|
|
1655
|
+
row provenance it gives you per-cell traceability - value → source
|
|
1656
|
+
binding → node id → source URL - without any runtime AST inspection.
|
|
1657
|
+
|
|
1658
|
+
For a query
|
|
1659
|
+
|
|
1660
|
+
```cypher
|
|
1661
|
+
MATCH (c:City)-[f:FLIGHT]->(d:City)
|
|
1662
|
+
WHERE c.country = 'US'
|
|
1663
|
+
RETURN c.name AS origin, d.name AS destination, f.airline AS airline
|
|
1664
|
+
```
|
|
1665
|
+
|
|
1666
|
+
`runner.info.returns` is:
|
|
1667
|
+
|
|
1668
|
+
```javascript
|
|
1669
|
+
{
|
|
1670
|
+
origin: {
|
|
1671
|
+
references: [{ alias: 'c', kind: 'node', labels: ['City'], property: 'name' }],
|
|
1672
|
+
kind: 'property'
|
|
1673
|
+
},
|
|
1674
|
+
destination: {
|
|
1675
|
+
references: [{ alias: 'd', kind: 'node', labels: ['City'], property: 'name' }],
|
|
1676
|
+
kind: 'property'
|
|
1677
|
+
},
|
|
1678
|
+
airline: {
|
|
1679
|
+
references: [{
|
|
1680
|
+
alias: 'f', kind: 'relationship',
|
|
1681
|
+
labels: ['FLIGHT'], property: 'airline'
|
|
1682
|
+
}],
|
|
1683
|
+
kind: 'property'
|
|
1684
|
+
}
|
|
1685
|
+
}
|
|
1686
|
+
```
|
|
1687
|
+
|
|
1688
|
+
`kind` summarises how the column was built:
|
|
1689
|
+
|
|
1690
|
+
| `kind` | Meaning |
|
|
1691
|
+
| -------------- | ------------------------------------------------------------------------- |
|
|
1692
|
+
| `'literal'` | Pure literal expression, no bindings (e.g. `42 AS answer`). |
|
|
1693
|
+
| `'property'` | Direct `alias.property` projection (or pass-through). |
|
|
1694
|
+
| `'expression'` | Computed from one or more `alias.property` accesses. |
|
|
1695
|
+
| `'aggregate'` | Aggregate function (`count`, `sum`, `collect`, …); see `aggregate` field. |
|
|
1696
|
+
|
|
1697
|
+
Notes:
|
|
1698
|
+
|
|
1699
|
+
- The map is keyed by the column's output alias (the part after `AS`),
|
|
1700
|
+
falling back to `expr0`, `expr1`, … for unnamed columns.
|
|
1701
|
+
- `count(c)` and similar aggregates over a bare binding produce
|
|
1702
|
+
`kind: 'aggregate'`, `aggregate: 'count'`, and an empty `references`
|
|
1703
|
+
list (no specific property is read). Use `count(c.id)` to surface a
|
|
1704
|
+
property reference.
|
|
1705
|
+
- Multi-label intersection matches (`MATCH (n:A:B)`) populate every
|
|
1706
|
+
label in `references[i].labels`.
|
|
1707
|
+
|
|
1708
|
+
#### Combining Lineage and Provenance: `traceRow()` and `lineage()`
|
|
1709
|
+
|
|
1710
|
+
`info.returns` (structural) and `runner.provenance` (runtime) are kept
|
|
1711
|
+
as separate streams so each is useful on its own. When you want both
|
|
1712
|
+
joined per cell, the `Runner` exposes two convenience methods.
|
|
1713
|
+
|
|
1714
|
+
`runner.traceRow(rowIndex)` returns one `CellTrace` per output column
|
|
1715
|
+
for a single row, pairing the column's structural lineage with the
|
|
1716
|
+
node / relationship bindings whose alias matches it. The matched
|
|
1717
|
+
property value is extracted for you (including the built-ins `id`,
|
|
1718
|
+
`left_id`, `right_id`, `type`):
|
|
1719
|
+
|
|
1720
|
+
```javascript
|
|
1721
|
+
const runner = new FlowQuery(
|
|
1722
|
+
`
|
|
1723
|
+
MATCH (c:City)-[f:FLIGHT]->(d:City)
|
|
1724
|
+
WHERE c.country = 'US'
|
|
1725
|
+
RETURN c.name AS origin, d.name AS destination, f.airline AS airline
|
|
1726
|
+
`,
|
|
1727
|
+
null,
|
|
1728
|
+
null,
|
|
1729
|
+
{ provenance: true }
|
|
1730
|
+
);
|
|
1731
|
+
await runner.run();
|
|
1732
|
+
|
|
1733
|
+
const trace = runner.traceRow(0);
|
|
1734
|
+
// trace.origin = {
|
|
1735
|
+
// column: 'origin',
|
|
1736
|
+
// value: 'New York',
|
|
1737
|
+
// lineage: { references: [{ alias: 'c', kind: 'node', labels: ['City'], property: 'name' }], kind: 'property' },
|
|
1738
|
+
// bindings: [{
|
|
1739
|
+
// reference: { alias: 'c', kind: 'node', labels: ['City'], property: 'name' },
|
|
1740
|
+
// value: 'New York',
|
|
1741
|
+
// node: { alias: 'c', label: 'City', id: 'nyc', properties: { name: 'New York', country: 'US' }, source: {...} }
|
|
1742
|
+
// }]
|
|
1743
|
+
// }
|
|
1744
|
+
// trace.airline.bindings[0].relationship.hops[0].properties.airline === 'AA'
|
|
1745
|
+
```
|
|
1746
|
+
|
|
1747
|
+
`runner.lineage()` is the one-shot equivalent over the entire result
|
|
1748
|
+
set:
|
|
1749
|
+
|
|
1750
|
+
```javascript
|
|
1751
|
+
const report = runner.lineage();
|
|
1752
|
+
// report.columns: same shape as info.returns (structural per-column).
|
|
1753
|
+
// report.rows[i]: same shape as traceRow(i) (per-cell trace).
|
|
1754
|
+
```
|
|
1755
|
+
|
|
1756
|
+
`bindings` is empty for literal columns, when the runner was
|
|
1757
|
+
constructed without `{ provenance: true }`, or when a row's bindings
|
|
1758
|
+
don't intersect the column's references. Aggregate columns like
|
|
1759
|
+
`collect(c.id)` surface one binding per contributing input row, so
|
|
1760
|
+
`trace.ids.bindings.map(b => b.value)` aligns with the collected
|
|
1761
|
+
array. `info`, `provenance`, and `metadata` remain available unchanged
|
|
1762
|
+
|
|
1763
|
+
- `traceRow` / `lineage` are purely additive helpers.
|
|
1764
|
+
|
|
1169
1765
|
## Contributing
|
|
1170
1766
|
|
|
1171
1767
|
This project welcomes contributions and suggestions. Most contributions require you to agree to a
|