@lde/pipeline 0.21.0 → 0.22.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -130,7 +130,11 @@ This keeps SPARQL doing the heavy lifting while TypeScript handles the edge case
130
130
 
131
131
  ### Validation
132
132
 
133
- Stages can optionally validate their output quads against a `Validator`. Validation operates on the combined output of all executors per batch, so shapes that span multiple executors' output are validated correctly. Quads are buffered, validated, and then written or discarded based on the `onInvalid` policy. When no validator is configured, quads stream directly with zero overhead.
133
+ Stages can optionally validate their output quads against a `Validator`. Validation operates on the **combined output of all executors per batch**, not on individual quads or per-executor output. A batch produces a complete result set a self-contained cluster of linked resources that can be meaningfully matched against SHACL shapes. Even with a single executor, each batch is a complete unit; with multiple executors, shapes that reference triples from different executors are validated correctly.
134
+
135
+ Validating individual quads would be meaningless, since a single quad carries no structural context for shape matching. Validating the full pipeline output would also be problematic: because the pipeline streams results in batches, it doesn’t know where resource cluster boundaries fall. Batching the output could split a valid cluster across two batches, causing partial resources to fail validation even though the complete cluster is valid.
136
+
137
+ Quads are buffered, validated, and then written or discarded based on the `onInvalid` policy. When no validator is configured, quads stream directly with zero overhead.
134
138
 
135
139
  ```typescript
136
140
  import { ShaclValidator } from '@lde/pipeline-shacl-validator';
@@ -1,10 +1,16 @@
1
- import { type QueryConstruct } from '@traqula/rules-sparql-1-1';
1
+ import { type Pattern, type QueryConstruct, type QuerySelect } from '@traqula/rules-sparql-1-1';
2
2
  import type { VariableBindings } from './executor.js';
3
+ /**
4
+ * Find the first SubSelect within a list of patterns, looking through
5
+ * intermediate group patterns (the parser wraps `{ SELECT }` in a group).
6
+ */
7
+ export declare function findSubSelect(patterns: Pattern[]): QuerySelect | undefined;
3
8
  /**
4
9
  * Inject a VALUES clause into a parsed CONSTRUCT query for the given binding rows.
5
10
  *
6
11
  * Each row's keys become SPARQL variables; NamedNode values become IRIs in the
7
- * VALUES block. The VALUES clause is prepended to the query's WHERE patterns.
12
+ * VALUES block. The VALUES clause is injected into the innermost subquery so
13
+ * that SPARQL engines can constrain scans early.
8
14
  *
9
15
  * The caller owns parsing and stringifying; this function operates on the AST.
10
16
  */
@@ -1 +1 @@
1
- {"version":3,"file":"values.d.ts","sourceRoot":"","sources":["../../src/sparql/values.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,cAAc,EAEpB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAItD;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,cAAc,CAoBhB"}
1
+ {"version":3,"file":"values.d.ts","sourceRoot":"","sources":["../../src/sparql/values.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,OAAO,EAGZ,KAAK,cAAc,EACnB,KAAK,WAAW,EAEjB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAItD;;;GAGG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,WAAW,GAAG,SAAS,CAW1E;AA0DD;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,cAAc,CAoBhB"}
@@ -1,10 +1,72 @@
1
1
  import { AstFactory, } from '@traqula/rules-sparql-1-1';
2
2
  const F = new AstFactory();
3
+ /**
4
+ * Find the first SubSelect within a list of patterns, looking through
5
+ * intermediate group patterns (the parser wraps `{ SELECT }` in a group).
6
+ */
7
+ export function findSubSelect(patterns) {
8
+ for (const pattern of patterns) {
9
+ if (F.isQuerySelect(pattern)) {
10
+ return pattern;
11
+ }
12
+ if (pattern.subType === 'group') {
13
+ const found = findSubSelect(pattern.patterns);
14
+ if (found)
15
+ return found;
16
+ }
17
+ }
18
+ return undefined;
19
+ }
20
+ /**
21
+ * Single-pass find-and-replace: walk through patterns to locate the SubSelect
22
+ * (looking through group wrappers) and return a new array with it replaced.
23
+ * Returns `undefined` if no SubSelect was found.
24
+ */
25
+ function mapSubSelect(patterns, replacer) {
26
+ for (let index = 0; index < patterns.length; index++) {
27
+ const pattern = patterns[index];
28
+ if (F.isQuerySelect(pattern)) {
29
+ const newPatterns = [...patterns];
30
+ newPatterns[index] = replacer(pattern);
31
+ return newPatterns;
32
+ }
33
+ if (pattern.subType === 'group') {
34
+ const group = pattern;
35
+ const innerResult = mapSubSelect(group.patterns, replacer);
36
+ if (innerResult) {
37
+ const newPatterns = [...patterns];
38
+ newPatterns[index] = F.patternGroup(innerResult, F.gen());
39
+ return newPatterns;
40
+ }
41
+ }
42
+ }
43
+ return undefined;
44
+ }
45
+ /**
46
+ * Recursively walk through nested SubSelect patterns and inject the VALUES
47
+ * clause into the innermost WHERE clause. This ensures that SPARQL engines
48
+ * constrain scans at the deepest level rather than only at the outer scope.
49
+ *
50
+ * For flat queries (no SubSelect), the base case injects directly — identical
51
+ * to the previous behavior.
52
+ */
53
+ function injectIntoInnermost(where, valuesPattern) {
54
+ const mapped = mapSubSelect(where.patterns, (subSelect) => ({
55
+ ...subSelect,
56
+ where: injectIntoInnermost(subSelect.where, valuesPattern),
57
+ }));
58
+ if (!mapped) {
59
+ // Base case: no SubSelect — inject here.
60
+ return F.patternGroup([valuesPattern, ...where.patterns], F.gen());
61
+ }
62
+ return F.patternGroup(mapped, F.gen());
63
+ }
3
64
  /**
4
65
  * Inject a VALUES clause into a parsed CONSTRUCT query for the given binding rows.
5
66
  *
6
67
  * Each row's keys become SPARQL variables; NamedNode values become IRIs in the
7
- * VALUES block. The VALUES clause is prepended to the query's WHERE patterns.
68
+ * VALUES block. The VALUES clause is injected into the innermost subquery so
69
+ * that SPARQL engines can constrain scans early.
8
70
  *
9
71
  * The caller owns parsing and stringifying; this function operates on the AST.
10
72
  */
@@ -18,6 +80,6 @@ export function injectValues(query, bindings) {
18
80
  const valuesPattern = F.patternValues(variables, values, F.gen());
19
81
  return {
20
82
  ...query,
21
- where: F.patternGroup([valuesPattern, ...query.where.patterns], F.gen()),
83
+ where: injectIntoInnermost(query.where, valuesPattern),
22
84
  };
23
85
  }
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAqCjB,eAAe;IAuH7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACrE"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAkDjB,eAAe;IA2I7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACrE"}
package/dist/stage.js CHANGED
@@ -39,11 +39,22 @@ export class Stage {
39
39
  buffer.push(quad);
40
40
  }
41
41
  }
42
- const accepted = await this.validateBuffer(buffer, dataset);
43
- if (accepted.length > 0) {
44
- await writer.write(dataset, (async function* () {
45
- yield* accepted;
46
- })());
42
+ const onInvalid = this.validation.onInvalid ?? 'write';
43
+ if (onInvalid === 'write') {
44
+ await Promise.all([
45
+ writer.write(dataset, (async function* () {
46
+ yield* buffer;
47
+ })()),
48
+ this.validation.validator.validate(buffer, dataset),
49
+ ]);
50
+ }
51
+ else {
52
+ const accepted = await this.validateBuffer(buffer, dataset);
53
+ if (accepted.length > 0) {
54
+ await writer.write(dataset, (async function* () {
55
+ yield* accepted;
56
+ })());
57
+ }
47
58
  }
48
59
  }
49
60
  else {
@@ -74,6 +85,8 @@ export class Stage {
74
85
  let itemsProcessed = 0;
75
86
  let quadsGenerated = 0;
76
87
  let hasResults = false;
88
+ const onInvalid = this.validation?.onInvalid ?? 'write';
89
+ const pendingValidations = [];
77
90
  const dispatch = async () => {
78
91
  const inFlight = new Set();
79
92
  let firstError;
@@ -109,13 +122,25 @@ export class Stage {
109
122
  }
110
123
  }
111
124
  }
112
- let accepted = batchQuads;
113
- if (this.validation && batchQuads.length > 0) {
114
- accepted = await this.validateBuffer(batchQuads, dataset);
125
+ if (this.validation &&
126
+ batchQuads.length > 0 &&
127
+ onInvalid !== 'write') {
128
+ // 'skip' or 'halt': must await validation before deciding to write.
129
+ const accepted = await this.validateBuffer(batchQuads, dataset);
130
+ for (const quad of accepted) {
131
+ await queue.push(quad);
132
+ quadsGenerated++;
133
+ }
115
134
  }
116
- for (const quad of accepted) {
117
- await queue.push(quad);
118
- quadsGenerated++;
135
+ else {
136
+ for (const quad of batchQuads) {
137
+ await queue.push(quad);
138
+ quadsGenerated++;
139
+ }
140
+ if (this.validation && batchQuads.length > 0) {
141
+ // 'write' mode: validate concurrently without blocking the write path.
142
+ pendingValidations.push(this.validation.validator.validate(batchQuads, dataset));
143
+ }
119
144
  }
120
145
  itemsProcessed += bindings.length;
121
146
  options?.onProgress?.(itemsProcessed, quadsGenerated);
@@ -127,6 +152,8 @@ export class Stage {
127
152
  }
128
153
  // Wait for all remaining in-flight tasks to settle.
129
154
  await Promise.all(inFlight);
155
+ // Ensure all background validations complete before report() is called.
156
+ await Promise.all(pendingValidations);
130
157
  if (firstError) {
131
158
  queue.abort(firstError);
132
159
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.21.0",
3
+ "version": "0.22.1",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"
@@ -33,7 +33,7 @@
33
33
  "@traqula/rules-sparql-1-1": "^1.0.3",
34
34
  "fetch-sparql-endpoint": "^7.1.0",
35
35
  "filenamify-url": "^4.0.0",
36
- "n3": "^2.0.1",
36
+ "n3": "^2.0.3",
37
37
  "tslib": "^2.3.0"
38
38
  }
39
39
  }