@lde/pipeline 0.22.0 → 0.23.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -130,7 +130,11 @@ This keeps SPARQL doing the heavy lifting while TypeScript handles the edge case
130
130
 
131
131
  ### Validation
132
132
 
133
- Stages can optionally validate their output quads against a `Validator`. Validation operates on the combined output of all executors per batch, so shapes that span multiple executors' output are validated correctly. Quads are buffered, validated, and then written or discarded based on the `onInvalid` policy. When no validator is configured, quads stream directly with zero overhead.
133
+ Stages can optionally validate their output quads against a `Validator`. Validation operates on the **combined output of all executors per batch**, not on individual quads or per-executor output. A batch produces a complete result set a self-contained cluster of linked resources that can be meaningfully matched against SHACL shapes. Even with a single executor, each batch is a complete unit; with multiple executors, shapes that reference triples from different executors are validated correctly.
134
+
135
+ Validating individual quads would be meaningless, since a single quad carries no structural context for shape matching. Validating the full pipeline output would also be problematic: because the pipeline streams results in batches, it doesn’t know where resource cluster boundaries fall. Batching the output could split a valid cluster across two batches, causing partial resources to fail validation even though the complete cluster is valid.
136
+
137
+ Quads are buffered, validated, and then written or discarded based on the `onInvalid` policy. When no validator is configured, quads stream directly with zero overhead.
134
138
 
135
139
  ```typescript
136
140
  import { ShaclValidator } from '@lde/pipeline-shacl-validator';
@@ -33,6 +33,11 @@ export interface SparqlConstructExecutorOptions {
33
33
  * @default 300000 (5 minutes)
34
34
  */
35
35
  timeout?: number;
36
+ /**
37
+ * Number of retries for transient HTTP errors (502, 503, 504).
38
+ * @default 3
39
+ */
40
+ retries?: number;
36
41
  /**
37
42
  * Optional custom SparqlEndpointFetcher instance.
38
43
  */
@@ -69,6 +74,7 @@ export declare class SparqlConstructExecutor implements Executor {
69
74
  private readonly rawQuery;
70
75
  private readonly preParsed?;
71
76
  private readonly fetcher;
77
+ private readonly retries;
72
78
  private readonly generator;
73
79
  constructor(options: SparqlConstructExecutorOptions);
74
80
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AASpD;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAkBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAiC/B;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAUpD;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAmBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAuC/B;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
@@ -3,6 +3,7 @@ import { readFile } from 'node:fs/promises';
3
3
  import { resolve } from 'node:path';
4
4
  import { Parser } from '@traqula/parser-sparql-1-1';
5
5
  import { Generator } from '@traqula/generator-sparql-1-1';
6
+ import pRetry from 'p-retry';
6
7
  import { withDefaultGraph } from './graph.js';
7
8
  import { injectValues } from './values.js';
8
9
  /**
@@ -45,9 +46,11 @@ export class SparqlConstructExecutor {
45
46
  rawQuery;
46
47
  preParsed;
47
48
  fetcher;
49
+ retries;
48
50
  generator = new Generator();
49
51
  constructor(options) {
50
52
  this.rawQuery = options.query;
53
+ this.retries = options.retries ?? 3;
51
54
  if (!options.query.includes('#subjectFilter#')) {
52
55
  const parsed = new Parser().parse(options.query);
53
56
  if (parsed.type !== 'query' || parsed.subType !== 'construct') {
@@ -92,7 +95,10 @@ export class SparqlConstructExecutor {
92
95
  }
93
96
  let query = this.generator.generate(ast);
94
97
  query = query.replaceAll('?dataset', `<${dataset.iri}>`);
95
- return await this.fetcher.fetchTriples(endpoint.toString(), query);
98
+ return await pRetry(() => this.fetcher.fetchTriples(endpoint.toString(), query), {
99
+ retries: this.retries,
100
+ shouldRetry: ({ error }) => isTransientHttpError(error),
101
+ });
96
102
  }
97
103
  /**
98
104
  * Create an executor from a query file.
@@ -111,3 +117,13 @@ export class SparqlConstructExecutor {
111
117
  export async function readQueryFile(filename) {
112
118
  return (await readFile(resolve(filename))).toString();
113
119
  }
120
+ const transientStatusPattern = /HTTP status (\d+)/;
121
+ function isTransientHttpError(error) {
122
+ if (!(error instanceof Error))
123
+ return false;
124
+ const match = error.message.match(transientStatusPattern);
125
+ if (!match)
126
+ return false;
127
+ const status = Number(match[1]);
128
+ return status === 502 || status === 503 || status === 504;
129
+ }
@@ -1,10 +1,16 @@
1
- import { type QueryConstruct } from '@traqula/rules-sparql-1-1';
1
+ import { type Pattern, type QueryConstruct, type QuerySelect } from '@traqula/rules-sparql-1-1';
2
2
  import type { VariableBindings } from './executor.js';
3
+ /**
4
+ * Find the first SubSelect within a list of patterns, looking through
5
+ * intermediate group patterns (the parser wraps `{ SELECT }` in a group).
6
+ */
7
+ export declare function findSubSelect(patterns: Pattern[]): QuerySelect | undefined;
3
8
  /**
4
9
  * Inject a VALUES clause into a parsed CONSTRUCT query for the given binding rows.
5
10
  *
6
11
  * Each row's keys become SPARQL variables; NamedNode values become IRIs in the
7
- * VALUES block. The VALUES clause is prepended to the query's WHERE patterns.
12
+ * VALUES block. The VALUES clause is injected into the innermost subquery so
13
+ * that SPARQL engines can constrain scans early.
8
14
  *
9
15
  * The caller owns parsing and stringifying; this function operates on the AST.
10
16
  */
@@ -1 +1 @@
1
- {"version":3,"file":"values.d.ts","sourceRoot":"","sources":["../../src/sparql/values.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,cAAc,EAEpB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAItD;;;;;;;GAOG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,cAAc,CAoBhB"}
1
+ {"version":3,"file":"values.d.ts","sourceRoot":"","sources":["../../src/sparql/values.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,OAAO,EAGZ,KAAK,cAAc,EACnB,KAAK,WAAW,EAEjB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAItD;;;GAGG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,WAAW,GAAG,SAAS,CAW1E;AA0DD;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,cAAc,CAoBhB"}
@@ -1,10 +1,72 @@
1
1
  import { AstFactory, } from '@traqula/rules-sparql-1-1';
2
2
  const F = new AstFactory();
3
+ /**
4
+ * Find the first SubSelect within a list of patterns, looking through
5
+ * intermediate group patterns (the parser wraps `{ SELECT }` in a group).
6
+ */
7
+ export function findSubSelect(patterns) {
8
+ for (const pattern of patterns) {
9
+ if (F.isQuerySelect(pattern)) {
10
+ return pattern;
11
+ }
12
+ if (pattern.subType === 'group') {
13
+ const found = findSubSelect(pattern.patterns);
14
+ if (found)
15
+ return found;
16
+ }
17
+ }
18
+ return undefined;
19
+ }
20
+ /**
21
+ * Single-pass find-and-replace: walk through patterns to locate the SubSelect
22
+ * (looking through group wrappers) and return a new array with it replaced.
23
+ * Returns `undefined` if no SubSelect was found.
24
+ */
25
+ function mapSubSelect(patterns, replacer) {
26
+ for (let index = 0; index < patterns.length; index++) {
27
+ const pattern = patterns[index];
28
+ if (F.isQuerySelect(pattern)) {
29
+ const newPatterns = [...patterns];
30
+ newPatterns[index] = replacer(pattern);
31
+ return newPatterns;
32
+ }
33
+ if (pattern.subType === 'group') {
34
+ const group = pattern;
35
+ const innerResult = mapSubSelect(group.patterns, replacer);
36
+ if (innerResult) {
37
+ const newPatterns = [...patterns];
38
+ newPatterns[index] = F.patternGroup(innerResult, F.gen());
39
+ return newPatterns;
40
+ }
41
+ }
42
+ }
43
+ return undefined;
44
+ }
45
+ /**
46
+ * Recursively walk through nested SubSelect patterns and inject the VALUES
47
+ * clause into the innermost WHERE clause. This ensures that SPARQL engines
48
+ * constrain scans at the deepest level rather than only at the outer scope.
49
+ *
50
+ * For flat queries (no SubSelect), the base case injects directly — identical
51
+ * to the previous behavior.
52
+ */
53
+ function injectIntoInnermost(where, valuesPattern) {
54
+ const mapped = mapSubSelect(where.patterns, (subSelect) => ({
55
+ ...subSelect,
56
+ where: injectIntoInnermost(subSelect.where, valuesPattern),
57
+ }));
58
+ if (!mapped) {
59
+ // Base case: no SubSelect — inject here.
60
+ return F.patternGroup([valuesPattern, ...where.patterns], F.gen());
61
+ }
62
+ return F.patternGroup(mapped, F.gen());
63
+ }
3
64
  /**
4
65
  * Inject a VALUES clause into a parsed CONSTRUCT query for the given binding rows.
5
66
  *
6
67
  * Each row's keys become SPARQL variables; NamedNode values become IRIs in the
7
- * VALUES block. The VALUES clause is prepended to the query's WHERE patterns.
68
+ * VALUES block. The VALUES clause is injected into the innermost subquery so
69
+ * that SPARQL engines can constrain scans early.
8
70
  *
9
71
  * The caller owns parsing and stringifying; this function operates on the AST.
10
72
  */
@@ -18,6 +80,6 @@ export function injectValues(query, bindings) {
18
80
  const valuesPattern = F.patternValues(variables, values, F.gen());
19
81
  return {
20
82
  ...query,
21
- where: F.patternGroup([valuesPattern, ...query.where.patterns], F.gen()),
83
+ where: injectIntoInnermost(query.where, valuesPattern),
22
84
  };
23
85
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.22.0",
3
+ "version": "0.23.0",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"
@@ -34,6 +34,7 @@
34
34
  "fetch-sparql-endpoint": "^7.1.0",
35
35
  "filenamify-url": "^4.0.0",
36
36
  "n3": "^2.0.3",
37
+ "p-retry": "^7.1.1",
37
38
  "tslib": "^2.3.0"
38
39
  }
39
40
  }