@lde/pipeline 0.22.0 → 0.23.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -130,7 +130,11 @@ This keeps SPARQL doing the heavy lifting while TypeScript handles the edge case
|
|
|
130
130
|
|
|
131
131
|
### Validation
|
|
132
132
|
|
|
133
|
-
Stages can optionally validate their output quads against a `Validator`. Validation operates on the combined output of all executors per batch
|
|
133
|
+
Stages can optionally validate their output quads against a `Validator`. Validation operates on the **combined output of all executors per batch**, not on individual quads or per-executor output. A batch produces a complete result set — a self-contained cluster of linked resources — that can be meaningfully matched against SHACL shapes. Even with a single executor, each batch is a complete unit; with multiple executors, shapes that reference triples from different executors are validated correctly.
|
|
134
|
+
|
|
135
|
+
Validating individual quads would be meaningless, since a single quad carries no structural context for shape matching. Validating the full pipeline output would also be problematic: because the pipeline streams results in batches, it doesn’t know where resource cluster boundaries fall. Batching the output could split a valid cluster across two batches, causing partial resources to fail validation even though the complete cluster is valid.
|
|
136
|
+
|
|
137
|
+
Quads are buffered, validated, and then written or discarded based on the `onInvalid` policy. When no validator is configured, quads stream directly with zero overhead.
|
|
134
138
|
|
|
135
139
|
```typescript
|
|
136
140
|
import { ShaclValidator } from '@lde/pipeline-shacl-validator';
|
|
@@ -33,6 +33,11 @@ export interface SparqlConstructExecutorOptions {
|
|
|
33
33
|
* @default 300000 (5 minutes)
|
|
34
34
|
*/
|
|
35
35
|
timeout?: number;
|
|
36
|
+
/**
|
|
37
|
+
* Number of retries for transient HTTP errors (502, 503, 504).
|
|
38
|
+
* @default 3
|
|
39
|
+
*/
|
|
40
|
+
retries?: number;
|
|
36
41
|
/**
|
|
37
42
|
* Optional custom SparqlEndpointFetcher instance.
|
|
38
43
|
*/
|
|
@@ -69,6 +74,7 @@ export declare class SparqlConstructExecutor implements Executor {
|
|
|
69
74
|
private readonly rawQuery;
|
|
70
75
|
private readonly preParsed?;
|
|
71
76
|
private readonly fetcher;
|
|
77
|
+
private readonly retries;
|
|
72
78
|
private readonly generator;
|
|
73
79
|
constructor(options: SparqlConstructExecutorOptions);
|
|
74
80
|
/**
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAUpD;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAmBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IAuC/B;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
|
package/dist/sparql/executor.js
CHANGED
|
@@ -3,6 +3,7 @@ import { readFile } from 'node:fs/promises';
|
|
|
3
3
|
import { resolve } from 'node:path';
|
|
4
4
|
import { Parser } from '@traqula/parser-sparql-1-1';
|
|
5
5
|
import { Generator } from '@traqula/generator-sparql-1-1';
|
|
6
|
+
import pRetry from 'p-retry';
|
|
6
7
|
import { withDefaultGraph } from './graph.js';
|
|
7
8
|
import { injectValues } from './values.js';
|
|
8
9
|
/**
|
|
@@ -45,9 +46,11 @@ export class SparqlConstructExecutor {
|
|
|
45
46
|
rawQuery;
|
|
46
47
|
preParsed;
|
|
47
48
|
fetcher;
|
|
49
|
+
retries;
|
|
48
50
|
generator = new Generator();
|
|
49
51
|
constructor(options) {
|
|
50
52
|
this.rawQuery = options.query;
|
|
53
|
+
this.retries = options.retries ?? 3;
|
|
51
54
|
if (!options.query.includes('#subjectFilter#')) {
|
|
52
55
|
const parsed = new Parser().parse(options.query);
|
|
53
56
|
if (parsed.type !== 'query' || parsed.subType !== 'construct') {
|
|
@@ -92,7 +95,10 @@ export class SparqlConstructExecutor {
|
|
|
92
95
|
}
|
|
93
96
|
let query = this.generator.generate(ast);
|
|
94
97
|
query = query.replaceAll('?dataset', `<${dataset.iri}>`);
|
|
95
|
-
return await this.fetcher.fetchTriples(endpoint.toString(), query)
|
|
98
|
+
return await pRetry(() => this.fetcher.fetchTriples(endpoint.toString(), query), {
|
|
99
|
+
retries: this.retries,
|
|
100
|
+
shouldRetry: ({ error }) => isTransientHttpError(error),
|
|
101
|
+
});
|
|
96
102
|
}
|
|
97
103
|
/**
|
|
98
104
|
* Create an executor from a query file.
|
|
@@ -111,3 +117,13 @@ export class SparqlConstructExecutor {
|
|
|
111
117
|
export async function readQueryFile(filename) {
|
|
112
118
|
return (await readFile(resolve(filename))).toString();
|
|
113
119
|
}
|
|
120
|
+
const transientStatusPattern = /HTTP status (\d+)/;
|
|
121
|
+
function isTransientHttpError(error) {
|
|
122
|
+
if (!(error instanceof Error))
|
|
123
|
+
return false;
|
|
124
|
+
const match = error.message.match(transientStatusPattern);
|
|
125
|
+
if (!match)
|
|
126
|
+
return false;
|
|
127
|
+
const status = Number(match[1]);
|
|
128
|
+
return status === 502 || status === 503 || status === 504;
|
|
129
|
+
}
|
package/dist/sparql/values.d.ts
CHANGED
|
@@ -1,10 +1,16 @@
|
|
|
1
|
-
import { type QueryConstruct } from '@traqula/rules-sparql-1-1';
|
|
1
|
+
import { type Pattern, type QueryConstruct, type QuerySelect } from '@traqula/rules-sparql-1-1';
|
|
2
2
|
import type { VariableBindings } from './executor.js';
|
|
3
|
+
/**
|
|
4
|
+
* Find the first SubSelect within a list of patterns, looking through
|
|
5
|
+
* intermediate group patterns (the parser wraps `{ SELECT }` in a group).
|
|
6
|
+
*/
|
|
7
|
+
export declare function findSubSelect(patterns: Pattern[]): QuerySelect | undefined;
|
|
3
8
|
/**
|
|
4
9
|
* Inject a VALUES clause into a parsed CONSTRUCT query for the given binding rows.
|
|
5
10
|
*
|
|
6
11
|
* Each row's keys become SPARQL variables; NamedNode values become IRIs in the
|
|
7
|
-
* VALUES block. The VALUES clause is
|
|
12
|
+
* VALUES block. The VALUES clause is injected into the innermost subquery so
|
|
13
|
+
* that SPARQL engines can constrain scans early.
|
|
8
14
|
*
|
|
9
15
|
* The caller owns parsing and stringifying; this function operates on the AST.
|
|
10
16
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"values.d.ts","sourceRoot":"","sources":["../../src/sparql/values.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,cAAc,
|
|
1
|
+
{"version":3,"file":"values.d.ts","sourceRoot":"","sources":["../../src/sparql/values.ts"],"names":[],"mappings":"AAAA,OAAO,EAEL,KAAK,OAAO,EAGZ,KAAK,cAAc,EACnB,KAAK,WAAW,EAEjB,MAAM,2BAA2B,CAAC;AACnC,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAItD;;;GAGG;AACH,wBAAgB,aAAa,CAAC,QAAQ,EAAE,OAAO,EAAE,GAAG,WAAW,GAAG,SAAS,CAW1E;AA0DD;;;;;;;;GAQG;AACH,wBAAgB,YAAY,CAC1B,KAAK,EAAE,cAAc,EACrB,QAAQ,EAAE,gBAAgB,EAAE,GAC3B,cAAc,CAoBhB"}
|
package/dist/sparql/values.js
CHANGED
|
@@ -1,10 +1,72 @@
|
|
|
1
1
|
import { AstFactory, } from '@traqula/rules-sparql-1-1';
|
|
2
2
|
const F = new AstFactory();
|
|
3
|
+
/**
|
|
4
|
+
* Find the first SubSelect within a list of patterns, looking through
|
|
5
|
+
* intermediate group patterns (the parser wraps `{ SELECT }` in a group).
|
|
6
|
+
*/
|
|
7
|
+
export function findSubSelect(patterns) {
|
|
8
|
+
for (const pattern of patterns) {
|
|
9
|
+
if (F.isQuerySelect(pattern)) {
|
|
10
|
+
return pattern;
|
|
11
|
+
}
|
|
12
|
+
if (pattern.subType === 'group') {
|
|
13
|
+
const found = findSubSelect(pattern.patterns);
|
|
14
|
+
if (found)
|
|
15
|
+
return found;
|
|
16
|
+
}
|
|
17
|
+
}
|
|
18
|
+
return undefined;
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Single-pass find-and-replace: walk through patterns to locate the SubSelect
|
|
22
|
+
* (looking through group wrappers) and return a new array with it replaced.
|
|
23
|
+
* Returns `undefined` if no SubSelect was found.
|
|
24
|
+
*/
|
|
25
|
+
function mapSubSelect(patterns, replacer) {
|
|
26
|
+
for (let index = 0; index < patterns.length; index++) {
|
|
27
|
+
const pattern = patterns[index];
|
|
28
|
+
if (F.isQuerySelect(pattern)) {
|
|
29
|
+
const newPatterns = [...patterns];
|
|
30
|
+
newPatterns[index] = replacer(pattern);
|
|
31
|
+
return newPatterns;
|
|
32
|
+
}
|
|
33
|
+
if (pattern.subType === 'group') {
|
|
34
|
+
const group = pattern;
|
|
35
|
+
const innerResult = mapSubSelect(group.patterns, replacer);
|
|
36
|
+
if (innerResult) {
|
|
37
|
+
const newPatterns = [...patterns];
|
|
38
|
+
newPatterns[index] = F.patternGroup(innerResult, F.gen());
|
|
39
|
+
return newPatterns;
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
return undefined;
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* Recursively walk through nested SubSelect patterns and inject the VALUES
|
|
47
|
+
* clause into the innermost WHERE clause. This ensures that SPARQL engines
|
|
48
|
+
* constrain scans at the deepest level rather than only at the outer scope.
|
|
49
|
+
*
|
|
50
|
+
* For flat queries (no SubSelect), the base case injects directly — identical
|
|
51
|
+
* to the previous behavior.
|
|
52
|
+
*/
|
|
53
|
+
function injectIntoInnermost(where, valuesPattern) {
|
|
54
|
+
const mapped = mapSubSelect(where.patterns, (subSelect) => ({
|
|
55
|
+
...subSelect,
|
|
56
|
+
where: injectIntoInnermost(subSelect.where, valuesPattern),
|
|
57
|
+
}));
|
|
58
|
+
if (!mapped) {
|
|
59
|
+
// Base case: no SubSelect — inject here.
|
|
60
|
+
return F.patternGroup([valuesPattern, ...where.patterns], F.gen());
|
|
61
|
+
}
|
|
62
|
+
return F.patternGroup(mapped, F.gen());
|
|
63
|
+
}
|
|
3
64
|
/**
|
|
4
65
|
* Inject a VALUES clause into a parsed CONSTRUCT query for the given binding rows.
|
|
5
66
|
*
|
|
6
67
|
* Each row's keys become SPARQL variables; NamedNode values become IRIs in the
|
|
7
|
-
* VALUES block. The VALUES clause is
|
|
68
|
+
* VALUES block. The VALUES clause is injected into the innermost subquery so
|
|
69
|
+
* that SPARQL engines can constrain scans early.
|
|
8
70
|
*
|
|
9
71
|
* The caller owns parsing and stringifying; this function operates on the AST.
|
|
10
72
|
*/
|
|
@@ -18,6 +80,6 @@ export function injectValues(query, bindings) {
|
|
|
18
80
|
const valuesPattern = F.patternValues(variables, values, F.gen());
|
|
19
81
|
return {
|
|
20
82
|
...query,
|
|
21
|
-
where:
|
|
83
|
+
where: injectIntoInnermost(query.where, valuesPattern),
|
|
22
84
|
};
|
|
23
85
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.23.0",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -34,6 +34,7 @@
|
|
|
34
34
|
"fetch-sparql-endpoint": "^7.1.0",
|
|
35
35
|
"filenamify-url": "^4.0.0",
|
|
36
36
|
"n3": "^2.0.3",
|
|
37
|
+
"p-retry": "^7.1.1",
|
|
37
38
|
"tslib": "^2.3.0"
|
|
38
39
|
}
|
|
39
40
|
}
|