@lde/pipeline 0.31.1 → 0.31.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -0
- package/dist/stage.d.ts +25 -0
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +43 -0
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -86,6 +86,12 @@ new Stage({
|
|
|
86
86
|
|
|
87
87
|
`maxConcurrency` (default: 10) limits the total number of concurrent SPARQL queries. Within each batch, all executors run in parallel; the number of concurrent batches is automatically reduced to `⌊maxConcurrency / executorCount⌋` so the total query pressure stays within the limit. For example, with `maxConcurrency: 10` and two executors per stage, up to 5 batches run concurrently (10 SPARQL queries total).
|
|
88
88
|
|
|
89
|
+
#### Expecting output
|
|
90
|
+
|
|
91
|
+
`expectsOutput` (default: `false`) marks a stage whose query must yield at least one quad. A supported stage that produces none is then treated as a hard failure rather than a legitimately empty result.
|
|
92
|
+
|
|
93
|
+
Set it for scalar aggregates such as `SELECT (COUNT(*) AS ?n)`, which always return exactly one row — so zero output can only mean the endpoint truncated or aborted the response (e.g. a timeout surfaced as an empty `HTTP 200`). The failure flows through like any other hard stage failure, triggering the [reactive dump fallback](#distribution-resolver) when `strategy: 'sparqlWithImportFallback'` is configured. Leave it `false` for stages that may legitimately be empty, such as class or property partitions of a dataset that lacks that structure.
|
|
94
|
+
|
|
89
95
|
### Item Selector
|
|
90
96
|
|
|
91
97
|
Selects resources from the distribution and fans out executor calls per batch of results. Implements the `ItemSelector` interface:
|
package/dist/stage.d.ts
CHANGED
|
@@ -70,6 +70,23 @@ export interface StageOptions {
|
|
|
70
70
|
maxConcurrency?: number;
|
|
71
71
|
/** Child stages that chain off this stage's output. */
|
|
72
72
|
stages?: Stage[];
|
|
73
|
+
/**
|
|
74
|
+
* Treat a supported stage that produces no quads as a hard failure (throws),
|
|
75
|
+
* rather than a legitimately empty result.
|
|
76
|
+
*
|
|
77
|
+
* Set this for stages whose query must yield output — typically a scalar
|
|
78
|
+
* aggregate such as `SELECT (COUNT(*) AS ?n)`, which always returns exactly
|
|
79
|
+
* one row, so zero quads can only mean the endpoint truncated or aborted the
|
|
80
|
+
* response (e.g. a timeout surfaced as an empty `HTTP 200`). The resulting
|
|
81
|
+
* failure flows through the pipeline like any other hard stage failure,
|
|
82
|
+
* triggering the reactive dump fallback when one is configured.
|
|
83
|
+
*
|
|
84
|
+
* Leave it `false` (default) for stages that may legitimately be empty, such
|
|
85
|
+
* as class/property partitions of a dataset that lacks that structure.
|
|
86
|
+
*
|
|
87
|
+
* @default false
|
|
88
|
+
*/
|
|
89
|
+
expectsOutput?: boolean;
|
|
73
90
|
/** Optional validation of the combined quads produced by all executors per batch. */
|
|
74
91
|
validation?: {
|
|
75
92
|
validator: Validator;
|
|
@@ -95,6 +112,8 @@ export interface SelectOptions {
|
|
|
95
112
|
export declare class Stage {
|
|
96
113
|
readonly name: string;
|
|
97
114
|
readonly stages: readonly Stage[];
|
|
115
|
+
/** Whether an empty result is treated as a hard failure. @see {@link StageOptions.expectsOutput} */
|
|
116
|
+
readonly expectsOutput: boolean;
|
|
98
117
|
private readonly executors;
|
|
99
118
|
private readonly itemSelector?;
|
|
100
119
|
private readonly batchSize;
|
|
@@ -104,6 +123,12 @@ export declare class Stage {
|
|
|
104
123
|
/** The validator for this stage, if configured. */
|
|
105
124
|
get validator(): Validator | undefined;
|
|
106
125
|
run(dataset: Dataset, distribution: Distribution, writer: Writer, options?: RunOptions): Promise<NotSupported | void>;
|
|
126
|
+
/**
|
|
127
|
+
* Throw when {@link StageOptions.expectsOutput} is set but the stage produced
|
|
128
|
+
* no quads — a supported-but-empty result that signals a truncated or aborted
|
|
129
|
+
* endpoint response rather than a legitimately empty one.
|
|
130
|
+
*/
|
|
131
|
+
private assertProduced;
|
|
107
132
|
private runWithSelector;
|
|
108
133
|
/**
|
|
109
134
|
* Validate a buffer of quads. Throws on halt, returns the quads to write
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,CAAC,GAAG,IAAI,CAC/B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,GAAG,KACT,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,YAAY,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,aAAa,CAAC,eAAe,CAAC,GAAG,aAAa,CAAC,eAAe,CAAC,EAAE,CAAC;CAC/E;AAED,2EAA2E;AAC3E,MAAM,MAAM,cAAc,GACtB,QAAQ,GACR,gBAAgB,GAChB,CAAC,QAAQ,GAAG,gBAAgB,CAAC,EAAE,CAAC;AAQpC,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,cAAc,CAAC;IAC1B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,CAAC,GAAG,IAAI,CAC/B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,GAAG,KACT,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,YAAY,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,aAAa,CAAC,eAAe,CAAC,GAAG,aAAa,CAAC,eAAe,CAAC,EAAE,CAAC;CAC/E;AAED,2EAA2E;AAC3E,MAAM,MAAM,cAAc,GACtB,QAAQ,GACR,gBAAgB,GAChB,CAAC,QAAQ,GAAG,gBAAgB,CAAC,EAAE,CAAC;AAQpC,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,cAAc,CAAC;IAC1B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB;;;;;;;;;;;;;;;OAeG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,oGAAoG;IACpG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC;IAChC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAWjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;IAqE/B;;;;OAIG;IACH,OAAO,CAAC,cAAc;YAMR,eAAe;IAgK7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;IA6BxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;CAiBxB;AAiDD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
|
package/dist/stage.js
CHANGED
|
@@ -4,6 +4,8 @@ import { AsyncQueue } from './asyncQueue.js';
|
|
|
4
4
|
export class Stage {
|
|
5
5
|
name;
|
|
6
6
|
stages;
|
|
7
|
+
/** Whether an empty result is treated as a hard failure. @see {@link StageOptions.expectsOutput} */
|
|
8
|
+
expectsOutput;
|
|
7
9
|
executors;
|
|
8
10
|
itemSelector;
|
|
9
11
|
batchSize;
|
|
@@ -17,6 +19,7 @@ export class Stage {
|
|
|
17
19
|
this.batchSize = options.batchSize ?? 10;
|
|
18
20
|
this.maxConcurrency = options.maxConcurrency ?? 10;
|
|
19
21
|
this.validation = options.validation;
|
|
22
|
+
this.expectsOutput = options.expectsOutput ?? false;
|
|
20
23
|
}
|
|
21
24
|
/** The validator for this stage, if configured. */
|
|
22
25
|
get validator() {
|
|
@@ -33,6 +36,9 @@ export class Stage {
|
|
|
33
36
|
if (streams instanceof NotSupported) {
|
|
34
37
|
return streams;
|
|
35
38
|
}
|
|
39
|
+
// Quads the executors produced (before any validation filtering); used to
|
|
40
|
+
// enforce `expectsOutput` below.
|
|
41
|
+
let produced = 0;
|
|
36
42
|
if (this.validation) {
|
|
37
43
|
const buffer = [];
|
|
38
44
|
for (const stream of streams) {
|
|
@@ -40,6 +46,7 @@ export class Stage {
|
|
|
40
46
|
buffer.push(quad);
|
|
41
47
|
}
|
|
42
48
|
}
|
|
49
|
+
produced = buffer.length;
|
|
43
50
|
const onInvalid = this.validation.onInvalid ?? 'write';
|
|
44
51
|
if (onInvalid === 'write') {
|
|
45
52
|
await Promise.all([
|
|
@@ -58,9 +65,27 @@ export class Stage {
|
|
|
58
65
|
}
|
|
59
66
|
}
|
|
60
67
|
}
|
|
68
|
+
else if (this.expectsOutput) {
|
|
69
|
+
// Only thread the per-quad counter through when the count is actually
|
|
70
|
+
// needed; the default path stays a plain streaming write with no overhead.
|
|
71
|
+
await writer.write(dataset, countQuads(mergeStreams(streams), (count) => {
|
|
72
|
+
produced = count;
|
|
73
|
+
}));
|
|
74
|
+
}
|
|
61
75
|
else {
|
|
62
76
|
await writer.write(dataset, mergeStreams(streams));
|
|
63
77
|
}
|
|
78
|
+
this.assertProduced(produced);
|
|
79
|
+
}
|
|
80
|
+
/**
|
|
81
|
+
* Throw when {@link StageOptions.expectsOutput} is set but the stage produced
|
|
82
|
+
* no quads — a supported-but-empty result that signals a truncated or aborted
|
|
83
|
+
* endpoint response rather than a legitimately empty one.
|
|
84
|
+
*/
|
|
85
|
+
assertProduced(produced) {
|
|
86
|
+
if (this.expectsOutput && produced === 0) {
|
|
87
|
+
throw new Error(`Stage '${this.name}' expected output but produced none`);
|
|
88
|
+
}
|
|
64
89
|
}
|
|
65
90
|
async runWithSelector(selector, dataset, distribution, writer, options) {
|
|
66
91
|
// Peek the first batch to detect an empty selector before starting the
|
|
@@ -185,6 +210,7 @@ export class Stage {
|
|
|
185
210
|
if (!hasResults) {
|
|
186
211
|
return new NotSupported('All executors returned NotSupported');
|
|
187
212
|
}
|
|
213
|
+
this.assertProduced(quadsGenerated);
|
|
188
214
|
}
|
|
189
215
|
/**
|
|
190
216
|
* Validate a buffer of quads. Throws on halt, returns the quads to write
|
|
@@ -260,3 +286,20 @@ async function* mergeStreams(streams) {
|
|
|
260
286
|
yield* stream;
|
|
261
287
|
}
|
|
262
288
|
}
|
|
289
|
+
/**
|
|
290
|
+
* Pass a quad stream through unchanged while counting it, reporting the total
|
|
291
|
+
* via `onCount` once the stream is exhausted. Lets a streaming write enforce
|
|
292
|
+
* {@link StageOptions.expectsOutput} without buffering.
|
|
293
|
+
*
|
|
294
|
+
* `onCount` fires only when the consumer drains the stream — which the pipeline
|
|
295
|
+
* writers do. A writer that stops early would leave the count short; callers
|
|
296
|
+
* relying on it for `expectsOutput` must consume the stream fully.
|
|
297
|
+
*/
|
|
298
|
+
async function* countQuads(stream, onCount) {
|
|
299
|
+
let count = 0;
|
|
300
|
+
for await (const quad of stream) {
|
|
301
|
+
count++;
|
|
302
|
+
yield quad;
|
|
303
|
+
}
|
|
304
|
+
onCount(count);
|
|
305
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.31.
|
|
3
|
+
"version": "0.31.3",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -26,8 +26,8 @@
|
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.7",
|
|
28
28
|
"@lde/dataset-registry-client": "0.8.4",
|
|
29
|
-
"@lde/distribution-health": "0.2.
|
|
30
|
-
"@lde/distribution-probe": "0.2.
|
|
29
|
+
"@lde/distribution-health": "0.2.1",
|
|
30
|
+
"@lde/distribution-probe": "0.2.1",
|
|
31
31
|
"@lde/sparql-importer": "0.6.5",
|
|
32
32
|
"@lde/sparql-server": "0.4.11",
|
|
33
33
|
"@rdfjs/namespace": "^2.0.1",
|