@lde/pipeline 0.28.2 → 0.28.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -4
- package/dist/stage.d.ts +8 -1
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +17 -12
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -50,20 +50,24 @@ new Stage({
|
|
|
50
50
|
|
|
51
51
|
`batchSize` (default: 10) controls how many variable bindings are passed to each executor call as a `VALUES` clause. It also sets the page size for the item selector's SPARQL requests, so that each paginated request fills exactly one executor batch.
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
Some SPARQL endpoints enforce different result limits for SELECT and CONSTRUCT queries. Since the selector uses SELECT and the executor uses CONSTRUCT, a `LIMIT` clause in the selector query overrides `batchSize` as the page size. Use this when the endpoint caps SELECT results below your desired batch size:
|
|
54
54
|
|
|
55
55
|
```typescript
|
|
56
|
-
// Endpoint caps results at
|
|
56
|
+
// Endpoint caps SELECT results at 500, but each CONSTRUCT can handle 1000 bindings.
|
|
57
57
|
new Stage({
|
|
58
58
|
name: 'per-class',
|
|
59
59
|
itemSelector: new SparqlItemSelector({
|
|
60
|
-
query: 'SELECT DISTINCT ?class WHERE { ?s a ?class } LIMIT
|
|
60
|
+
query: 'SELECT DISTINCT ?class WHERE { ?s a ?class } LIMIT 500',
|
|
61
61
|
}),
|
|
62
62
|
executors: executor,
|
|
63
|
-
batchSize:
|
|
63
|
+
batchSize: 1000, // Two SELECT pages fill one CONSTRUCT batch.
|
|
64
64
|
});
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
+
#### Concurrency
|
|
68
|
+
|
|
69
|
+
`maxConcurrency` (default: 10) limits the total number of concurrent SPARQL queries. Within each batch, all executors run in parallel; the number of concurrent batches is automatically reduced to `⌊maxConcurrency / executorCount⌋` so the total query pressure stays within the limit. For example, with `maxConcurrency: 10` and two executors per stage, up to 5 batches run concurrently (10 SPARQL queries total).
|
|
70
|
+
|
|
67
71
|
### Item Selector
|
|
68
72
|
|
|
69
73
|
Selects resources from the distribution and fans out executor calls per batch of results. Implements the `ItemSelector` interface:
|
package/dist/stage.d.ts
CHANGED
|
@@ -20,7 +20,14 @@ export interface StageOptions {
|
|
|
20
20
|
* @default 10
|
|
21
21
|
*/
|
|
22
22
|
batchSize?: number;
|
|
23
|
-
/**
|
|
23
|
+
/**
|
|
24
|
+
* Maximum concurrent in-flight SPARQL queries. Within each batch, all
|
|
25
|
+
* executors run in parallel; the number of concurrent batches is
|
|
26
|
+
* automatically reduced to `⌊maxConcurrency / executorCount⌋` so the
|
|
27
|
+
* total query pressure stays within this limit.
|
|
28
|
+
*
|
|
29
|
+
* @default 10
|
|
30
|
+
*/
|
|
24
31
|
maxConcurrency?: number;
|
|
25
32
|
/** Child stages that chain off this stage's output. */
|
|
26
33
|
stages?: Stage[];
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAkDjB,eAAe;IAyJ7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
|
package/dist/stage.js
CHANGED
|
@@ -90,6 +90,10 @@ export class Stage {
|
|
|
90
90
|
const dispatch = async () => {
|
|
91
91
|
const inFlight = new Set();
|
|
92
92
|
let firstError;
|
|
93
|
+
// Divide maxConcurrency by executor count so the total concurrent
|
|
94
|
+
// SPARQL queries stays at maxConcurrency (each batch runs all
|
|
95
|
+
// executors in parallel).
|
|
96
|
+
const maxConcurrentBatches = Math.max(1, Math.floor(this.maxConcurrency / this.executors.length));
|
|
93
97
|
const track = (promise) => {
|
|
94
98
|
const p = promise.then(() => {
|
|
95
99
|
inFlight.delete(p);
|
|
@@ -104,24 +108,25 @@ export class Stage {
|
|
|
104
108
|
if (firstError)
|
|
105
109
|
break;
|
|
106
110
|
// Respect maxConcurrency: wait for a slot to open.
|
|
107
|
-
if (inFlight.size >=
|
|
111
|
+
if (inFlight.size >= maxConcurrentBatches) {
|
|
108
112
|
await Promise.race(inFlight);
|
|
109
113
|
if (firstError)
|
|
110
114
|
break;
|
|
111
115
|
}
|
|
112
116
|
track((async () => {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
const result = await executor.execute(dataset, distribution, {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
117
|
+
// Run all executors for this batch in parallel.
|
|
118
|
+
const executorOutputs = await Promise.all(this.executors.map(async (executor) => {
|
|
119
|
+
const result = await executor.execute(dataset, distribution, { bindings });
|
|
120
|
+
if (result instanceof NotSupported)
|
|
121
|
+
return [];
|
|
122
|
+
hasResults = true;
|
|
123
|
+
const quads = [];
|
|
124
|
+
for await (const quad of result) {
|
|
125
|
+
quads.push(quad);
|
|
123
126
|
}
|
|
124
|
-
|
|
127
|
+
return quads;
|
|
128
|
+
}));
|
|
129
|
+
const batchQuads = executorOutputs.flat();
|
|
125
130
|
if (this.validation &&
|
|
126
131
|
batchQuads.length > 0 &&
|
|
127
132
|
onInvalid !== 'write') {
|