@lde/pipeline 0.28.2 → 0.28.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +8 -4
- package/dist/stage.d.ts +7 -1
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +12 -11
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -50,20 +50,24 @@ new Stage({
|
|
|
50
50
|
|
|
51
51
|
`batchSize` (default: 10) controls how many variable bindings are passed to each executor call as a `VALUES` clause. It also sets the page size for the item selector's SPARQL requests, so that each paginated request fills exactly one executor batch.
|
|
52
52
|
|
|
53
|
-
|
|
53
|
+
Some SPARQL endpoints enforce different result limits for SELECT and CONSTRUCT queries. Since the selector uses SELECT and the executor uses CONSTRUCT, a `LIMIT` clause in the selector query overrides `batchSize` as the page size. Use this when the endpoint caps SELECT results below your desired batch size:
|
|
54
54
|
|
|
55
55
|
```typescript
|
|
56
|
-
// Endpoint caps results at
|
|
56
|
+
// Endpoint caps SELECT results at 500, but each CONSTRUCT can handle 1000 bindings.
|
|
57
57
|
new Stage({
|
|
58
58
|
name: 'per-class',
|
|
59
59
|
itemSelector: new SparqlItemSelector({
|
|
60
|
-
query: 'SELECT DISTINCT ?class WHERE { ?s a ?class } LIMIT
|
|
60
|
+
query: 'SELECT DISTINCT ?class WHERE { ?s a ?class } LIMIT 500',
|
|
61
61
|
}),
|
|
62
62
|
executors: executor,
|
|
63
|
-
batchSize:
|
|
63
|
+
batchSize: 1000, // Two SELECT pages fill one CONSTRUCT batch.
|
|
64
64
|
});
|
|
65
65
|
```
|
|
66
66
|
|
|
67
|
+
#### Concurrency
|
|
68
|
+
|
|
69
|
+
`maxConcurrency` (default: 10) controls how many batches run in parallel. Within each batch, all executors run in parallel too, so the peak number of concurrent SPARQL queries is `maxConcurrency × executorCount`. For example, with `maxConcurrency: 5` and two executors per stage, up to 10 SPARQL queries may be in flight at the same time. Lower `maxConcurrency` if the endpoint can't handle the load.
|
|
70
|
+
|
|
67
71
|
### Item Selector
|
|
68
72
|
|
|
69
73
|
Selects resources from the distribution and fans out executor calls per batch of results. Implements the `ItemSelector` interface:
|
package/dist/stage.d.ts
CHANGED
|
@@ -20,7 +20,13 @@ export interface StageOptions {
|
|
|
20
20
|
* @default 10
|
|
21
21
|
*/
|
|
22
22
|
batchSize?: number;
|
|
23
|
-
/**
|
|
23
|
+
/**
|
|
24
|
+
* Maximum concurrent in-flight batches. Within each batch, all executors
|
|
25
|
+
* run in parallel, so the peak number of concurrent SPARQL queries is
|
|
26
|
+
* `maxConcurrency × executorCount`.
|
|
27
|
+
*
|
|
28
|
+
* @default 10
|
|
29
|
+
*/
|
|
24
30
|
maxConcurrency?: number;
|
|
25
31
|
/** Child stages that chain off this stage's output. */
|
|
26
32
|
stages?: Stage[];
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAkDjB,eAAe;IAiJ7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
|
package/dist/stage.js
CHANGED
|
@@ -110,18 +110,19 @@ export class Stage {
|
|
|
110
110
|
break;
|
|
111
111
|
}
|
|
112
112
|
track((async () => {
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
const result = await executor.execute(dataset, distribution, {
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
}
|
|
113
|
+
// Run all executors for this batch in parallel.
|
|
114
|
+
const executorOutputs = await Promise.all(this.executors.map(async (executor) => {
|
|
115
|
+
const result = await executor.execute(dataset, distribution, { bindings });
|
|
116
|
+
if (result instanceof NotSupported)
|
|
117
|
+
return [];
|
|
118
|
+
hasResults = true;
|
|
119
|
+
const quads = [];
|
|
120
|
+
for await (const quad of result) {
|
|
121
|
+
quads.push(quad);
|
|
123
122
|
}
|
|
124
|
-
|
|
123
|
+
return quads;
|
|
124
|
+
}));
|
|
125
|
+
const batchQuads = executorOutputs.flat();
|
|
125
126
|
if (this.validation &&
|
|
126
127
|
batchQuads.length > 0 &&
|
|
127
128
|
onInvalid !== 'write') {
|