@lde/pipeline 0.28.2 → 0.28.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -50,20 +50,24 @@ new Stage({
50
50
 
51
51
  `batchSize` (default: 10) controls how many variable bindings are passed to each executor call as a `VALUES` clause. It also sets the page size for the item selector's SPARQL requests, so that each paginated request fills exactly one executor batch.
52
52
 
53
- A `LIMIT` clause in the selector query overrides `batchSize` as the page size use this when the SPARQL endpoint enforces a hard result limit:
53
+ Some SPARQL endpoints enforce different result limits for SELECT and CONSTRUCT queries. Since the selector uses SELECT and the executor uses CONSTRUCT, a `LIMIT` clause in the selector query overrides `batchSize` as the page size. Use this when the endpoint caps SELECT results below your desired batch size:
54
54
 
55
55
  ```typescript
56
- // Endpoint caps results at 1000, but process in batches of 100.
56
+ // Endpoint caps SELECT results at 500, but each CONSTRUCT can handle 1000 bindings.
57
57
  new Stage({
58
58
  name: 'per-class',
59
59
  itemSelector: new SparqlItemSelector({
60
- query: 'SELECT DISTINCT ?class WHERE { ?s a ?class } LIMIT 1000',
60
+ query: 'SELECT DISTINCT ?class WHERE { ?s a ?class } LIMIT 500',
61
61
  }),
62
62
  executors: executor,
63
- batchSize: 100,
63
+ batchSize: 1000, // Two SELECT pages fill one CONSTRUCT batch.
64
64
  });
65
65
  ```
66
66
 
67
+ #### Concurrency
68
+
69
+ `maxConcurrency` (default: 10) controls how many batches run in parallel. Within each batch, all executors run in parallel too, so the peak number of concurrent SPARQL queries is `maxConcurrency × executorCount`. For example, with `maxConcurrency: 5` and two executors per stage, up to 10 SPARQL queries may be in flight at the same time. Lower `maxConcurrency` if the endpoint can't handle the load.
70
+
67
71
  ### Item Selector
68
72
 
69
73
  Selects resources from the distribution and fans out executor calls per batch of results. Implements the `ItemSelector` interface:
package/dist/stage.d.ts CHANGED
@@ -20,7 +20,13 @@ export interface StageOptions {
20
20
  * @default 10
21
21
  */
22
22
  batchSize?: number;
23
- /** Maximum concurrent in-flight executor batches. @default 10 */
23
+ /**
24
+ * Maximum concurrent in-flight batches. Within each batch, all executors
25
+ * run in parallel, so the peak number of concurrent SPARQL queries is
26
+ * `maxConcurrency × executorCount`.
27
+ *
28
+ * @default 10
29
+ */
24
30
  maxConcurrency?: number;
25
31
  /** Child stages that chain off this stage's output. */
26
32
  stages?: Stage[];
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAkDjB,eAAe;IA2I7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;OAMG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAkDjB,eAAe;IAiJ7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
package/dist/stage.js CHANGED
@@ -110,18 +110,19 @@ export class Stage {
110
110
  break;
111
111
  }
112
112
  track((async () => {
113
- const batchQuads = [];
114
- for (const executor of this.executors) {
115
- const result = await executor.execute(dataset, distribution, {
116
- bindings,
117
- });
118
- if (!(result instanceof NotSupported)) {
119
- hasResults = true;
120
- for await (const quad of result) {
121
- batchQuads.push(quad);
122
- }
113
+ // Run all executors for this batch in parallel.
114
+ const executorOutputs = await Promise.all(this.executors.map(async (executor) => {
115
+ const result = await executor.execute(dataset, distribution, { bindings });
116
+ if (result instanceof NotSupported)
117
+ return [];
118
+ hasResults = true;
119
+ const quads = [];
120
+ for await (const quad of result) {
121
+ quads.push(quad);
123
122
  }
124
- }
123
+ return quads;
124
+ }));
125
+ const batchQuads = executorOutputs.flat();
125
126
  if (this.validation &&
126
127
  batchQuads.length > 0 &&
127
128
  onInvalid !== 'write') {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.28.2",
3
+ "version": "0.28.3",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"