@lde/pipeline 0.30.4 → 0.30.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -149,64 +149,48 @@ const executor = new SparqlConstructExecutor({
149
149
 
150
150
  The dedup set is scoped to each `execute()` call, so memory stays bounded to the number of unique quads per batch. A standalone `deduplicateQuads()` function is also exported for use outside the executor.
151
151
 
152
- `Executor` is an interface, so you can implement your own for logic that's hard to express in pure SPARQL — for example, cleaning up messy date notations or converting locale-specific dates to ISO 8601. The decorator pattern lets you wrap a SPARQL executor and post-process its quad stream in TypeScript:
152
+ ### Extending a stage with a quad transform
153
+
154
+ Some logic is hard to express in pure SPARQL — cleaning up messy date notations, converting locale-specific dates to ISO 8601, or sampling an executor’s output and firing follow-up queries. Rather than subclass `Executor`, attach a `QuadTransform` to it as data: a plain function `(quads, context) => quads` that post-processes one executor’s output before the stage merges it with its siblings. This is extension point 1 of [ADR 2](../../docs/decisions/0002-unify-pipeline-extension-on-quad-transforms.md).
155
+
156
+ A transform receives an `ExecutorContext` — the `dataset`, the `distribution` (so it can fire its own SPARQL queries), and the `stage` name. It runs once per executor call, so **write it to accept being called more than once**: a global stage calls it once over the executor’s complete output, but a per-class stage with batching enabled calls it once per batch (one class at `batchSize: 1`). Accumulate within an invocation, not across invocations — or keep the transform per-quad, where the number of calls makes no difference.
153
157
 
154
158
  ```typescript
155
159
  import { DataFactory } from 'n3';
156
- import type { Quad, Literal } from '@rdfjs/types';
157
- import type { Dataset, Distribution } from '@lde/dataset';
158
160
  import {
159
- type Executor,
160
- type ExecuteOptions,
161
- NotSupported,
161
+ Stage,
162
+ SparqlConstructExecutor,
163
+ type QuadTransform,
164
+ type ExecutorContext,
162
165
  } from '@lde/pipeline';
163
166
 
164
- class TransformExecutor implements Executor {
165
- constructor(
166
- private readonly inner: Executor,
167
- private readonly transform: (
168
- quads: AsyncIterable<Quad>,
169
- dataset: Dataset,
170
- ) => AsyncIterable<Quad>,
171
- ) {}
172
-
173
- async execute(
174
- dataset: Dataset,
175
- distribution: Distribution,
176
- options?: ExecuteOptions,
177
- ): Promise<AsyncIterable<Quad> | NotSupported> {
178
- const result = await this.inner.execute(dataset, distribution, options);
179
- if (result instanceof NotSupported) return result;
180
- return this.transform(result, dataset);
167
+ const cleanDates: QuadTransform<ExecutorContext> = async function* (quads) {
168
+ for await (const quad of quads) {
169
+ if (quad.object.termType === 'Literal' && isMessyDate(quad.object)) {
170
+ yield DataFactory.quad(
171
+ quad.subject,
172
+ quad.predicate,
173
+ DataFactory.literal(
174
+ parseDutchDate(quad.object.value),
175
+ DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#date'),
176
+ ),
177
+ );
178
+ } else {
179
+ yield quad;
180
+ }
181
181
  }
182
- }
183
- ```
184
-
185
- Then use it to wrap any SPARQL executor:
182
+ };
186
183
 
187
- ```typescript
188
184
  new Stage({
189
185
  name: 'dates',
190
- executors: new TransformExecutor(
191
- await SparqlConstructExecutor.fromFile('dates.rq'),
192
- async function* (quads) {
193
- for await (const quad of quads) {
194
- if (quad.object.termType === 'Literal' && isMessyDate(quad.object)) {
195
- const cleaned = DataFactory.literal(
196
- parseDutchDate(quad.object.value),
197
- DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#date'),
198
- );
199
- yield DataFactory.quad(quad.subject, quad.predicate, cleaned);
200
- } else {
201
- yield quad;
202
- }
203
- }
204
- },
205
- ),
186
+ executors: {
187
+ executor: await SparqlConstructExecutor.fromFile('dates.rq'),
188
+ transform: cleanDates,
189
+ },
206
190
  });
207
191
  ```
208
192
 
209
- This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `VocabularyExecutor` for a real-world example of this pattern.
193
+ `transform` accepts a single transform or an array applied in order, so a stage can compose several. This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `withVocabularies` for a real-world example of this pattern.
210
194
 
211
195
  #### Adaptive timeouts
212
196
 
@@ -1,3 +1,4 @@
1
+ import { Dataset } from '@lde/dataset';
1
2
  import type { DatasetSelector } from './selector.js';
2
3
  import { Stage } from './stage.js';
3
4
  import type { QuadTransform } from './stage.js';
@@ -9,8 +10,15 @@ import { type TimeoutPolicy } from './sparql/timeoutPolicy.js';
9
10
  /** Plugin that hooks into pipeline lifecycle events. */
10
11
  export interface PipelinePlugin {
11
12
  name: string;
12
- /** Transform the quad stream before writing. */
13
- beforeStageWrite?: QuadTransform;
13
+ /**
14
+ * Transform the merged, post-stage quad stream before writing (extension
15
+ * point 2: pipeline-wide, post-merge). The home of cross-cutting concerns
16
+ * – provenance, namespace normalisation – that apply regardless of which
17
+ * executor produced a quad.
18
+ */
19
+ beforeStageWrite?: QuadTransform<{
20
+ dataset: Dataset;
21
+ }>;
14
22
  }
15
23
  export interface PipelineOptions {
16
24
  datasetSelector: DatasetSelector;
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;gBAEzC,OAAO,EAAE,eAAe;IAkC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAgFd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA2DP,SAAS;CAczB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC;QAAE,OAAO,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;CACxD;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;gBAEzC,OAAO,EAAE,eAAe;IAkC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAgFd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA2DP,SAAS;CAczB"}
package/dist/pipeline.js CHANGED
@@ -66,7 +66,7 @@ class TransformWriter {
66
66
  this.transform = transform;
67
67
  }
68
68
  async write(dataset, quads) {
69
- await this.inner.write(dataset, this.transform(quads, dataset));
69
+ await this.inner.write(dataset, this.transform(quads, { dataset }));
70
70
  }
71
71
  async flush(dataset) {
72
72
  await this.inner.flush?.(dataset);
@@ -96,7 +96,7 @@ export class Pipeline {
96
96
  ?.map((p) => p.beforeStageWrite)
97
97
  .filter((t) => t !== undefined);
98
98
  if (transforms?.length) {
99
- const composed = (quads, dataset) => transforms.reduce((q, fn) => fn(q, dataset), quads);
99
+ const composed = (quads, context) => transforms.reduce((q, fn) => fn(q, context), quads);
100
100
  writer = new TransformWriter(writer, composed);
101
101
  }
102
102
  this.writer = writer;
@@ -1,5 +1,6 @@
1
1
  import type { QuadTransform } from '../stage.js';
2
2
  import type { PipelinePlugin } from '../pipeline.js';
3
+ import type { Dataset } from '@lde/dataset';
3
4
  export interface NamespaceNormalizationOptions {
4
5
  /** Namespace URI prefix to match (e.g. `http://schema.org/`). */
5
6
  from: string;
@@ -14,7 +15,9 @@ export interface NamespaceNormalizationOptions {
14
15
  * `void:vocabulary` quads are left unchanged so consumers can see which
15
16
  * namespace the source dataset actually uses.
16
17
  */
17
- export declare function namespaceNormalizationTransform(options: NamespaceNormalizationOptions): QuadTransform;
18
+ export declare function namespaceNormalizationTransform(options: NamespaceNormalizationOptions): QuadTransform<{
19
+ dataset: Dataset;
20
+ }>;
18
21
  /**
19
22
  * Pipeline plugin that normalizes namespace prefixes in `void:class` and
20
23
  * `void:property` quad objects.
@@ -1 +1 @@
1
- {"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,gBAAgB,CAAC;AASnD,MAAM,WAAW,6BAA6B;IAC5C,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;GAOG;AACH,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,6BAA6B,GACrC,aAAa,CAEf;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,6BAA6B,GACrC,cAAc,CAKhB"}
1
+ {"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAS5C,MAAM,WAAW,6BAA6B;IAC5C,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;GAOG;AACH,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,6BAA6B,GACrC,aAAa,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,CAErC;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,6BAA6B,GACrC,cAAc,CAKhB"}
@@ -1,7 +1,10 @@
1
1
  import type { QuadTransform } from '../stage.js';
2
2
  import type { PipelinePlugin } from '../pipeline.js';
3
+ import type { Dataset } from '@lde/dataset';
3
4
  /** QuadTransform that appends PROV-O provenance quads. */
4
- export declare const provenanceTransform: QuadTransform;
5
+ export declare const provenanceTransform: QuadTransform<{
6
+ dataset: Dataset;
7
+ }>;
5
8
  /** Pipeline plugin that appends PROV-O provenance to every stage's output. */
6
9
  export declare function provenancePlugin(): PipelinePlugin;
7
10
  //# sourceMappingURL=provenance.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAkBrD,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aACgC,CAAC;AAEnE,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
1
+ {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAkB5C,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aAAa,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAGC,CAAC;AAEtE,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
@@ -8,7 +8,7 @@ const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime'
8
8
  const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
9
9
  const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
10
10
  /** QuadTransform that appends PROV-O provenance quads. */
11
- export const provenanceTransform = (quads, dataset) => appendProvenanceQuads(quads, dataset.iri.toString(), new Date());
11
+ export const provenanceTransform = (quads, { dataset }) => appendProvenanceQuads(quads, dataset.iri.toString(), new Date());
12
12
  /** Pipeline plugin that appends PROV-O provenance to every stage's output. */
13
13
  export function provenancePlugin() {
14
14
  return {
@@ -1,11 +1,14 @@
1
1
  import type { QuadTransform } from '../stage.js';
2
2
  import type { PipelinePlugin } from '../pipeline.js';
3
+ import type { Dataset } from '@lde/dataset';
3
4
  export interface SchemaOrgNormalizationOptions {
4
5
  /** When true, normalizes `https://schema.org/` to `http://schema.org/` instead. */
5
6
  reverse?: boolean;
6
7
  }
7
8
  /** QuadTransform that normalizes `http://schema.org/` to `https://schema.org/` in `void:class` and `void:property` objects. */
8
- export declare const schemaOrgNormalizationTransform: QuadTransform;
9
+ export declare const schemaOrgNormalizationTransform: QuadTransform<{
10
+ dataset: Dataset;
11
+ }>;
9
12
  /**
10
13
  * Pipeline plugin that normalizes Schema.org namespace prefixes in `void:class`
11
14
  * and `void:property` quad objects.
@@ -1 +1 @@
1
- {"version":3,"file":"schemaOrgNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/schemaOrgNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,gBAAgB,CAAC;AASnD,MAAM,WAAW,6BAA6B;IAC5C,mFAAmF;IACnF,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,+HAA+H;AAC/H,eAAO,MAAM,+BAA+B,EAAE,aAI1C,CAAC;AAEL;;;;;;;;;GASG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,CAAC,EAAE,6BAA6B,GACtC,cAAc,CAOhB"}
1
+ {"version":3,"file":"schemaOrgNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/schemaOrgNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAS5C,MAAM,WAAW,6BAA6B;IAC5C,mFAAmF;IACnF,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,+HAA+H;AAC/H,eAAO,MAAM,+BAA+B,EAAE,aAAa,CAAC;IAC1D,OAAO,EAAE,OAAO,CAAC;CAClB,CAGC,CAAC;AAEH;;;;;;;;;GASG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,CAAC,EAAE,6BAA6B,GACtC,cAAc,CAOhB"}
package/dist/stage.d.ts CHANGED
@@ -5,11 +5,49 @@ import { NotSupported } from './sparql/executor.js';
5
5
  import type { TimeoutPolicy } from './sparql/timeoutPolicy.js';
6
6
  import type { Validator } from './validator.js';
7
7
  import type { Writer } from './writer/writer.js';
8
- /** Transforms a quad stream, optionally using dataset metadata. */
9
- export type QuadTransform = (quads: AsyncIterable<Quad>, dataset: Dataset) => AsyncIterable<Quad>;
8
+ /**
9
+ * Transforms a quad stream, given the context of its extension point.
10
+ *
11
+ * Every pipeline extension is the same operation – intercept the quad stream,
12
+ * `AsyncIterable<Quad> → AsyncIterable<Quad>` – differing only in *where* it
13
+ * runs and the `Ctx` in scope. See
14
+ * {@link https://github.com/ldelements/lde/blob/main/docs/decisions/0002-unify-pipeline-extension-on-quad-transforms.md | ADR 2}.
15
+ */
16
+ export type QuadTransform<Ctx> = (quads: AsyncIterable<Quad>, context: Ctx) => AsyncIterable<Quad>;
17
+ /**
18
+ * Context handed to a {@link QuadTransform} attached to an executor (extension
19
+ * point 1: per-executor output, pre-merge).
20
+ *
21
+ * `distribution` gives the transform endpoint reach – it may fire its own
22
+ * SPARQL queries – and `stage` carries the stage identity.
23
+ */
24
+ export interface ExecutorContext {
25
+ dataset: Dataset;
26
+ distribution: Distribution;
27
+ stage: string;
28
+ }
29
+ /**
30
+ * An {@link Executor} with zero or more {@link QuadTransform}s attached as data.
31
+ *
32
+ * The stage runner applies the transform(s) in order to **this executor's
33
+ * output** before merging it with sibling executors. The window is one
34
+ * `execute()` call:
35
+ *
36
+ * - for a global stage that is the executor's complete output;
37
+ * - for a per-class stage that is one batch – one class at `batchSize: 1`.
38
+ *
39
+ * Decorating an executor is therefore construction-time data, not a wrapping
40
+ * class: the runner is the only code that delegates to the inner executor.
41
+ */
42
+ export interface AttachedExecutor {
43
+ executor: Executor;
44
+ transform?: QuadTransform<ExecutorContext> | QuadTransform<ExecutorContext>[];
45
+ }
46
+ /** One or more executors, each optionally carrying attached transforms. */
47
+ export type StageExecutors = Executor | AttachedExecutor | (Executor | AttachedExecutor)[];
10
48
  export interface StageOptions {
11
49
  name: string;
12
- executors: Executor | Executor[];
50
+ executors: StageExecutors;
13
51
  itemSelector?: ItemSelector;
14
52
  /**
15
53
  * Maximum number of bindings per executor call.
@@ -73,6 +111,13 @@ export declare class Stage {
73
111
  */
74
112
  private validateBuffer;
75
113
  private executeAll;
114
+ /**
115
+ * Fold an executor's attached transforms over its output stream, in order,
116
+ * supplying the {@link ExecutorContext}. A transform sees one `execute()`
117
+ * call's output (see {@link AttachedExecutor}); `NotSupported` is handled by
118
+ * the caller and never reaches a transform.
119
+ */
120
+ private applyTransforms;
76
121
  }
77
122
  /** Selects items (as variable bindings) for executors to process. Pagination is an implementation detail. */
78
123
  export interface ItemSelector {
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAqDjB,eAAe;IAwJ7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAwBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,CAAC,GAAG,IAAI,CAC/B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,GAAG,KACT,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,YAAY,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,aAAa,CAAC,eAAe,CAAC,GAAG,aAAa,CAAC,eAAe,CAAC,EAAE,CAAC;CAC/E;AAED,2EAA2E;AAC3E,MAAM,MAAM,cAAc,GACtB,QAAQ,GACR,gBAAgB,GAChB,CAAC,QAAQ,GAAG,gBAAgB,CAAC,EAAE,CAAC;AAQpC,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,cAAc,CAAC;IAC1B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAUjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAqDjB,eAAe;IA8J7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;IA6BxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;CAiBxB;AA4BD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
package/dist/stage.js CHANGED
@@ -12,9 +12,7 @@ export class Stage {
12
12
  constructor(options) {
13
13
  this.name = options.name;
14
14
  this.stages = options.stages ?? [];
15
- this.executors = Array.isArray(options.executors)
16
- ? options.executors
17
- : [options.executors];
15
+ this.executors = normalizeExecutors(options.executors);
18
16
  this.itemSelector = options.itemSelector;
19
17
  this.batchSize = options.batchSize ?? 10;
20
18
  this.maxConcurrency = options.maxConcurrency ?? 10;
@@ -118,7 +116,7 @@ export class Stage {
118
116
  }
119
117
  track((async () => {
120
118
  // Run all executors for this batch in parallel.
121
- const executorOutputs = await Promise.all(this.executors.map(async (executor) => {
119
+ const executorOutputs = await Promise.all(this.executors.map(async ({ executor, transforms }) => {
122
120
  const result = await executor.execute(dataset, distribution, {
123
121
  bindings,
124
122
  timeout: options?.timeout,
@@ -126,8 +124,9 @@ export class Stage {
126
124
  if (result instanceof NotSupported)
127
125
  return [];
128
126
  hasResults = true;
127
+ const stream = this.applyTransforms(transforms, result, dataset, distribution);
129
128
  const quads = [];
130
- for await (const quad of result) {
129
+ for await (const quad of stream) {
131
130
  quads.push(quad);
132
131
  }
133
132
  return quads;
@@ -204,7 +203,14 @@ export class Stage {
204
203
  return [];
205
204
  }
206
205
  async executeAll(dataset, distribution, timeout) {
207
- const results = await Promise.all(this.executors.map((executor) => executor.execute(dataset, distribution, { timeout })));
206
+ const results = await Promise.all(this.executors.map(async ({ executor, transforms }) => {
207
+ const result = await executor.execute(dataset, distribution, {
208
+ timeout,
209
+ });
210
+ if (result instanceof NotSupported)
211
+ return result;
212
+ return this.applyTransforms(transforms, result, dataset, distribution);
213
+ }));
208
214
  const streams = [];
209
215
  for (const result of results) {
210
216
  if (!(result instanceof NotSupported)) {
@@ -216,6 +222,38 @@ export class Stage {
216
222
  }
217
223
  return streams;
218
224
  }
225
+ /**
226
+ * Fold an executor's attached transforms over its output stream, in order,
227
+ * supplying the {@link ExecutorContext}. A transform sees one `execute()`
228
+ * call's output (see {@link AttachedExecutor}); `NotSupported` is handled by
229
+ * the caller and never reaches a transform.
230
+ */
231
+ applyTransforms(transforms, stream, dataset, distribution) {
232
+ if (transforms.length === 0)
233
+ return stream;
234
+ const context = {
235
+ dataset,
236
+ distribution,
237
+ stage: this.name,
238
+ };
239
+ return transforms.reduce((quads, transform) => transform(quads, context), stream);
240
+ }
241
+ }
242
+ /** Normalise the {@link StageExecutors} union to executor + transforms pairs. */
243
+ function normalizeExecutors(executors) {
244
+ const list = Array.isArray(executors) ? executors : [executors];
245
+ return list.map((entry) => {
246
+ if ('execute' in entry) {
247
+ return { executor: entry, transforms: [] };
248
+ }
249
+ const { executor, transform } = entry;
250
+ const transforms = transform === undefined
251
+ ? []
252
+ : Array.isArray(transform)
253
+ ? [...transform]
254
+ : [transform];
255
+ return { executor, transforms };
256
+ });
219
257
  }
220
258
  async function* mergeStreams(streams) {
221
259
  for (const stream of streams) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.30.4",
3
+ "version": "0.30.6",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"
@@ -26,7 +26,7 @@
26
26
  "dependencies": {
27
27
  "@lde/dataset": "0.7.4",
28
28
  "@lde/dataset-registry-client": "0.8.0",
29
- "@lde/distribution-probe": "0.1.5",
29
+ "@lde/distribution-probe": "0.1.6",
30
30
  "@lde/sparql-importer": "0.6.2",
31
31
  "@lde/sparql-server": "0.4.11",
32
32
  "@rdfjs/types": "^2.0.1",