@lde/pipeline 0.30.3 → 0.30.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -45
- package/dist/pipeline.d.ts +10 -2
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +2 -2
- package/dist/plugin/namespaceNormalization.d.ts +4 -1
- package/dist/plugin/namespaceNormalization.d.ts.map +1 -1
- package/dist/plugin/provenance.d.ts +4 -1
- package/dist/plugin/provenance.d.ts.map +1 -1
- package/dist/plugin/provenance.js +1 -1
- package/dist/plugin/schemaOrgNormalization.d.ts +4 -1
- package/dist/plugin/schemaOrgNormalization.d.ts.map +1 -1
- package/dist/stage.d.ts +48 -3
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +44 -6
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -149,64 +149,48 @@ const executor = new SparqlConstructExecutor({
|
|
|
149
149
|
|
|
150
150
|
The dedup set is scoped to each `execute()` call, so memory stays bounded to the number of unique quads per batch. A standalone `deduplicateQuads()` function is also exported for use outside the executor.
|
|
151
151
|
|
|
152
|
-
|
|
152
|
+
### Extending a stage with a quad transform
|
|
153
|
+
|
|
154
|
+
Some logic is hard to express in pure SPARQL — cleaning up messy date notations, converting locale-specific dates to ISO 8601, or sampling an executor’s output and firing follow-up queries. Rather than subclass `Executor`, attach a `QuadTransform` to it as data: a plain function `(quads, context) => quads` that post-processes one executor’s output before the stage merges it with its siblings. This is extension point 1 of [ADR 2](../../docs/decisions/0002-unify-pipeline-extension-on-quad-transforms.md).
|
|
155
|
+
|
|
156
|
+
A transform receives an `ExecutorContext` — the `dataset`, the `distribution` (so it can fire its own SPARQL queries), and the `stage` name. It runs once per executor call, so **write it to accept being called more than once**: a global stage calls it once over the executor’s complete output, but a per-class stage with batching enabled calls it once per batch (one class at `batchSize: 1`). Accumulate within an invocation, not across invocations — or keep the transform per-quad, where the number of calls makes no difference.
|
|
153
157
|
|
|
154
158
|
```typescript
|
|
155
159
|
import { DataFactory } from 'n3';
|
|
156
|
-
import type { Quad, Literal } from '@rdfjs/types';
|
|
157
|
-
import type { Dataset, Distribution } from '@lde/dataset';
|
|
158
160
|
import {
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
161
|
+
Stage,
|
|
162
|
+
SparqlConstructExecutor,
|
|
163
|
+
type QuadTransform,
|
|
164
|
+
type ExecutorContext,
|
|
162
165
|
} from '@lde/pipeline';
|
|
163
166
|
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
const result = await this.inner.execute(dataset, distribution, options);
|
|
179
|
-
if (result instanceof NotSupported) return result;
|
|
180
|
-
return this.transform(result, dataset);
|
|
167
|
+
const cleanDates: QuadTransform<ExecutorContext> = async function* (quads) {
|
|
168
|
+
for await (const quad of quads) {
|
|
169
|
+
if (quad.object.termType === 'Literal' && isMessyDate(quad.object)) {
|
|
170
|
+
yield DataFactory.quad(
|
|
171
|
+
quad.subject,
|
|
172
|
+
quad.predicate,
|
|
173
|
+
DataFactory.literal(
|
|
174
|
+
parseDutchDate(quad.object.value),
|
|
175
|
+
DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#date'),
|
|
176
|
+
),
|
|
177
|
+
);
|
|
178
|
+
} else {
|
|
179
|
+
yield quad;
|
|
180
|
+
}
|
|
181
181
|
}
|
|
182
|
-
}
|
|
183
|
-
```
|
|
184
|
-
|
|
185
|
-
Then use it to wrap any SPARQL executor:
|
|
182
|
+
};
|
|
186
183
|
|
|
187
|
-
```typescript
|
|
188
184
|
new Stage({
|
|
189
185
|
name: 'dates',
|
|
190
|
-
executors:
|
|
191
|
-
await SparqlConstructExecutor.fromFile('dates.rq'),
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
if (quad.object.termType === 'Literal' && isMessyDate(quad.object)) {
|
|
195
|
-
const cleaned = DataFactory.literal(
|
|
196
|
-
parseDutchDate(quad.object.value),
|
|
197
|
-
DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#date'),
|
|
198
|
-
);
|
|
199
|
-
yield DataFactory.quad(quad.subject, quad.predicate, cleaned);
|
|
200
|
-
} else {
|
|
201
|
-
yield quad;
|
|
202
|
-
}
|
|
203
|
-
}
|
|
204
|
-
},
|
|
205
|
-
),
|
|
186
|
+
executors: {
|
|
187
|
+
executor: await SparqlConstructExecutor.fromFile('dates.rq'),
|
|
188
|
+
transform: cleanDates,
|
|
189
|
+
},
|
|
206
190
|
});
|
|
207
191
|
```
|
|
208
192
|
|
|
209
|
-
This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `
|
|
193
|
+
`transform` accepts a single transform or an array applied in order, so a stage can compose several. This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `withVocabularies` for a real-world example of this pattern.
|
|
210
194
|
|
|
211
195
|
#### Adaptive timeouts
|
|
212
196
|
|
package/dist/pipeline.d.ts
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { Dataset } from '@lde/dataset';
|
|
1
2
|
import type { DatasetSelector } from './selector.js';
|
|
2
3
|
import { Stage } from './stage.js';
|
|
3
4
|
import type { QuadTransform } from './stage.js';
|
|
@@ -9,8 +10,15 @@ import { type TimeoutPolicy } from './sparql/timeoutPolicy.js';
|
|
|
9
10
|
/** Plugin that hooks into pipeline lifecycle events. */
|
|
10
11
|
export interface PipelinePlugin {
|
|
11
12
|
name: string;
|
|
12
|
-
/**
|
|
13
|
-
|
|
13
|
+
/**
|
|
14
|
+
* Transform the merged, post-stage quad stream before writing (extension
|
|
15
|
+
* point 2: pipeline-wide, post-merge). The home of cross-cutting concerns
|
|
16
|
+
* – provenance, namespace normalisation – that apply regardless of which
|
|
17
|
+
* executor produced a quad.
|
|
18
|
+
*/
|
|
19
|
+
beforeStageWrite?: QuadTransform<{
|
|
20
|
+
dataset: Dataset;
|
|
21
|
+
}>;
|
|
14
22
|
}
|
|
15
23
|
export interface PipelineOptions {
|
|
16
24
|
datasetSelector: DatasetSelector;
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC;QAAE,OAAO,EAAE,OAAO,CAAA;KAAE,CAAC,CAAC;CACxD;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;gBAEzC,OAAO,EAAE,eAAe;IAkC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAgFd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA2DP,SAAS;CAczB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -66,7 +66,7 @@ class TransformWriter {
|
|
|
66
66
|
this.transform = transform;
|
|
67
67
|
}
|
|
68
68
|
async write(dataset, quads) {
|
|
69
|
-
await this.inner.write(dataset, this.transform(quads, dataset));
|
|
69
|
+
await this.inner.write(dataset, this.transform(quads, { dataset }));
|
|
70
70
|
}
|
|
71
71
|
async flush(dataset) {
|
|
72
72
|
await this.inner.flush?.(dataset);
|
|
@@ -96,7 +96,7 @@ export class Pipeline {
|
|
|
96
96
|
?.map((p) => p.beforeStageWrite)
|
|
97
97
|
.filter((t) => t !== undefined);
|
|
98
98
|
if (transforms?.length) {
|
|
99
|
-
const composed = (quads,
|
|
99
|
+
const composed = (quads, context) => transforms.reduce((q, fn) => fn(q, context), quads);
|
|
100
100
|
writer = new TransformWriter(writer, composed);
|
|
101
101
|
}
|
|
102
102
|
this.writer = writer;
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { QuadTransform } from '../stage.js';
|
|
2
2
|
import type { PipelinePlugin } from '../pipeline.js';
|
|
3
|
+
import type { Dataset } from '@lde/dataset';
|
|
3
4
|
export interface NamespaceNormalizationOptions {
|
|
4
5
|
/** Namespace URI prefix to match (e.g. `http://schema.org/`). */
|
|
5
6
|
from: string;
|
|
@@ -14,7 +15,9 @@ export interface NamespaceNormalizationOptions {
|
|
|
14
15
|
* `void:vocabulary` quads are left unchanged so consumers can see which
|
|
15
16
|
* namespace the source dataset actually uses.
|
|
16
17
|
*/
|
|
17
|
-
export declare function namespaceNormalizationTransform(options: NamespaceNormalizationOptions): QuadTransform
|
|
18
|
+
export declare function namespaceNormalizationTransform(options: NamespaceNormalizationOptions): QuadTransform<{
|
|
19
|
+
dataset: Dataset;
|
|
20
|
+
}>;
|
|
18
21
|
/**
|
|
19
22
|
* Pipeline plugin that normalizes namespace prefixes in `void:class` and
|
|
20
23
|
* `void:property` quad objects.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAS5C,MAAM,WAAW,6BAA6B;IAC5C,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;GAOG;AACH,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,6BAA6B,GACrC,aAAa,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,CAErC;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,6BAA6B,GACrC,cAAc,CAKhB"}
|
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import type { QuadTransform } from '../stage.js';
|
|
2
2
|
import type { PipelinePlugin } from '../pipeline.js';
|
|
3
|
+
import type { Dataset } from '@lde/dataset';
|
|
3
4
|
/** QuadTransform that appends PROV-O provenance quads. */
|
|
4
|
-
export declare const provenanceTransform: QuadTransform
|
|
5
|
+
export declare const provenanceTransform: QuadTransform<{
|
|
6
|
+
dataset: Dataset;
|
|
7
|
+
}>;
|
|
5
8
|
/** Pipeline plugin that appends PROV-O provenance to every stage's output. */
|
|
6
9
|
export declare function provenancePlugin(): PipelinePlugin;
|
|
7
10
|
//# sourceMappingURL=provenance.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAkB5C,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aAAa,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAGC,CAAC;AAEtE,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
|
|
@@ -8,7 +8,7 @@ const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime'
|
|
|
8
8
|
const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
|
|
9
9
|
const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
|
|
10
10
|
/** QuadTransform that appends PROV-O provenance quads. */
|
|
11
|
-
export const provenanceTransform = (quads, dataset) => appendProvenanceQuads(quads, dataset.iri.toString(), new Date());
|
|
11
|
+
export const provenanceTransform = (quads, { dataset }) => appendProvenanceQuads(quads, dataset.iri.toString(), new Date());
|
|
12
12
|
/** Pipeline plugin that appends PROV-O provenance to every stage's output. */
|
|
13
13
|
export function provenancePlugin() {
|
|
14
14
|
return {
|
|
@@ -1,11 +1,14 @@
|
|
|
1
1
|
import type { QuadTransform } from '../stage.js';
|
|
2
2
|
import type { PipelinePlugin } from '../pipeline.js';
|
|
3
|
+
import type { Dataset } from '@lde/dataset';
|
|
3
4
|
export interface SchemaOrgNormalizationOptions {
|
|
4
5
|
/** When true, normalizes `https://schema.org/` to `http://schema.org/` instead. */
|
|
5
6
|
reverse?: boolean;
|
|
6
7
|
}
|
|
7
8
|
/** QuadTransform that normalizes `http://schema.org/` to `https://schema.org/` in `void:class` and `void:property` objects. */
|
|
8
|
-
export declare const schemaOrgNormalizationTransform: QuadTransform
|
|
9
|
+
export declare const schemaOrgNormalizationTransform: QuadTransform<{
|
|
10
|
+
dataset: Dataset;
|
|
11
|
+
}>;
|
|
9
12
|
/**
|
|
10
13
|
* Pipeline plugin that normalizes Schema.org namespace prefixes in `void:class`
|
|
11
14
|
* and `void:property` quad objects.
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schemaOrgNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/schemaOrgNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"schemaOrgNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/schemaOrgNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAS5C,MAAM,WAAW,6BAA6B;IAC5C,mFAAmF;IACnF,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,+HAA+H;AAC/H,eAAO,MAAM,+BAA+B,EAAE,aAAa,CAAC;IAC1D,OAAO,EAAE,OAAO,CAAC;CAClB,CAGC,CAAC;AAEH;;;;;;;;;GASG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,CAAC,EAAE,6BAA6B,GACtC,cAAc,CAOhB"}
|
package/dist/stage.d.ts
CHANGED
|
@@ -5,11 +5,49 @@ import { NotSupported } from './sparql/executor.js';
|
|
|
5
5
|
import type { TimeoutPolicy } from './sparql/timeoutPolicy.js';
|
|
6
6
|
import type { Validator } from './validator.js';
|
|
7
7
|
import type { Writer } from './writer/writer.js';
|
|
8
|
-
/**
|
|
9
|
-
|
|
8
|
+
/**
|
|
9
|
+
* Transforms a quad stream, given the context of its extension point.
|
|
10
|
+
*
|
|
11
|
+
* Every pipeline extension is the same operation – intercept the quad stream,
|
|
12
|
+
* `AsyncIterable<Quad> → AsyncIterable<Quad>` – differing only in *where* it
|
|
13
|
+
* runs and the `Ctx` in scope. See
|
|
14
|
+
* {@link https://github.com/ldelements/lde/blob/main/docs/decisions/0002-unify-pipeline-extension-on-quad-transforms.md | ADR 2}.
|
|
15
|
+
*/
|
|
16
|
+
export type QuadTransform<Ctx> = (quads: AsyncIterable<Quad>, context: Ctx) => AsyncIterable<Quad>;
|
|
17
|
+
/**
|
|
18
|
+
* Context handed to a {@link QuadTransform} attached to an executor (extension
|
|
19
|
+
* point 1: per-executor output, pre-merge).
|
|
20
|
+
*
|
|
21
|
+
* `distribution` gives the transform endpoint reach – it may fire its own
|
|
22
|
+
* SPARQL queries – and `stage` carries the stage identity.
|
|
23
|
+
*/
|
|
24
|
+
export interface ExecutorContext {
|
|
25
|
+
dataset: Dataset;
|
|
26
|
+
distribution: Distribution;
|
|
27
|
+
stage: string;
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* An {@link Executor} with zero or more {@link QuadTransform}s attached as data.
|
|
31
|
+
*
|
|
32
|
+
* The stage runner applies the transform(s) in order to **this executor's
|
|
33
|
+
* output** before merging it with sibling executors. The window is one
|
|
34
|
+
* `execute()` call:
|
|
35
|
+
*
|
|
36
|
+
* - for a global stage that is the executor's complete output;
|
|
37
|
+
* - for a per-class stage that is one batch – one class at `batchSize: 1`.
|
|
38
|
+
*
|
|
39
|
+
* Decorating an executor is therefore construction-time data, not a wrapping
|
|
40
|
+
* class: the runner is the only code that delegates to the inner executor.
|
|
41
|
+
*/
|
|
42
|
+
export interface AttachedExecutor {
|
|
43
|
+
executor: Executor;
|
|
44
|
+
transform?: QuadTransform<ExecutorContext> | QuadTransform<ExecutorContext>[];
|
|
45
|
+
}
|
|
46
|
+
/** One or more executors, each optionally carrying attached transforms. */
|
|
47
|
+
export type StageExecutors = Executor | AttachedExecutor | (Executor | AttachedExecutor)[];
|
|
10
48
|
export interface StageOptions {
|
|
11
49
|
name: string;
|
|
12
|
-
executors:
|
|
50
|
+
executors: StageExecutors;
|
|
13
51
|
itemSelector?: ItemSelector;
|
|
14
52
|
/**
|
|
15
53
|
* Maximum number of bindings per executor call.
|
|
@@ -73,6 +111,13 @@ export declare class Stage {
|
|
|
73
111
|
*/
|
|
74
112
|
private validateBuffer;
|
|
75
113
|
private executeAll;
|
|
114
|
+
/**
|
|
115
|
+
* Fold an executor's attached transforms over its output stream, in order,
|
|
116
|
+
* supplying the {@link ExecutorContext}. A transform sees one `execute()`
|
|
117
|
+
* call's output (see {@link AttachedExecutor}); `NotSupported` is handled by
|
|
118
|
+
* the caller and never reaches a transform.
|
|
119
|
+
*/
|
|
120
|
+
private applyTransforms;
|
|
76
121
|
}
|
|
77
122
|
/** Selects items (as variable bindings) for executors to process. Pagination is an implementation detail. */
|
|
78
123
|
export interface ItemSelector {
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,CAAC,GAAG,IAAI,CAC/B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,GAAG,KACT,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,YAAY,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,aAAa,CAAC,eAAe,CAAC,GAAG,aAAa,CAAC,eAAe,CAAC,EAAE,CAAC;CAC/E;AAED,2EAA2E;AAC3E,MAAM,MAAM,cAAc,GACtB,QAAQ,GACR,gBAAgB,GAChB,CAAC,QAAQ,GAAG,gBAAgB,CAAC,EAAE,CAAC;AAQpC,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,cAAc,CAAC;IAC1B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAUjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAqDjB,eAAe;IA8J7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;IA6BxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;CAiBxB;AA4BD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
|
package/dist/stage.js
CHANGED
|
@@ -12,9 +12,7 @@ export class Stage {
|
|
|
12
12
|
constructor(options) {
|
|
13
13
|
this.name = options.name;
|
|
14
14
|
this.stages = options.stages ?? [];
|
|
15
|
-
this.executors =
|
|
16
|
-
? options.executors
|
|
17
|
-
: [options.executors];
|
|
15
|
+
this.executors = normalizeExecutors(options.executors);
|
|
18
16
|
this.itemSelector = options.itemSelector;
|
|
19
17
|
this.batchSize = options.batchSize ?? 10;
|
|
20
18
|
this.maxConcurrency = options.maxConcurrency ?? 10;
|
|
@@ -118,7 +116,7 @@ export class Stage {
|
|
|
118
116
|
}
|
|
119
117
|
track((async () => {
|
|
120
118
|
// Run all executors for this batch in parallel.
|
|
121
|
-
const executorOutputs = await Promise.all(this.executors.map(async (executor) => {
|
|
119
|
+
const executorOutputs = await Promise.all(this.executors.map(async ({ executor, transforms }) => {
|
|
122
120
|
const result = await executor.execute(dataset, distribution, {
|
|
123
121
|
bindings,
|
|
124
122
|
timeout: options?.timeout,
|
|
@@ -126,8 +124,9 @@ export class Stage {
|
|
|
126
124
|
if (result instanceof NotSupported)
|
|
127
125
|
return [];
|
|
128
126
|
hasResults = true;
|
|
127
|
+
const stream = this.applyTransforms(transforms, result, dataset, distribution);
|
|
129
128
|
const quads = [];
|
|
130
|
-
for await (const quad of
|
|
129
|
+
for await (const quad of stream) {
|
|
131
130
|
quads.push(quad);
|
|
132
131
|
}
|
|
133
132
|
return quads;
|
|
@@ -204,7 +203,14 @@ export class Stage {
|
|
|
204
203
|
return [];
|
|
205
204
|
}
|
|
206
205
|
async executeAll(dataset, distribution, timeout) {
|
|
207
|
-
const results = await Promise.all(this.executors.map((
|
|
206
|
+
const results = await Promise.all(this.executors.map(async ({ executor, transforms }) => {
|
|
207
|
+
const result = await executor.execute(dataset, distribution, {
|
|
208
|
+
timeout,
|
|
209
|
+
});
|
|
210
|
+
if (result instanceof NotSupported)
|
|
211
|
+
return result;
|
|
212
|
+
return this.applyTransforms(transforms, result, dataset, distribution);
|
|
213
|
+
}));
|
|
208
214
|
const streams = [];
|
|
209
215
|
for (const result of results) {
|
|
210
216
|
if (!(result instanceof NotSupported)) {
|
|
@@ -216,6 +222,38 @@ export class Stage {
|
|
|
216
222
|
}
|
|
217
223
|
return streams;
|
|
218
224
|
}
|
|
225
|
+
/**
|
|
226
|
+
* Fold an executor's attached transforms over its output stream, in order,
|
|
227
|
+
* supplying the {@link ExecutorContext}. A transform sees one `execute()`
|
|
228
|
+
* call's output (see {@link AttachedExecutor}); `NotSupported` is handled by
|
|
229
|
+
* the caller and never reaches a transform.
|
|
230
|
+
*/
|
|
231
|
+
applyTransforms(transforms, stream, dataset, distribution) {
|
|
232
|
+
if (transforms.length === 0)
|
|
233
|
+
return stream;
|
|
234
|
+
const context = {
|
|
235
|
+
dataset,
|
|
236
|
+
distribution,
|
|
237
|
+
stage: this.name,
|
|
238
|
+
};
|
|
239
|
+
return transforms.reduce((quads, transform) => transform(quads, context), stream);
|
|
240
|
+
}
|
|
241
|
+
}
|
|
242
|
+
/** Normalise the {@link StageExecutors} union to executor + transforms pairs. */
|
|
243
|
+
function normalizeExecutors(executors) {
|
|
244
|
+
const list = Array.isArray(executors) ? executors : [executors];
|
|
245
|
+
return list.map((entry) => {
|
|
246
|
+
if ('execute' in entry) {
|
|
247
|
+
return { executor: entry, transforms: [] };
|
|
248
|
+
}
|
|
249
|
+
const { executor, transform } = entry;
|
|
250
|
+
const transforms = transform === undefined
|
|
251
|
+
? []
|
|
252
|
+
: Array.isArray(transform)
|
|
253
|
+
? [...transform]
|
|
254
|
+
: [transform];
|
|
255
|
+
return { executor, transforms };
|
|
256
|
+
});
|
|
219
257
|
}
|
|
220
258
|
async function* mergeStreams(streams) {
|
|
221
259
|
for (const stream of streams) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.30.
|
|
3
|
+
"version": "0.30.5",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.4",
|
|
28
28
|
"@lde/dataset-registry-client": "0.8.0",
|
|
29
|
-
"@lde/distribution-probe": "0.1.
|
|
29
|
+
"@lde/distribution-probe": "0.1.5",
|
|
30
30
|
"@lde/sparql-importer": "0.6.2",
|
|
31
31
|
"@lde/sparql-server": "0.4.11",
|
|
32
32
|
"@rdfjs/types": "^2.0.1",
|