@lde/pipeline 0.6.23 → 0.6.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,29 @@
1
+ /**
2
+ * A bounded async channel: producers `push()` items, a single consumer
3
+ * iterates with `for await...of`. Backpressure is applied when the buffer
4
+ * reaches `capacity` — `push()` will block until the consumer pulls.
5
+ */
6
+ export declare class AsyncQueue<T> implements AsyncIterable<T> {
7
+ private buffer;
8
+ private readonly capacity;
9
+ private closed;
10
+ private error;
11
+ /** Resolvers for a blocked consumer waiting for data or close/abort. */
12
+ private consumerResolve?;
13
+ private consumerReject?;
14
+ /** Resolvers for blocked producers waiting for buffer space. */
15
+ private producerResolvers;
16
+ constructor(capacity?: number);
17
+ /**
18
+ * Push an item into the queue. Blocks (returns a Promise) when the buffer
19
+ * is full. Throws if the queue has been closed or aborted.
20
+ */
21
+ push(item: T): Promise<void>;
22
+ /** Signal that no more items will be pushed. */
23
+ close(): void;
24
+ /** Signal an error. Unblocks all waiting producers and the consumer. */
25
+ abort(error: unknown): void;
26
+ [Symbol.asyncIterator](): AsyncIterator<T, undefined>;
27
+ private pull;
28
+ }
29
+ //# sourceMappingURL=asyncQueue.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"asyncQueue.d.ts","sourceRoot":"","sources":["../src/asyncQueue.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,qBAAa,UAAU,CAAC,CAAC,CAAE,YAAW,aAAa,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,MAAM,CAAW;IACzB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAsB;IAEnC,wEAAwE;IACxE,OAAO,CAAC,eAAe,CAAC,CAAgD;IACxE,OAAO,CAAC,cAAc,CAAC,CAA4B;IAEnD,gEAAgE;IAChE,OAAO,CAAC,iBAAiB,CAGjB;gBAEI,QAAQ,SAAM;IAI1B;;;OAGG;IACG,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA2BlC,gDAAgD;IAChD,KAAK,IAAI,IAAI;IAab,wEAAwE;IACxE,KAAK,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAoB3B,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa,CAAC,CAAC,EAAE,SAAS,CAAC;IAMrD,OAAO,CAAC,IAAI;CA2Bb"}
@@ -0,0 +1,106 @@
1
+ /**
2
+ * A bounded async channel: producers `push()` items, a single consumer
3
+ * iterates with `for await...of`. Backpressure is applied when the buffer
4
+ * reaches `capacity` — `push()` will block until the consumer pulls.
5
+ */
6
+ export class AsyncQueue {
7
+ buffer = [];
8
+ capacity;
9
+ closed = false;
10
+ error = undefined;
11
+ /** Resolvers for a blocked consumer waiting for data or close/abort. */
12
+ consumerResolve;
13
+ consumerReject;
14
+ /** Resolvers for blocked producers waiting for buffer space. */
15
+ producerResolvers = [];
16
+ constructor(capacity = 128) {
17
+ this.capacity = capacity;
18
+ }
19
+ /**
20
+ * Push an item into the queue. Blocks (returns a Promise) when the buffer
21
+ * is full. Throws if the queue has been closed or aborted.
22
+ */
23
+ async push(item) {
24
+ if (this.error !== undefined) {
25
+ throw this.error;
26
+ }
27
+ if (this.closed) {
28
+ throw new Error('Cannot push to a closed queue');
29
+ }
30
+ // If a consumer is already waiting, deliver directly.
31
+ if (this.consumerResolve) {
32
+ const resolve = this.consumerResolve;
33
+ this.consumerResolve = undefined;
34
+ this.consumerReject = undefined;
35
+ resolve({ value: item, done: false });
36
+ return;
37
+ }
38
+ // Wait for space if buffer is at capacity.
39
+ if (this.buffer.length >= this.capacity) {
40
+ await new Promise((resolve, reject) => {
41
+ this.producerResolvers.push({ resolve, reject });
42
+ });
43
+ }
44
+ this.buffer.push(item);
45
+ }
46
+ /** Signal that no more items will be pushed. */
47
+ close() {
48
+ if (this.closed)
49
+ return;
50
+ this.closed = true;
51
+ // Wake a waiting consumer with done signal if buffer is empty.
52
+ if (this.buffer.length === 0 && this.consumerResolve) {
53
+ const resolve = this.consumerResolve;
54
+ this.consumerResolve = undefined;
55
+ this.consumerReject = undefined;
56
+ resolve({ value: undefined, done: true });
57
+ }
58
+ }
59
+ /** Signal an error. Unblocks all waiting producers and the consumer. */
60
+ abort(error) {
61
+ if (this.error !== undefined)
62
+ return; // first error wins
63
+ this.error = error;
64
+ this.closed = true;
65
+ // Reject all blocked producers.
66
+ for (const { reject } of this.producerResolvers) {
67
+ reject(error);
68
+ }
69
+ this.producerResolvers = [];
70
+ // Reject or resolve the consumer depending on buffered items.
71
+ if (this.consumerReject) {
72
+ const reject = this.consumerReject;
73
+ this.consumerResolve = undefined;
74
+ this.consumerReject = undefined;
75
+ reject(error);
76
+ }
77
+ }
78
+ [Symbol.asyncIterator]() {
79
+ return {
80
+ next: () => this.pull(),
81
+ };
82
+ }
83
+ pull() {
84
+ // Drain buffer first.
85
+ if (this.buffer.length > 0) {
86
+ const item = this.buffer.shift();
87
+ // Unblock one waiting producer.
88
+ if (this.producerResolvers.length > 0) {
89
+ this.producerResolvers.shift().resolve();
90
+ }
91
+ return Promise.resolve({ value: item, done: false });
92
+ }
93
+ // Buffer empty — check for error or closed.
94
+ if (this.error !== undefined) {
95
+ return Promise.reject(this.error);
96
+ }
97
+ if (this.closed) {
98
+ return Promise.resolve({ value: undefined, done: true });
99
+ }
100
+ // Wait for a producer to push or for close/abort.
101
+ return new Promise((resolve, reject) => {
102
+ this.consumerResolve = resolve;
103
+ this.consumerReject = reject;
104
+ });
105
+ }
106
+ }
package/dist/index.d.ts CHANGED
@@ -1,11 +1,10 @@
1
+ export * from './asyncQueue.js';
1
2
  export * from './batch.js';
2
3
  export * from './pipeline.js';
4
+ export * from './progressReporter.js';
3
5
  export * from './selector.js';
4
6
  export * from './stage.js';
5
- export * from './step.js';
6
- export * from './step/sparqlQuery.js';
7
- export * from './builder.js';
8
- export * from './config.js';
7
+ export * from './stageOutputResolver.js';
9
8
  export * from './sparql/index.js';
10
9
  export * from './distribution/index.js';
11
10
  export * from './writer/index.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
package/dist/index.js CHANGED
@@ -1,11 +1,10 @@
1
+ export * from './asyncQueue.js';
1
2
  export * from './batch.js';
2
3
  export * from './pipeline.js';
4
+ export * from './progressReporter.js';
3
5
  export * from './selector.js';
4
6
  export * from './stage.js';
5
- export * from './step.js';
6
- export * from './step/sparqlQuery.js';
7
- export * from './builder.js';
8
- export * from './config.js';
7
+ export * from './stageOutputResolver.js';
9
8
  export * from './sparql/index.js';
10
9
  export * from './distribution/index.js';
11
10
  export * from './writer/index.js';
@@ -1,11 +1,28 @@
1
- import { Selector } from './selector.js';
2
- import { Step } from './step.js';
1
+ import type { Selector } from './selector.js';
2
+ import { Stage } from './stage.js';
3
+ import type { Writer } from './writer/writer.js';
4
+ import { type DistributionResolver } from './distribution/resolver.js';
5
+ import type { StageOutputResolver } from './stageOutputResolver.js';
6
+ import type { ProgressReporter } from './progressReporter.js';
7
+ export interface PipelineOptions {
8
+ name: string;
9
+ selector: Selector;
10
+ stages: Stage[];
11
+ writer: Writer;
12
+ distributionResolver: DistributionResolver;
13
+ stageOutputResolver?: StageOutputResolver;
14
+ outputDir?: string;
15
+ outputFormat?: 'turtle' | 'n-triples' | 'n-quads';
16
+ reporter?: ProgressReporter;
17
+ }
3
18
  export declare class Pipeline {
4
- private readonly config;
5
- constructor(config: {
6
- selector: Selector;
7
- steps: Step[];
8
- });
19
+ private readonly options;
20
+ constructor(options: PipelineOptions);
9
21
  run(): Promise<void>;
22
+ private processDataset;
23
+ private runStage;
24
+ private runChain;
25
+ private runChainedStage;
26
+ private readFiles;
10
27
  }
11
28
  //# sourceMappingURL=pipeline.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAc,IAAI,EAAE,MAAM,WAAW,CAAC;AAI7C,qBAAa,QAAQ;IACP,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE;QAAE,QAAQ,EAAE,QAAQ,CAAC;QAAC,KAAK,EAAE,IAAI,EAAE,CAAA;KAAE;IAE7D,GAAG;CAiCjB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -1,42 +1,141 @@
1
- import { NotSupported } from './step.js';
2
- import { Readable } from 'node:stream';
1
+ import { createReadStream } from 'node:fs';
2
+ import { StreamParser } from 'n3';
3
+ import { FileWriter } from './writer/fileWriter.js';
4
+ import { NoDistributionAvailable, } from './distribution/resolver.js';
5
+ import { NotSupported } from './sparql/executor.js';
3
6
  export class Pipeline {
4
- config;
5
- constructor(config) {
6
- this.config = config;
7
+ options;
8
+ constructor(options) {
9
+ const hasSubStages = options.stages.some((stage) => stage.stages.length > 0);
10
+ if (hasSubStages && !options.stageOutputResolver) {
11
+ throw new Error('stageOutputResolver is required when any stage has sub-stages');
12
+ }
13
+ if (hasSubStages && !options.outputDir) {
14
+ throw new Error('outputDir is required when any stage has sub-stages');
15
+ }
16
+ this.options = options;
7
17
  }
8
18
  async run() {
9
- const datasets = await this.config.selector.select();
19
+ const { selector, reporter, name } = this.options;
20
+ const start = Date.now();
21
+ reporter?.pipelineStart(name);
22
+ const datasets = await selector.select();
10
23
  for await (const dataset of datasets) {
11
- const distribution = dataset.getSparqlDistribution() ?? undefined;
12
- for (const step of this.config.steps) {
13
- const result = await step.execute(dataset, distribution);
14
- if (result instanceof NotSupported) {
15
- console.error(result);
24
+ await this.processDataset(dataset);
25
+ }
26
+ reporter?.pipelineComplete({ duration: Date.now() - start });
27
+ }
28
+ async processDataset(dataset) {
29
+ const { distributionResolver, reporter } = this.options;
30
+ const datasetIri = dataset.iri.toString();
31
+ reporter?.datasetStart(datasetIri);
32
+ const resolved = await distributionResolver.resolve(dataset);
33
+ if (resolved instanceof NoDistributionAvailable) {
34
+ reporter?.datasetSkipped(datasetIri, resolved.message);
35
+ return;
36
+ }
37
+ try {
38
+ for (const stage of this.options.stages) {
39
+ if (stage.stages.length > 0) {
40
+ await this.runChain(dataset, resolved.distribution, stage);
16
41
  }
17
- else if (result instanceof Readable) {
18
- const promise = new Promise((resolve, reject) => {
19
- result.on('data', (data) => {
20
- // TODO: pipe to writers.
21
- console.log('Data:', data);
22
- });
23
- result.on('error', (error) => {
24
- console.error('rejecting');
25
- reject(error);
26
- });
27
- result.on('end', resolve);
28
- });
29
- await promise;
42
+ else {
43
+ await this.runStage(dataset, resolved.distribution, stage);
30
44
  }
31
45
  }
32
- for (const step of this.config.steps) {
33
- if (isFinishable(step)) {
34
- await step.finish();
46
+ }
47
+ catch {
48
+ // Stage error for this dataset; continue to next dataset.
49
+ }
50
+ reporter?.datasetComplete(datasetIri);
51
+ }
52
+ async runStage(dataset, distribution, stage) {
53
+ const { writer, reporter } = this.options;
54
+ reporter?.stageStart(stage.name);
55
+ const stageStart = Date.now();
56
+ let elementsProcessed = 0;
57
+ let quadsGenerated = 0;
58
+ const result = await stage.run(dataset, distribution, writer, {
59
+ onProgress: (elements, quads) => {
60
+ elementsProcessed = elements;
61
+ quadsGenerated = quads;
62
+ reporter?.stageProgress({ elementsProcessed, quadsGenerated });
63
+ },
64
+ });
65
+ if (result instanceof NotSupported) {
66
+ reporter?.stageSkipped(stage.name, result.message);
67
+ }
68
+ else {
69
+ reporter?.stageComplete(stage.name, {
70
+ elementsProcessed,
71
+ quadsGenerated,
72
+ duration: Date.now() - stageStart,
73
+ });
74
+ }
75
+ }
76
+ async runChain(dataset, distribution, stage) {
77
+ const { writer, stageOutputResolver, outputDir, outputFormat } = this.options;
78
+ const outputFiles = [];
79
+ try {
80
+ // 1. Run parent stage → FileWriter.
81
+ const parentWriter = new FileWriter({
82
+ outputDir: `${outputDir}/${stage.name}`,
83
+ format: outputFormat,
84
+ });
85
+ await this.runChainedStage(dataset, distribution, stage, parentWriter);
86
+ outputFiles.push(parentWriter.getOutputPath(dataset));
87
+ // 2. Chain through children.
88
+ let currentDistribution = await stageOutputResolver.resolve(parentWriter.getOutputPath(dataset));
89
+ for (let i = 0; i < stage.stages.length; i++) {
90
+ const child = stage.stages[i];
91
+ const childWriter = new FileWriter({
92
+ outputDir: `${outputDir}/${child.name}`,
93
+ format: outputFormat,
94
+ });
95
+ await this.runChainedStage(dataset, currentDistribution, child, childWriter);
96
+ outputFiles.push(childWriter.getOutputPath(dataset));
97
+ if (i < stage.stages.length - 1) {
98
+ currentDistribution = await stageOutputResolver.resolve(childWriter.getOutputPath(dataset));
35
99
  }
36
100
  }
101
+ // 3. Concatenate all output files → user writer.
102
+ await writer.write(dataset, this.readFiles(outputFiles));
103
+ }
104
+ finally {
105
+ await stageOutputResolver.cleanup();
106
+ }
107
+ }
108
+ async runChainedStage(dataset, distribution, stage, stageWriter) {
109
+ const { reporter } = this.options;
110
+ reporter?.stageStart(stage.name);
111
+ const stageStart = Date.now();
112
+ let elementsProcessed = 0;
113
+ let quadsGenerated = 0;
114
+ const result = await stage.run(dataset, distribution, stageWriter, {
115
+ onProgress: (elements, quads) => {
116
+ elementsProcessed = elements;
117
+ quadsGenerated = quads;
118
+ reporter?.stageProgress({ elementsProcessed, quadsGenerated });
119
+ },
120
+ });
121
+ if (result instanceof NotSupported) {
122
+ reporter?.stageSkipped(stage.name, result.message);
123
+ throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
124
+ }
125
+ reporter?.stageComplete(stage.name, {
126
+ elementsProcessed,
127
+ quadsGenerated,
128
+ duration: Date.now() - stageStart,
129
+ });
130
+ }
131
+ async *readFiles(paths) {
132
+ for (const path of paths) {
133
+ const stream = createReadStream(path);
134
+ const parser = new StreamParser();
135
+ stream.pipe(parser);
136
+ for await (const quad of parser) {
137
+ yield quad;
138
+ }
37
139
  }
38
140
  }
39
141
  }
40
- const isFinishable = (step) => {
41
- return typeof step.finish === 'function';
42
- };
@@ -0,0 +1,21 @@
1
+ export interface ProgressReporter {
2
+ pipelineStart(name: string): void;
3
+ datasetStart(dataset: string): void;
4
+ stageStart(stage: string): void;
5
+ stageProgress(update: {
6
+ elementsProcessed: number;
7
+ quadsGenerated: number;
8
+ }): void;
9
+ stageComplete(stage: string, result: {
10
+ elementsProcessed: number;
11
+ quadsGenerated: number;
12
+ duration: number;
13
+ }): void;
14
+ stageSkipped(stage: string, reason: string): void;
15
+ datasetComplete(dataset: string): void;
16
+ datasetSkipped(dataset: string, reason: string): void;
17
+ pipelineComplete(result: {
18
+ duration: number;
19
+ }): void;
20
+ }
21
+ //# sourceMappingURL=progressReporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,aAAa,CAAC,MAAM,EAAE;QACpB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CACX,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAClD,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtD,gBAAgB,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACtD"}
@@ -0,0 +1 @@
1
+ export {};
@@ -2,8 +2,13 @@ import { Dataset, Distribution } from '@lde/dataset';
2
2
  import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
3
3
  import type { NamedNode, Quad, Stream } from '@rdfjs/types';
4
4
  import type { Readable } from 'node:stream';
5
- import { NotSupported } from '../step.js';
6
- export { NotSupported } from '../step.js';
5
+ /**
6
+ * An executor could not run because the dataset lacks a supported distribution.
7
+ */
8
+ export declare class NotSupported {
9
+ readonly message: string;
10
+ constructor(message: string);
11
+ }
7
12
  /** A single row of variable bindings (variable name → NamedNode). */
8
13
  export type VariableBindings = Record<string, NamedNode>;
9
14
  export interface ExecuteOptions {
@@ -1 +1 @@
1
- {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI5C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAK1C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAiB;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAcnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAoBtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,OAAO,EAAE,OAAO,GACf,MAAM,CAWR;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAO5C;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAiB;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAcnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAoBtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,OAAO,EAAE,OAAO,GACf,MAAM,CAWR;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
@@ -4,8 +4,15 @@ import { resolve } from 'node:path';
4
4
  import { Generator, Parser } from 'sparqljs';
5
5
  import { withDefaultGraph } from './graph.js';
6
6
  import { injectValues } from './values.js';
7
- // Re-export for convenience
8
- export { NotSupported } from '../step.js';
7
+ /**
8
+ * An executor could not run because the dataset lacks a supported distribution.
9
+ */
10
+ export class NotSupported {
11
+ message;
12
+ constructor(message) {
13
+ this.message = message;
14
+ }
15
+ }
9
16
  /**
10
17
  * A streaming SPARQL CONSTRUCT executor that parses the query once (in the
11
18
  * constructor) and operates on the AST for graph and VALUES injection.
package/dist/stage.d.ts CHANGED
@@ -8,15 +8,24 @@ export interface StageOptions {
8
8
  selector?: StageSelector;
9
9
  /** Maximum number of bindings per executor call. @default 10 */
10
10
  batchSize?: number;
11
+ /** Maximum concurrent in-flight executor batches. @default 10 */
12
+ maxConcurrency?: number;
13
+ /** Child stages that chain off this stage's output. */
14
+ stages?: Stage[];
15
+ }
16
+ export interface RunOptions {
17
+ onProgress?: (elementsProcessed: number, quadsGenerated: number) => void;
11
18
  }
12
19
  export declare class Stage {
13
20
  readonly name: string;
21
+ readonly stages: readonly Stage[];
14
22
  private readonly executors;
15
23
  private readonly selector?;
16
24
  private readonly batchSize;
25
+ private readonly maxConcurrency;
17
26
  constructor(options: StageOptions);
18
- run(dataset: Dataset, distribution: Distribution, writer: Writer): Promise<NotSupported | void>;
19
- private executeWithSelector;
27
+ run(dataset: Dataset, distribution: Distribution, writer: Writer, options?: RunOptions): Promise<NotSupported | void>;
28
+ private runWithSelector;
20
29
  private executeAll;
21
30
  }
22
31
  /** Stage-level selector that yields variable bindings for use in executor queries. Pagination is an implementation detail. */
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,YAAY;IAS3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAYjB,mBAAmB;YAuBnB,UAAU;CAkBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAajB,eAAe;YA8Gf,UAAU;CAqBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
package/dist/stage.js CHANGED
@@ -1,48 +1,130 @@
1
1
  import { NotSupported } from './sparql/executor.js';
2
2
  import { batch } from './batch.js';
3
+ import { AsyncQueue } from './asyncQueue.js';
3
4
  export class Stage {
4
5
  name;
6
+ stages;
5
7
  executors;
6
8
  selector;
7
9
  batchSize;
10
+ maxConcurrency;
8
11
  constructor(options) {
9
12
  this.name = options.name;
13
+ this.stages = options.stages ?? [];
10
14
  this.executors = Array.isArray(options.executors)
11
15
  ? options.executors
12
16
  : [options.executors];
13
17
  this.selector = options.selector;
14
18
  this.batchSize = options.batchSize ?? 10;
19
+ this.maxConcurrency = options.maxConcurrency ?? 10;
15
20
  }
16
- async run(dataset, distribution, writer) {
17
- const streams = this.selector
18
- ? await this.executeWithSelector(dataset, distribution)
19
- : await this.executeAll(dataset, distribution);
21
+ async run(dataset, distribution, writer, options) {
22
+ if (this.selector) {
23
+ return this.runWithSelector(dataset, distribution, writer, options);
24
+ }
25
+ const streams = await this.executeAll(dataset, distribution);
20
26
  if (streams instanceof NotSupported) {
21
27
  return streams;
22
28
  }
23
29
  await writer.write(dataset, mergeStreams(streams));
24
30
  }
25
- async executeWithSelector(dataset, distribution) {
26
- const streams = [];
27
- for await (const bindings of batch(this.selector, this.batchSize)) {
28
- for (const executor of this.executors) {
29
- const result = await executor.execute(dataset, distribution, {
30
- bindings,
31
+ async runWithSelector(dataset, distribution, writer, options) {
32
+ // Peek the first batch to detect an empty selector before starting the
33
+ // writer (important because e.g. SparqlUpdateWriter does CLEAR GRAPH).
34
+ const batches = batch(this.selector, this.batchSize);
35
+ const iter = batches[Symbol.asyncIterator]();
36
+ const first = await iter.next();
37
+ if (first.done) {
38
+ return new NotSupported('All executors returned NotSupported');
39
+ }
40
+ // Reconstruct a full iterable including the peeked first batch.
41
+ const allBatches = (async function* () {
42
+ yield first.value;
43
+ // Continue yielding remaining batches from the same iterator.
44
+ for (;;) {
45
+ const next = await iter.next();
46
+ if (next.done)
47
+ break;
48
+ yield next.value;
49
+ }
50
+ })();
51
+ const queue = new AsyncQueue();
52
+ let elementsProcessed = 0;
53
+ let quadsGenerated = 0;
54
+ let hasResults = false;
55
+ const dispatch = async () => {
56
+ const inFlight = new Set();
57
+ let firstError;
58
+ const track = (promise) => {
59
+ const p = promise.then(() => {
60
+ inFlight.delete(p);
61
+ }, (err) => {
62
+ inFlight.delete(p);
63
+ firstError ??= err;
31
64
  });
32
- if (!(result instanceof NotSupported)) {
33
- streams.push(result);
65
+ inFlight.add(p);
66
+ };
67
+ try {
68
+ for await (const bindings of allBatches) {
69
+ if (firstError)
70
+ break;
71
+ for (const executor of this.executors) {
72
+ if (firstError)
73
+ break;
74
+ // Respect maxConcurrency: wait for a slot to open.
75
+ if (inFlight.size >= this.maxConcurrency) {
76
+ await Promise.race(inFlight);
77
+ if (firstError)
78
+ break;
79
+ }
80
+ track((async () => {
81
+ const result = await executor.execute(dataset, distribution, {
82
+ bindings,
83
+ });
84
+ if (!(result instanceof NotSupported)) {
85
+ hasResults = true;
86
+ for await (const quad of result) {
87
+ await queue.push(quad);
88
+ quadsGenerated++;
89
+ }
90
+ }
91
+ elementsProcessed += bindings.length;
92
+ options?.onProgress?.(elementsProcessed, quadsGenerated);
93
+ })());
94
+ }
34
95
  }
35
96
  }
36
- }
37
- if (streams.length === 0) {
97
+ catch (err) {
98
+ firstError ??= err;
99
+ }
100
+ // Wait for all remaining in-flight tasks to settle.
101
+ await Promise.all(inFlight);
102
+ if (firstError) {
103
+ queue.abort(firstError);
104
+ }
105
+ else {
106
+ queue.close();
107
+ }
108
+ };
109
+ const dispatchPromise = dispatch();
110
+ const writePromise = (async () => {
111
+ try {
112
+ await writer.write(dataset, queue);
113
+ }
114
+ catch (err) {
115
+ queue.abort(err);
116
+ throw err;
117
+ }
118
+ })();
119
+ await Promise.all([dispatchPromise, writePromise]);
120
+ if (!hasResults) {
38
121
  return new NotSupported('All executors returned NotSupported');
39
122
  }
40
- return streams;
41
123
  }
42
124
  async executeAll(dataset, distribution) {
125
+ const results = await Promise.all(this.executors.map((executor) => executor.execute(dataset, distribution)));
43
126
  const streams = [];
44
- for (const executor of this.executors) {
45
- const result = await executor.execute(dataset, distribution);
127
+ for (const result of results) {
46
128
  if (!(result instanceof NotSupported)) {
47
129
  streams.push(result);
48
130
  }