@lde/pipeline 0.6.24 → 0.6.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,12 +1,10 @@
1
1
  export * from './asyncQueue.js';
2
2
  export * from './batch.js';
3
3
  export * from './pipeline.js';
4
+ export * from './progressReporter.js';
4
5
  export * from './selector.js';
5
6
  export * from './stage.js';
6
- export * from './step.js';
7
- export * from './step/sparqlQuery.js';
8
- export * from './builder.js';
9
- export * from './config.js';
7
+ export * from './stageOutputResolver.js';
10
8
  export * from './sparql/index.js';
11
9
  export * from './distribution/index.js';
12
10
  export * from './writer/index.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
package/dist/index.js CHANGED
@@ -1,12 +1,10 @@
1
1
  export * from './asyncQueue.js';
2
2
  export * from './batch.js';
3
3
  export * from './pipeline.js';
4
+ export * from './progressReporter.js';
4
5
  export * from './selector.js';
5
6
  export * from './stage.js';
6
- export * from './step.js';
7
- export * from './step/sparqlQuery.js';
8
- export * from './builder.js';
9
- export * from './config.js';
7
+ export * from './stageOutputResolver.js';
10
8
  export * from './sparql/index.js';
11
9
  export * from './distribution/index.js';
12
10
  export * from './writer/index.js';
@@ -1,11 +1,28 @@
1
- import { Selector } from './selector.js';
2
- import { Step } from './step.js';
1
+ import type { Selector } from './selector.js';
2
+ import { Stage } from './stage.js';
3
+ import type { Writer } from './writer/writer.js';
4
+ import { type DistributionResolver } from './distribution/resolver.js';
5
+ import type { StageOutputResolver } from './stageOutputResolver.js';
6
+ import type { ProgressReporter } from './progressReporter.js';
7
+ export interface PipelineOptions {
8
+ name: string;
9
+ selector: Selector;
10
+ stages: Stage[];
11
+ writer: Writer;
12
+ distributionResolver: DistributionResolver;
13
+ stageOutputResolver?: StageOutputResolver;
14
+ outputDir?: string;
15
+ outputFormat?: 'turtle' | 'n-triples' | 'n-quads';
16
+ reporter?: ProgressReporter;
17
+ }
3
18
  export declare class Pipeline {
4
- private readonly config;
5
- constructor(config: {
6
- selector: Selector;
7
- steps: Step[];
8
- });
19
+ private readonly options;
20
+ constructor(options: PipelineOptions);
9
21
  run(): Promise<void>;
22
+ private processDataset;
23
+ private runStage;
24
+ private runChain;
25
+ private runChainedStage;
26
+ private readFiles;
10
27
  }
11
28
  //# sourceMappingURL=pipeline.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAc,IAAI,EAAE,MAAM,WAAW,CAAC;AAI7C,qBAAa,QAAQ;IACP,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE;QAAE,QAAQ,EAAE,QAAQ,CAAC;QAAC,KAAK,EAAE,IAAI,EAAE,CAAA;KAAE;IAE7D,GAAG;CAiCjB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -1,42 +1,141 @@
1
- import { NotSupported } from './step.js';
2
- import { Readable } from 'node:stream';
1
+ import { createReadStream } from 'node:fs';
2
+ import { StreamParser } from 'n3';
3
+ import { FileWriter } from './writer/fileWriter.js';
4
+ import { NoDistributionAvailable, } from './distribution/resolver.js';
5
+ import { NotSupported } from './sparql/executor.js';
3
6
  export class Pipeline {
4
- config;
5
- constructor(config) {
6
- this.config = config;
7
+ options;
8
+ constructor(options) {
9
+ const hasSubStages = options.stages.some((stage) => stage.stages.length > 0);
10
+ if (hasSubStages && !options.stageOutputResolver) {
11
+ throw new Error('stageOutputResolver is required when any stage has sub-stages');
12
+ }
13
+ if (hasSubStages && !options.outputDir) {
14
+ throw new Error('outputDir is required when any stage has sub-stages');
15
+ }
16
+ this.options = options;
7
17
  }
8
18
  async run() {
9
- const datasets = await this.config.selector.select();
19
+ const { selector, reporter, name } = this.options;
20
+ const start = Date.now();
21
+ reporter?.pipelineStart(name);
22
+ const datasets = await selector.select();
10
23
  for await (const dataset of datasets) {
11
- const distribution = dataset.getSparqlDistribution() ?? undefined;
12
- for (const step of this.config.steps) {
13
- const result = await step.execute(dataset, distribution);
14
- if (result instanceof NotSupported) {
15
- console.error(result);
24
+ await this.processDataset(dataset);
25
+ }
26
+ reporter?.pipelineComplete({ duration: Date.now() - start });
27
+ }
28
+ async processDataset(dataset) {
29
+ const { distributionResolver, reporter } = this.options;
30
+ const datasetIri = dataset.iri.toString();
31
+ reporter?.datasetStart(datasetIri);
32
+ const resolved = await distributionResolver.resolve(dataset);
33
+ if (resolved instanceof NoDistributionAvailable) {
34
+ reporter?.datasetSkipped(datasetIri, resolved.message);
35
+ return;
36
+ }
37
+ try {
38
+ for (const stage of this.options.stages) {
39
+ if (stage.stages.length > 0) {
40
+ await this.runChain(dataset, resolved.distribution, stage);
16
41
  }
17
- else if (result instanceof Readable) {
18
- const promise = new Promise((resolve, reject) => {
19
- result.on('data', (data) => {
20
- // TODO: pipe to writers.
21
- console.log('Data:', data);
22
- });
23
- result.on('error', (error) => {
24
- console.error('rejecting');
25
- reject(error);
26
- });
27
- result.on('end', resolve);
28
- });
29
- await promise;
42
+ else {
43
+ await this.runStage(dataset, resolved.distribution, stage);
30
44
  }
31
45
  }
32
- for (const step of this.config.steps) {
33
- if (isFinishable(step)) {
34
- await step.finish();
46
+ }
47
+ catch {
48
+ // Stage error for this dataset; continue to next dataset.
49
+ }
50
+ reporter?.datasetComplete(datasetIri);
51
+ }
52
+ async runStage(dataset, distribution, stage) {
53
+ const { writer, reporter } = this.options;
54
+ reporter?.stageStart(stage.name);
55
+ const stageStart = Date.now();
56
+ let elementsProcessed = 0;
57
+ let quadsGenerated = 0;
58
+ const result = await stage.run(dataset, distribution, writer, {
59
+ onProgress: (elements, quads) => {
60
+ elementsProcessed = elements;
61
+ quadsGenerated = quads;
62
+ reporter?.stageProgress({ elementsProcessed, quadsGenerated });
63
+ },
64
+ });
65
+ if (result instanceof NotSupported) {
66
+ reporter?.stageSkipped(stage.name, result.message);
67
+ }
68
+ else {
69
+ reporter?.stageComplete(stage.name, {
70
+ elementsProcessed,
71
+ quadsGenerated,
72
+ duration: Date.now() - stageStart,
73
+ });
74
+ }
75
+ }
76
+ async runChain(dataset, distribution, stage) {
77
+ const { writer, stageOutputResolver, outputDir, outputFormat } = this.options;
78
+ const outputFiles = [];
79
+ try {
80
+ // 1. Run parent stage → FileWriter.
81
+ const parentWriter = new FileWriter({
82
+ outputDir: `${outputDir}/${stage.name}`,
83
+ format: outputFormat,
84
+ });
85
+ await this.runChainedStage(dataset, distribution, stage, parentWriter);
86
+ outputFiles.push(parentWriter.getOutputPath(dataset));
87
+ // 2. Chain through children.
88
+ let currentDistribution = await stageOutputResolver.resolve(parentWriter.getOutputPath(dataset));
89
+ for (let i = 0; i < stage.stages.length; i++) {
90
+ const child = stage.stages[i];
91
+ const childWriter = new FileWriter({
92
+ outputDir: `${outputDir}/${child.name}`,
93
+ format: outputFormat,
94
+ });
95
+ await this.runChainedStage(dataset, currentDistribution, child, childWriter);
96
+ outputFiles.push(childWriter.getOutputPath(dataset));
97
+ if (i < stage.stages.length - 1) {
98
+ currentDistribution = await stageOutputResolver.resolve(childWriter.getOutputPath(dataset));
35
99
  }
36
100
  }
101
+ // 3. Concatenate all output files → user writer.
102
+ await writer.write(dataset, this.readFiles(outputFiles));
103
+ }
104
+ finally {
105
+ await stageOutputResolver.cleanup();
106
+ }
107
+ }
108
+ async runChainedStage(dataset, distribution, stage, stageWriter) {
109
+ const { reporter } = this.options;
110
+ reporter?.stageStart(stage.name);
111
+ const stageStart = Date.now();
112
+ let elementsProcessed = 0;
113
+ let quadsGenerated = 0;
114
+ const result = await stage.run(dataset, distribution, stageWriter, {
115
+ onProgress: (elements, quads) => {
116
+ elementsProcessed = elements;
117
+ quadsGenerated = quads;
118
+ reporter?.stageProgress({ elementsProcessed, quadsGenerated });
119
+ },
120
+ });
121
+ if (result instanceof NotSupported) {
122
+ reporter?.stageSkipped(stage.name, result.message);
123
+ throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
124
+ }
125
+ reporter?.stageComplete(stage.name, {
126
+ elementsProcessed,
127
+ quadsGenerated,
128
+ duration: Date.now() - stageStart,
129
+ });
130
+ }
131
+ async *readFiles(paths) {
132
+ for (const path of paths) {
133
+ const stream = createReadStream(path);
134
+ const parser = new StreamParser();
135
+ stream.pipe(parser);
136
+ for await (const quad of parser) {
137
+ yield quad;
138
+ }
37
139
  }
38
140
  }
39
141
  }
40
- const isFinishable = (step) => {
41
- return typeof step.finish === 'function';
42
- };
@@ -0,0 +1,21 @@
1
+ export interface ProgressReporter {
2
+ pipelineStart(name: string): void;
3
+ datasetStart(dataset: string): void;
4
+ stageStart(stage: string): void;
5
+ stageProgress(update: {
6
+ elementsProcessed: number;
7
+ quadsGenerated: number;
8
+ }): void;
9
+ stageComplete(stage: string, result: {
10
+ elementsProcessed: number;
11
+ quadsGenerated: number;
12
+ duration: number;
13
+ }): void;
14
+ stageSkipped(stage: string, reason: string): void;
15
+ datasetComplete(dataset: string): void;
16
+ datasetSkipped(dataset: string, reason: string): void;
17
+ pipelineComplete(result: {
18
+ duration: number;
19
+ }): void;
20
+ }
21
+ //# sourceMappingURL=progressReporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,aAAa,CAAC,MAAM,EAAE;QACpB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CACX,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAClD,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtD,gBAAgB,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACtD"}
@@ -0,0 +1 @@
1
+ export {};
@@ -2,8 +2,13 @@ import { Dataset, Distribution } from '@lde/dataset';
2
2
  import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
3
3
  import type { NamedNode, Quad, Stream } from '@rdfjs/types';
4
4
  import type { Readable } from 'node:stream';
5
- import { NotSupported } from '../step.js';
6
- export { NotSupported } from '../step.js';
5
+ /**
6
+ * An executor could not run because the dataset lacks a supported distribution.
7
+ */
8
+ export declare class NotSupported {
9
+ readonly message: string;
10
+ constructor(message: string);
11
+ }
7
12
  /** A single row of variable bindings (variable name → NamedNode). */
8
13
  export type VariableBindings = Record<string, NamedNode>;
9
14
  export interface ExecuteOptions {
@@ -40,12 +45,16 @@ export interface SparqlConstructExecutorOptions {
40
45
  fetcher?: SparqlEndpointFetcher;
41
46
  }
42
47
  /**
43
- * A streaming SPARQL CONSTRUCT executor that parses the query once (in the
44
- * constructor) and operates on the AST for graph and VALUES injection.
48
+ * A streaming SPARQL CONSTRUCT executor.
49
+ *
50
+ * Queries **without** `#subjectFilter#` are parsed once in the constructor
51
+ * (fast path). Queries that contain the template are stored as raw strings
52
+ * and parsed at {@link execute} time after substitution.
45
53
  *
46
54
  * Template substitution (applied in order):
47
- * 1. `FROM <graph>` set via `withDefaultGraph` if the distribution has a named graph
48
- * 2. `?dataset` — replaced with the dataset IRI (string substitution on the serialised query)
55
+ * 1. `#subjectFilter#`replaced with `distribution.subjectFilter` (deferred to execute)
56
+ * 2. `FROM <graph>` set via `withDefaultGraph` if the distribution has a named graph
57
+ * 3. `?dataset` — replaced with the dataset IRI (string substitution on the serialised query)
49
58
  *
50
59
  * @example
51
60
  * ```typescript
@@ -63,7 +72,8 @@ export interface SparqlConstructExecutorOptions {
63
72
  * ```
64
73
  */
65
74
  export declare class SparqlConstructExecutor implements Executor {
66
- private readonly query;
75
+ private readonly rawQuery;
76
+ private readonly preParsed?;
67
77
  private readonly fetcher;
68
78
  private readonly generator;
69
79
  constructor(options: SparqlConstructExecutorOptions);
@@ -84,14 +94,6 @@ export declare class SparqlConstructExecutor implements Executor {
84
94
  */
85
95
  static fromFile(filename: string, options?: Omit<SparqlConstructExecutorOptions, 'query'>): Promise<SparqlConstructExecutor>;
86
96
  }
87
- /**
88
- * Substitute template variables in a SPARQL query.
89
- *
90
- * - `#subjectFilter#` — replaced with the distribution's subject filter
91
- * - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
92
- * - `?dataset` — replaced with the dataset IRI
93
- */
94
- export declare function substituteQueryTemplates(query: string, distribution: Distribution | null, dataset: Dataset): string;
95
97
  /**
96
98
  * Read a SPARQL query from a file.
97
99
  */
@@ -1 +1 @@
1
- {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI5C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAK1C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAiB;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAcnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAoBtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,OAAO,EAAE,OAAO,GACf,MAAM,CAWR;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAO5C;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAkBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAiCtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
@@ -4,15 +4,26 @@ import { resolve } from 'node:path';
4
4
  import { Generator, Parser } from 'sparqljs';
5
5
  import { withDefaultGraph } from './graph.js';
6
6
  import { injectValues } from './values.js';
7
- // Re-export for convenience
8
- export { NotSupported } from '../step.js';
9
7
  /**
10
- * A streaming SPARQL CONSTRUCT executor that parses the query once (in the
11
- * constructor) and operates on the AST for graph and VALUES injection.
8
+ * An executor could not run because the dataset lacks a supported distribution.
9
+ */
10
+ export class NotSupported {
11
+ message;
12
+ constructor(message) {
13
+ this.message = message;
14
+ }
15
+ }
16
+ /**
17
+ * A streaming SPARQL CONSTRUCT executor.
18
+ *
19
+ * Queries **without** `#subjectFilter#` are parsed once in the constructor
20
+ * (fast path). Queries that contain the template are stored as raw strings
21
+ * and parsed at {@link execute} time after substitution.
12
22
  *
13
23
  * Template substitution (applied in order):
14
- * 1. `FROM <graph>` set via `withDefaultGraph` if the distribution has a named graph
15
- * 2. `?dataset` — replaced with the dataset IRI (string substitution on the serialised query)
24
+ * 1. `#subjectFilter#`replaced with `distribution.subjectFilter` (deferred to execute)
25
+ * 2. `FROM <graph>` set via `withDefaultGraph` if the distribution has a named graph
26
+ * 3. `?dataset` — replaced with the dataset IRI (string substitution on the serialised query)
16
27
  *
17
28
  * @example
18
29
  * ```typescript
@@ -30,16 +41,19 @@ export { NotSupported } from '../step.js';
30
41
  * ```
31
42
  */
32
43
  export class SparqlConstructExecutor {
33
- query;
44
+ rawQuery;
45
+ preParsed;
34
46
  fetcher;
35
47
  generator = new Generator();
36
48
  constructor(options) {
37
- const parser = new Parser();
38
- const parsed = parser.parse(options.query);
39
- if (parsed.type !== 'query' || parsed.queryType !== 'CONSTRUCT') {
40
- throw new Error('Query must be a CONSTRUCT query');
49
+ this.rawQuery = options.query;
50
+ if (!options.query.includes('#subjectFilter#')) {
51
+ const parsed = new Parser().parse(options.query);
52
+ if (parsed.type !== 'query' || parsed.queryType !== 'CONSTRUCT') {
53
+ throw new Error('Query must be a CONSTRUCT query');
54
+ }
55
+ this.preParsed = parsed;
41
56
  }
42
- this.query = parsed;
43
57
  this.fetcher =
44
58
  options.fetcher ??
45
59
  new SparqlEndpointFetcher({
@@ -56,7 +70,18 @@ export class SparqlConstructExecutor {
56
70
  */
57
71
  async execute(dataset, distribution, options) {
58
72
  const endpoint = distribution.accessUrl;
59
- let ast = structuredClone(this.query);
73
+ let ast;
74
+ if (this.preParsed) {
75
+ ast = structuredClone(this.preParsed);
76
+ }
77
+ else {
78
+ const substituted = this.rawQuery.replace('#subjectFilter#', distribution.subjectFilter ?? '');
79
+ const parsed = new Parser().parse(substituted);
80
+ if (parsed.type !== 'query' || parsed.queryType !== 'CONSTRUCT') {
81
+ throw new Error('Query must be a CONSTRUCT query');
82
+ }
83
+ ast = parsed;
84
+ }
60
85
  if (distribution.namedGraph) {
61
86
  withDefaultGraph(ast, distribution.namedGraph);
62
87
  }
@@ -79,23 +104,6 @@ export class SparqlConstructExecutor {
79
104
  return new SparqlConstructExecutor({ ...options, query });
80
105
  }
81
106
  }
82
- /**
83
- * Substitute template variables in a SPARQL query.
84
- *
85
- * - `#subjectFilter#` — replaced with the distribution's subject filter
86
- * - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
87
- * - `?dataset` — replaced with the dataset IRI
88
- */
89
- export function substituteQueryTemplates(query, distribution, dataset) {
90
- const subjectFilter = distribution?.subjectFilter ?? '';
91
- const namedGraph = distribution?.namedGraph
92
- ? `FROM <${distribution.namedGraph}>`
93
- : '';
94
- return query
95
- .replace('#subjectFilter#', subjectFilter)
96
- .replaceAll('?dataset', `<${dataset.iri}>`)
97
- .replace('#namedGraph#', namedGraph);
98
- }
99
107
  /**
100
108
  * Read a SPARQL query from a file.
101
109
  */
@@ -1,4 +1,4 @@
1
- export { SparqlConstructExecutor, substituteQueryTemplates, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type QuadStream, type VariableBindings, } from './executor.js';
1
+ export { SparqlConstructExecutor, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type QuadStream, type VariableBindings, } from './executor.js';
2
2
  export { SparqlSelector, type SparqlSelectorOptions } from './selector.js';
3
3
  export { injectValues } from './values.js';
4
4
  export { withDefaultGraph } from './graph.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,wBAAwB,EACxB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,cAAc,EAAE,KAAK,qBAAqB,EAAE,MAAM,eAAe,CAAC;AAE3E,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,cAAc,EAAE,KAAK,qBAAqB,EAAE,MAAM,eAAe,CAAC;AAE3E,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
@@ -1,4 +1,4 @@
1
- export { SparqlConstructExecutor, substituteQueryTemplates, NotSupported, readQueryFile, } from './executor.js';
1
+ export { SparqlConstructExecutor, NotSupported, readQueryFile, } from './executor.js';
2
2
  export { SparqlSelector } from './selector.js';
3
3
  export { injectValues } from './values.js';
4
4
  export { withDefaultGraph } from './graph.js';
package/dist/stage.d.ts CHANGED
@@ -2,22 +2,27 @@ import { Dataset, Distribution } from '@lde/dataset';
2
2
  import type { Executor, VariableBindings } from './sparql/executor.js';
3
3
  import { NotSupported } from './sparql/executor.js';
4
4
  import type { Writer } from './writer/writer.js';
5
+ /** A selector, or a factory that receives the runtime distribution. */
6
+ export type StageSelectorInput = StageSelector | ((distribution: Distribution) => StageSelector);
5
7
  export interface StageOptions {
6
8
  name: string;
7
9
  executors: Executor | Executor[];
8
- selector?: StageSelector;
10
+ selector?: StageSelectorInput;
9
11
  /** Maximum number of bindings per executor call. @default 10 */
10
12
  batchSize?: number;
11
13
  /** Maximum concurrent in-flight executor batches. @default 10 */
12
14
  maxConcurrency?: number;
15
+ /** Child stages that chain off this stage's output. */
16
+ stages?: Stage[];
13
17
  }
14
18
  export interface RunOptions {
15
19
  onProgress?: (elementsProcessed: number, quadsGenerated: number) => void;
16
20
  }
17
21
  export declare class Stage {
18
22
  readonly name: string;
23
+ readonly stages: readonly Stage[];
19
24
  private readonly executors;
20
- private readonly selector?;
25
+ private readonly selectorInput?;
21
26
  private readonly batchSize;
22
27
  private readonly maxConcurrency;
23
28
  constructor(options: StageOptions);
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAU3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAajB,eAAe;YA8Gf,UAAU;CAqBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,uEAAuE;AACvE,MAAM,MAAM,kBAAkB,GAC1B,aAAa,GACb,CAAC,CAAC,YAAY,EAAE,YAAY,KAAK,aAAa,CAAC,CAAC;AAEpD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAC9B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAqB;IACpD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAuBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
package/dist/stage.js CHANGED
@@ -3,22 +3,27 @@ import { batch } from './batch.js';
3
3
  import { AsyncQueue } from './asyncQueue.js';
4
4
  export class Stage {
5
5
  name;
6
+ stages;
6
7
  executors;
7
- selector;
8
+ selectorInput;
8
9
  batchSize;
9
10
  maxConcurrency;
10
11
  constructor(options) {
11
12
  this.name = options.name;
13
+ this.stages = options.stages ?? [];
12
14
  this.executors = Array.isArray(options.executors)
13
15
  ? options.executors
14
16
  : [options.executors];
15
- this.selector = options.selector;
17
+ this.selectorInput = options.selector;
16
18
  this.batchSize = options.batchSize ?? 10;
17
19
  this.maxConcurrency = options.maxConcurrency ?? 10;
18
20
  }
19
21
  async run(dataset, distribution, writer, options) {
20
- if (this.selector) {
21
- return this.runWithSelector(dataset, distribution, writer, options);
22
+ if (this.selectorInput) {
23
+ const selector = typeof this.selectorInput === 'function'
24
+ ? this.selectorInput(distribution)
25
+ : this.selectorInput;
26
+ return this.runWithSelector(selector, dataset, distribution, writer, options);
22
27
  }
23
28
  const streams = await this.executeAll(dataset, distribution);
24
29
  if (streams instanceof NotSupported) {
@@ -26,10 +31,10 @@ export class Stage {
26
31
  }
27
32
  await writer.write(dataset, mergeStreams(streams));
28
33
  }
29
- async runWithSelector(dataset, distribution, writer, options) {
34
+ async runWithSelector(selector, dataset, distribution, writer, options) {
30
35
  // Peek the first batch to detect an empty selector before starting the
31
36
  // writer (important because e.g. SparqlUpdateWriter does CLEAR GRAPH).
32
- const batches = batch(this.selector, this.batchSize);
37
+ const batches = batch(selector, this.batchSize);
33
38
  const iter = batches[Symbol.asyncIterator]();
34
39
  const first = await iter.next();
35
40
  if (first.done) {
@@ -0,0 +1,6 @@
1
+ import { Distribution } from '@lde/dataset';
2
+ export interface StageOutputResolver {
3
+ resolve(outputPath: string): Promise<Distribution>;
4
+ cleanup(): Promise<void>;
5
+ }
6
+ //# sourceMappingURL=stageOutputResolver.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stageOutputResolver.d.ts","sourceRoot":"","sources":["../src/stageOutputResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IACnD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B"}
@@ -0,0 +1 @@
1
+ export {};
@@ -14,10 +14,11 @@ export interface FileWriterOptions {
14
14
  }
15
15
  export declare class FileWriter implements Writer {
16
16
  private readonly outputDir;
17
- private readonly format;
17
+ readonly format: 'turtle' | 'n-triples' | 'n-quads';
18
18
  constructor(options: FileWriterOptions);
19
19
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
20
- private getFilename;
20
+ getOutputPath(dataset: Dataset): string;
21
+ getFilename(dataset: Dataset): string;
21
22
  private getExtension;
22
23
  }
23
24
  //# sourceMappingURL=fileWriter.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAaD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqC;gBAEhD,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBxE,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,YAAY;CAUrB"}
1
+ {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAaD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;gBAExC,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBxE,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,YAAY;CAUrB"}
@@ -43,6 +43,9 @@ export class FileWriter {
43
43
  });
44
44
  });
45
45
  }
46
+ getOutputPath(dataset) {
47
+ return join(this.outputDir, this.getFilename(dataset));
48
+ }
46
49
  getFilename(dataset) {
47
50
  const extension = this.getExtension();
48
51
  const baseName = filenamifyUrl(dataset.iri.toString(), {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.6.24",
3
+ "version": "0.6.26",
4
4
  "repository": {
5
5
  "url": "https://github.com/ldengine/lde",
6
6
  "directory": "packages/pipeline"
@@ -23,12 +23,10 @@
23
23
  "!**/*.tsbuildinfo"
24
24
  ],
25
25
  "dependencies": {
26
- "@lde/dataset": "0.6.8",
27
- "@lde/dataset-registry-client": "0.6.15",
28
- "@lde/sparql-importer": "0.2.8",
29
- "@lde/sparql-server": "0.4.8",
26
+ "@lde/dataset": "0.6.9",
27
+ "@lde/dataset-registry-client": "0.6.16",
28
+ "@lde/sparql-importer": "0.2.9",
30
29
  "@rdfjs/types": "^2.0.1",
31
- "c12": "^3.0.2",
32
30
  "fetch-sparql-endpoint": "^7.1.0",
33
31
  "filenamify-url": "^3.0.0",
34
32
  "n3": "^1.17.0",