@lde/pipeline 0.6.24 → 0.6.25

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,12 +1,10 @@
1
1
  export * from './asyncQueue.js';
2
2
  export * from './batch.js';
3
3
  export * from './pipeline.js';
4
+ export * from './progressReporter.js';
4
5
  export * from './selector.js';
5
6
  export * from './stage.js';
6
- export * from './step.js';
7
- export * from './step/sparqlQuery.js';
8
- export * from './builder.js';
9
- export * from './config.js';
7
+ export * from './stageOutputResolver.js';
10
8
  export * from './sparql/index.js';
11
9
  export * from './distribution/index.js';
12
10
  export * from './writer/index.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
package/dist/index.js CHANGED
@@ -1,12 +1,10 @@
1
1
  export * from './asyncQueue.js';
2
2
  export * from './batch.js';
3
3
  export * from './pipeline.js';
4
+ export * from './progressReporter.js';
4
5
  export * from './selector.js';
5
6
  export * from './stage.js';
6
- export * from './step.js';
7
- export * from './step/sparqlQuery.js';
8
- export * from './builder.js';
9
- export * from './config.js';
7
+ export * from './stageOutputResolver.js';
10
8
  export * from './sparql/index.js';
11
9
  export * from './distribution/index.js';
12
10
  export * from './writer/index.js';
@@ -1,11 +1,28 @@
1
- import { Selector } from './selector.js';
2
- import { Step } from './step.js';
1
+ import type { Selector } from './selector.js';
2
+ import { Stage } from './stage.js';
3
+ import type { Writer } from './writer/writer.js';
4
+ import { type DistributionResolver } from './distribution/resolver.js';
5
+ import type { StageOutputResolver } from './stageOutputResolver.js';
6
+ import type { ProgressReporter } from './progressReporter.js';
7
+ export interface PipelineOptions {
8
+ name: string;
9
+ selector: Selector;
10
+ stages: Stage[];
11
+ writer: Writer;
12
+ distributionResolver: DistributionResolver;
13
+ stageOutputResolver?: StageOutputResolver;
14
+ outputDir?: string;
15
+ outputFormat?: 'turtle' | 'n-triples' | 'n-quads';
16
+ reporter?: ProgressReporter;
17
+ }
3
18
  export declare class Pipeline {
4
- private readonly config;
5
- constructor(config: {
6
- selector: Selector;
7
- steps: Step[];
8
- });
19
+ private readonly options;
20
+ constructor(options: PipelineOptions);
9
21
  run(): Promise<void>;
22
+ private processDataset;
23
+ private runStage;
24
+ private runChain;
25
+ private runChainedStage;
26
+ private readFiles;
10
27
  }
11
28
  //# sourceMappingURL=pipeline.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAc,IAAI,EAAE,MAAM,WAAW,CAAC;AAI7C,qBAAa,QAAQ;IACP,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE;QAAE,QAAQ,EAAE,QAAQ,CAAC;QAAC,KAAK,EAAE,IAAI,EAAE,CAAA;KAAE;IAE7D,GAAG;CAiCjB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -1,42 +1,141 @@
1
- import { NotSupported } from './step.js';
2
- import { Readable } from 'node:stream';
1
+ import { createReadStream } from 'node:fs';
2
+ import { StreamParser } from 'n3';
3
+ import { FileWriter } from './writer/fileWriter.js';
4
+ import { NoDistributionAvailable, } from './distribution/resolver.js';
5
+ import { NotSupported } from './sparql/executor.js';
3
6
  export class Pipeline {
4
- config;
5
- constructor(config) {
6
- this.config = config;
7
+ options;
8
+ constructor(options) {
9
+ const hasSubStages = options.stages.some((stage) => stage.stages.length > 0);
10
+ if (hasSubStages && !options.stageOutputResolver) {
11
+ throw new Error('stageOutputResolver is required when any stage has sub-stages');
12
+ }
13
+ if (hasSubStages && !options.outputDir) {
14
+ throw new Error('outputDir is required when any stage has sub-stages');
15
+ }
16
+ this.options = options;
7
17
  }
8
18
  async run() {
9
- const datasets = await this.config.selector.select();
19
+ const { selector, reporter, name } = this.options;
20
+ const start = Date.now();
21
+ reporter?.pipelineStart(name);
22
+ const datasets = await selector.select();
10
23
  for await (const dataset of datasets) {
11
- const distribution = dataset.getSparqlDistribution() ?? undefined;
12
- for (const step of this.config.steps) {
13
- const result = await step.execute(dataset, distribution);
14
- if (result instanceof NotSupported) {
15
- console.error(result);
24
+ await this.processDataset(dataset);
25
+ }
26
+ reporter?.pipelineComplete({ duration: Date.now() - start });
27
+ }
28
+ async processDataset(dataset) {
29
+ const { distributionResolver, reporter } = this.options;
30
+ const datasetIri = dataset.iri.toString();
31
+ reporter?.datasetStart(datasetIri);
32
+ const resolved = await distributionResolver.resolve(dataset);
33
+ if (resolved instanceof NoDistributionAvailable) {
34
+ reporter?.datasetSkipped(datasetIri, resolved.message);
35
+ return;
36
+ }
37
+ try {
38
+ for (const stage of this.options.stages) {
39
+ if (stage.stages.length > 0) {
40
+ await this.runChain(dataset, resolved.distribution, stage);
16
41
  }
17
- else if (result instanceof Readable) {
18
- const promise = new Promise((resolve, reject) => {
19
- result.on('data', (data) => {
20
- // TODO: pipe to writers.
21
- console.log('Data:', data);
22
- });
23
- result.on('error', (error) => {
24
- console.error('rejecting');
25
- reject(error);
26
- });
27
- result.on('end', resolve);
28
- });
29
- await promise;
42
+ else {
43
+ await this.runStage(dataset, resolved.distribution, stage);
30
44
  }
31
45
  }
32
- for (const step of this.config.steps) {
33
- if (isFinishable(step)) {
34
- await step.finish();
46
+ }
47
+ catch {
48
+ // Stage error for this dataset; continue to next dataset.
49
+ }
50
+ reporter?.datasetComplete(datasetIri);
51
+ }
52
+ async runStage(dataset, distribution, stage) {
53
+ const { writer, reporter } = this.options;
54
+ reporter?.stageStart(stage.name);
55
+ const stageStart = Date.now();
56
+ let elementsProcessed = 0;
57
+ let quadsGenerated = 0;
58
+ const result = await stage.run(dataset, distribution, writer, {
59
+ onProgress: (elements, quads) => {
60
+ elementsProcessed = elements;
61
+ quadsGenerated = quads;
62
+ reporter?.stageProgress({ elementsProcessed, quadsGenerated });
63
+ },
64
+ });
65
+ if (result instanceof NotSupported) {
66
+ reporter?.stageSkipped(stage.name, result.message);
67
+ }
68
+ else {
69
+ reporter?.stageComplete(stage.name, {
70
+ elementsProcessed,
71
+ quadsGenerated,
72
+ duration: Date.now() - stageStart,
73
+ });
74
+ }
75
+ }
76
+ async runChain(dataset, distribution, stage) {
77
+ const { writer, stageOutputResolver, outputDir, outputFormat } = this.options;
78
+ const outputFiles = [];
79
+ try {
80
+ // 1. Run parent stage → FileWriter.
81
+ const parentWriter = new FileWriter({
82
+ outputDir: `${outputDir}/${stage.name}`,
83
+ format: outputFormat,
84
+ });
85
+ await this.runChainedStage(dataset, distribution, stage, parentWriter);
86
+ outputFiles.push(parentWriter.getOutputPath(dataset));
87
+ // 2. Chain through children.
88
+ let currentDistribution = await stageOutputResolver.resolve(parentWriter.getOutputPath(dataset));
89
+ for (let i = 0; i < stage.stages.length; i++) {
90
+ const child = stage.stages[i];
91
+ const childWriter = new FileWriter({
92
+ outputDir: `${outputDir}/${child.name}`,
93
+ format: outputFormat,
94
+ });
95
+ await this.runChainedStage(dataset, currentDistribution, child, childWriter);
96
+ outputFiles.push(childWriter.getOutputPath(dataset));
97
+ if (i < stage.stages.length - 1) {
98
+ currentDistribution = await stageOutputResolver.resolve(childWriter.getOutputPath(dataset));
35
99
  }
36
100
  }
101
+ // 3. Concatenate all output files → user writer.
102
+ await writer.write(dataset, this.readFiles(outputFiles));
103
+ }
104
+ finally {
105
+ await stageOutputResolver.cleanup();
106
+ }
107
+ }
108
+ async runChainedStage(dataset, distribution, stage, stageWriter) {
109
+ const { reporter } = this.options;
110
+ reporter?.stageStart(stage.name);
111
+ const stageStart = Date.now();
112
+ let elementsProcessed = 0;
113
+ let quadsGenerated = 0;
114
+ const result = await stage.run(dataset, distribution, stageWriter, {
115
+ onProgress: (elements, quads) => {
116
+ elementsProcessed = elements;
117
+ quadsGenerated = quads;
118
+ reporter?.stageProgress({ elementsProcessed, quadsGenerated });
119
+ },
120
+ });
121
+ if (result instanceof NotSupported) {
122
+ reporter?.stageSkipped(stage.name, result.message);
123
+ throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
124
+ }
125
+ reporter?.stageComplete(stage.name, {
126
+ elementsProcessed,
127
+ quadsGenerated,
128
+ duration: Date.now() - stageStart,
129
+ });
130
+ }
131
+ async *readFiles(paths) {
132
+ for (const path of paths) {
133
+ const stream = createReadStream(path);
134
+ const parser = new StreamParser();
135
+ stream.pipe(parser);
136
+ for await (const quad of parser) {
137
+ yield quad;
138
+ }
37
139
  }
38
140
  }
39
141
  }
40
- const isFinishable = (step) => {
41
- return typeof step.finish === 'function';
42
- };
@@ -0,0 +1,21 @@
1
+ export interface ProgressReporter {
2
+ pipelineStart(name: string): void;
3
+ datasetStart(dataset: string): void;
4
+ stageStart(stage: string): void;
5
+ stageProgress(update: {
6
+ elementsProcessed: number;
7
+ quadsGenerated: number;
8
+ }): void;
9
+ stageComplete(stage: string, result: {
10
+ elementsProcessed: number;
11
+ quadsGenerated: number;
12
+ duration: number;
13
+ }): void;
14
+ stageSkipped(stage: string, reason: string): void;
15
+ datasetComplete(dataset: string): void;
16
+ datasetSkipped(dataset: string, reason: string): void;
17
+ pipelineComplete(result: {
18
+ duration: number;
19
+ }): void;
20
+ }
21
+ //# sourceMappingURL=progressReporter.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,aAAa,CAAC,MAAM,EAAE;QACpB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CACX,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAClD,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtD,gBAAgB,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACtD"}
@@ -0,0 +1 @@
1
+ export {};
@@ -2,8 +2,13 @@ import { Dataset, Distribution } from '@lde/dataset';
2
2
  import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
3
3
  import type { NamedNode, Quad, Stream } from '@rdfjs/types';
4
4
  import type { Readable } from 'node:stream';
5
- import { NotSupported } from '../step.js';
6
- export { NotSupported } from '../step.js';
5
+ /**
6
+ * An executor could not run because the dataset lacks a supported distribution.
7
+ */
8
+ export declare class NotSupported {
9
+ readonly message: string;
10
+ constructor(message: string);
11
+ }
7
12
  /** A single row of variable bindings (variable name → NamedNode). */
8
13
  export type VariableBindings = Record<string, NamedNode>;
9
14
  export interface ExecuteOptions {
@@ -1 +1 @@
1
- {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAI5C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAK1C,OAAO,EAAE,YAAY,EAAE,MAAM,YAAY,CAAC;AAE1C,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAiB;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAcnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAoBtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,OAAO,EAAE,OAAO,GACf,MAAM,CAWR;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAO5C;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;GAsBG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAiB;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAcnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAoBtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;;;;;GAMG;AACH,wBAAgB,wBAAwB,CACtC,KAAK,EAAE,MAAM,EACb,YAAY,EAAE,YAAY,GAAG,IAAI,EACjC,OAAO,EAAE,OAAO,GACf,MAAM,CAWR;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
@@ -4,8 +4,15 @@ import { resolve } from 'node:path';
4
4
  import { Generator, Parser } from 'sparqljs';
5
5
  import { withDefaultGraph } from './graph.js';
6
6
  import { injectValues } from './values.js';
7
- // Re-export for convenience
8
- export { NotSupported } from '../step.js';
7
+ /**
8
+ * An executor could not run because the dataset lacks a supported distribution.
9
+ */
10
+ export class NotSupported {
11
+ message;
12
+ constructor(message) {
13
+ this.message = message;
14
+ }
15
+ }
9
16
  /**
10
17
  * A streaming SPARQL CONSTRUCT executor that parses the query once (in the
11
18
  * constructor) and operates on the AST for graph and VALUES injection.
package/dist/stage.d.ts CHANGED
@@ -10,12 +10,15 @@ export interface StageOptions {
10
10
  batchSize?: number;
11
11
  /** Maximum concurrent in-flight executor batches. @default 10 */
12
12
  maxConcurrency?: number;
13
+ /** Child stages that chain off this stage's output. */
14
+ stages?: Stage[];
13
15
  }
14
16
  export interface RunOptions {
15
17
  onProgress?: (elementsProcessed: number, quadsGenerated: number) => void;
16
18
  }
17
19
  export declare class Stage {
18
20
  readonly name: string;
21
+ readonly stages: readonly Stage[];
19
22
  private readonly executors;
20
23
  private readonly selector?;
21
24
  private readonly batchSize;
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAU3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAajB,eAAe;YA8Gf,UAAU;CAqBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAajB,eAAe;YA8Gf,UAAU;CAqBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
package/dist/stage.js CHANGED
@@ -3,12 +3,14 @@ import { batch } from './batch.js';
3
3
  import { AsyncQueue } from './asyncQueue.js';
4
4
  export class Stage {
5
5
  name;
6
+ stages;
6
7
  executors;
7
8
  selector;
8
9
  batchSize;
9
10
  maxConcurrency;
10
11
  constructor(options) {
11
12
  this.name = options.name;
13
+ this.stages = options.stages ?? [];
12
14
  this.executors = Array.isArray(options.executors)
13
15
  ? options.executors
14
16
  : [options.executors];
@@ -0,0 +1,6 @@
1
+ import { Distribution } from '@lde/dataset';
2
+ export interface StageOutputResolver {
3
+ resolve(outputPath: string): Promise<Distribution>;
4
+ cleanup(): Promise<void>;
5
+ }
6
+ //# sourceMappingURL=stageOutputResolver.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stageOutputResolver.d.ts","sourceRoot":"","sources":["../src/stageOutputResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE5C,MAAM,WAAW,mBAAmB;IAClC,OAAO,CAAC,UAAU,EAAE,MAAM,GAAG,OAAO,CAAC,YAAY,CAAC,CAAC;IACnD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B"}
@@ -0,0 +1 @@
1
+ export {};
@@ -14,10 +14,11 @@ export interface FileWriterOptions {
14
14
  }
15
15
  export declare class FileWriter implements Writer {
16
16
  private readonly outputDir;
17
- private readonly format;
17
+ readonly format: 'turtle' | 'n-triples' | 'n-quads';
18
18
  constructor(options: FileWriterOptions);
19
19
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
20
- private getFilename;
20
+ getOutputPath(dataset: Dataset): string;
21
+ getFilename(dataset: Dataset): string;
21
22
  private getExtension;
22
23
  }
23
24
  //# sourceMappingURL=fileWriter.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAaD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqC;gBAEhD,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBxE,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,YAAY;CAUrB"}
1
+ {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAaD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;gBAExC,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBxE,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,YAAY;CAUrB"}
@@ -43,6 +43,9 @@ export class FileWriter {
43
43
  });
44
44
  });
45
45
  }
46
+ getOutputPath(dataset) {
47
+ return join(this.outputDir, this.getFilename(dataset));
48
+ }
46
49
  getFilename(dataset) {
47
50
  const extension = this.getExtension();
48
51
  const baseName = filenamifyUrl(dataset.iri.toString(), {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.6.24",
3
+ "version": "0.6.25",
4
4
  "repository": {
5
5
  "url": "https://github.com/ldengine/lde",
6
6
  "directory": "packages/pipeline"
@@ -23,12 +23,10 @@
23
23
  "!**/*.tsbuildinfo"
24
24
  ],
25
25
  "dependencies": {
26
- "@lde/dataset": "0.6.8",
27
- "@lde/dataset-registry-client": "0.6.15",
28
- "@lde/sparql-importer": "0.2.8",
29
- "@lde/sparql-server": "0.4.8",
26
+ "@lde/dataset": "0.6.9",
27
+ "@lde/dataset-registry-client": "0.6.16",
28
+ "@lde/sparql-importer": "0.2.9",
30
29
  "@rdfjs/types": "^2.0.1",
31
- "c12": "^3.0.2",
32
30
  "fetch-sparql-endpoint": "^7.1.0",
33
31
  "filenamify-url": "^3.0.0",
34
32
  "n3": "^1.17.0",
package/dist/builder.d.ts DELETED
@@ -1,120 +0,0 @@
1
- import { Selector, ManualDatasetSelection, RegistrySelector } from './selector.js';
2
- import { Step } from './step.js';
3
- /**
4
- * Configuration for QLever SPARQL server.
5
- */
6
- export interface QleverConfig {
7
- /**
8
- * Execution mode: 'docker' for containerized, 'native' for local binary.
9
- */
10
- mode: 'docker' | 'native';
11
- /**
12
- * Docker image to use (for docker mode).
13
- * @default 'adfreiburg/qlever'
14
- */
15
- image?: string;
16
- /**
17
- * Port for the SPARQL endpoint.
18
- * @default 7001
19
- */
20
- port?: number;
21
- /**
22
- * Working directory for imports.
23
- */
24
- workingDir?: string;
25
- }
26
- /**
27
- * Writer configuration.
28
- */
29
- export interface WriterConfig {
30
- type: 'file' | 'sparql';
31
- outputDir?: string;
32
- endpoint?: URL;
33
- /**
34
- * Value for the Authorization header sent with SPARQL UPDATE requests, e.g.
35
- * `"Basic dXNlcjpwYXNz"`, `"Bearer my-token"`, or `"GDB eyJ…"`.
36
- */
37
- auth?: string;
38
- }
39
- /**
40
- * Complete pipeline configuration.
41
- */
42
- export interface PipelineConfig {
43
- selector: Selector;
44
- steps: Step[];
45
- writers?: WriterConfig[];
46
- qlever?: QleverConfig;
47
- }
48
- /**
49
- * Fluent builder for creating pipeline configurations.
50
- *
51
- * @example
52
- * ```typescript
53
- * const config = PipelineBuilder.create()
54
- * .withSelector(registry('https://example.com/sparql'))
55
- * .addStep(sparqlQuery('queries/triples.rq'))
56
- * .addWriter(fileWriter({ outputDir: 'output' }))
57
- * .build();
58
- * ```
59
- */
60
- export declare class PipelineBuilder {
61
- private selector?;
62
- private steps;
63
- private writers;
64
- private qleverConfig?;
65
- /**
66
- * Create a new PipelineBuilder instance.
67
- */
68
- static create(): PipelineBuilder;
69
- /**
70
- * Set the dataset selector.
71
- */
72
- withSelector(selector: Selector): this;
73
- /**
74
- * Configure QLever for local SPARQL imports.
75
- */
76
- withQlever(config: QleverConfig): this;
77
- /**
78
- * Add a single step to the pipeline.
79
- */
80
- addStep(step: Step): this;
81
- /**
82
- * Add multiple steps to the pipeline.
83
- */
84
- addSteps(...steps: Step[]): this;
85
- /**
86
- * Add a writer for pipeline output.
87
- */
88
- addWriter(writer: WriterConfig): this;
89
- /**
90
- * Build the final pipeline configuration.
91
- * @throws Error if selector is not set
92
- */
93
- build(): PipelineConfig;
94
- }
95
- /**
96
- * Create a selector that queries a Dataset Registry.
97
- *
98
- * @param endpoint SPARQL endpoint URL of the registry
99
- */
100
- export declare function registry(endpoint: string | URL): RegistrySelector;
101
- /**
102
- * Create a selector for manually specified datasets.
103
- *
104
- * @param datasets Array of dataset IRIs
105
- */
106
- export declare function manual(...datasetIris: URL[]): ManualDatasetSelection;
107
- /**
108
- * Create a file writer configuration.
109
- */
110
- export declare function fileWriter(options: {
111
- outputDir: string;
112
- }): WriterConfig;
113
- /**
114
- * Create a SPARQL UPDATE writer configuration.
115
- */
116
- export declare function sparqlWriter(options: {
117
- endpoint: URL;
118
- auth?: string;
119
- }): WriterConfig;
120
- //# sourceMappingURL=builder.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"builder.d.ts","sourceRoot":"","sources":["../src/builder.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,IAAI,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC1B;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,GAAG,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;IACzB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,CAAW;IAC5B,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,YAAY,CAAC,CAAe;IAEpC;;OAEG;IACH,MAAM,CAAC,MAAM,IAAI,eAAe;IAIhC;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKtC;;OAEG;IACH,UAAU,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKtC;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;IAKzB;;OAEG;IACH,QAAQ,CAAC,GAAG,KAAK,EAAE,IAAI,EAAE,GAAG,IAAI;IAKhC;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKrC;;;OAGG;IACH,KAAK,IAAI,cAAc;CAYxB;AAID;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,GAAG,GAAG,gBAAgB,CAMjE;AAED;;;;GAIG;AACH,wBAAgB,MAAM,CAAC,GAAG,WAAW,EAAE,GAAG,EAAE,GAAG,sBAAsB,CAKpE;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GAAG,YAAY,CAKvE;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE;IACpC,QAAQ,EAAE,GAAG,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,YAAY,CAMf"}
package/dist/builder.js DELETED
@@ -1,116 +0,0 @@
1
- import { Dataset } from '@lde/dataset';
2
- import { ManualDatasetSelection, RegistrySelector, } from './selector.js';
3
- import { Client } from '@lde/dataset-registry-client';
4
- /**
5
- * Fluent builder for creating pipeline configurations.
6
- *
7
- * @example
8
- * ```typescript
9
- * const config = PipelineBuilder.create()
10
- * .withSelector(registry('https://example.com/sparql'))
11
- * .addStep(sparqlQuery('queries/triples.rq'))
12
- * .addWriter(fileWriter({ outputDir: 'output' }))
13
- * .build();
14
- * ```
15
- */
16
- export class PipelineBuilder {
17
- selector;
18
- steps = [];
19
- writers = [];
20
- qleverConfig;
21
- /**
22
- * Create a new PipelineBuilder instance.
23
- */
24
- static create() {
25
- return new PipelineBuilder();
26
- }
27
- /**
28
- * Set the dataset selector.
29
- */
30
- withSelector(selector) {
31
- this.selector = selector;
32
- return this;
33
- }
34
- /**
35
- * Configure QLever for local SPARQL imports.
36
- */
37
- withQlever(config) {
38
- this.qleverConfig = config;
39
- return this;
40
- }
41
- /**
42
- * Add a single step to the pipeline.
43
- */
44
- addStep(step) {
45
- this.steps.push(step);
46
- return this;
47
- }
48
- /**
49
- * Add multiple steps to the pipeline.
50
- */
51
- addSteps(...steps) {
52
- this.steps.push(...steps);
53
- return this;
54
- }
55
- /**
56
- * Add a writer for pipeline output.
57
- */
58
- addWriter(writer) {
59
- this.writers.push(writer);
60
- return this;
61
- }
62
- /**
63
- * Build the final pipeline configuration.
64
- * @throws Error if selector is not set
65
- */
66
- build() {
67
- if (!this.selector) {
68
- throw new Error('Selector is required. Use withSelector() to set it.');
69
- }
70
- return {
71
- selector: this.selector,
72
- steps: this.steps,
73
- writers: this.writers.length > 0 ? this.writers : undefined,
74
- qlever: this.qleverConfig,
75
- };
76
- }
77
- }
78
- // Helper functions for fluent construction.
79
- /**
80
- * Create a selector that queries a Dataset Registry.
81
- *
82
- * @param endpoint SPARQL endpoint URL of the registry
83
- */
84
- export function registry(endpoint) {
85
- return new RegistrySelector({
86
- registry: new Client(typeof endpoint === 'string' ? new URL(endpoint) : endpoint),
87
- });
88
- }
89
- /**
90
- * Create a selector for manually specified datasets.
91
- *
92
- * @param datasets Array of dataset IRIs
93
- */
94
- export function manual(...datasetIris) {
95
- const datasets = datasetIris.map((iri) => new Dataset({ iri, distributions: [] }));
96
- return new ManualDatasetSelection(datasets);
97
- }
98
- /**
99
- * Create a file writer configuration.
100
- */
101
- export function fileWriter(options) {
102
- return {
103
- type: 'file',
104
- outputDir: options.outputDir,
105
- };
106
- }
107
- /**
108
- * Create a SPARQL UPDATE writer configuration.
109
- */
110
- export function sparqlWriter(options) {
111
- return {
112
- type: 'sparql',
113
- endpoint: options.endpoint,
114
- auth: options.auth,
115
- };
116
- }
package/dist/config.d.ts DELETED
@@ -1,71 +0,0 @@
1
- import { PipelineConfig, QleverConfig } from './builder.js';
2
- /**
3
- * Raw configuration schema from YAML/JSON files.
4
- */
5
- export interface RawPipelineConfig {
6
- selector?: {
7
- type: 'registry' | 'manual';
8
- endpoint?: string;
9
- datasets?: string[];
10
- };
11
- qlever?: QleverConfig;
12
- steps?: Array<{
13
- type: 'sparql-query';
14
- query: string;
15
- }>;
16
- writers?: Array<{
17
- type: 'file' | 'sparql';
18
- outputDir?: string;
19
- endpoint?: string;
20
- auth?: string;
21
- }>;
22
- }
23
- /**
24
- * Options for loading pipeline configuration.
25
- */
26
- export interface LoadConfigOptions {
27
- /**
28
- * Configuration file name (without extension).
29
- * @default 'pipeline.config'
30
- */
31
- name?: string;
32
- /**
33
- * Working directory to search for config files.
34
- * @default process.cwd()
35
- */
36
- cwd?: string;
37
- }
38
- /**
39
- * Define a pipeline configuration with TypeScript type checking.
40
- *
41
- * @example
42
- * ```typescript
43
- * // pipeline.config.ts
44
- * import { defineConfig } from '@lde/pipeline';
45
- *
46
- * export default defineConfig({
47
- * selector: { type: 'registry', endpoint: 'https://example.com/sparql' },
48
- * steps: [{ type: 'sparql-query', query: 'queries/triples.rq' }],
49
- * });
50
- * ```
51
- */
52
- export declare function defineConfig(config: RawPipelineConfig): RawPipelineConfig;
53
- /**
54
- * Load pipeline configuration from files.
55
- *
56
- * Searches for configuration files in the following order:
57
- * - pipeline.config.ts
58
- * - pipeline.config.js
59
- * - pipeline.config.yaml
60
- * - pipeline.config.yml
61
- * - pipeline.config.json
62
- *
63
- * @param options Load options
64
- * @returns Resolved pipeline configuration
65
- */
66
- export declare function loadPipelineConfig(options?: LoadConfigOptions): Promise<PipelineConfig>;
67
- /**
68
- * Normalize raw configuration into a typed PipelineConfig.
69
- */
70
- export declare function normalizeConfig(raw: RawPipelineConfig): PipelineConfig;
71
- //# sourceMappingURL=config.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AACA,OAAO,EACL,cAAc,EACd,YAAY,EAIb,MAAM,cAAc,CAAC;AAKtB;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE;QACT,IAAI,EAAE,UAAU,GAAG,QAAQ,CAAC;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;IACF,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,cAAc,CAAC;QACrB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;QACxB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,IAAI,CAAC,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,iBAAiB,GAAG,iBAAiB,CAEzE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,cAAc,CAAC,CAWzB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,iBAAiB,GAAG,cAAc,CAOtE"}
package/dist/config.js DELETED
@@ -1,114 +0,0 @@
1
- import { loadConfig } from 'c12';
2
- import { registry, manual, } from './builder.js';
3
- import { SparqlQuery } from './step/sparqlQuery.js';
4
- /**
5
- * Define a pipeline configuration with TypeScript type checking.
6
- *
7
- * @example
8
- * ```typescript
9
- * // pipeline.config.ts
10
- * import { defineConfig } from '@lde/pipeline';
11
- *
12
- * export default defineConfig({
13
- * selector: { type: 'registry', endpoint: 'https://example.com/sparql' },
14
- * steps: [{ type: 'sparql-query', query: 'queries/triples.rq' }],
15
- * });
16
- * ```
17
- */
18
- export function defineConfig(config) {
19
- return config;
20
- }
21
- /**
22
- * Load pipeline configuration from files.
23
- *
24
- * Searches for configuration files in the following order:
25
- * - pipeline.config.ts
26
- * - pipeline.config.js
27
- * - pipeline.config.yaml
28
- * - pipeline.config.yml
29
- * - pipeline.config.json
30
- *
31
- * @param options Load options
32
- * @returns Resolved pipeline configuration
33
- */
34
- export async function loadPipelineConfig(options) {
35
- const { config } = await loadConfig({
36
- name: options?.name ?? 'pipeline.config',
37
- cwd: options?.cwd,
38
- });
39
- if (!config) {
40
- throw new Error('No pipeline configuration found');
41
- }
42
- return normalizeConfig(config);
43
- }
44
- /**
45
- * Normalize raw configuration into a typed PipelineConfig.
46
- */
47
- export function normalizeConfig(raw) {
48
- return {
49
- selector: normalizeSelector(raw.selector),
50
- steps: normalizeSteps(raw.steps),
51
- writers: normalizeWriters(raw.writers),
52
- qlever: raw.qlever,
53
- };
54
- }
55
- function normalizeSelector(raw) {
56
- if (!raw) {
57
- throw new Error('Selector configuration is required');
58
- }
59
- switch (raw.type) {
60
- case 'registry':
61
- if (!raw.endpoint) {
62
- throw new Error('Registry selector requires endpoint');
63
- }
64
- return registry(raw.endpoint);
65
- case 'manual':
66
- if (!raw.datasets || raw.datasets.length === 0) {
67
- throw new Error('Manual selector requires datasets');
68
- }
69
- return manual(...raw.datasets.map((d) => new URL(d)));
70
- default:
71
- throw new Error(`Unknown selector type: ${raw.type}`);
72
- }
73
- }
74
- function normalizeSteps(raw) {
75
- if (!raw) {
76
- return [];
77
- }
78
- return raw.map((step) => {
79
- switch (step.type) {
80
- case 'sparql-query':
81
- return new SparqlQuery({
82
- identifier: step.query,
83
- query: step.query, // Will be loaded from file by SparqlQuery.fromFile if path
84
- });
85
- default:
86
- throw new Error(`Unknown step type: ${step.type}`);
87
- }
88
- });
89
- }
90
- function normalizeWriters(raw) {
91
- if (!raw || raw.length === 0) {
92
- return undefined;
93
- }
94
- return raw.map((writer) => {
95
- switch (writer.type) {
96
- case 'file':
97
- if (!writer.outputDir) {
98
- throw new Error('File writer requires outputDir');
99
- }
100
- return { type: 'file', outputDir: writer.outputDir };
101
- case 'sparql':
102
- if (!writer.endpoint) {
103
- throw new Error('SPARQL writer requires endpoint');
104
- }
105
- return {
106
- type: 'sparql',
107
- endpoint: new URL(writer.endpoint),
108
- auth: writer.auth,
109
- };
110
- default:
111
- throw new Error(`Unknown writer type: ${writer.type}`);
112
- }
113
- });
114
- }
package/dist/import.d.ts DELETED
@@ -1,30 +0,0 @@
1
- import { Dataset } from '@lde/dataset';
2
- import { Failure, Finishable, NotSupported, SingleStep, Success } from './step.js';
3
- import { Importer } from '@lde/sparql-importer';
4
- import { SparqlServer } from '@lde/sparql-server';
5
- /**
6
- * A pipeline step that imports a database using an {@link Importer} and makes
7
- * the import available at a local SPARQL endpoint.
8
- */
9
- export declare class Import implements SingleStep, Finishable {
10
- readonly identifier = "import";
11
- private readonly importer;
12
- private readonly server;
13
- private readonly forceImport;
14
- /**
15
- * Create a Pipeline ImportStep.
16
- *
17
- * @param {object} args
18
- * @param args.importer A concrete importer that will import the distribution if needed.
19
- * @param args.server SPARQL server that will be started to serve the imported data.
20
- * @param args.forceImport Whether to force an import even if the dataset already has a SPARQL distribution.
21
- */
22
- constructor({ importer, server, forceImport, }: {
23
- importer: Importer;
24
- server: SparqlServer;
25
- forceImport?: boolean;
26
- });
27
- execute(dataset: Dataset): Promise<NotSupported | Failure | Success>;
28
- finish(): Promise<void>;
29
- }
30
- //# sourceMappingURL=import.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"import.d.ts","sourceRoot":"","sources":["../src/import.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,OAAO,EACP,UAAU,EACV,YAAY,EACZ,UAAU,EACV,OAAO,EACR,MAAM,WAAW,CAAC;AACnB,OAAO,EACL,QAAQ,EAGT,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD;;;GAGG;AACH,qBAAa,MAAO,YAAW,UAAU,EAAE,UAAU;IACnD,SAAgB,UAAU,YAAY;IACtC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAW;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAe;IACtC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IAEtC;;;;;;;OAOG;gBACS,EACV,QAAQ,EACR,MAAM,EACN,WAAW,GACZ,EAAE;QACD,QAAQ,EAAE,QAAQ,CAAC;QACnB,MAAM,EAAE,YAAY,CAAC;QACrB,WAAW,CAAC,EAAE,OAAO,CAAC;KACvB;IAMY,OAAO,CAClB,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,YAAY,GAAG,OAAO,GAAG,OAAO,CAAC;IAuB/B,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAGrC"}
package/dist/import.js DELETED
@@ -1,44 +0,0 @@
1
- import { Distribution } from '@lde/dataset';
2
- import { Failure, NotSupported, Success, } from './step.js';
3
- import { ImportFailed, NotSupported as ImporterNotSupported, } from '@lde/sparql-importer';
4
- /**
5
- * A pipeline step that imports a database using an {@link Importer} and makes
6
- * the import available at a local SPARQL endpoint.
7
- */
8
- export class Import {
9
- identifier = 'import';
10
- importer;
11
- server;
12
- forceImport;
13
- /**
14
- * Create a Pipeline ImportStep.
15
- *
16
- * @param {object} args
17
- * @param args.importer A concrete importer that will import the distribution if needed.
18
- * @param args.server SPARQL server that will be started to serve the imported data.
19
- * @param args.forceImport Whether to force an import even if the dataset already has a SPARQL distribution.
20
- */
21
- constructor({ importer, server, forceImport, }) {
22
- this.importer = importer;
23
- this.server = server;
24
- this.forceImport = forceImport ?? false;
25
- }
26
- async execute(dataset) {
27
- if (dataset.getSparqlDistribution()?.isValid && !this.forceImport) {
28
- return new NotSupported('A valid SPARQL distribution is available so no import needed');
29
- }
30
- const result = await this.importer.import(dataset);
31
- if (result instanceof ImporterNotSupported) {
32
- return new NotSupported('No download distribution available');
33
- }
34
- if (result instanceof ImportFailed) {
35
- return new Failure(result.distribution, result.error);
36
- }
37
- await this.server.start();
38
- dataset.distributions.push(Distribution.sparql(this.server.queryEndpoint));
39
- return new Success(dataset, result.distribution);
40
- }
41
- async finish() {
42
- await this.server.stop();
43
- }
44
- }
@@ -1,35 +0,0 @@
1
- import { DataEmittingStep } from './../step.js';
2
- import { Dataset, Distribution } from '@lde/dataset';
3
- import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
4
- /**
5
- * Arguments for the SparqlQuery step.
6
- *
7
- * @param identifier Unique identifier for the step.
8
- * @param query: SPARQL CONSTRUCT query to execute.
9
- * @param fetcher Optional SPARQL endpoint fetcher; defaults to SparqlEndpointFetcher.
10
- */
11
- export interface Args {
12
- identifier: string;
13
- query: string;
14
- fetcher?: SparqlEndpointFetcher;
15
- }
16
- /**
17
- * Executes a SPARQL CONSTRUCT query and emits the resulting quads.
18
- *
19
- * This step wraps the SparqlConstructExecutor to provide the DataEmittingStep interface
20
- * for use in pipelines. Supports legacy template substitution (`#namedGraph#`,
21
- * `#subjectFilter#`, `?dataset`); for new code prefer the AST-based executor directly.
22
- */
23
- export declare class SparqlQuery implements DataEmittingStep {
24
- readonly identifier: string;
25
- private readonly query;
26
- private readonly fetcher?;
27
- constructor({ identifier, query, fetcher }: Args);
28
- execute(dataset: Dataset, distribution: Distribution): Promise<import("../sparql/executor.js").QuadStream>;
29
- static fromFile(filename: string): Promise<SparqlQuery>;
30
- }
31
- /**
32
- * @deprecated Use readQueryFile from '@lde/pipeline/sparql' instead.
33
- */
34
- export declare function fromFile(filename: string): Promise<string>;
35
- //# sourceMappingURL=sparqlQuery.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"sparqlQuery.d.ts","sourceRoot":"","sources":["../../src/step/sparqlQuery.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAO9D;;;;;;GAMG;AACH,MAAM,WAAW,IAAI;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;GAMG;AACH,qBAAa,WAAY,YAAW,gBAAgB;IAClD,SAAgB,UAAU,SAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC,CAAwB;gBAErC,EAAE,UAAU,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI;IAM1C,OAAO,CAAC,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,YAAY;WAatC,QAAQ,CAAC,QAAQ,EAAE,MAAM;CAM9C;AAED;;GAEG;AACH,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,mBAE9C"}
@@ -1,38 +0,0 @@
1
- import { SparqlConstructExecutor, substituteQueryTemplates, readQueryFile, } from '../sparql/index.js';
2
- /**
3
- * Executes a SPARQL CONSTRUCT query and emits the resulting quads.
4
- *
5
- * This step wraps the SparqlConstructExecutor to provide the DataEmittingStep interface
6
- * for use in pipelines. Supports legacy template substitution (`#namedGraph#`,
7
- * `#subjectFilter#`, `?dataset`); for new code prefer the AST-based executor directly.
8
- */
9
- export class SparqlQuery {
10
- identifier;
11
- query;
12
- fetcher;
13
- constructor({ identifier, query, fetcher }) {
14
- this.identifier = identifier;
15
- this.query = query;
16
- this.fetcher = fetcher;
17
- }
18
- async execute(dataset, distribution) {
19
- const substituted = substituteQueryTemplates(this.query, distribution, dataset);
20
- const executor = new SparqlConstructExecutor({
21
- query: substituted,
22
- fetcher: this.fetcher,
23
- });
24
- return await executor.execute(dataset, distribution);
25
- }
26
- static async fromFile(filename) {
27
- return new this({
28
- identifier: filename,
29
- query: await readQueryFile(filename),
30
- });
31
- }
32
- }
33
- /**
34
- * @deprecated Use readQueryFile from '@lde/pipeline/sparql' instead.
35
- */
36
- export async function fromFile(filename) {
37
- return readQueryFile(filename);
38
- }
package/dist/step.d.ts DELETED
@@ -1,55 +0,0 @@
1
- import { Dataset, Distribution } from '@lde/dataset';
2
- import type { Stream } from '@rdfjs/types';
3
- interface AbstractStep {
4
- readonly identifier: string;
5
- }
6
- export type Step = DataEmittingStep | SingleStep;
7
- /**
8
- * A pipeline step that returns a data-emitting stream of RDF quads.
9
- * Failure is expressed by emitting an error event; success by the end event.
10
- */
11
- export interface DataEmittingStep extends AbstractStep {
12
- execute(dataset: Dataset, distribution: Distribution): Promise<Stream | NotSupported>;
13
- }
14
- /**
15
- * A pipeline step that executes an operation without emitting data.
16
- */
17
- export interface SingleStep extends AbstractStep {
18
- execute(dataset: Dataset, distribution?: Distribution): Promise<NotSupported | Failure | Success>;
19
- }
20
- export interface Finishable {
21
- finish(): Promise<void>;
22
- }
23
- /**
24
- * A pipeline step failed to run.
25
- *
26
- * @param distribution The distribution that was processed.
27
- * @param message Optional error message.
28
- */
29
- export declare class Failure {
30
- readonly distribution: Distribution;
31
- readonly message?: string | undefined;
32
- constructor(distribution: Distribution, message?: string | undefined);
33
- }
34
- /**
35
- * A pipeline ran successfully.
36
- *
37
- * @param dataset: The dataset, with possible modifications, that was processed.
38
- * @param distribution The distribution that was processed.
39
- */
40
- export declare class Success {
41
- readonly dataset: Dataset;
42
- readonly distribution: Distribution;
43
- constructor(dataset: Dataset, distribution: Distribution);
44
- }
45
- /**
46
- * A pipeline step could not be run because the dataset lacks a distribution supported by the step.
47
- *
48
- * @param message: A message explaining why the step is not supported.
49
- */
50
- export declare class NotSupported {
51
- readonly message: string;
52
- constructor(message: string);
53
- }
54
- export {};
55
- //# sourceMappingURL=step.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"step.d.ts","sourceRoot":"","sources":["../src/step.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE3C,UAAU,YAAY;IACpB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,IAAI,GAAG,gBAAgB,GAAG,UAAU,CAAC;AAEjD;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,MAAM,GAAG,YAAY,CAAC,CAAC;CACnC;AAED;;GAEG;AACH,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,CAAC,EAAE,YAAY,GAC1B,OAAO,CAAC,YAAY,GAAG,OAAO,GAAG,OAAO,CAAC,CAAC;CAC9C;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAED;;;;;GAKG;AACH,qBAAa,OAAO;aAEA,YAAY,EAAE,YAAY;aAC1B,OAAO,CAAC,EAAE,MAAM;gBADhB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED;;;;;GAKG;AACH,qBAAa,OAAO;aAEA,OAAO,EAAE,OAAO;aAChB,YAAY,EAAE,YAAY;gBAD1B,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY;CAE7C;AAED;;;;GAIG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C"}
package/dist/step.js DELETED
@@ -1,39 +0,0 @@
1
- /**
2
- * A pipeline step failed to run.
3
- *
4
- * @param distribution The distribution that was processed.
5
- * @param message Optional error message.
6
- */
7
- export class Failure {
8
- distribution;
9
- message;
10
- constructor(distribution, message) {
11
- this.distribution = distribution;
12
- this.message = message;
13
- }
14
- }
15
- /**
16
- * A pipeline ran successfully.
17
- *
18
- * @param dataset: The dataset, with possible modifications, that was processed.
19
- * @param distribution The distribution that was processed.
20
- */
21
- export class Success {
22
- dataset;
23
- distribution;
24
- constructor(dataset, distribution) {
25
- this.dataset = dataset;
26
- this.distribution = distribution;
27
- }
28
- }
29
- /**
30
- * A pipeline step could not be run because the dataset lacks a distribution supported by the step.
31
- *
32
- * @param message: A message explaining why the step is not supported.
33
- */
34
- export class NotSupported {
35
- message;
36
- constructor(message) {
37
- this.message = message;
38
- }
39
- }