@lde/pipeline 0.6.28 → 0.6.29

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -86,9 +86,7 @@ import {
86
86
  } from '@lde/pipeline';
87
87
 
88
88
  const pipeline = new Pipeline({
89
- name: 'example',
90
89
  datasetSelector: new ManualDatasetSelection([dataset]),
91
- distributionResolver: new SparqlDistributionResolver(),
92
90
  stages: [
93
91
  new Stage({
94
92
  name: 'per-class',
@@ -101,7 +99,7 @@ const pipeline = new Pipeline({
101
99
  }),
102
100
  }),
103
101
  ],
104
- writer: new SparqlUpdateWriter({
102
+ writers: new SparqlUpdateWriter({
105
103
  endpoint: new URL('http://localhost:7200/repositories/lde/statements'),
106
104
  }),
107
105
  });
@@ -5,18 +5,26 @@ import { type DistributionResolver } from './distribution/resolver.js';
5
5
  import type { StageOutputResolver } from './stageOutputResolver.js';
6
6
  import type { ProgressReporter } from './progressReporter.js';
7
7
  export interface PipelineOptions {
8
- name: string;
9
8
  datasetSelector: DatasetSelector;
10
9
  stages: Stage[];
11
- writer: Writer;
12
- distributionResolver: DistributionResolver;
13
- stageOutputResolver?: StageOutputResolver;
14
- outputDir?: string;
15
- outputFormat?: 'turtle' | 'n-triples' | 'n-quads';
10
+ writers: Writer | Writer[];
11
+ name?: string;
12
+ distributionResolver?: DistributionResolver;
13
+ chaining?: {
14
+ stageOutputResolver: StageOutputResolver;
15
+ outputDir: string;
16
+ outputFormat?: 'turtle' | 'n-triples' | 'n-quads';
17
+ };
16
18
  reporter?: ProgressReporter;
17
19
  }
18
20
  export declare class Pipeline {
19
- private readonly options;
21
+ private readonly name;
22
+ private readonly datasetSelector;
23
+ private readonly stages;
24
+ private readonly writer;
25
+ private readonly distributionResolver;
26
+ private readonly chaining?;
27
+ private readonly reporter?;
20
28
  constructor(options: PipelineOptions);
21
29
  run(): Promise<void>;
22
30
  private processDataset;
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAGpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;KACnD,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAmBD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAoB9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA0Bd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -2,40 +2,67 @@ import { createReadStream } from 'node:fs';
2
2
  import { StreamParser } from 'n3';
3
3
  import { FileWriter } from './writer/fileWriter.js';
4
4
  import { NoDistributionAvailable, } from './distribution/resolver.js';
5
+ import { SparqlDistributionResolver } from './distribution/index.js';
5
6
  import { NotSupported } from './sparql/executor.js';
7
+ class FanOutWriter {
8
+ writers;
9
+ constructor(writers) {
10
+ this.writers = writers;
11
+ }
12
+ async write(dataset, quads) {
13
+ const collected = [];
14
+ for await (const quad of quads)
15
+ collected.push(quad);
16
+ for (const w of this.writers) {
17
+ await w.write(dataset, (async function* () {
18
+ yield* collected;
19
+ })());
20
+ }
21
+ }
22
+ }
6
23
  export class Pipeline {
7
- options;
24
+ name;
25
+ datasetSelector;
26
+ stages;
27
+ writer;
28
+ distributionResolver;
29
+ chaining;
30
+ reporter;
8
31
  constructor(options) {
9
32
  const hasSubStages = options.stages.some((stage) => stage.stages.length > 0);
10
- if (hasSubStages && !options.stageOutputResolver) {
11
- throw new Error('stageOutputResolver is required when any stage has sub-stages');
12
- }
13
- if (hasSubStages && !options.outputDir) {
14
- throw new Error('outputDir is required when any stage has sub-stages');
33
+ if (hasSubStages && !options.chaining) {
34
+ throw new Error('chaining is required when any stage has sub-stages');
15
35
  }
16
- this.options = options;
36
+ this.name = options.name ?? '';
37
+ this.datasetSelector = options.datasetSelector;
38
+ this.stages = options.stages;
39
+ this.writer = Array.isArray(options.writers)
40
+ ? new FanOutWriter(options.writers)
41
+ : options.writers;
42
+ this.distributionResolver =
43
+ options.distributionResolver ?? new SparqlDistributionResolver();
44
+ this.chaining = options.chaining;
45
+ this.reporter = options.reporter;
17
46
  }
18
47
  async run() {
19
- const { datasetSelector, reporter, name } = this.options;
20
48
  const start = Date.now();
21
- reporter?.pipelineStart(name);
22
- const datasets = await datasetSelector.select();
49
+ this.reporter?.pipelineStart(this.name);
50
+ const datasets = await this.datasetSelector.select();
23
51
  for await (const dataset of datasets) {
24
52
  await this.processDataset(dataset);
25
53
  }
26
- reporter?.pipelineComplete({ duration: Date.now() - start });
54
+ this.reporter?.pipelineComplete({ duration: Date.now() - start });
27
55
  }
28
56
  async processDataset(dataset) {
29
- const { distributionResolver, reporter } = this.options;
30
57
  const datasetIri = dataset.iri.toString();
31
- reporter?.datasetStart(datasetIri);
32
- const resolved = await distributionResolver.resolve(dataset);
58
+ this.reporter?.datasetStart(datasetIri);
59
+ const resolved = await this.distributionResolver.resolve(dataset);
33
60
  if (resolved instanceof NoDistributionAvailable) {
34
- reporter?.datasetSkipped(datasetIri, resolved.message);
61
+ this.reporter?.datasetSkipped(datasetIri, resolved.message);
35
62
  return;
36
63
  }
37
64
  try {
38
- for (const stage of this.options.stages) {
65
+ for (const stage of this.stages) {
39
66
  if (stage.stages.length > 0) {
40
67
  await this.runChain(dataset, resolved.distribution, stage);
41
68
  }
@@ -47,26 +74,25 @@ export class Pipeline {
47
74
  catch {
48
75
  // Stage error for this dataset; continue to next dataset.
49
76
  }
50
- reporter?.datasetComplete(datasetIri);
77
+ this.reporter?.datasetComplete(datasetIri);
51
78
  }
52
79
  async runStage(dataset, distribution, stage) {
53
- const { writer, reporter } = this.options;
54
- reporter?.stageStart(stage.name);
80
+ this.reporter?.stageStart(stage.name);
55
81
  const stageStart = Date.now();
56
82
  let elementsProcessed = 0;
57
83
  let quadsGenerated = 0;
58
- const result = await stage.run(dataset, distribution, writer, {
84
+ const result = await stage.run(dataset, distribution, this.writer, {
59
85
  onProgress: (elements, quads) => {
60
86
  elementsProcessed = elements;
61
87
  quadsGenerated = quads;
62
- reporter?.stageProgress({ elementsProcessed, quadsGenerated });
88
+ this.reporter?.stageProgress({ elementsProcessed, quadsGenerated });
63
89
  },
64
90
  });
65
91
  if (result instanceof NotSupported) {
66
- reporter?.stageSkipped(stage.name, result.message);
92
+ this.reporter?.stageSkipped(stage.name, result.message);
67
93
  }
68
94
  else {
69
- reporter?.stageComplete(stage.name, {
95
+ this.reporter?.stageComplete(stage.name, {
70
96
  elementsProcessed,
71
97
  quadsGenerated,
72
98
  duration: Date.now() - stageStart,
@@ -74,7 +100,7 @@ export class Pipeline {
74
100
  }
75
101
  }
76
102
  async runChain(dataset, distribution, stage) {
77
- const { writer, stageOutputResolver, outputDir, outputFormat } = this.options;
103
+ const { stageOutputResolver, outputDir, outputFormat } = this.chaining;
78
104
  const outputFiles = [];
79
105
  try {
80
106
  // 1. Run parent stage → FileWriter.
@@ -99,15 +125,14 @@ export class Pipeline {
99
125
  }
100
126
  }
101
127
  // 3. Concatenate all output files → user writer.
102
- await writer.write(dataset, this.readFiles(outputFiles));
128
+ await this.writer.write(dataset, this.readFiles(outputFiles));
103
129
  }
104
130
  finally {
105
131
  await stageOutputResolver.cleanup();
106
132
  }
107
133
  }
108
134
  async runChainedStage(dataset, distribution, stage, stageWriter) {
109
- const { reporter } = this.options;
110
- reporter?.stageStart(stage.name);
135
+ this.reporter?.stageStart(stage.name);
111
136
  const stageStart = Date.now();
112
137
  let elementsProcessed = 0;
113
138
  let quadsGenerated = 0;
@@ -115,14 +140,14 @@ export class Pipeline {
115
140
  onProgress: (elements, quads) => {
116
141
  elementsProcessed = elements;
117
142
  quadsGenerated = quads;
118
- reporter?.stageProgress({ elementsProcessed, quadsGenerated });
143
+ this.reporter?.stageProgress({ elementsProcessed, quadsGenerated });
119
144
  },
120
145
  });
121
146
  if (result instanceof NotSupported) {
122
- reporter?.stageSkipped(stage.name, result.message);
147
+ this.reporter?.stageSkipped(stage.name, result.message);
123
148
  throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
124
149
  }
125
- reporter?.stageComplete(stage.name, {
150
+ this.reporter?.stageComplete(stage.name, {
126
151
  elementsProcessed,
127
152
  quadsGenerated,
128
153
  duration: Date.now() - stageStart,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.6.28",
3
+ "version": "0.6.29",
4
4
  "repository": {
5
5
  "url": "https://github.com/ldengine/lde",
6
6
  "directory": "packages/pipeline"