@lde/pipeline 0.12.0 → 0.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;CACtB;AAED;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,OAAO,CACX,GAAG,IAAI,EAAE,UAAU,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,GACnD,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IA+BpD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
1
+ {"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;CACtB;AAED;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,OAAO,CACX,GAAG,IAAI,EAAE,UAAU,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,GACnD,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IAiCpD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
@@ -20,12 +20,13 @@ export class ImportResolver {
20
20
  if (result instanceof ResolvedDistribution)
21
21
  return result;
22
22
  const [dataset] = args;
23
+ const importStart = Date.now();
23
24
  const importResult = await this.options.importer.import(dataset);
24
25
  if (importResult instanceof ImportSuccessful) {
25
26
  await this.options.server.start();
26
27
  const distribution = Distribution.sparql(this.options.server.queryEndpoint, importResult.identifier);
27
28
  distribution.subjectFilter = importResult.distribution.subjectFilter;
28
- return new ResolvedDistribution(distribution, result.probeResults, importResult.distribution);
29
+ return new ResolvedDistribution(distribution, result.probeResults, importResult.distribution, Date.now() - importStart);
29
30
  }
30
31
  return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', result.probeResults, importResult instanceof ImportFailed ? importResult : undefined);
31
32
  }
@@ -5,7 +5,8 @@ export declare class ResolvedDistribution {
5
5
  readonly distribution: Distribution;
6
6
  readonly probeResults: ProbeResultType[];
7
7
  readonly importedFrom?: Distribution | undefined;
8
- constructor(distribution: Distribution, probeResults: ProbeResultType[], importedFrom?: Distribution | undefined);
8
+ readonly importDuration?: number | undefined;
9
+ constructor(distribution: Distribution, probeResults: ProbeResultType[], importedFrom?: Distribution | undefined, importDuration?: number | undefined);
9
10
  }
10
11
  export declare class NoDistributionAvailable {
11
12
  readonly dataset: Dataset;
@@ -1 +1 @@
1
- {"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAA4B,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAE5E,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAF3B,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;GAQG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAIjD,OAAO,CACX,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CA2B3D"}
1
+ {"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAA4B,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAE5E,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;IACpC,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM;gBAHvB,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA,EAC3B,cAAc,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;GAQG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAIjD,OAAO,CACX,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CA2B3D"}
@@ -3,10 +3,12 @@ export class ResolvedDistribution {
3
3
  distribution;
4
4
  probeResults;
5
5
  importedFrom;
6
- constructor(distribution, probeResults, importedFrom) {
6
+ importDuration;
7
+ constructor(distribution, probeResults, importedFrom, importDuration) {
7
8
  this.distribution = distribution;
8
9
  this.probeResults = probeResults;
9
10
  this.importedFrom = importedFrom;
11
+ this.importDuration = importDuration;
10
12
  }
11
13
  }
12
14
  export class NoDistributionAvailable {
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AA8BD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA6Cd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAsCD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA6Cd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -20,6 +20,10 @@ class FanOutWriter {
20
20
  })());
21
21
  }
22
22
  }
23
+ async flush(dataset) {
24
+ for (const w of this.writers)
25
+ await w.flush?.(dataset);
26
+ }
23
27
  }
24
28
  class TransformWriter {
25
29
  inner;
@@ -31,6 +35,9 @@ class TransformWriter {
31
35
  async write(dataset, quads) {
32
36
  await this.inner.write(dataset, this.transform(quads, dataset));
33
37
  }
38
+ async flush(dataset) {
39
+ await this.inner.flush?.(dataset);
40
+ }
34
41
  }
35
42
  export class Pipeline {
36
43
  name;
@@ -66,27 +73,22 @@ export class Pipeline {
66
73
  }
67
74
  async run() {
68
75
  const start = Date.now();
69
- this.reporter?.pipelineStart(this.name);
76
+ this.reporter?.pipelineStart?.(this.name);
70
77
  const datasets = await this.datasetSelector.select();
71
78
  for await (const dataset of datasets) {
72
79
  await this.processDataset(dataset);
73
80
  }
74
- this.reporter?.pipelineComplete({ duration: Date.now() - start });
81
+ this.reporter?.pipelineComplete?.({ duration: Date.now() - start });
75
82
  }
76
83
  async processDataset(dataset) {
77
- const datasetIri = dataset.iri.toString();
78
- this.reporter?.datasetStart(datasetIri);
84
+ this.reporter?.datasetStart?.(dataset);
79
85
  const resolved = await this.distributionResolver.resolve(dataset);
80
- this.reporter?.distributionsAnalyzed(datasetIri, mapProbeResults(resolved.probeResults));
86
+ this.reporter?.distributionsAnalyzed?.(dataset, mapProbeResults(dataset, resolved.probeResults));
81
87
  if (resolved instanceof NoDistributionAvailable) {
82
- this.reporter?.datasetSkipped(datasetIri, resolved.message);
88
+ this.reporter?.datasetSkipped?.(dataset, resolved.message);
83
89
  return;
84
90
  }
85
- this.reporter?.distributionSelected(datasetIri, {
86
- accessUrl: resolved.distribution.accessUrl.toString(),
87
- namedGraph: resolved.distribution.namedGraph,
88
- importedFrom: resolved.importedFrom?.accessUrl?.toString(),
89
- });
91
+ this.reporter?.distributionSelected?.(dataset, resolved.distribution, resolved.importedFrom, resolved.importDuration);
90
92
  try {
91
93
  for (const stage of this.stages) {
92
94
  try {
@@ -98,17 +100,18 @@ export class Pipeline {
98
100
  }
99
101
  }
100
102
  catch (error) {
101
- this.reporter?.stageFailed(stage.name, error instanceof Error ? error : new Error(String(error)));
103
+ this.reporter?.stageFailed?.(stage.name, error instanceof Error ? error : new Error(String(error)));
102
104
  }
103
105
  }
104
106
  }
105
107
  finally {
106
108
  await this.distributionResolver.cleanup?.();
107
109
  }
108
- this.reporter?.datasetComplete(datasetIri);
110
+ await this.writer.flush?.(dataset);
111
+ this.reporter?.datasetComplete?.(dataset);
109
112
  }
110
113
  async runStage(dataset, distribution, stage) {
111
- this.reporter?.stageStart(stage.name);
114
+ this.reporter?.stageStart?.(stage.name);
112
115
  const stageStart = Date.now();
113
116
  let elementsProcessed = 0;
114
117
  let quadsGenerated = 0;
@@ -116,14 +119,14 @@ export class Pipeline {
116
119
  onProgress: (elements, quads) => {
117
120
  elementsProcessed = elements;
118
121
  quadsGenerated = quads;
119
- this.reporter?.stageProgress({ elementsProcessed, quadsGenerated });
122
+ this.reporter?.stageProgress?.({ elementsProcessed, quadsGenerated });
120
123
  },
121
124
  });
122
125
  if (result instanceof NotSupported) {
123
- this.reporter?.stageSkipped(stage.name, result.message);
126
+ this.reporter?.stageSkipped?.(stage.name, result.message);
124
127
  }
125
128
  else {
126
- this.reporter?.stageComplete(stage.name, {
129
+ this.reporter?.stageComplete?.(stage.name, {
127
130
  elementsProcessed,
128
131
  quadsGenerated,
129
132
  duration: Date.now() - stageStart,
@@ -163,7 +166,7 @@ export class Pipeline {
163
166
  }
164
167
  }
165
168
  async runChainedStage(dataset, distribution, stage, stageWriter) {
166
- this.reporter?.stageStart(stage.name);
169
+ this.reporter?.stageStart?.(stage.name);
167
170
  const stageStart = Date.now();
168
171
  let elementsProcessed = 0;
169
172
  let quadsGenerated = 0;
@@ -171,14 +174,14 @@ export class Pipeline {
171
174
  onProgress: (elements, quads) => {
172
175
  elementsProcessed = elements;
173
176
  quadsGenerated = quads;
174
- this.reporter?.stageProgress({ elementsProcessed, quadsGenerated });
177
+ this.reporter?.stageProgress?.({ elementsProcessed, quadsGenerated });
175
178
  },
176
179
  });
177
180
  if (result instanceof NotSupported) {
178
- this.reporter?.stageSkipped(stage.name, result.message);
181
+ this.reporter?.stageSkipped?.(stage.name, result.message);
179
182
  throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
180
183
  }
181
- this.reporter?.stageComplete(stage.name, {
184
+ this.reporter?.stageComplete?.(stage.name, {
182
185
  elementsProcessed,
183
186
  quadsGenerated,
184
187
  duration: Date.now() - stageStart,
@@ -195,18 +198,18 @@ export class Pipeline {
195
198
  }
196
199
  }
197
200
  }
198
- function mapProbeResults(probeResults) {
199
- return probeResults.map((result) => {
201
+ function mapProbeResults(dataset, probeResults) {
202
+ return probeResults.map((result, index) => {
200
203
  if (result instanceof NetworkError) {
201
204
  return {
202
- accessUrl: result.url,
205
+ distribution: dataset.distributions[index],
203
206
  type: 'network-error',
204
207
  available: false,
205
208
  error: result.message,
206
209
  };
207
210
  }
208
211
  return {
209
- accessUrl: result.url,
212
+ distribution: dataset.distributions[index],
210
213
  type: result instanceof SparqlProbeResult
211
214
  ? 'sparql'
212
215
  : 'data-dump',
@@ -1,33 +1,32 @@
1
+ import type { Dataset, Distribution } from '@lde/dataset';
2
+ export interface DistributionAnalysisResult {
3
+ distribution: Distribution;
4
+ type: 'sparql' | 'data-dump' | 'network-error';
5
+ available: boolean;
6
+ statusCode?: number;
7
+ error?: string;
8
+ }
1
9
  export interface ProgressReporter {
2
- pipelineStart(name: string): void;
3
- datasetStart(dataset: string): void;
4
- distributionsAnalyzed(dataset: string, results: Array<{
5
- accessUrl: string;
6
- type: 'sparql' | 'data-dump' | 'network-error';
7
- available: boolean;
8
- statusCode?: number;
9
- error?: string;
10
- }>): void;
11
- distributionSelected(dataset: string, distribution: {
12
- accessUrl: string;
13
- namedGraph?: string;
14
- importedFrom?: string;
15
- }): void;
16
- stageStart(stage: string): void;
17
- stageProgress(update: {
10
+ pipelineStart?(name: string): void;
11
+ datasetsSelected?(count: number): void;
12
+ datasetStart?(dataset: Dataset): void;
13
+ distributionsAnalyzed?(dataset: Dataset, results: DistributionAnalysisResult[]): void;
14
+ distributionSelected?(dataset: Dataset, distribution: Distribution, importedFrom?: Distribution, importDuration?: number): void;
15
+ stageStart?(stage: string): void;
16
+ stageProgress?(update: {
18
17
  elementsProcessed: number;
19
18
  quadsGenerated: number;
20
19
  }): void;
21
- stageComplete(stage: string, result: {
20
+ stageComplete?(stage: string, result: {
22
21
  elementsProcessed: number;
23
22
  quadsGenerated: number;
24
23
  duration: number;
25
24
  }): void;
26
- stageFailed(stage: string, error: Error): void;
27
- stageSkipped(stage: string, reason: string): void;
28
- datasetComplete(dataset: string): void;
29
- datasetSkipped(dataset: string, reason: string): void;
30
- pipelineComplete(result: {
25
+ stageFailed?(stage: string, error: Error): void;
26
+ stageSkipped?(stage: string, reason: string): void;
27
+ datasetComplete?(dataset: Dataset): void;
28
+ datasetSkipped?(dataset: Dataset, reason: string): void;
29
+ pipelineComplete?(result: {
31
30
  duration: number;
32
31
  }): void;
33
32
  }
@@ -1 +1 @@
1
- {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IAClC,YAAY,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACpC,qBAAqB,CACnB,OAAO,EAAE,MAAM,EACf,OAAO,EAAE,KAAK,CAAC;QACb,SAAS,EAAE,MAAM,CAAC;QAClB,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;QAC/C,SAAS,EAAE,OAAO,CAAC;QACnB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,KAAK,CAAC,EAAE,MAAM,CAAC;KAChB,CAAC,GACD,IAAI,CAAC;IACR,oBAAoB,CAClB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE;QACZ,SAAS,EAAE,MAAM,CAAC;QAClB,UAAU,CAAC,EAAE,MAAM,CAAC;QACpB,YAAY,CAAC,EAAE,MAAM,CAAC;KACvB,GACA,IAAI,CAAC;IACR,UAAU,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAChC,aAAa,CAAC,MAAM,EAAE;QACpB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CACX,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAC/C,YAAY,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IAClD,eAAe,CAAC,OAAO,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,cAAc,CAAC,OAAO,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACtD,gBAAgB,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACtD"}
1
+ {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE1D,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,qBAAqB,CAAC,CACpB,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,0BAA0B,EAAE,GACpC,IAAI,CAAC;IACR,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,GACtB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,eAAe,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACzC,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACvD"}
@@ -16,16 +16,25 @@ export interface FileWriterOptions {
16
16
  * @default '-'
17
17
  */
18
18
  replacementCharacter?: string;
19
+ /**
20
+ * Turtle prefix declarations. Keys are prefix names, values are namespace IRIs.
21
+ * Only used when format is 'turtle'.
22
+ */
23
+ prefixes?: Record<string, string>;
19
24
  }
20
25
  export declare class FileWriter implements Writer {
21
26
  private readonly outputDir;
22
27
  readonly format: 'turtle' | 'n-triples' | 'n-quads';
23
28
  private readonly replacementCharacter;
24
- private readonly writtenFiles;
29
+ private readonly prefixes?;
30
+ private readonly activeWriters;
25
31
  constructor(options: FileWriterOptions);
26
32
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
33
+ flush(dataset: Dataset): Promise<void>;
27
34
  getOutputPath(dataset: Dataset): string;
28
35
  getFilename(dataset: Dataset): string;
36
+ private getFilePath;
37
+ private getOrCreateWriter;
29
38
  private getExtension;
30
39
  }
31
40
  //# sourceMappingURL=fileWriter.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;CAC/B;AAqBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAqB;gBAEtC,OAAO,EAAE,iBAAiB;IAMhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA4BxE,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,YAAY;CAUrB"}
1
+ {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAOhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAclE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAc5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IAoB/B,OAAO,CAAC,YAAY;CAUrB"}
@@ -8,13 +8,9 @@ import { Writer as N3Writer } from 'n3';
8
8
  *
9
9
  * Files are named based on the dataset IRI using filenamify-url.
10
10
  *
11
- * The first {@link write} call for a given dataset creates (or overwrites) the file.
12
- * Subsequent calls for the same dataset append to it, so that multiple pipeline stages
13
- * can each contribute quads to a single output file.
14
- *
15
- * **Note:** With `format: 'turtle'` each append will repeat the prefix declarations
16
- * at the start of each chunk. The default `format: 'n-triples'` produces clean
17
- * line-oriented output without repeated headers.
11
+ * A single N3Writer is kept open per dataset across all {@link write} calls,
12
+ * so Turtle prefix declarations are written once and triples can be grouped
13
+ * by subject. Call {@link flush} after all stages complete to finalize the file.
18
14
  */
19
15
  const formatMap = {
20
16
  turtle: 'Turtle',
@@ -25,11 +21,13 @@ export class FileWriter {
25
21
  outputDir;
26
22
  format;
27
23
  replacementCharacter;
28
- writtenFiles = new Set();
24
+ prefixes;
25
+ activeWriters = new Map();
29
26
  constructor(options) {
30
27
  this.outputDir = options.outputDir;
31
28
  this.format = options.format ?? 'n-triples';
32
29
  this.replacementCharacter = options.replacementCharacter ?? '-';
30
+ this.prefixes = options.prefixes;
33
31
  }
34
32
  async write(dataset, quads) {
35
33
  // Peek at the first quad to avoid creating empty files.
@@ -37,18 +35,20 @@ export class FileWriter {
37
35
  const first = await iterator.next();
38
36
  if (first.done)
39
37
  return;
40
- const filePath = join(this.outputDir, this.getFilename(dataset));
41
- await mkdir(dirname(filePath), { recursive: true });
42
- const flags = this.writtenFiles.has(filePath) ? 'a' : 'w';
43
- this.writtenFiles.add(filePath);
44
- const stream = createWriteStream(filePath, { flags });
45
- const writer = new N3Writer(stream, { format: formatMap[this.format] });
46
- writer.addQuad(first.value);
38
+ const { n3Writer } = await this.getOrCreateWriter(dataset);
39
+ n3Writer.addQuad(first.value);
47
40
  for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
48
- writer.addQuad(quad);
41
+ n3Writer.addQuad(quad);
49
42
  }
43
+ }
44
+ async flush(dataset) {
45
+ const key = this.getFilePath(dataset);
46
+ const entry = this.activeWriters.get(key);
47
+ if (!entry)
48
+ return;
49
+ this.activeWriters.delete(key);
50
50
  await new Promise((resolve, reject) => {
51
- writer.end((error) => {
51
+ entry.n3Writer.end((error) => {
52
52
  if (error)
53
53
  reject(error);
54
54
  else
@@ -57,7 +57,7 @@ export class FileWriter {
57
57
  });
58
58
  }
59
59
  getOutputPath(dataset) {
60
- return join(this.outputDir, this.getFilename(dataset));
60
+ return this.getFilePath(dataset);
61
61
  }
62
62
  getFilename(dataset) {
63
63
  const extension = this.getExtension();
@@ -66,6 +66,24 @@ export class FileWriter {
66
66
  });
67
67
  return `${baseName}.${extension}`;
68
68
  }
69
+ getFilePath(dataset) {
70
+ return join(this.outputDir, this.getFilename(dataset));
71
+ }
72
+ async getOrCreateWriter(dataset) {
73
+ const key = this.getFilePath(dataset);
74
+ const existing = this.activeWriters.get(key);
75
+ if (existing)
76
+ return existing;
77
+ await mkdir(dirname(key), { recursive: true });
78
+ const stream = createWriteStream(key, { flags: 'w' });
79
+ const n3Writer = new N3Writer(stream, {
80
+ format: formatMap[this.format],
81
+ prefixes: this.prefixes,
82
+ });
83
+ const entry = { n3Writer, stream };
84
+ this.activeWriters.set(key, entry);
85
+ return entry;
86
+ }
69
87
  getExtension() {
70
88
  switch (this.format) {
71
89
  case 'turtle':
@@ -11,5 +11,13 @@ export interface Writer {
11
11
  * @param quads The RDF quads to write
12
12
  */
13
13
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
14
+ /**
15
+ * Finalize writing for a dataset. Called after all stages complete.
16
+ *
17
+ * Writers that buffer output across multiple {@link write} calls (e.g. to
18
+ * share Turtle prefix declarations) should implement this to flush remaining
19
+ * data and release resources.
20
+ */
21
+ flush?(dataset: Dataset): Promise<void>;
14
22
  }
15
23
  //# sourceMappingURL=writer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACpE"}
1
+ {"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnE;;;;;;OAMG;IACH,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC"}
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.12.0",
3
+ "version": "0.14.0",
4
4
  "repository": {
5
- "url": "https://github.com/ldengine/lde",
5
+ "url": "git+https://github.com/ldengine/lde.git",
6
6
  "directory": "packages/pipeline"
7
7
  },
8
8
  "type": "module",