@lde/pipeline 0.12.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/distribution/importResolver.d.ts.map +1 -1
- package/dist/distribution/importResolver.js +2 -1
- package/dist/distribution/resolver.d.ts +2 -1
- package/dist/distribution/resolver.d.ts.map +1 -1
- package/dist/distribution/resolver.js +3 -1
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +28 -25
- package/dist/progressReporter.d.ts +21 -22
- package/dist/progressReporter.d.ts.map +1 -1
- package/dist/writer/fileWriter.d.ts +10 -1
- package/dist/writer/fileWriter.d.ts.map +1 -1
- package/dist/writer/fileWriter.js +36 -18
- package/dist/writer/writer.d.ts +8 -0
- package/dist/writer/writer.d.ts.map +1 -1
- package/package.json +2 -2
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;CACtB;AAED;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,OAAO,CACX,GAAG,IAAI,EAAE,UAAU,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,GACnD,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;CACtB;AAED;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,OAAO,CACX,GAAG,IAAI,EAAE,UAAU,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,GACnD,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IAiCpD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
|
|
@@ -20,12 +20,13 @@ export class ImportResolver {
|
|
|
20
20
|
if (result instanceof ResolvedDistribution)
|
|
21
21
|
return result;
|
|
22
22
|
const [dataset] = args;
|
|
23
|
+
const importStart = Date.now();
|
|
23
24
|
const importResult = await this.options.importer.import(dataset);
|
|
24
25
|
if (importResult instanceof ImportSuccessful) {
|
|
25
26
|
await this.options.server.start();
|
|
26
27
|
const distribution = Distribution.sparql(this.options.server.queryEndpoint, importResult.identifier);
|
|
27
28
|
distribution.subjectFilter = importResult.distribution.subjectFilter;
|
|
28
|
-
return new ResolvedDistribution(distribution, result.probeResults, importResult.distribution);
|
|
29
|
+
return new ResolvedDistribution(distribution, result.probeResults, importResult.distribution, Date.now() - importStart);
|
|
29
30
|
}
|
|
30
31
|
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', result.probeResults, importResult instanceof ImportFailed ? importResult : undefined);
|
|
31
32
|
}
|
|
@@ -5,7 +5,8 @@ export declare class ResolvedDistribution {
|
|
|
5
5
|
readonly distribution: Distribution;
|
|
6
6
|
readonly probeResults: ProbeResultType[];
|
|
7
7
|
readonly importedFrom?: Distribution | undefined;
|
|
8
|
-
|
|
8
|
+
readonly importDuration?: number | undefined;
|
|
9
|
+
constructor(distribution: Distribution, probeResults: ProbeResultType[], importedFrom?: Distribution | undefined, importDuration?: number | undefined);
|
|
9
10
|
}
|
|
10
11
|
export declare class NoDistributionAvailable {
|
|
11
12
|
readonly dataset: Dataset;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAA4B,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAE5E,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;
|
|
1
|
+
{"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAA4B,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAE5E,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;IACpC,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM;gBAHvB,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA,EAC3B,cAAc,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;GAQG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAIjD,OAAO,CACX,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CA2B3D"}
|
|
@@ -3,10 +3,12 @@ export class ResolvedDistribution {
|
|
|
3
3
|
distribution;
|
|
4
4
|
probeResults;
|
|
5
5
|
importedFrom;
|
|
6
|
-
|
|
6
|
+
importDuration;
|
|
7
|
+
constructor(distribution, probeResults, importedFrom, importDuration) {
|
|
7
8
|
this.distribution = distribution;
|
|
8
9
|
this.probeResults = probeResults;
|
|
9
10
|
this.importedFrom = importedFrom;
|
|
11
|
+
this.importDuration = importDuration;
|
|
10
12
|
}
|
|
11
13
|
}
|
|
12
14
|
export class NoDistributionAvailable {
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAsCD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA6Cd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -20,6 +20,10 @@ class FanOutWriter {
|
|
|
20
20
|
})());
|
|
21
21
|
}
|
|
22
22
|
}
|
|
23
|
+
async flush(dataset) {
|
|
24
|
+
for (const w of this.writers)
|
|
25
|
+
await w.flush?.(dataset);
|
|
26
|
+
}
|
|
23
27
|
}
|
|
24
28
|
class TransformWriter {
|
|
25
29
|
inner;
|
|
@@ -31,6 +35,9 @@ class TransformWriter {
|
|
|
31
35
|
async write(dataset, quads) {
|
|
32
36
|
await this.inner.write(dataset, this.transform(quads, dataset));
|
|
33
37
|
}
|
|
38
|
+
async flush(dataset) {
|
|
39
|
+
await this.inner.flush?.(dataset);
|
|
40
|
+
}
|
|
34
41
|
}
|
|
35
42
|
export class Pipeline {
|
|
36
43
|
name;
|
|
@@ -66,27 +73,22 @@ export class Pipeline {
|
|
|
66
73
|
}
|
|
67
74
|
async run() {
|
|
68
75
|
const start = Date.now();
|
|
69
|
-
this.reporter?.pipelineStart(this.name);
|
|
76
|
+
this.reporter?.pipelineStart?.(this.name);
|
|
70
77
|
const datasets = await this.datasetSelector.select();
|
|
71
78
|
for await (const dataset of datasets) {
|
|
72
79
|
await this.processDataset(dataset);
|
|
73
80
|
}
|
|
74
|
-
this.reporter?.pipelineComplete({ duration: Date.now() - start });
|
|
81
|
+
this.reporter?.pipelineComplete?.({ duration: Date.now() - start });
|
|
75
82
|
}
|
|
76
83
|
async processDataset(dataset) {
|
|
77
|
-
|
|
78
|
-
this.reporter?.datasetStart(datasetIri);
|
|
84
|
+
this.reporter?.datasetStart?.(dataset);
|
|
79
85
|
const resolved = await this.distributionResolver.resolve(dataset);
|
|
80
|
-
this.reporter?.distributionsAnalyzed(
|
|
86
|
+
this.reporter?.distributionsAnalyzed?.(dataset, mapProbeResults(dataset, resolved.probeResults));
|
|
81
87
|
if (resolved instanceof NoDistributionAvailable) {
|
|
82
|
-
this.reporter?.datasetSkipped(
|
|
88
|
+
this.reporter?.datasetSkipped?.(dataset, resolved.message);
|
|
83
89
|
return;
|
|
84
90
|
}
|
|
85
|
-
this.reporter?.distributionSelected(
|
|
86
|
-
accessUrl: resolved.distribution.accessUrl.toString(),
|
|
87
|
-
namedGraph: resolved.distribution.namedGraph,
|
|
88
|
-
importedFrom: resolved.importedFrom?.accessUrl?.toString(),
|
|
89
|
-
});
|
|
91
|
+
this.reporter?.distributionSelected?.(dataset, resolved.distribution, resolved.importedFrom, resolved.importDuration);
|
|
90
92
|
try {
|
|
91
93
|
for (const stage of this.stages) {
|
|
92
94
|
try {
|
|
@@ -98,17 +100,18 @@ export class Pipeline {
|
|
|
98
100
|
}
|
|
99
101
|
}
|
|
100
102
|
catch (error) {
|
|
101
|
-
this.reporter?.stageFailed(stage.name, error instanceof Error ? error : new Error(String(error)));
|
|
103
|
+
this.reporter?.stageFailed?.(stage.name, error instanceof Error ? error : new Error(String(error)));
|
|
102
104
|
}
|
|
103
105
|
}
|
|
104
106
|
}
|
|
105
107
|
finally {
|
|
106
108
|
await this.distributionResolver.cleanup?.();
|
|
107
109
|
}
|
|
108
|
-
this.
|
|
110
|
+
await this.writer.flush?.(dataset);
|
|
111
|
+
this.reporter?.datasetComplete?.(dataset);
|
|
109
112
|
}
|
|
110
113
|
async runStage(dataset, distribution, stage) {
|
|
111
|
-
this.reporter?.stageStart(stage.name);
|
|
114
|
+
this.reporter?.stageStart?.(stage.name);
|
|
112
115
|
const stageStart = Date.now();
|
|
113
116
|
let elementsProcessed = 0;
|
|
114
117
|
let quadsGenerated = 0;
|
|
@@ -116,14 +119,14 @@ export class Pipeline {
|
|
|
116
119
|
onProgress: (elements, quads) => {
|
|
117
120
|
elementsProcessed = elements;
|
|
118
121
|
quadsGenerated = quads;
|
|
119
|
-
this.reporter?.stageProgress({ elementsProcessed, quadsGenerated });
|
|
122
|
+
this.reporter?.stageProgress?.({ elementsProcessed, quadsGenerated });
|
|
120
123
|
},
|
|
121
124
|
});
|
|
122
125
|
if (result instanceof NotSupported) {
|
|
123
|
-
this.reporter?.stageSkipped(stage.name, result.message);
|
|
126
|
+
this.reporter?.stageSkipped?.(stage.name, result.message);
|
|
124
127
|
}
|
|
125
128
|
else {
|
|
126
|
-
this.reporter?.stageComplete(stage.name, {
|
|
129
|
+
this.reporter?.stageComplete?.(stage.name, {
|
|
127
130
|
elementsProcessed,
|
|
128
131
|
quadsGenerated,
|
|
129
132
|
duration: Date.now() - stageStart,
|
|
@@ -163,7 +166,7 @@ export class Pipeline {
|
|
|
163
166
|
}
|
|
164
167
|
}
|
|
165
168
|
async runChainedStage(dataset, distribution, stage, stageWriter) {
|
|
166
|
-
this.reporter?.stageStart(stage.name);
|
|
169
|
+
this.reporter?.stageStart?.(stage.name);
|
|
167
170
|
const stageStart = Date.now();
|
|
168
171
|
let elementsProcessed = 0;
|
|
169
172
|
let quadsGenerated = 0;
|
|
@@ -171,14 +174,14 @@ export class Pipeline {
|
|
|
171
174
|
onProgress: (elements, quads) => {
|
|
172
175
|
elementsProcessed = elements;
|
|
173
176
|
quadsGenerated = quads;
|
|
174
|
-
this.reporter?.stageProgress({ elementsProcessed, quadsGenerated });
|
|
177
|
+
this.reporter?.stageProgress?.({ elementsProcessed, quadsGenerated });
|
|
175
178
|
},
|
|
176
179
|
});
|
|
177
180
|
if (result instanceof NotSupported) {
|
|
178
|
-
this.reporter?.stageSkipped(stage.name, result.message);
|
|
181
|
+
this.reporter?.stageSkipped?.(stage.name, result.message);
|
|
179
182
|
throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
|
|
180
183
|
}
|
|
181
|
-
this.reporter?.stageComplete(stage.name, {
|
|
184
|
+
this.reporter?.stageComplete?.(stage.name, {
|
|
182
185
|
elementsProcessed,
|
|
183
186
|
quadsGenerated,
|
|
184
187
|
duration: Date.now() - stageStart,
|
|
@@ -195,18 +198,18 @@ export class Pipeline {
|
|
|
195
198
|
}
|
|
196
199
|
}
|
|
197
200
|
}
|
|
198
|
-
function mapProbeResults(probeResults) {
|
|
199
|
-
return probeResults.map((result) => {
|
|
201
|
+
function mapProbeResults(dataset, probeResults) {
|
|
202
|
+
return probeResults.map((result, index) => {
|
|
200
203
|
if (result instanceof NetworkError) {
|
|
201
204
|
return {
|
|
202
|
-
|
|
205
|
+
distribution: dataset.distributions[index],
|
|
203
206
|
type: 'network-error',
|
|
204
207
|
available: false,
|
|
205
208
|
error: result.message,
|
|
206
209
|
};
|
|
207
210
|
}
|
|
208
211
|
return {
|
|
209
|
-
|
|
212
|
+
distribution: dataset.distributions[index],
|
|
210
213
|
type: result instanceof SparqlProbeResult
|
|
211
214
|
? 'sparql'
|
|
212
215
|
: 'data-dump',
|
|
@@ -1,33 +1,32 @@
|
|
|
1
|
+
import type { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
+
export interface DistributionAnalysisResult {
|
|
3
|
+
distribution: Distribution;
|
|
4
|
+
type: 'sparql' | 'data-dump' | 'network-error';
|
|
5
|
+
available: boolean;
|
|
6
|
+
statusCode?: number;
|
|
7
|
+
error?: string;
|
|
8
|
+
}
|
|
1
9
|
export interface ProgressReporter {
|
|
2
|
-
pipelineStart(name: string): void;
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
error?: string;
|
|
10
|
-
}>): void;
|
|
11
|
-
distributionSelected(dataset: string, distribution: {
|
|
12
|
-
accessUrl: string;
|
|
13
|
-
namedGraph?: string;
|
|
14
|
-
importedFrom?: string;
|
|
15
|
-
}): void;
|
|
16
|
-
stageStart(stage: string): void;
|
|
17
|
-
stageProgress(update: {
|
|
10
|
+
pipelineStart?(name: string): void;
|
|
11
|
+
datasetsSelected?(count: number): void;
|
|
12
|
+
datasetStart?(dataset: Dataset): void;
|
|
13
|
+
distributionsAnalyzed?(dataset: Dataset, results: DistributionAnalysisResult[]): void;
|
|
14
|
+
distributionSelected?(dataset: Dataset, distribution: Distribution, importedFrom?: Distribution, importDuration?: number): void;
|
|
15
|
+
stageStart?(stage: string): void;
|
|
16
|
+
stageProgress?(update: {
|
|
18
17
|
elementsProcessed: number;
|
|
19
18
|
quadsGenerated: number;
|
|
20
19
|
}): void;
|
|
21
|
-
stageComplete(stage: string, result: {
|
|
20
|
+
stageComplete?(stage: string, result: {
|
|
22
21
|
elementsProcessed: number;
|
|
23
22
|
quadsGenerated: number;
|
|
24
23
|
duration: number;
|
|
25
24
|
}): void;
|
|
26
|
-
stageFailed(stage: string, error: Error): void;
|
|
27
|
-
stageSkipped(stage: string, reason: string): void;
|
|
28
|
-
datasetComplete(dataset:
|
|
29
|
-
datasetSkipped(dataset:
|
|
30
|
-
pipelineComplete(result: {
|
|
25
|
+
stageFailed?(stage: string, error: Error): void;
|
|
26
|
+
stageSkipped?(stage: string, reason: string): void;
|
|
27
|
+
datasetComplete?(dataset: Dataset): void;
|
|
28
|
+
datasetSkipped?(dataset: Dataset, reason: string): void;
|
|
29
|
+
pipelineComplete?(result: {
|
|
31
30
|
duration: number;
|
|
32
31
|
}): void;
|
|
33
32
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,
|
|
1
|
+
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE1D,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,qBAAqB,CAAC,CACpB,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,0BAA0B,EAAE,GACpC,IAAI,CAAC;IACR,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,GACtB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,eAAe,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACzC,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACvD"}
|
|
@@ -16,16 +16,25 @@ export interface FileWriterOptions {
|
|
|
16
16
|
* @default '-'
|
|
17
17
|
*/
|
|
18
18
|
replacementCharacter?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Turtle prefix declarations. Keys are prefix names, values are namespace IRIs.
|
|
21
|
+
* Only used when format is 'turtle'.
|
|
22
|
+
*/
|
|
23
|
+
prefixes?: Record<string, string>;
|
|
19
24
|
}
|
|
20
25
|
export declare class FileWriter implements Writer {
|
|
21
26
|
private readonly outputDir;
|
|
22
27
|
readonly format: 'turtle' | 'n-triples' | 'n-quads';
|
|
23
28
|
private readonly replacementCharacter;
|
|
24
|
-
private readonly
|
|
29
|
+
private readonly prefixes?;
|
|
30
|
+
private readonly activeWriters;
|
|
25
31
|
constructor(options: FileWriterOptions);
|
|
26
32
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
33
|
+
flush(dataset: Dataset): Promise<void>;
|
|
27
34
|
getOutputPath(dataset: Dataset): string;
|
|
28
35
|
getFilename(dataset: Dataset): string;
|
|
36
|
+
private getFilePath;
|
|
37
|
+
private getOrCreateWriter;
|
|
29
38
|
private getExtension;
|
|
30
39
|
}
|
|
31
40
|
//# sourceMappingURL=fileWriter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAOhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAclE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAc5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IAoB/B,OAAO,CAAC,YAAY;CAUrB"}
|
|
@@ -8,13 +8,9 @@ import { Writer as N3Writer } from 'n3';
|
|
|
8
8
|
*
|
|
9
9
|
* Files are named based on the dataset IRI using filenamify-url.
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
* **Note:** With `format: 'turtle'` each append will repeat the prefix declarations
|
|
16
|
-
* at the start of each chunk. The default `format: 'n-triples'` produces clean
|
|
17
|
-
* line-oriented output without repeated headers.
|
|
11
|
+
* A single N3Writer is kept open per dataset across all {@link write} calls,
|
|
12
|
+
* so Turtle prefix declarations are written once and triples can be grouped
|
|
13
|
+
* by subject. Call {@link flush} after all stages complete to finalize the file.
|
|
18
14
|
*/
|
|
19
15
|
const formatMap = {
|
|
20
16
|
turtle: 'Turtle',
|
|
@@ -25,11 +21,13 @@ export class FileWriter {
|
|
|
25
21
|
outputDir;
|
|
26
22
|
format;
|
|
27
23
|
replacementCharacter;
|
|
28
|
-
|
|
24
|
+
prefixes;
|
|
25
|
+
activeWriters = new Map();
|
|
29
26
|
constructor(options) {
|
|
30
27
|
this.outputDir = options.outputDir;
|
|
31
28
|
this.format = options.format ?? 'n-triples';
|
|
32
29
|
this.replacementCharacter = options.replacementCharacter ?? '-';
|
|
30
|
+
this.prefixes = options.prefixes;
|
|
33
31
|
}
|
|
34
32
|
async write(dataset, quads) {
|
|
35
33
|
// Peek at the first quad to avoid creating empty files.
|
|
@@ -37,18 +35,20 @@ export class FileWriter {
|
|
|
37
35
|
const first = await iterator.next();
|
|
38
36
|
if (first.done)
|
|
39
37
|
return;
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
const flags = this.writtenFiles.has(filePath) ? 'a' : 'w';
|
|
43
|
-
this.writtenFiles.add(filePath);
|
|
44
|
-
const stream = createWriteStream(filePath, { flags });
|
|
45
|
-
const writer = new N3Writer(stream, { format: formatMap[this.format] });
|
|
46
|
-
writer.addQuad(first.value);
|
|
38
|
+
const { n3Writer } = await this.getOrCreateWriter(dataset);
|
|
39
|
+
n3Writer.addQuad(first.value);
|
|
47
40
|
for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
|
|
48
|
-
|
|
41
|
+
n3Writer.addQuad(quad);
|
|
49
42
|
}
|
|
43
|
+
}
|
|
44
|
+
async flush(dataset) {
|
|
45
|
+
const key = this.getFilePath(dataset);
|
|
46
|
+
const entry = this.activeWriters.get(key);
|
|
47
|
+
if (!entry)
|
|
48
|
+
return;
|
|
49
|
+
this.activeWriters.delete(key);
|
|
50
50
|
await new Promise((resolve, reject) => {
|
|
51
|
-
|
|
51
|
+
entry.n3Writer.end((error) => {
|
|
52
52
|
if (error)
|
|
53
53
|
reject(error);
|
|
54
54
|
else
|
|
@@ -57,7 +57,7 @@ export class FileWriter {
|
|
|
57
57
|
});
|
|
58
58
|
}
|
|
59
59
|
getOutputPath(dataset) {
|
|
60
|
-
return
|
|
60
|
+
return this.getFilePath(dataset);
|
|
61
61
|
}
|
|
62
62
|
getFilename(dataset) {
|
|
63
63
|
const extension = this.getExtension();
|
|
@@ -66,6 +66,24 @@ export class FileWriter {
|
|
|
66
66
|
});
|
|
67
67
|
return `${baseName}.${extension}`;
|
|
68
68
|
}
|
|
69
|
+
getFilePath(dataset) {
|
|
70
|
+
return join(this.outputDir, this.getFilename(dataset));
|
|
71
|
+
}
|
|
72
|
+
async getOrCreateWriter(dataset) {
|
|
73
|
+
const key = this.getFilePath(dataset);
|
|
74
|
+
const existing = this.activeWriters.get(key);
|
|
75
|
+
if (existing)
|
|
76
|
+
return existing;
|
|
77
|
+
await mkdir(dirname(key), { recursive: true });
|
|
78
|
+
const stream = createWriteStream(key, { flags: 'w' });
|
|
79
|
+
const n3Writer = new N3Writer(stream, {
|
|
80
|
+
format: formatMap[this.format],
|
|
81
|
+
prefixes: this.prefixes,
|
|
82
|
+
});
|
|
83
|
+
const entry = { n3Writer, stream };
|
|
84
|
+
this.activeWriters.set(key, entry);
|
|
85
|
+
return entry;
|
|
86
|
+
}
|
|
69
87
|
getExtension() {
|
|
70
88
|
switch (this.format) {
|
|
71
89
|
case 'turtle':
|
package/dist/writer/writer.d.ts
CHANGED
|
@@ -11,5 +11,13 @@ export interface Writer {
|
|
|
11
11
|
* @param quads The RDF quads to write
|
|
12
12
|
*/
|
|
13
13
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
14
|
+
/**
|
|
15
|
+
* Finalize writing for a dataset. Called after all stages complete.
|
|
16
|
+
*
|
|
17
|
+
* Writers that buffer output across multiple {@link write} calls (e.g. to
|
|
18
|
+
* share Turtle prefix declarations) should implement this to flush remaining
|
|
19
|
+
* data and release resources.
|
|
20
|
+
*/
|
|
21
|
+
flush?(dataset: Dataset): Promise<void>;
|
|
14
22
|
}
|
|
15
23
|
//# sourceMappingURL=writer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnE;;;;;;OAMG;IACH,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC"}
|
package/package.json
CHANGED