@lde/pipeline 0.13.0 → 0.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +8 -0
- package/dist/writer/fileWriter.d.ts +10 -1
- package/dist/writer/fileWriter.d.ts.map +1 -1
- package/dist/writer/fileWriter.js +36 -18
- package/dist/writer/writer.d.ts +8 -0
- package/dist/writer/writer.d.ts.map +1 -1
- package/package.json +2 -2
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAsCD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA6Cd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -20,6 +20,10 @@ class FanOutWriter {
|
|
|
20
20
|
})());
|
|
21
21
|
}
|
|
22
22
|
}
|
|
23
|
+
async flush(dataset) {
|
|
24
|
+
for (const w of this.writers)
|
|
25
|
+
await w.flush?.(dataset);
|
|
26
|
+
}
|
|
23
27
|
}
|
|
24
28
|
class TransformWriter {
|
|
25
29
|
inner;
|
|
@@ -31,6 +35,9 @@ class TransformWriter {
|
|
|
31
35
|
async write(dataset, quads) {
|
|
32
36
|
await this.inner.write(dataset, this.transform(quads, dataset));
|
|
33
37
|
}
|
|
38
|
+
async flush(dataset) {
|
|
39
|
+
await this.inner.flush?.(dataset);
|
|
40
|
+
}
|
|
34
41
|
}
|
|
35
42
|
export class Pipeline {
|
|
36
43
|
name;
|
|
@@ -100,6 +107,7 @@ export class Pipeline {
|
|
|
100
107
|
finally {
|
|
101
108
|
await this.distributionResolver.cleanup?.();
|
|
102
109
|
}
|
|
110
|
+
await this.writer.flush?.(dataset);
|
|
103
111
|
this.reporter?.datasetComplete?.(dataset);
|
|
104
112
|
}
|
|
105
113
|
async runStage(dataset, distribution, stage) {
|
|
@@ -16,16 +16,25 @@ export interface FileWriterOptions {
|
|
|
16
16
|
* @default '-'
|
|
17
17
|
*/
|
|
18
18
|
replacementCharacter?: string;
|
|
19
|
+
/**
|
|
20
|
+
* Turtle prefix declarations. Keys are prefix names, values are namespace IRIs.
|
|
21
|
+
* Only used when format is 'turtle'.
|
|
22
|
+
*/
|
|
23
|
+
prefixes?: Record<string, string>;
|
|
19
24
|
}
|
|
20
25
|
export declare class FileWriter implements Writer {
|
|
21
26
|
private readonly outputDir;
|
|
22
27
|
readonly format: 'turtle' | 'n-triples' | 'n-quads';
|
|
23
28
|
private readonly replacementCharacter;
|
|
24
|
-
private readonly
|
|
29
|
+
private readonly prefixes?;
|
|
30
|
+
private readonly activeWriters;
|
|
25
31
|
constructor(options: FileWriterOptions);
|
|
26
32
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
33
|
+
flush(dataset: Dataset): Promise<void>;
|
|
27
34
|
getOutputPath(dataset: Dataset): string;
|
|
28
35
|
getFilename(dataset: Dataset): string;
|
|
36
|
+
private getFilePath;
|
|
37
|
+
private getOrCreateWriter;
|
|
29
38
|
private getExtension;
|
|
30
39
|
}
|
|
31
40
|
//# sourceMappingURL=fileWriter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAOhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAclE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAc5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IAoB/B,OAAO,CAAC,YAAY;CAUrB"}
|
|
@@ -8,13 +8,9 @@ import { Writer as N3Writer } from 'n3';
|
|
|
8
8
|
*
|
|
9
9
|
* Files are named based on the dataset IRI using filenamify-url.
|
|
10
10
|
*
|
|
11
|
-
*
|
|
12
|
-
*
|
|
13
|
-
*
|
|
14
|
-
*
|
|
15
|
-
* **Note:** With `format: 'turtle'` each append will repeat the prefix declarations
|
|
16
|
-
* at the start of each chunk. The default `format: 'n-triples'` produces clean
|
|
17
|
-
* line-oriented output without repeated headers.
|
|
11
|
+
* A single N3Writer is kept open per dataset across all {@link write} calls,
|
|
12
|
+
* so Turtle prefix declarations are written once and triples can be grouped
|
|
13
|
+
* by subject. Call {@link flush} after all stages complete to finalize the file.
|
|
18
14
|
*/
|
|
19
15
|
const formatMap = {
|
|
20
16
|
turtle: 'Turtle',
|
|
@@ -25,11 +21,13 @@ export class FileWriter {
|
|
|
25
21
|
outputDir;
|
|
26
22
|
format;
|
|
27
23
|
replacementCharacter;
|
|
28
|
-
|
|
24
|
+
prefixes;
|
|
25
|
+
activeWriters = new Map();
|
|
29
26
|
constructor(options) {
|
|
30
27
|
this.outputDir = options.outputDir;
|
|
31
28
|
this.format = options.format ?? 'n-triples';
|
|
32
29
|
this.replacementCharacter = options.replacementCharacter ?? '-';
|
|
30
|
+
this.prefixes = options.prefixes;
|
|
33
31
|
}
|
|
34
32
|
async write(dataset, quads) {
|
|
35
33
|
// Peek at the first quad to avoid creating empty files.
|
|
@@ -37,18 +35,20 @@ export class FileWriter {
|
|
|
37
35
|
const first = await iterator.next();
|
|
38
36
|
if (first.done)
|
|
39
37
|
return;
|
|
40
|
-
const
|
|
41
|
-
|
|
42
|
-
const flags = this.writtenFiles.has(filePath) ? 'a' : 'w';
|
|
43
|
-
this.writtenFiles.add(filePath);
|
|
44
|
-
const stream = createWriteStream(filePath, { flags });
|
|
45
|
-
const writer = new N3Writer(stream, { format: formatMap[this.format] });
|
|
46
|
-
writer.addQuad(first.value);
|
|
38
|
+
const { n3Writer } = await this.getOrCreateWriter(dataset);
|
|
39
|
+
n3Writer.addQuad(first.value);
|
|
47
40
|
for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
|
|
48
|
-
|
|
41
|
+
n3Writer.addQuad(quad);
|
|
49
42
|
}
|
|
43
|
+
}
|
|
44
|
+
async flush(dataset) {
|
|
45
|
+
const key = this.getFilePath(dataset);
|
|
46
|
+
const entry = this.activeWriters.get(key);
|
|
47
|
+
if (!entry)
|
|
48
|
+
return;
|
|
49
|
+
this.activeWriters.delete(key);
|
|
50
50
|
await new Promise((resolve, reject) => {
|
|
51
|
-
|
|
51
|
+
entry.n3Writer.end((error) => {
|
|
52
52
|
if (error)
|
|
53
53
|
reject(error);
|
|
54
54
|
else
|
|
@@ -57,7 +57,7 @@ export class FileWriter {
|
|
|
57
57
|
});
|
|
58
58
|
}
|
|
59
59
|
getOutputPath(dataset) {
|
|
60
|
-
return
|
|
60
|
+
return this.getFilePath(dataset);
|
|
61
61
|
}
|
|
62
62
|
getFilename(dataset) {
|
|
63
63
|
const extension = this.getExtension();
|
|
@@ -66,6 +66,24 @@ export class FileWriter {
|
|
|
66
66
|
});
|
|
67
67
|
return `${baseName}.${extension}`;
|
|
68
68
|
}
|
|
69
|
+
getFilePath(dataset) {
|
|
70
|
+
return join(this.outputDir, this.getFilename(dataset));
|
|
71
|
+
}
|
|
72
|
+
async getOrCreateWriter(dataset) {
|
|
73
|
+
const key = this.getFilePath(dataset);
|
|
74
|
+
const existing = this.activeWriters.get(key);
|
|
75
|
+
if (existing)
|
|
76
|
+
return existing;
|
|
77
|
+
await mkdir(dirname(key), { recursive: true });
|
|
78
|
+
const stream = createWriteStream(key, { flags: 'w' });
|
|
79
|
+
const n3Writer = new N3Writer(stream, {
|
|
80
|
+
format: formatMap[this.format],
|
|
81
|
+
prefixes: this.prefixes,
|
|
82
|
+
});
|
|
83
|
+
const entry = { n3Writer, stream };
|
|
84
|
+
this.activeWriters.set(key, entry);
|
|
85
|
+
return entry;
|
|
86
|
+
}
|
|
69
87
|
getExtension() {
|
|
70
88
|
switch (this.format) {
|
|
71
89
|
case 'turtle':
|
package/dist/writer/writer.d.ts
CHANGED
|
@@ -11,5 +11,13 @@ export interface Writer {
|
|
|
11
11
|
* @param quads The RDF quads to write
|
|
12
12
|
*/
|
|
13
13
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
14
|
+
/**
|
|
15
|
+
* Finalize writing for a dataset. Called after all stages complete.
|
|
16
|
+
*
|
|
17
|
+
* Writers that buffer output across multiple {@link write} calls (e.g. to
|
|
18
|
+
* share Turtle prefix declarations) should implement this to flush remaining
|
|
19
|
+
* data and release resources.
|
|
20
|
+
*/
|
|
21
|
+
flush?(dataset: Dataset): Promise<void>;
|
|
14
22
|
}
|
|
15
23
|
//# sourceMappingURL=writer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnE;;;;;;OAMG;IACH,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC"}
|
package/package.json
CHANGED