@lde/pipeline 0.30.8 → 0.30.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
|
@@ -21,12 +21,23 @@ export interface FileWriterOptions {
|
|
|
21
21
|
* Only used when format is 'turtle'.
|
|
22
22
|
*/
|
|
23
23
|
prefixes?: Record<string, string>;
|
|
24
|
+
/**
|
|
25
|
+
* Derive the named-graph IRI each quad is written into. Only meaningful for
|
|
26
|
+
* format `'n-quads'`; ignored for `'turtle'` and `'n-triples'`, which have no
|
|
27
|
+
* graph slot. When set, every quad is re-emitted with this graph term,
|
|
28
|
+
* regardless of the quad's own graph — mirroring
|
|
29
|
+
* {@link SparqlUpdateWriter}'s `graphIri`, so the same callback produces the
|
|
30
|
+
* same named-graph structure whether you write to a SPARQL store or to files.
|
|
31
|
+
* Defaults to undefined (quads written as-is, i.e. the default graph).
|
|
32
|
+
*/
|
|
33
|
+
graphIri?: (dataset: Dataset) => URL;
|
|
24
34
|
}
|
|
25
35
|
export declare class FileWriter implements Writer {
|
|
26
36
|
private readonly outputDir;
|
|
27
37
|
readonly format: 'turtle' | 'n-triples' | 'n-quads';
|
|
28
38
|
private readonly replacementCharacter;
|
|
29
39
|
private readonly prefixes?;
|
|
40
|
+
private readonly graphIri?;
|
|
30
41
|
private readonly activeWriters;
|
|
31
42
|
constructor(options: FileWriterOptions);
|
|
32
43
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;
|
|
1
|
+
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClC;;;;;;;;OAQG;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,GAAG,CAAC;CACtC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA4B;IACtD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAQhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAiClE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IA8B5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IA4B/B,OAAO,CAAC,YAAY;CAUrB"}
|
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
import { createWriteStream } from 'node:fs';
|
|
2
|
-
import { mkdir } from 'node:fs/promises';
|
|
2
|
+
import { mkdir, rename, rm } from 'node:fs/promises';
|
|
3
3
|
import { join, dirname } from 'node:path';
|
|
4
4
|
import filenamifyUrl from 'filenamify-url';
|
|
5
|
-
import { Writer as N3Writer } from 'n3';
|
|
5
|
+
import { DataFactory, Writer as N3Writer } from 'n3';
|
|
6
6
|
/**
|
|
7
7
|
* Streams RDF quads to files on disk using N3 Writer.
|
|
8
8
|
*
|
|
@@ -22,12 +22,14 @@ export class FileWriter {
|
|
|
22
22
|
format;
|
|
23
23
|
replacementCharacter;
|
|
24
24
|
prefixes;
|
|
25
|
+
graphIri;
|
|
25
26
|
activeWriters = new Map();
|
|
26
27
|
constructor(options) {
|
|
27
28
|
this.outputDir = options.outputDir;
|
|
28
29
|
this.format = options.format ?? 'n-triples';
|
|
29
30
|
this.replacementCharacter = options.replacementCharacter ?? '-';
|
|
30
31
|
this.prefixes = options.prefixes;
|
|
32
|
+
this.graphIri = options.graphIri;
|
|
31
33
|
}
|
|
32
34
|
async write(dataset, quads) {
|
|
33
35
|
// Peek at the first quad to avoid creating empty files.
|
|
@@ -36,9 +38,18 @@ export class FileWriter {
|
|
|
36
38
|
if (first.done)
|
|
37
39
|
return;
|
|
38
40
|
const { n3Writer } = await this.getOrCreateWriter(dataset);
|
|
39
|
-
|
|
41
|
+
// Re-emit each quad into the configured named graph (n-quads only). The
|
|
42
|
+
// pipeline's quads carry no graph context, so the graph is supplied here
|
|
43
|
+
// exactly as SparqlUpdateWriter supplies it via INSERT DATA { GRAPH … }.
|
|
44
|
+
const graphNode = this.format === 'n-quads' && this.graphIri
|
|
45
|
+
? DataFactory.namedNode(this.graphIri(dataset).toString())
|
|
46
|
+
: undefined;
|
|
47
|
+
const addQuad = (quad) => n3Writer.addQuad(graphNode
|
|
48
|
+
? DataFactory.quad(quad.subject, quad.predicate, quad.object, graphNode)
|
|
49
|
+
: quad);
|
|
50
|
+
addQuad(first.value);
|
|
40
51
|
for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
|
|
41
|
-
|
|
52
|
+
addQuad(quad);
|
|
42
53
|
}
|
|
43
54
|
}
|
|
44
55
|
async flush(dataset) {
|
|
@@ -47,18 +58,29 @@ export class FileWriter {
|
|
|
47
58
|
if (!entry)
|
|
48
59
|
return;
|
|
49
60
|
this.activeWriters.delete(key);
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
if (
|
|
57
|
-
reject(
|
|
58
|
-
|
|
59
|
-
|
|
61
|
+
// Quads are streamed to a sibling temp file; only on a clean flush is it
|
|
62
|
+
// atomically renamed onto the final path. A crash therefore leaves at most
|
|
63
|
+
// a stale `*.tmp` — never a truncated final file — so a downstream index
|
|
64
|
+
// rebuild that globs the final extension never reads a half-written file.
|
|
65
|
+
try {
|
|
66
|
+
await new Promise((resolve, reject) => {
|
|
67
|
+
if (entry.stream.errored) {
|
|
68
|
+
reject(entry.stream.errored);
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
71
|
+
entry.n3Writer.end((error) => {
|
|
72
|
+
if (error)
|
|
73
|
+
reject(error);
|
|
74
|
+
else
|
|
75
|
+
resolve();
|
|
76
|
+
});
|
|
60
77
|
});
|
|
61
|
-
}
|
|
78
|
+
}
|
|
79
|
+
catch (error) {
|
|
80
|
+
await rm(entry.tempPath, { force: true, recursive: true });
|
|
81
|
+
throw error;
|
|
82
|
+
}
|
|
83
|
+
await rename(entry.tempPath, key);
|
|
62
84
|
}
|
|
63
85
|
getOutputPath(dataset) {
|
|
64
86
|
return this.getFilePath(dataset);
|
|
@@ -79,7 +101,11 @@ export class FileWriter {
|
|
|
79
101
|
if (existing)
|
|
80
102
|
return existing;
|
|
81
103
|
await mkdir(dirname(key), { recursive: true });
|
|
82
|
-
|
|
104
|
+
// Write to a sibling temp file (same directory, so the flush rename stays on
|
|
105
|
+
// one filesystem and is atomic). The `.tmp` suffix keeps it out of any glob
|
|
106
|
+
// on the final extension.
|
|
107
|
+
const tempPath = `${key}.tmp`;
|
|
108
|
+
const stream = createWriteStream(tempPath, { flags: 'w' });
|
|
83
109
|
stream.on('error', (error) => {
|
|
84
110
|
// Surface stream errors when flushing; prevents 'unhandled error' crashes.
|
|
85
111
|
stream.destroy(error);
|
|
@@ -88,7 +114,7 @@ export class FileWriter {
|
|
|
88
114
|
format: formatMap[this.format],
|
|
89
115
|
prefixes: this.prefixes,
|
|
90
116
|
});
|
|
91
|
-
const entry = { n3Writer, stream };
|
|
117
|
+
const entry = { n3Writer, stream, tempPath };
|
|
92
118
|
this.activeWriters.set(key, entry);
|
|
93
119
|
return entry;
|
|
94
120
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.30.
|
|
3
|
+
"version": "0.30.10",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -24,10 +24,10 @@
|
|
|
24
24
|
"!**/*.tsbuildinfo"
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
|
-
"@lde/dataset": "0.7.
|
|
28
|
-
"@lde/dataset-registry-client": "0.8.
|
|
29
|
-
"@lde/distribution-probe": "0.1.
|
|
30
|
-
"@lde/sparql-importer": "0.6.
|
|
27
|
+
"@lde/dataset": "0.7.5",
|
|
28
|
+
"@lde/dataset-registry-client": "0.8.1",
|
|
29
|
+
"@lde/distribution-probe": "0.1.8",
|
|
30
|
+
"@lde/sparql-importer": "0.6.3",
|
|
31
31
|
"@lde/sparql-server": "0.4.11",
|
|
32
32
|
"@rdfjs/types": "^2.0.1",
|
|
33
33
|
"@traqula/generator-sparql-1-1": "^1.1.1",
|