@lde/pipeline 0.30.9 → 0.30.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,12 +21,23 @@ export interface FileWriterOptions {
21
21
  * Only used when format is 'turtle'.
22
22
  */
23
23
  prefixes?: Record<string, string>;
24
+ /**
25
+ * Derive the named-graph IRI each quad is written into. Only meaningful for
26
+ * format `'n-quads'`; ignored for `'turtle'` and `'n-triples'`, which have no
27
+ * graph slot. When set, every quad is re-emitted with this graph term,
28
+ * regardless of the quad's own graph — mirroring
29
+ * {@link SparqlUpdateWriter}'s `graphIri`, so the same callback produces the
30
+ * same named-graph structure whether you write to a SPARQL store or to files.
31
+ * Defaults to undefined (quads written as-is, i.e. the default graph).
32
+ */
33
+ graphIri?: (dataset: Dataset) => URL;
24
34
  }
25
35
  export declare class FileWriter implements Writer {
26
36
  private readonly outputDir;
27
37
  readonly format: 'turtle' | 'n-triples' | 'n-quads';
28
38
  private readonly replacementCharacter;
29
39
  private readonly prefixes?;
40
+ private readonly graphIri?;
30
41
  private readonly activeWriters;
31
42
  constructor(options: FileWriterOptions);
32
43
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
@@ -1 +1 @@
1
- {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;CACnC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAOhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAclE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAkB5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IAwB/B,OAAO,CAAC,YAAY;CAUrB"}
1
+ {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClC;;;;;;;;OAQG;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,GAAG,CAAC;CACtC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA4B;IACtD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAQhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAiClE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IA8B5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IA4B/B,OAAO,CAAC,YAAY;CAUrB"}
@@ -1,8 +1,8 @@
1
1
  import { createWriteStream } from 'node:fs';
2
- import { mkdir } from 'node:fs/promises';
2
+ import { mkdir, rename, rm } from 'node:fs/promises';
3
3
  import { join, dirname } from 'node:path';
4
4
  import filenamifyUrl from 'filenamify-url';
5
- import { Writer as N3Writer } from 'n3';
5
+ import { DataFactory, Writer as N3Writer } from 'n3';
6
6
  /**
7
7
  * Streams RDF quads to files on disk using N3 Writer.
8
8
  *
@@ -22,12 +22,14 @@ export class FileWriter {
22
22
  format;
23
23
  replacementCharacter;
24
24
  prefixes;
25
+ graphIri;
25
26
  activeWriters = new Map();
26
27
  constructor(options) {
27
28
  this.outputDir = options.outputDir;
28
29
  this.format = options.format ?? 'n-triples';
29
30
  this.replacementCharacter = options.replacementCharacter ?? '-';
30
31
  this.prefixes = options.prefixes;
32
+ this.graphIri = options.graphIri;
31
33
  }
32
34
  async write(dataset, quads) {
33
35
  // Peek at the first quad to avoid creating empty files.
@@ -36,9 +38,18 @@ export class FileWriter {
36
38
  if (first.done)
37
39
  return;
38
40
  const { n3Writer } = await this.getOrCreateWriter(dataset);
39
- n3Writer.addQuad(first.value);
41
+ // Re-emit each quad into the configured named graph (n-quads only). The
42
+ // pipeline's quads carry no graph context, so the graph is supplied here
43
+ // exactly as SparqlUpdateWriter supplies it via INSERT DATA { GRAPH … }.
44
+ const graphNode = this.format === 'n-quads' && this.graphIri
45
+ ? DataFactory.namedNode(this.graphIri(dataset).toString())
46
+ : undefined;
47
+ const addQuad = (quad) => n3Writer.addQuad(graphNode
48
+ ? DataFactory.quad(quad.subject, quad.predicate, quad.object, graphNode)
49
+ : quad);
50
+ addQuad(first.value);
40
51
  for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
41
- n3Writer.addQuad(quad);
52
+ addQuad(quad);
42
53
  }
43
54
  }
44
55
  async flush(dataset) {
@@ -47,18 +58,29 @@ export class FileWriter {
47
58
  if (!entry)
48
59
  return;
49
60
  this.activeWriters.delete(key);
50
- await new Promise((resolve, reject) => {
51
- if (entry.stream.errored) {
52
- reject(entry.stream.errored);
53
- return;
54
- }
55
- entry.n3Writer.end((error) => {
56
- if (error)
57
- reject(error);
58
- else
59
- resolve();
61
+ // Quads are streamed to a sibling temp file; only on a clean flush is it
62
+ // atomically renamed onto the final path. A crash therefore leaves at most
63
+ // a stale `*.tmp` — never a truncated final file — so a downstream index
64
+ // rebuild that globs the final extension never reads a half-written file.
65
+ try {
66
+ await new Promise((resolve, reject) => {
67
+ if (entry.stream.errored) {
68
+ reject(entry.stream.errored);
69
+ return;
70
+ }
71
+ entry.n3Writer.end((error) => {
72
+ if (error)
73
+ reject(error);
74
+ else
75
+ resolve();
76
+ });
60
77
  });
61
- });
78
+ }
79
+ catch (error) {
80
+ await rm(entry.tempPath, { force: true, recursive: true });
81
+ throw error;
82
+ }
83
+ await rename(entry.tempPath, key);
62
84
  }
63
85
  getOutputPath(dataset) {
64
86
  return this.getFilePath(dataset);
@@ -79,7 +101,11 @@ export class FileWriter {
79
101
  if (existing)
80
102
  return existing;
81
103
  await mkdir(dirname(key), { recursive: true });
82
- const stream = createWriteStream(key, { flags: 'w' });
104
+ // Write to a sibling temp file (same directory, so the flush rename stays on
105
+ // one filesystem and is atomic). The `.tmp` suffix keeps it out of any glob
106
+ // on the final extension.
107
+ const tempPath = `${key}.tmp`;
108
+ const stream = createWriteStream(tempPath, { flags: 'w' });
83
109
  stream.on('error', (error) => {
84
110
  // Surface stream errors when flushing; prevents 'unhandled error' crashes.
85
111
  stream.destroy(error);
@@ -88,7 +114,7 @@ export class FileWriter {
88
114
  format: formatMap[this.format],
89
115
  prefixes: this.prefixes,
90
116
  });
91
- const entry = { n3Writer, stream };
117
+ const entry = { n3Writer, stream, tempPath };
92
118
  this.activeWriters.set(key, entry);
93
119
  return entry;
94
120
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.30.9",
3
+ "version": "0.30.10",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"