@lde/pipeline 0.7.0 → 0.7.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/writer/fileWriter.d.ts +1 -0
- package/dist/writer/fileWriter.d.ts.map +1 -1
- package/dist/writer/fileWriter.js +13 -1
- package/dist/writer/sparqlUpdateWriter.d.ts +4 -2
- package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -1
- package/dist/writer/sparqlUpdateWriter.js +8 -3
- package/package.json +1 -1
|
@@ -15,6 +15,7 @@ export interface FileWriterOptions {
|
|
|
15
15
|
export declare class FileWriter implements Writer {
|
|
16
16
|
private readonly outputDir;
|
|
17
17
|
readonly format: 'turtle' | 'n-triples' | 'n-quads';
|
|
18
|
+
private readonly writtenFiles;
|
|
18
19
|
constructor(options: FileWriterOptions);
|
|
19
20
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
20
21
|
getOutputPath(dataset: Dataset): string;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;
|
|
1
|
+
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAsBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAqB;gBAEtC,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA4BxE,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,YAAY;CAUrB"}
|
|
@@ -7,6 +7,15 @@ import { Writer as N3Writer } from 'n3';
|
|
|
7
7
|
* Streams RDF quads to files on disk using N3 Writer.
|
|
8
8
|
*
|
|
9
9
|
* Files are named based on the dataset IRI using filenamify-url.
|
|
10
|
+
*
|
|
11
|
+
* The first {@link write} call for a given dataset creates (or overwrites) the file.
|
|
12
|
+
* Subsequent calls for the same dataset append to it, so that multiple pipeline stages
|
|
13
|
+
* can each contribute quads to a single output file.
|
|
14
|
+
*
|
|
15
|
+
* **Note:** With `format: 'turtle'` (the default) each append will repeat the prefix
|
|
16
|
+
* declarations at the start of each chunk. For multi-stage pipelines, prefer
|
|
17
|
+
* `format: 'n-triples'` or `format: 'n-quads'`, which produce clean line-oriented
|
|
18
|
+
* output without repeated headers.
|
|
10
19
|
*/
|
|
11
20
|
const formatMap = {
|
|
12
21
|
turtle: 'Turtle',
|
|
@@ -16,6 +25,7 @@ const formatMap = {
|
|
|
16
25
|
export class FileWriter {
|
|
17
26
|
outputDir;
|
|
18
27
|
format;
|
|
28
|
+
writtenFiles = new Set();
|
|
19
29
|
constructor(options) {
|
|
20
30
|
this.outputDir = options.outputDir;
|
|
21
31
|
this.format = options.format ?? 'turtle';
|
|
@@ -28,7 +38,9 @@ export class FileWriter {
|
|
|
28
38
|
return;
|
|
29
39
|
const filePath = join(this.outputDir, this.getFilename(dataset));
|
|
30
40
|
await mkdir(dirname(filePath), { recursive: true });
|
|
31
|
-
const
|
|
41
|
+
const flags = this.writtenFiles.has(filePath) ? 'a' : 'w';
|
|
42
|
+
this.writtenFiles.add(filePath);
|
|
43
|
+
const stream = createWriteStream(filePath, { flags });
|
|
32
44
|
const writer = new N3Writer(stream, { format: formatMap[this.format] });
|
|
33
45
|
writer.addQuad(first.value);
|
|
34
46
|
for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
|
|
@@ -26,14 +26,16 @@ export interface SparqlWriterOptions {
|
|
|
26
26
|
/**
|
|
27
27
|
* Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
|
|
28
28
|
*
|
|
29
|
-
* Clears the named graph before
|
|
30
|
-
* to avoid accumulating the entire dataset in memory.
|
|
29
|
+
* Clears the named graph before the first write per dataset per instance, then
|
|
30
|
+
* streams quads in batches to avoid accumulating the entire dataset in memory.
|
|
31
|
+
* Subsequent calls to {@link write} for the same dataset append rather than replace.
|
|
31
32
|
*/
|
|
32
33
|
export declare class SparqlUpdateWriter implements Writer {
|
|
33
34
|
private readonly endpoint;
|
|
34
35
|
private readonly auth?;
|
|
35
36
|
private readonly fetch;
|
|
36
37
|
private readonly batchSize;
|
|
38
|
+
private readonly clearedGraphs;
|
|
37
39
|
constructor(options: SparqlWriterOptions);
|
|
38
40
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
39
41
|
private clearGraph;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED
|
|
1
|
+
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;GAMG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqB;gBAEvC,OAAO,EAAE,mBAAmB;IAOlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAa1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAqB5B"}
|
|
@@ -3,14 +3,16 @@ import { serializeQuads } from './serialize.js';
|
|
|
3
3
|
/**
|
|
4
4
|
* Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
|
|
5
5
|
*
|
|
6
|
-
* Clears the named graph before
|
|
7
|
-
* to avoid accumulating the entire dataset in memory.
|
|
6
|
+
* Clears the named graph before the first write per dataset per instance, then
|
|
7
|
+
* streams quads in batches to avoid accumulating the entire dataset in memory.
|
|
8
|
+
* Subsequent calls to {@link write} for the same dataset append rather than replace.
|
|
8
9
|
*/
|
|
9
10
|
export class SparqlUpdateWriter {
|
|
10
11
|
endpoint;
|
|
11
12
|
auth;
|
|
12
13
|
fetch;
|
|
13
14
|
batchSize;
|
|
15
|
+
clearedGraphs = new Set();
|
|
14
16
|
constructor(options) {
|
|
15
17
|
this.endpoint = options.endpoint;
|
|
16
18
|
this.auth = options.auth;
|
|
@@ -19,7 +21,10 @@ export class SparqlUpdateWriter {
|
|
|
19
21
|
}
|
|
20
22
|
async write(dataset, quads) {
|
|
21
23
|
const graphUri = dataset.iri.toString();
|
|
22
|
-
|
|
24
|
+
if (!this.clearedGraphs.has(graphUri)) {
|
|
25
|
+
await this.clearGraph(graphUri);
|
|
26
|
+
this.clearedGraphs.add(graphUri);
|
|
27
|
+
}
|
|
23
28
|
for await (const chunk of batch(quads, this.batchSize)) {
|
|
24
29
|
await this.insertBatch(graphUri, chunk);
|
|
25
30
|
}
|