@lde/pipeline 0.6.19 → 0.6.21
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stage.d.ts +3 -2
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +11 -5
- package/dist/writer/fileWriter.d.ts +2 -2
- package/dist/writer/fileWriter.d.ts.map +1 -1
- package/dist/writer/fileWriter.js +24 -12
- package/dist/writer/sparqlUpdateWriter.d.ts +6 -3
- package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -1
- package/dist/writer/sparqlUpdateWriter.js +13 -11
- package/dist/writer/writer.d.ts +3 -3
- package/dist/writer/writer.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/stage.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
-
import type { Quad } from '@rdfjs/types';
|
|
3
2
|
import type { Executor, VariableBindings } from './sparql/executor.js';
|
|
4
3
|
import { NotSupported } from './sparql/executor.js';
|
|
4
|
+
import type { Writer } from './writer/writer.js';
|
|
5
5
|
export interface StageOptions {
|
|
6
6
|
name: string;
|
|
7
7
|
executors: Executor | Executor[];
|
|
@@ -15,7 +15,8 @@ export declare class Stage {
|
|
|
15
15
|
private readonly selector?;
|
|
16
16
|
private readonly batchSize;
|
|
17
17
|
constructor(options: StageOptions);
|
|
18
|
-
run(dataset: Dataset, distribution: Distribution): Promise<
|
|
18
|
+
run(dataset: Dataset, distribution: Distribution, writer: Writer): Promise<NotSupported | void>;
|
|
19
|
+
private executeWithSelector;
|
|
19
20
|
private executeAll;
|
|
20
21
|
}
|
|
21
22
|
/** Stage-level selector that yields variable bindings for use in executor queries. Pagination is an implementation detail. */
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,YAAY;IAS3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAYjB,mBAAmB;YAuBnB,UAAU;CAkBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
|
package/dist/stage.js
CHANGED
|
@@ -13,10 +13,16 @@ export class Stage {
|
|
|
13
13
|
this.selector = options.selector;
|
|
14
14
|
this.batchSize = options.batchSize ?? 10;
|
|
15
15
|
}
|
|
16
|
-
async run(dataset, distribution) {
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
async run(dataset, distribution, writer) {
|
|
17
|
+
const streams = this.selector
|
|
18
|
+
? await this.executeWithSelector(dataset, distribution)
|
|
19
|
+
: await this.executeAll(dataset, distribution);
|
|
20
|
+
if (streams instanceof NotSupported) {
|
|
21
|
+
return streams;
|
|
19
22
|
}
|
|
23
|
+
await writer.write(dataset, mergeStreams(streams));
|
|
24
|
+
}
|
|
25
|
+
async executeWithSelector(dataset, distribution) {
|
|
20
26
|
const streams = [];
|
|
21
27
|
for await (const bindings of batch(this.selector, this.batchSize)) {
|
|
22
28
|
for (const executor of this.executors) {
|
|
@@ -31,7 +37,7 @@ export class Stage {
|
|
|
31
37
|
if (streams.length === 0) {
|
|
32
38
|
return new NotSupported('All executors returned NotSupported');
|
|
33
39
|
}
|
|
34
|
-
return
|
|
40
|
+
return streams;
|
|
35
41
|
}
|
|
36
42
|
async executeAll(dataset, distribution) {
|
|
37
43
|
const streams = [];
|
|
@@ -44,7 +50,7 @@ export class Stage {
|
|
|
44
50
|
if (streams.length === 0) {
|
|
45
51
|
return new NotSupported('All executors returned NotSupported');
|
|
46
52
|
}
|
|
47
|
-
return
|
|
53
|
+
return streams;
|
|
48
54
|
}
|
|
49
55
|
}
|
|
50
56
|
async function* mergeStreams(streams) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import type {
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
3
|
import { Writer } from './writer.js';
|
|
4
4
|
export interface FileWriterOptions {
|
|
5
5
|
/**
|
|
@@ -16,7 +16,7 @@ export declare class FileWriter implements Writer {
|
|
|
16
16
|
private readonly outputDir;
|
|
17
17
|
private readonly format;
|
|
18
18
|
constructor(options: FileWriterOptions);
|
|
19
|
-
write(dataset: Dataset,
|
|
19
|
+
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
20
20
|
private getFilename;
|
|
21
21
|
private getExtension;
|
|
22
22
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAaD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqC;gBAEhD,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBxE,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,YAAY;CAUrB"}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createWriteStream } from 'node:fs';
|
|
2
|
+
import { mkdir } from 'node:fs/promises';
|
|
2
3
|
import { join, dirname } from 'node:path';
|
|
3
4
|
import filenamifyUrl from 'filenamify-url';
|
|
4
|
-
import {
|
|
5
|
+
import { Writer as N3Writer } from 'n3';
|
|
5
6
|
/**
|
|
6
|
-
*
|
|
7
|
+
* Streams RDF quads to files on disk using N3 Writer.
|
|
7
8
|
*
|
|
8
9
|
* Files are named based on the dataset IRI using filenamify-url.
|
|
9
10
|
*/
|
|
@@ -19,17 +20,28 @@ export class FileWriter {
|
|
|
19
20
|
this.outputDir = options.outputDir;
|
|
20
21
|
this.format = options.format ?? 'turtle';
|
|
21
22
|
}
|
|
22
|
-
async write(dataset,
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
async write(dataset, quads) {
|
|
24
|
+
// Peek at the first quad to avoid creating empty files.
|
|
25
|
+
const iterator = quads[Symbol.asyncIterator]();
|
|
26
|
+
const first = await iterator.next();
|
|
27
|
+
if (first.done)
|
|
25
28
|
return;
|
|
26
|
-
|
|
27
|
-
const filename = this.getFilename(dataset);
|
|
28
|
-
const filePath = join(this.outputDir, filename);
|
|
29
|
-
// Ensure the output directory exists.
|
|
29
|
+
const filePath = join(this.outputDir, this.getFilename(dataset));
|
|
30
30
|
await mkdir(dirname(filePath), { recursive: true });
|
|
31
|
-
const
|
|
32
|
-
|
|
31
|
+
const stream = createWriteStream(filePath);
|
|
32
|
+
const writer = new N3Writer(stream, { format: formatMap[this.format] });
|
|
33
|
+
writer.addQuad(first.value);
|
|
34
|
+
for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
|
|
35
|
+
writer.addQuad(quad);
|
|
36
|
+
}
|
|
37
|
+
await new Promise((resolve, reject) => {
|
|
38
|
+
writer.end((error) => {
|
|
39
|
+
if (error)
|
|
40
|
+
reject(error);
|
|
41
|
+
else
|
|
42
|
+
resolve();
|
|
43
|
+
});
|
|
44
|
+
});
|
|
33
45
|
}
|
|
34
46
|
getFilename(dataset) {
|
|
35
47
|
const extension = this.getExtension();
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import type {
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
3
|
import { Writer } from './writer.js';
|
|
4
4
|
export interface SparqlWriterOptions {
|
|
5
5
|
/**
|
|
@@ -21,14 +21,17 @@ export interface SparqlWriterOptions {
|
|
|
21
21
|
/**
|
|
22
22
|
* Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
|
|
23
23
|
*
|
|
24
|
-
*
|
|
24
|
+
* Clears the named graph before writing, then streams quads in batches
|
|
25
|
+
* to avoid accumulating the entire dataset in memory.
|
|
25
26
|
*/
|
|
26
27
|
export declare class SparqlUpdateWriter implements Writer {
|
|
27
28
|
private readonly endpoint;
|
|
28
29
|
private readonly fetch;
|
|
29
30
|
private readonly batchSize;
|
|
30
31
|
constructor(options: SparqlWriterOptions);
|
|
31
|
-
write(dataset: Dataset,
|
|
32
|
+
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
33
|
+
private clearGraph;
|
|
32
34
|
private insertBatch;
|
|
35
|
+
private executeUpdate;
|
|
33
36
|
}
|
|
34
37
|
//# sourceMappingURL=sparqlUpdateWriter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAMlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAS1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAgB5B"}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import { batch } from '../batch.js';
|
|
1
2
|
import { serializeQuads } from './serialize.js';
|
|
2
3
|
/**
|
|
3
4
|
* Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
|
|
4
5
|
*
|
|
5
|
-
*
|
|
6
|
+
* Clears the named graph before writing, then streams quads in batches
|
|
7
|
+
* to avoid accumulating the entire dataset in memory.
|
|
6
8
|
*/
|
|
7
9
|
export class SparqlUpdateWriter {
|
|
8
10
|
endpoint;
|
|
@@ -13,21 +15,21 @@ export class SparqlUpdateWriter {
|
|
|
13
15
|
this.fetch = options.fetch ?? globalThis.fetch;
|
|
14
16
|
this.batchSize = options.batchSize ?? 10000;
|
|
15
17
|
}
|
|
16
|
-
async write(dataset,
|
|
18
|
+
async write(dataset, quads) {
|
|
17
19
|
const graphUri = dataset.iri.toString();
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
// Process in batches to avoid hitting endpoint size limits.
|
|
23
|
-
for (let i = 0; i < quads.length; i += this.batchSize) {
|
|
24
|
-
const batch = quads.slice(i, i + this.batchSize);
|
|
25
|
-
await this.insertBatch(graphUri, batch);
|
|
20
|
+
await this.clearGraph(graphUri);
|
|
21
|
+
for await (const chunk of batch(quads, this.batchSize)) {
|
|
22
|
+
await this.insertBatch(graphUri, chunk);
|
|
26
23
|
}
|
|
27
24
|
}
|
|
25
|
+
async clearGraph(graphUri) {
|
|
26
|
+
await this.executeUpdate(`CLEAR GRAPH <${graphUri}>`);
|
|
27
|
+
}
|
|
28
28
|
async insertBatch(graphUri, quads) {
|
|
29
29
|
const turtleData = await serializeQuads(quads, 'N-Triples');
|
|
30
|
-
|
|
30
|
+
await this.executeUpdate(`INSERT DATA { GRAPH <${graphUri}> { ${turtleData} } }`);
|
|
31
|
+
}
|
|
32
|
+
async executeUpdate(query) {
|
|
31
33
|
const response = await this.fetch(this.endpoint.toString(), {
|
|
32
34
|
method: 'POST',
|
|
33
35
|
headers: {
|
package/dist/writer/writer.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import type {
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
3
|
/**
|
|
4
4
|
* Interface for writing RDF data to a destination.
|
|
5
5
|
*/
|
|
@@ -8,8 +8,8 @@ export interface Writer {
|
|
|
8
8
|
* Write RDF data for a dataset to the destination.
|
|
9
9
|
*
|
|
10
10
|
* @param dataset The dataset metadata
|
|
11
|
-
* @param
|
|
11
|
+
* @param quads The RDF quads to write
|
|
12
12
|
*/
|
|
13
|
-
write(dataset: Dataset,
|
|
13
|
+
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
14
14
|
}
|
|
15
15
|
//# sourceMappingURL=writer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACpE"}
|