@lde/pipeline 0.6.19 → 0.6.20
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/stage.d.ts +3 -2
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +11 -5
- package/dist/writer/fileWriter.d.ts +2 -2
- package/dist/writer/fileWriter.d.ts.map +1 -1
- package/dist/writer/fileWriter.js +24 -12
- package/dist/writer/sparqlUpdateWriter.d.ts +2 -2
- package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -1
- package/dist/writer/sparqlUpdateWriter.js +8 -5
- package/dist/writer/writer.d.ts +3 -3
- package/dist/writer/writer.d.ts.map +1 -1
- package/package.json +1 -1
package/dist/stage.d.ts
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
-
import type { Quad } from '@rdfjs/types';
|
|
3
2
|
import type { Executor, VariableBindings } from './sparql/executor.js';
|
|
4
3
|
import { NotSupported } from './sparql/executor.js';
|
|
4
|
+
import type { Writer } from './writer/writer.js';
|
|
5
5
|
export interface StageOptions {
|
|
6
6
|
name: string;
|
|
7
7
|
executors: Executor | Executor[];
|
|
@@ -15,7 +15,8 @@ export declare class Stage {
|
|
|
15
15
|
private readonly selector?;
|
|
16
16
|
private readonly batchSize;
|
|
17
17
|
constructor(options: StageOptions);
|
|
18
|
-
run(dataset: Dataset, distribution: Distribution): Promise<
|
|
18
|
+
run(dataset: Dataset, distribution: Distribution, writer: Writer): Promise<NotSupported | void>;
|
|
19
|
+
private executeWithSelector;
|
|
19
20
|
private executeAll;
|
|
20
21
|
}
|
|
21
22
|
/** Stage-level selector that yields variable bindings for use in executor queries. Pagination is an implementation detail. */
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,YAAY;IAS3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,GACb,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAYjB,mBAAmB;YAuBnB,UAAU;CAkBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
|
package/dist/stage.js
CHANGED
|
@@ -13,10 +13,16 @@ export class Stage {
|
|
|
13
13
|
this.selector = options.selector;
|
|
14
14
|
this.batchSize = options.batchSize ?? 10;
|
|
15
15
|
}
|
|
16
|
-
async run(dataset, distribution) {
|
|
17
|
-
|
|
18
|
-
|
|
16
|
+
async run(dataset, distribution, writer) {
|
|
17
|
+
const streams = this.selector
|
|
18
|
+
? await this.executeWithSelector(dataset, distribution)
|
|
19
|
+
: await this.executeAll(dataset, distribution);
|
|
20
|
+
if (streams instanceof NotSupported) {
|
|
21
|
+
return streams;
|
|
19
22
|
}
|
|
23
|
+
await writer.write(dataset, mergeStreams(streams));
|
|
24
|
+
}
|
|
25
|
+
async executeWithSelector(dataset, distribution) {
|
|
20
26
|
const streams = [];
|
|
21
27
|
for await (const bindings of batch(this.selector, this.batchSize)) {
|
|
22
28
|
for (const executor of this.executors) {
|
|
@@ -31,7 +37,7 @@ export class Stage {
|
|
|
31
37
|
if (streams.length === 0) {
|
|
32
38
|
return new NotSupported('All executors returned NotSupported');
|
|
33
39
|
}
|
|
34
|
-
return
|
|
40
|
+
return streams;
|
|
35
41
|
}
|
|
36
42
|
async executeAll(dataset, distribution) {
|
|
37
43
|
const streams = [];
|
|
@@ -44,7 +50,7 @@ export class Stage {
|
|
|
44
50
|
if (streams.length === 0) {
|
|
45
51
|
return new NotSupported('All executors returned NotSupported');
|
|
46
52
|
}
|
|
47
|
-
return
|
|
53
|
+
return streams;
|
|
48
54
|
}
|
|
49
55
|
}
|
|
50
56
|
async function* mergeStreams(streams) {
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import type {
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
3
|
import { Writer } from './writer.js';
|
|
4
4
|
export interface FileWriterOptions {
|
|
5
5
|
/**
|
|
@@ -16,7 +16,7 @@ export declare class FileWriter implements Writer {
|
|
|
16
16
|
private readonly outputDir;
|
|
17
17
|
private readonly format;
|
|
18
18
|
constructor(options: FileWriterOptions);
|
|
19
|
-
write(dataset: Dataset,
|
|
19
|
+
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
20
20
|
private getFilename;
|
|
21
21
|
private getExtension;
|
|
22
22
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAaD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAqC;gBAEhD,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBxE,OAAO,CAAC,WAAW;IAQnB,OAAO,CAAC,YAAY;CAUrB"}
|
|
@@ -1,9 +1,10 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { createWriteStream } from 'node:fs';
|
|
2
|
+
import { mkdir } from 'node:fs/promises';
|
|
2
3
|
import { join, dirname } from 'node:path';
|
|
3
4
|
import filenamifyUrl from 'filenamify-url';
|
|
4
|
-
import {
|
|
5
|
+
import { Writer as N3Writer } from 'n3';
|
|
5
6
|
/**
|
|
6
|
-
*
|
|
7
|
+
* Streams RDF quads to files on disk using N3 Writer.
|
|
7
8
|
*
|
|
8
9
|
* Files are named based on the dataset IRI using filenamify-url.
|
|
9
10
|
*/
|
|
@@ -19,17 +20,28 @@ export class FileWriter {
|
|
|
19
20
|
this.outputDir = options.outputDir;
|
|
20
21
|
this.format = options.format ?? 'turtle';
|
|
21
22
|
}
|
|
22
|
-
async write(dataset,
|
|
23
|
-
|
|
24
|
-
|
|
23
|
+
async write(dataset, quads) {
|
|
24
|
+
// Peek at the first quad to avoid creating empty files.
|
|
25
|
+
const iterator = quads[Symbol.asyncIterator]();
|
|
26
|
+
const first = await iterator.next();
|
|
27
|
+
if (first.done)
|
|
25
28
|
return;
|
|
26
|
-
|
|
27
|
-
const filename = this.getFilename(dataset);
|
|
28
|
-
const filePath = join(this.outputDir, filename);
|
|
29
|
-
// Ensure the output directory exists.
|
|
29
|
+
const filePath = join(this.outputDir, this.getFilename(dataset));
|
|
30
30
|
await mkdir(dirname(filePath), { recursive: true });
|
|
31
|
-
const
|
|
32
|
-
|
|
31
|
+
const stream = createWriteStream(filePath);
|
|
32
|
+
const writer = new N3Writer(stream, { format: formatMap[this.format] });
|
|
33
|
+
writer.addQuad(first.value);
|
|
34
|
+
for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
|
|
35
|
+
writer.addQuad(quad);
|
|
36
|
+
}
|
|
37
|
+
await new Promise((resolve, reject) => {
|
|
38
|
+
writer.end((error) => {
|
|
39
|
+
if (error)
|
|
40
|
+
reject(error);
|
|
41
|
+
else
|
|
42
|
+
resolve();
|
|
43
|
+
});
|
|
44
|
+
});
|
|
33
45
|
}
|
|
34
46
|
getFilename(dataset) {
|
|
35
47
|
const extension = this.getExtension();
|
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import type {
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
3
|
import { Writer } from './writer.js';
|
|
4
4
|
export interface SparqlWriterOptions {
|
|
5
5
|
/**
|
|
@@ -28,7 +28,7 @@ export declare class SparqlUpdateWriter implements Writer {
|
|
|
28
28
|
private readonly fetch;
|
|
29
29
|
private readonly batchSize;
|
|
30
30
|
constructor(options: SparqlWriterOptions);
|
|
31
|
-
write(dataset: Dataset,
|
|
31
|
+
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
32
32
|
private insertBatch;
|
|
33
33
|
}
|
|
34
34
|
//# sourceMappingURL=sparqlUpdateWriter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;GAIG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAMlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAkB1D,WAAW;CAmB1B"}
|
|
@@ -13,15 +13,18 @@ export class SparqlUpdateWriter {
|
|
|
13
13
|
this.fetch = options.fetch ?? globalThis.fetch;
|
|
14
14
|
this.batchSize = options.batchSize ?? 10000;
|
|
15
15
|
}
|
|
16
|
-
async write(dataset,
|
|
16
|
+
async write(dataset, quads) {
|
|
17
17
|
const graphUri = dataset.iri.toString();
|
|
18
|
-
const
|
|
19
|
-
|
|
18
|
+
const collected = [];
|
|
19
|
+
for await (const quad of quads) {
|
|
20
|
+
collected.push(quad);
|
|
21
|
+
}
|
|
22
|
+
if (collected.length === 0) {
|
|
20
23
|
return;
|
|
21
24
|
}
|
|
22
25
|
// Process in batches to avoid hitting endpoint size limits.
|
|
23
|
-
for (let i = 0; i <
|
|
24
|
-
const batch =
|
|
26
|
+
for (let i = 0; i < collected.length; i += this.batchSize) {
|
|
27
|
+
const batch = collected.slice(i, i + this.batchSize);
|
|
25
28
|
await this.insertBatch(graphUri, batch);
|
|
26
29
|
}
|
|
27
30
|
}
|
package/dist/writer/writer.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import type {
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
3
|
/**
|
|
4
4
|
* Interface for writing RDF data to a destination.
|
|
5
5
|
*/
|
|
@@ -8,8 +8,8 @@ export interface Writer {
|
|
|
8
8
|
* Write RDF data for a dataset to the destination.
|
|
9
9
|
*
|
|
10
10
|
* @param dataset The dataset metadata
|
|
11
|
-
* @param
|
|
11
|
+
* @param quads The RDF quads to write
|
|
12
12
|
*/
|
|
13
|
-
write(dataset: Dataset,
|
|
13
|
+
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
14
14
|
}
|
|
15
15
|
//# sourceMappingURL=writer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACpE"}
|