@lde/pipeline 0.6.20 → 0.6.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md
CHANGED
|
@@ -10,13 +10,37 @@ Framework for building RDF data processing pipelines with SPARQL.
|
|
|
10
10
|
- **SparqlConstructExecutor** — streaming SPARQL CONSTRUCT with template substitution and variable bindings
|
|
11
11
|
- **Distribution analysis** — probe and analyze dataset distributions
|
|
12
12
|
|
|
13
|
-
##
|
|
13
|
+
## Components
|
|
14
14
|
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
15
|
+
A **Pipeline** consists of:
|
|
16
|
+
|
|
17
|
+
- one **[Dataset Selector](#dataset-selector)**
|
|
18
|
+
- one **[Distribution Resolver](#distribution-resolver)** that resolves the input dataset to a usable SPARQL distribution
|
|
19
|
+
- one or more **Stages**, each consisting of:
|
|
20
|
+
- an optional **Selector** that filters resources
|
|
21
|
+
- one or more **Executors** that generate triples for each selected resource
|
|
22
|
+
|
|
23
|
+
### Dataset Selector
|
|
24
|
+
|
|
25
|
+
Selects datasets, either manually by the user or dynamically by querying a DCAT Dataset Registry.
|
|
26
|
+
|
|
27
|
+
### Distribution Resolver
|
|
28
|
+
|
|
29
|
+
Resolves each selected dataset to a usable distribution.
|
|
30
|
+
|
|
31
|
+
#### SPARQL Distribution Resolver
|
|
32
|
+
|
|
33
|
+
If a working SPARQL endpoint is already available for the dataset, that is used.
|
|
34
|
+
If not, and a valid RDF datadump is available, that is imported to a local SPARQL server.
|
|
35
|
+
|
|
36
|
+
#### Other Distribution Resolvers
|
|
37
|
+
|
|
38
|
+
### Bindings Selector
|
|
39
|
+
|
|
40
|
+
Selects resources from the dataset and to fan out queries per result in the executor.
|
|
41
|
+
Bindings are free, and replaced with `VALUES { ... }`.
|
|
42
|
+
|
|
43
|
+
### Executor
|
|
20
44
|
|
|
21
45
|
## Usage
|
|
22
46
|
|
package/dist/index.d.ts
CHANGED
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -21,7 +21,8 @@ export interface SparqlWriterOptions {
|
|
|
21
21
|
/**
|
|
22
22
|
* Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
|
|
23
23
|
*
|
|
24
|
-
*
|
|
24
|
+
* Clears the named graph before writing, then streams quads in batches
|
|
25
|
+
* to avoid accumulating the entire dataset in memory.
|
|
25
26
|
*/
|
|
26
27
|
export declare class SparqlUpdateWriter implements Writer {
|
|
27
28
|
private readonly endpoint;
|
|
@@ -29,6 +30,8 @@ export declare class SparqlUpdateWriter implements Writer {
|
|
|
29
30
|
private readonly batchSize;
|
|
30
31
|
constructor(options: SparqlWriterOptions);
|
|
31
32
|
write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
|
|
33
|
+
private clearGraph;
|
|
32
34
|
private insertBatch;
|
|
35
|
+
private executeUpdate;
|
|
33
36
|
}
|
|
34
37
|
//# sourceMappingURL=sparqlUpdateWriter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAMlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAS1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAgB5B"}
|
|
@@ -1,8 +1,10 @@
|
|
|
1
|
+
import { batch } from '../batch.js';
|
|
1
2
|
import { serializeQuads } from './serialize.js';
|
|
2
3
|
/**
|
|
3
4
|
* Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
|
|
4
5
|
*
|
|
5
|
-
*
|
|
6
|
+
* Clears the named graph before writing, then streams quads in batches
|
|
7
|
+
* to avoid accumulating the entire dataset in memory.
|
|
6
8
|
*/
|
|
7
9
|
export class SparqlUpdateWriter {
|
|
8
10
|
endpoint;
|
|
@@ -15,22 +17,19 @@ export class SparqlUpdateWriter {
|
|
|
15
17
|
}
|
|
16
18
|
async write(dataset, quads) {
|
|
17
19
|
const graphUri = dataset.iri.toString();
|
|
18
|
-
|
|
19
|
-
for await (const
|
|
20
|
-
|
|
21
|
-
}
|
|
22
|
-
if (collected.length === 0) {
|
|
23
|
-
return;
|
|
24
|
-
}
|
|
25
|
-
// Process in batches to avoid hitting endpoint size limits.
|
|
26
|
-
for (let i = 0; i < collected.length; i += this.batchSize) {
|
|
27
|
-
const batch = collected.slice(i, i + this.batchSize);
|
|
28
|
-
await this.insertBatch(graphUri, batch);
|
|
20
|
+
await this.clearGraph(graphUri);
|
|
21
|
+
for await (const chunk of batch(quads, this.batchSize)) {
|
|
22
|
+
await this.insertBatch(graphUri, chunk);
|
|
29
23
|
}
|
|
30
24
|
}
|
|
25
|
+
async clearGraph(graphUri) {
|
|
26
|
+
await this.executeUpdate(`CLEAR GRAPH <${graphUri}>`);
|
|
27
|
+
}
|
|
31
28
|
async insertBatch(graphUri, quads) {
|
|
32
29
|
const turtleData = await serializeQuads(quads, 'N-Triples');
|
|
33
|
-
|
|
30
|
+
await this.executeUpdate(`INSERT DATA { GRAPH <${graphUri}> { ${turtleData} } }`);
|
|
31
|
+
}
|
|
32
|
+
async executeUpdate(query) {
|
|
34
33
|
const response = await this.fetch(this.endpoint.toString(), {
|
|
35
34
|
method: 'POST',
|
|
36
35
|
headers: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.22",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "https://github.com/ldengine/lde",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -13,18 +13,6 @@
|
|
|
13
13
|
"import": "./dist/index.js",
|
|
14
14
|
"development": "./src/index.ts",
|
|
15
15
|
"default": "./dist/index.js"
|
|
16
|
-
},
|
|
17
|
-
"./writer": {
|
|
18
|
-
"types": "./dist/writer/index.d.ts",
|
|
19
|
-
"import": "./dist/writer/index.js",
|
|
20
|
-
"development": "./src/writer/index.ts",
|
|
21
|
-
"default": "./dist/writer/index.js"
|
|
22
|
-
},
|
|
23
|
-
"./analyzer": {
|
|
24
|
-
"types": "./dist/analyzer.d.ts",
|
|
25
|
-
"import": "./dist/analyzer.js",
|
|
26
|
-
"development": "./src/analyzer.ts",
|
|
27
|
-
"default": "./dist/analyzer.js"
|
|
28
16
|
}
|
|
29
17
|
},
|
|
30
18
|
"main": "./dist/index.js",
|