@rdfc/sparql-ingest-processor-ts 2.0.3 → 2.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -15
- package/lib/SPARQLIngest.d.ts +1 -3
- package/lib/SPARQLIngest.js +37 -36
- package/lib/SPARQLQueries.d.ts +3 -3
- package/lib/SPARQLQueries.js +77 -91
- package/lib/Utils.d.ts +6 -2
- package/lib/Utils.js +18 -4
- package/package.json +4 -4
- package/processors.ttl +2 -7
- package/lib/LogUtil.d.ts +0 -9
- package/lib/LogUtil.js +0 -54
package/README.md
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml) [](https://npmjs.com/package/@rdfc/sparql-ingest-processor-ts)
|
|
4
4
|
|
|
5
|
-
TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) into a SPARQL endpoint.
|
|
5
|
+
TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) or in general, a stream of RDF quads into a SPARQL endpoint.
|
|
6
6
|
|
|
7
|
-
This processor takes a stream of RDF records, transforms them into [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
|
|
8
|
-
It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content.
|
|
7
|
+
This processor takes a stream of RDF records, transforms them into corresponding [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
|
|
8
|
+
It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content. It also supports direct quad ingestion via the SPARQL Graph Store Protocol.
|
|
9
9
|
|
|
10
10
|
---
|
|
11
11
|
|
|
@@ -46,8 +46,7 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
46
46
|
<ingester> a rdfc:SPARQLIngest;
|
|
47
47
|
rdfc:memberStream <in>;
|
|
48
48
|
rdfc:ingestConfig [
|
|
49
|
-
rdfc:
|
|
50
|
-
rdfc:memberShape "http://ex.org/Shape1", "http://ex.org/Shape2";
|
|
49
|
+
rdfc:memberShape "http://ex.org/Shape";
|
|
51
50
|
rdfc:changeSemantics [
|
|
52
51
|
rdfc:changeTypePath "http://ex.org/changeType";
|
|
53
52
|
rdfc:createValue "http://ex.org/Create";
|
|
@@ -60,7 +59,14 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
60
59
|
rdfc:transactionEndPath "http://ex.org/transactionEnd"
|
|
61
60
|
];
|
|
62
61
|
rdfc:graphStoreUrl "http://example.org/sparql";
|
|
63
|
-
rdfc:forVirtuoso false
|
|
62
|
+
rdfc:forVirtuoso false;
|
|
63
|
+
rdfc:accessToken "myAccessToken";
|
|
64
|
+
rdfc:measurePerformance [
|
|
65
|
+
rdfc:name "myPerformanceMeasurement";
|
|
66
|
+
rdfc:outputPath "/path/to/output.json";
|
|
67
|
+
rdfc:failureIsFatal true;
|
|
68
|
+
rdfc:queryTimeout 30000
|
|
69
|
+
]
|
|
64
70
|
];
|
|
65
71
|
rdfc:sparqlWriter <out>.
|
|
66
72
|
```
|
|
@@ -77,13 +83,12 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
77
83
|
---
|
|
78
84
|
|
|
79
85
|
### Parameters of `rdfc:IngestConfig`:
|
|
80
|
-
- `rdfc:
|
|
81
|
-
- `rdfc:memberShape` (**string**, optional, repeatable): SHACL shape identifiers used to guide query construction when payloads are incomplete.
|
|
86
|
+
- `rdfc:memberShape` (**string**, optional): SHACL shape used to guide query construction when payloads are incomplete.
|
|
82
87
|
- `rdfc:changeSemantics` (**rdfc:ChangeSemantics**, optional): Configures mapping between change types (create/update/delete) and SPARQL operations.
|
|
83
|
-
- `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph
|
|
88
|
+
- `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph.
|
|
84
89
|
- `rdfc:transactionConfig` (**rdfc:TransactionConfig**, optional): Groups records by transaction ID for atomic updates.
|
|
85
90
|
- `rdfc:graphStoreUrl` (**string**, optional): SPARQL Graph Store endpoint URL.
|
|
86
|
-
- `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling.
|
|
91
|
+
- `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling to avoid query size limits.
|
|
87
92
|
- `rdfc:accessToken` (**string**, optional): Access token for authenticated graph stores.
|
|
88
93
|
- `rdfc:measurePerformance` (**rdfc:PerformanceConfig**, optional): Enables performance measurement of SPARQL queries.
|
|
89
94
|
|
|
@@ -117,7 +122,6 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
117
122
|
<ingester> a rdfc:SPARQLIngest;
|
|
118
123
|
rdfc:memberStream <in>;
|
|
119
124
|
rdfc:ingestConfig [
|
|
120
|
-
rdfc:memberIsGraph true;
|
|
121
125
|
rdfc:targetNamedGraph "http://example.org/targetGraph";
|
|
122
126
|
rdfc:graphStoreUrl "http://example.org/sparql"
|
|
123
127
|
];
|
|
@@ -128,8 +132,7 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
128
132
|
|
|
129
133
|
## Notes
|
|
130
134
|
|
|
131
|
-
- Delete operations can be handled differently depending on how complete the
|
|
132
|
-
-
|
|
133
|
-
-
|
|
134
|
-
- SHACL shapes (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
|
|
135
|
+
- Delete operations can be handled differently depending on how complete the input record is.
|
|
136
|
+
- Transactions can buffer multiple input records and commit them together using `rdfc:transactionConfig`.
|
|
137
|
+
- A SHACL shape (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
|
|
135
138
|
|
package/lib/SPARQLIngest.d.ts
CHANGED
|
@@ -26,8 +26,7 @@ export declare enum OperationMode {
|
|
|
26
26
|
export type IngestConfig = {
|
|
27
27
|
operationMode?: OperationMode;
|
|
28
28
|
memberBatchSize?: number;
|
|
29
|
-
|
|
30
|
-
memberShapes?: string[];
|
|
29
|
+
memberShape?: string;
|
|
31
30
|
changeSemantics?: ChangeSemantics;
|
|
32
31
|
targetNamedGraph?: string;
|
|
33
32
|
transactionConfig?: TransactionConfig;
|
|
@@ -57,7 +56,6 @@ export declare class SPARQLIngest extends Processor<SPARQLIngestArgs> {
|
|
|
57
56
|
transform(this: SPARQLIngestArgs & this): Promise<void>;
|
|
58
57
|
produce(this: SPARQLIngestArgs & this): Promise<void>;
|
|
59
58
|
verifyTransaction(stores: RdfStore[], transactionIdPath: string, transactionId: Term): void;
|
|
60
|
-
getNamedGraphIfAny(memberIRI: Term, memberIsGraph: boolean | undefined, targetNamedGraph?: string): string | undefined;
|
|
61
59
|
createTransactionQueries(transactionMembers: TransactionMember[], config: IngestConfig): string;
|
|
62
60
|
}
|
|
63
61
|
export {};
|
package/lib/SPARQLIngest.js
CHANGED
|
@@ -38,7 +38,15 @@ export class SPARQLIngest extends Processor {
|
|
|
38
38
|
const quads = new Parser().parse(rawQuads);
|
|
39
39
|
this.logger.verbose(`Parsed ${quads.length} quads from received member data`);
|
|
40
40
|
const store = RdfStore.createDefault();
|
|
41
|
-
quads.forEach(q =>
|
|
41
|
+
quads.forEach(q => {
|
|
42
|
+
if (q.graph.equals(df.defaultGraph()) && this.config.targetNamedGraph) {
|
|
43
|
+
store.addQuad(df.quad(q.subject, q.predicate, q.object, df.namedNode(this.config.targetNamedGraph)));
|
|
44
|
+
}
|
|
45
|
+
else {
|
|
46
|
+
store.addQuad(q);
|
|
47
|
+
}
|
|
48
|
+
});
|
|
49
|
+
sanitizeQuads(store);
|
|
42
50
|
let query;
|
|
43
51
|
const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
|
|
44
52
|
if (memberIRI) {
|
|
@@ -92,21 +100,18 @@ export class SPARQLIngest extends Processor {
|
|
|
92
100
|
this.transactionMembers = [];
|
|
93
101
|
}
|
|
94
102
|
else {
|
|
95
|
-
const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
|
|
96
103
|
const ctv = store.getQuads(null, df.namedNode(this.config.changeSemantics.changeTypePath))[0];
|
|
97
|
-
store.removeQuad(ctv);
|
|
98
|
-
sanitizeQuads(store);
|
|
99
104
|
if (ctv.object.value === this.config.changeSemantics.createValue) {
|
|
100
105
|
this.logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
|
|
101
|
-
query = CREATE(store, this.config.forVirtuoso
|
|
106
|
+
query = CREATE(store, this.config.forVirtuoso);
|
|
102
107
|
}
|
|
103
108
|
else if (ctv.object.value === this.config.changeSemantics.updateValue) {
|
|
104
109
|
this.logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
105
|
-
query = UPDATE(store, this.config.forVirtuoso
|
|
110
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
106
111
|
}
|
|
107
112
|
else if (ctv.object.value === this.config.changeSemantics.deleteValue) {
|
|
108
113
|
this.logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
109
|
-
query =
|
|
114
|
+
query = DELETE(store, memberIRI.value, this.config.memberShape);
|
|
110
115
|
}
|
|
111
116
|
else {
|
|
112
117
|
this.logger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
|
|
@@ -117,42 +122,40 @@ export class SPARQLIngest extends Processor {
|
|
|
117
122
|
else {
|
|
118
123
|
if (this.transactionMembers.length > 0) {
|
|
119
124
|
this.transactionMembers.forEach(ts => {
|
|
120
|
-
ts.store.getQuads(
|
|
125
|
+
ts.store.getQuads().forEach(q => store.addQuad(q));
|
|
121
126
|
});
|
|
122
127
|
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
|
|
123
|
-
query = UPDATE(store, this.config.forVirtuoso
|
|
128
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
124
129
|
}
|
|
125
130
|
else {
|
|
126
131
|
if (this.config.operationMode === OperationMode.REPLICATION) {
|
|
127
|
-
this.memberBatch.push(...store.getQuads(
|
|
132
|
+
this.memberBatch.push(...store.getQuads());
|
|
128
133
|
this.batchCount++;
|
|
129
134
|
if (this.batchCount < this.config.memberBatchSize) {
|
|
130
135
|
continue;
|
|
131
136
|
}
|
|
132
137
|
}
|
|
133
138
|
else {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
query = UPDATE(store, this.config.forVirtuoso, ng);
|
|
139
|
+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for member ${memberIRI.value}`);
|
|
140
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
137
141
|
}
|
|
138
142
|
}
|
|
139
143
|
}
|
|
140
144
|
}
|
|
141
145
|
else {
|
|
142
146
|
if (this.config.operationMode === OperationMode.REPLICATION) {
|
|
143
|
-
this.memberBatch.push(...store.getQuads(
|
|
147
|
+
this.memberBatch.push(...store.getQuads());
|
|
144
148
|
this.batchCount++;
|
|
145
149
|
if (this.batchCount < this.config.memberBatchSize) {
|
|
146
150
|
continue;
|
|
147
151
|
}
|
|
148
152
|
}
|
|
149
153
|
else {
|
|
150
|
-
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL
|
|
151
|
-
query = UPDATE(store, this.config.forVirtuoso
|
|
154
|
+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for received quads (${store.size})`);
|
|
155
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
152
156
|
}
|
|
153
157
|
}
|
|
154
158
|
if (query && query.length > 0) {
|
|
155
|
-
this.logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
|
|
156
159
|
if (this.config.graphStoreUrl) {
|
|
157
160
|
try {
|
|
158
161
|
const t0 = Date.now();
|
|
@@ -176,7 +179,14 @@ export class SPARQLIngest extends Processor {
|
|
|
176
179
|
}
|
|
177
180
|
}
|
|
178
181
|
if (this.sparqlWriter) {
|
|
179
|
-
|
|
182
|
+
if (this.config.forVirtuoso) {
|
|
183
|
+
for (const q of query) {
|
|
184
|
+
await this.sparqlWriter.string(q);
|
|
185
|
+
}
|
|
186
|
+
}
|
|
187
|
+
else {
|
|
188
|
+
await this.sparqlWriter.string(query.join("\n"));
|
|
189
|
+
}
|
|
180
190
|
}
|
|
181
191
|
}
|
|
182
192
|
else {
|
|
@@ -256,16 +266,6 @@ export class SPARQLIngest extends Processor {
|
|
|
256
266
|
}
|
|
257
267
|
}
|
|
258
268
|
}
|
|
259
|
-
getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
|
|
260
|
-
let ng;
|
|
261
|
-
if (memberIsGraph) {
|
|
262
|
-
ng = memberIRI.value;
|
|
263
|
-
}
|
|
264
|
-
else if (targetNamedGraph) {
|
|
265
|
-
ng = targetNamedGraph;
|
|
266
|
-
}
|
|
267
|
-
return ng;
|
|
268
|
-
}
|
|
269
269
|
createTransactionQueries(transactionMembers, config) {
|
|
270
270
|
this.createTransactionQueriesLogger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
|
|
271
271
|
+ ` members of transaction ${transactionMembers[0].transactionId}`);
|
|
@@ -276,15 +276,14 @@ export class SPARQLIngest extends Processor {
|
|
|
276
276
|
const transactionQueryBuilder = [];
|
|
277
277
|
for (const tsm of transactionMembers) {
|
|
278
278
|
const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
|
|
279
|
-
tsm.store.removeQuad(ctv);
|
|
280
279
|
if (ctv.object.value === config.changeSemantics.createValue) {
|
|
281
|
-
tsm.store.getQuads(
|
|
280
|
+
tsm.store.getQuads().forEach(q => createStore.addQuad(q));
|
|
282
281
|
}
|
|
283
282
|
else if (ctv.object.value === config.changeSemantics.updateValue) {
|
|
284
|
-
tsm.store.getQuads(
|
|
283
|
+
tsm.store.getQuads().forEach(q => updateStore.addQuad(q));
|
|
285
284
|
}
|
|
286
285
|
else if (ctv.object.value === config.changeSemantics.deleteValue) {
|
|
287
|
-
tsm.store.getQuads(
|
|
286
|
+
tsm.store.getQuads().forEach(q => deleteStore.addQuad(q));
|
|
288
287
|
deleteMembers.push(tsm.memberId);
|
|
289
288
|
}
|
|
290
289
|
else {
|
|
@@ -293,14 +292,16 @@ export class SPARQLIngest extends Processor {
|
|
|
293
292
|
}
|
|
294
293
|
}
|
|
295
294
|
if (createStore.size > 0) {
|
|
296
|
-
transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso
|
|
295
|
+
transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso).join("\n"));
|
|
297
296
|
}
|
|
298
297
|
if (updateStore.size > 0) {
|
|
299
|
-
transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso
|
|
298
|
+
transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso).join("\n"));
|
|
300
299
|
}
|
|
301
300
|
if (deleteStore.size > 0) {
|
|
302
|
-
|
|
301
|
+
deleteMembers.forEach(dm => {
|
|
302
|
+
transactionQueryBuilder.push(DELETE(deleteStore, dm, config.memberShape).join("\n"));
|
|
303
|
+
});
|
|
303
304
|
}
|
|
304
|
-
return transactionQueryBuilder.join("
|
|
305
|
+
return transactionQueryBuilder.join("\n");
|
|
305
306
|
}
|
|
306
307
|
}
|
package/lib/SPARQLQueries.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { RdfStore } from "rdf-stores";
|
|
2
|
-
export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean
|
|
3
|
-
export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean
|
|
4
|
-
export declare const DELETE: (store: RdfStore,
|
|
2
|
+
export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
|
|
3
|
+
export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
|
|
4
|
+
export declare const DELETE: (store: RdfStore, memberIRI: string, memberShape?: string) => string[];
|
package/lib/SPARQLQueries.js
CHANGED
|
@@ -2,73 +2,81 @@ import { RDF, SHACL } from "@treecg/types";
|
|
|
2
2
|
import { Writer as N3Writer, Parser } from "n3";
|
|
3
3
|
import { RdfStore } from "rdf-stores";
|
|
4
4
|
import { DataFactory } from "rdf-data-factory";
|
|
5
|
-
import { getObjects, getSubjects,
|
|
5
|
+
import { getObjects, getSubjects, splitStoreOnSize, splitStorePerNamedGraph } from "./Utils.js";
|
|
6
6
|
const df = new DataFactory();
|
|
7
|
-
export const CREATE = (store, forVirtuoso
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
7
|
+
export const CREATE = (store, forVirtuoso) => {
|
|
8
|
+
const queries = [];
|
|
9
|
+
const storesPerGraph = splitStorePerNamedGraph(store);
|
|
10
|
+
for (const { graph, store } of storesPerGraph) {
|
|
11
|
+
const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
|
|
12
|
+
subStores.forEach((s, i) => {
|
|
13
|
+
queries.push(`
|
|
14
|
+
INSERT DATA {
|
|
15
|
+
${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
|
|
16
|
+
${new N3Writer().quadsToString(s.getQuads().map(q => {
|
|
17
|
+
return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
|
|
18
|
+
}))}
|
|
19
|
+
${graph.equals(df.defaultGraph()) ? "" : `}`}
|
|
20
|
+
};
|
|
21
|
+
`);
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
return queries;
|
|
19
25
|
};
|
|
20
|
-
export const UPDATE = (store, forVirtuoso
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
|
|
26
|
+
export const UPDATE = (store, forVirtuoso) => {
|
|
27
|
+
const queries = [];
|
|
28
|
+
const storesPerGraph = splitStorePerNamedGraph(store);
|
|
29
|
+
for (const { graph, store } of storesPerGraph) {
|
|
30
|
+
const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
|
|
31
|
+
const formattedQuery = formatQuery(store);
|
|
32
|
+
const deleteInsertQuery = [`
|
|
33
|
+
${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
|
|
26
34
|
DELETE {
|
|
27
35
|
${formattedQuery[0]}
|
|
28
36
|
}
|
|
29
37
|
WHERE {
|
|
30
38
|
${formattedQuery[0]}
|
|
31
39
|
};
|
|
32
|
-
`
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
`];
|
|
41
|
+
subStores.forEach((s, i) => {
|
|
42
|
+
deleteInsertQuery.push(`
|
|
43
|
+
INSERT DATA {
|
|
44
|
+
${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
|
|
45
|
+
${new N3Writer().quadsToString(s.getQuads().map(q => {
|
|
46
|
+
return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
|
|
47
|
+
}))}
|
|
48
|
+
${graph.equals(df.defaultGraph()) ? "" : `}`}
|
|
49
|
+
};
|
|
50
|
+
`);
|
|
51
|
+
});
|
|
52
|
+
queries.push(...deleteInsertQuery);
|
|
53
|
+
}
|
|
44
54
|
return queries;
|
|
45
55
|
};
|
|
46
|
-
export const DELETE = (store,
|
|
47
|
-
const
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
56
|
+
export const DELETE = (store, memberIRI, memberShape) => {
|
|
57
|
+
const queries = [];
|
|
58
|
+
const storesPerGraph = splitStorePerNamedGraph(store);
|
|
59
|
+
for (const { graph, store } of storesPerGraph) {
|
|
60
|
+
const formatted = formatQuery(store, memberIRI, memberShape);
|
|
61
|
+
const deleteBuilder = formatted.length > 1 ? formatted[1] : formatted[0];
|
|
62
|
+
const whereBuilder = formatted[0];
|
|
63
|
+
queries.push(`
|
|
64
|
+
${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
|
|
65
|
+
DELETE {
|
|
66
|
+
${deleteBuilder}
|
|
67
|
+
} WHERE {
|
|
68
|
+
${whereBuilder}
|
|
69
|
+
};
|
|
70
|
+
`);
|
|
55
71
|
}
|
|
56
|
-
return
|
|
57
|
-
${namedGraph ? `WITH <${namedGraph}>` : ""}
|
|
58
|
-
DELETE {
|
|
59
|
-
${deleteBuilder.join("\n")}
|
|
60
|
-
} WHERE {
|
|
61
|
-
${whereBuilder.join("\n")}
|
|
62
|
-
}
|
|
63
|
-
`;
|
|
72
|
+
return queries;
|
|
64
73
|
};
|
|
65
|
-
function formatQuery(memberStore, memberIRI,
|
|
74
|
+
function formatQuery(memberStore, memberIRI, memberShape, indexStart = 0) {
|
|
66
75
|
const subjectSet = new Set();
|
|
67
76
|
const blankNodeMap = new Map();
|
|
68
77
|
const queryBuilder = [];
|
|
69
|
-
const formattedQueries = [];
|
|
70
78
|
let i = indexStart;
|
|
71
|
-
if (!
|
|
79
|
+
if (!memberShape) {
|
|
72
80
|
for (const quad of memberStore.getQuads()) {
|
|
73
81
|
if (!subjectSet.has(quad.subject.value)) {
|
|
74
82
|
subjectSet.add(quad.subject.value);
|
|
@@ -82,7 +90,8 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
|
|
|
82
90
|
if (quad.object.termType === "BlankNode") {
|
|
83
91
|
blankNodeMap.set(quad.object.value, `?bn_ref_${i}`);
|
|
84
92
|
}
|
|
85
|
-
queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal"
|
|
93
|
+
queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal"
|
|
94
|
+
? `"${quad.object.value}"^^<${quad.object.datatype.value}>`
|
|
86
95
|
: quad.object.termType === "BlankNode" ? `${blankNodeMap.get(quad.object.value)} `
|
|
87
96
|
: `<${quad.object.value}>`}.`);
|
|
88
97
|
queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} ?p_${i} ?o_${i}.`);
|
|
@@ -91,51 +100,28 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
|
|
|
91
100
|
i++;
|
|
92
101
|
}
|
|
93
102
|
}
|
|
94
|
-
|
|
103
|
+
return [queryBuilder.join("\n")];
|
|
95
104
|
}
|
|
96
105
|
else {
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
const shapeStore = RdfStore.createDefault();
|
|
100
|
-
new Parser().parse(msh).forEach(quad => shapeStore.addQuad(quad));
|
|
101
|
-
shapeIndex.set(extractMainTargetClass(shapeStore).value, shapeStore);
|
|
102
|
-
});
|
|
106
|
+
const shapeStore = RdfStore.createDefault();
|
|
107
|
+
new Parser().parse(memberShape).forEach(quad => shapeStore.addQuad(quad));
|
|
103
108
|
queryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
}
|
|
115
|
-
formattedQueries.push(queryBuilder.join("\n"));
|
|
116
|
-
}
|
|
117
|
-
else {
|
|
118
|
-
const deleteQueryBuilder = [];
|
|
119
|
-
deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
|
|
109
|
+
const deleteQueryBuilder = [];
|
|
110
|
+
deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
|
|
111
|
+
i++;
|
|
112
|
+
const propShapes = getObjects(shapeStore, null, SHACL.terms.property, null);
|
|
113
|
+
queryBuilder.push(" OPTIONAL { ");
|
|
114
|
+
for (const propSh of propShapes) {
|
|
115
|
+
const pred = getObjects(shapeStore, propSh, SHACL.terms.path, null)[0];
|
|
116
|
+
queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
117
|
+
deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
118
|
+
queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
119
|
+
deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
120
120
|
i++;
|
|
121
|
-
shapeIndex.forEach(mshStore => {
|
|
122
|
-
const propShapes = getObjects(mshStore, null, SHACL.terms.property, null);
|
|
123
|
-
queryBuilder.push(" OPTIONAL { ");
|
|
124
|
-
for (const propSh of propShapes) {
|
|
125
|
-
const pred = getObjects(mshStore, propSh, SHACL.terms.path, null)[0];
|
|
126
|
-
queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
127
|
-
deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
128
|
-
queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
129
|
-
deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
130
|
-
i++;
|
|
131
|
-
}
|
|
132
|
-
queryBuilder.push(" }");
|
|
133
|
-
});
|
|
134
|
-
formattedQueries.push(queryBuilder.join("\n"));
|
|
135
|
-
formattedQueries.push(deleteQueryBuilder.join("\n"));
|
|
136
121
|
}
|
|
122
|
+
queryBuilder.push(" }");
|
|
123
|
+
return [queryBuilder.join("\n"), deleteQueryBuilder.join("\n")];
|
|
137
124
|
}
|
|
138
|
-
return formattedQueries;
|
|
139
125
|
}
|
|
140
126
|
function extractMainTargetClass(store) {
|
|
141
127
|
const nodeShapes = getSubjects(store, RDF.terms.type, SHACL.terms.NodeShape, null);
|
package/lib/Utils.d.ts
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
import { RdfStore } from "rdf-stores";
|
|
2
|
-
import type { Term, Quad_Subject, Quad_Object, Quad } from "@rdfjs/types";
|
|
2
|
+
import type { Term, Quad_Subject, Quad_Object, Quad, Quad_Graph } from "@rdfjs/types";
|
|
3
3
|
import type { IngestConfig } from "./SPARQLIngest.js";
|
|
4
4
|
import { Logger } from "winston";
|
|
5
5
|
export declare function getSubjects(store: RdfStore, predicate: Term | null, object: Term | null, graph?: Term | null): Quad_Subject[];
|
|
6
6
|
export declare function getObjects(store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null): Quad_Object[];
|
|
7
|
-
export declare function
|
|
7
|
+
export declare function splitStorePerNamedGraph(store: RdfStore): {
|
|
8
|
+
graph: Quad_Graph;
|
|
9
|
+
store: RdfStore;
|
|
10
|
+
}[];
|
|
11
|
+
export declare function splitStoreOnSize(store: RdfStore, threshold: number): RdfStore[];
|
|
8
12
|
export declare function sanitizeQuads(store: RdfStore): void;
|
|
9
13
|
export declare function doSPARQLRequest(query: string[] | Quad[], config: IngestConfig, logger: Logger): Promise<void>;
|
package/lib/Utils.js
CHANGED
|
@@ -14,7 +14,20 @@ export function getObjects(store, subject, predicate, graph) {
|
|
|
14
14
|
return quad.object;
|
|
15
15
|
});
|
|
16
16
|
}
|
|
17
|
-
export function
|
|
17
|
+
export function splitStorePerNamedGraph(store) {
|
|
18
|
+
const stores = [];
|
|
19
|
+
const namedGraphs = new Set();
|
|
20
|
+
store.getQuads(null, null, null, null)
|
|
21
|
+
.forEach(q => namedGraphs.add(q.graph));
|
|
22
|
+
namedGraphs.forEach(ng => {
|
|
23
|
+
const subStore = RdfStore.createDefault();
|
|
24
|
+
const quads = store.getQuads(null, null, null, ng);
|
|
25
|
+
quads.forEach(q => subStore.addQuad(q));
|
|
26
|
+
stores.push({ graph: ng, store: subStore });
|
|
27
|
+
});
|
|
28
|
+
return stores;
|
|
29
|
+
}
|
|
30
|
+
export function splitStoreOnSize(store, threshold) {
|
|
18
31
|
const stores = [];
|
|
19
32
|
if (store.size < threshold) {
|
|
20
33
|
stores.push(store);
|
|
@@ -74,7 +87,8 @@ export async function doSPARQLRequest(query, config, logger) {
|
|
|
74
87
|
if (config.accessToken) {
|
|
75
88
|
url.searchParams.append("access-token", config.accessToken);
|
|
76
89
|
}
|
|
77
|
-
logger.
|
|
90
|
+
logger.verbose(`[doSPARQLRequest] Executing SPARQL Graph Store request (POST) with ${quads.length} quads.`);
|
|
91
|
+
logger.debug(`[doSPARQLRequest] POSTing the following RDF quads:\n${serialized}`);
|
|
78
92
|
const res = await fetch(url.toString(), {
|
|
79
93
|
method: "POST",
|
|
80
94
|
headers: {
|
|
@@ -92,7 +106,7 @@ export async function doSPARQLRequest(query, config, logger) {
|
|
|
92
106
|
return;
|
|
93
107
|
}
|
|
94
108
|
let queries = [];
|
|
95
|
-
const jointQuery = query.join("
|
|
109
|
+
const jointQuery = query.join(";\n");
|
|
96
110
|
if (config.forVirtuoso && Buffer.byteLength(jointQuery, 'utf8') > 1e6) {
|
|
97
111
|
queries = query;
|
|
98
112
|
}
|
|
@@ -100,7 +114,7 @@ export async function doSPARQLRequest(query, config, logger) {
|
|
|
100
114
|
queries.push(jointQuery);
|
|
101
115
|
}
|
|
102
116
|
for (const q of queries) {
|
|
103
|
-
logger.debug(`Executing SPARQL query: \n${q}`);
|
|
117
|
+
logger.debug(`[doSPARQLRequest] Executing SPARQL query: \n${q}`);
|
|
104
118
|
const res = await fetch(config.graphStoreUrl, {
|
|
105
119
|
method: "POST",
|
|
106
120
|
headers: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rdfc/sparql-ingest-processor-ts",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.1.1",
|
|
4
4
|
"description": "SPARQL Update function to be within RDF-Connect pipelines",
|
|
5
5
|
"author": "Julián Rojas",
|
|
6
6
|
"contributors": [
|
|
@@ -28,10 +28,10 @@
|
|
|
28
28
|
},
|
|
29
29
|
"dependencies": {
|
|
30
30
|
"@treecg/types": "^0.4.6",
|
|
31
|
-
"n3": "^
|
|
31
|
+
"n3": "^1.26.0",
|
|
32
32
|
"rdf-data-factory": "^2.0.2",
|
|
33
33
|
"rdf-stores": "^2.1.1",
|
|
34
|
-
"undici": "^7.
|
|
34
|
+
"undici": "^7.21.0",
|
|
35
35
|
"winston": "^3.19.0"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"ts-patch": "^3.3.0",
|
|
51
51
|
"tsc-alias": "^1.8.16",
|
|
52
52
|
"typescript": "^5.9.3",
|
|
53
|
-
"vite-tsconfig-paths": "^6.0
|
|
53
|
+
"vite-tsconfig-paths": "^6.1.0",
|
|
54
54
|
"vitest": "^4.0.18"
|
|
55
55
|
}
|
|
56
56
|
}
|
package/processors.ttl
CHANGED
|
@@ -43,16 +43,11 @@ rdfc:SPARQLIngest rdfc:jsImplementationOf rdfc:Processor;
|
|
|
43
43
|
sh:datatype xsd:integer;
|
|
44
44
|
sh:name "memberBatchSize";
|
|
45
45
|
sh:maxCount 1;
|
|
46
|
-
], [
|
|
47
|
-
sh:path rdfc:memberIsGraph;
|
|
48
|
-
sh:datatype xsd:boolean;
|
|
49
|
-
sh:name "memberIsGraph";
|
|
50
|
-
sh:maxCount 1;
|
|
51
46
|
], [
|
|
52
47
|
sh:path rdfc:memberShape;
|
|
53
48
|
sh:datatype xsd:string;
|
|
54
|
-
sh:name "
|
|
55
|
-
sh:
|
|
49
|
+
sh:name "memberShape";
|
|
50
|
+
sh:maxCount 1;
|
|
56
51
|
], [
|
|
57
52
|
sh:path rdfc:changeSemantics;
|
|
58
53
|
sh:class rdfc:ChangeSemantics;
|
package/lib/LogUtil.d.ts
DELETED
package/lib/LogUtil.js
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import winston, { format } from "winston";
|
|
2
|
-
const PROCESSOR_NAME = "sparql-ingest";
|
|
3
|
-
const consoleTransport = new winston.transports.Console({
|
|
4
|
-
stderrLevels: [
|
|
5
|
-
"error",
|
|
6
|
-
"warn",
|
|
7
|
-
"info",
|
|
8
|
-
"http",
|
|
9
|
-
"verbose",
|
|
10
|
-
"debug",
|
|
11
|
-
"silly"
|
|
12
|
-
]
|
|
13
|
-
});
|
|
14
|
-
if (typeof process !== "undefined") {
|
|
15
|
-
consoleTransport.level =
|
|
16
|
-
process.env.LOG_LEVEL ||
|
|
17
|
-
(process.env.DEBUG?.includes(PROCESSOR_NAME) ||
|
|
18
|
-
process.env.DEBUG === "*"
|
|
19
|
-
? "debug"
|
|
20
|
-
: "info");
|
|
21
|
-
}
|
|
22
|
-
const classLoggers = new WeakMap();
|
|
23
|
-
const stringLoggers = new Map();
|
|
24
|
-
export function getLoggerFor(loggable) {
|
|
25
|
-
let logger;
|
|
26
|
-
if (typeof loggable === "string") {
|
|
27
|
-
if (stringLoggers.has(loggable)) {
|
|
28
|
-
logger = stringLoggers.get(loggable);
|
|
29
|
-
}
|
|
30
|
-
else {
|
|
31
|
-
logger = createLogger(loggable);
|
|
32
|
-
stringLoggers.set(loggable, logger);
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
else {
|
|
36
|
-
const { constructor } = loggable;
|
|
37
|
-
if (classLoggers.has(constructor)) {
|
|
38
|
-
logger = classLoggers.get(constructor);
|
|
39
|
-
}
|
|
40
|
-
else {
|
|
41
|
-
logger = createLogger(constructor.name);
|
|
42
|
-
classLoggers.set(constructor, logger);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
return logger;
|
|
46
|
-
}
|
|
47
|
-
function createLogger(label) {
|
|
48
|
-
return winston.createLogger({
|
|
49
|
-
format: format.combine(format.label({ label }), format.colorize(), format.timestamp(), format.metadata({
|
|
50
|
-
fillExcept: ["level", "timestamp", "label", "message"],
|
|
51
|
-
}), format.printf(({ level: levelInner, message, label: labelInner, timestamp, }) => `${timestamp} {${PROCESSOR_NAME}} [${labelInner}] ${levelInner}: ${message}`)),
|
|
52
|
-
transports: [consoleTransport],
|
|
53
|
-
});
|
|
54
|
-
}
|