@rdfc/sparql-ingest-processor-ts 2.0.3 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +18 -15
- package/lib/SPARQLIngest.d.ts +1 -3
- package/lib/SPARQLIngest.js +41 -36
- package/lib/SPARQLQueries.d.ts +3 -3
- package/lib/SPARQLQueries.js +77 -91
- package/lib/Utils.d.ts +6 -2
- package/lib/Utils.js +15 -3
- package/package.json +3 -3
- package/processors.ttl +2 -7
- package/lib/LogUtil.d.ts +0 -9
- package/lib/LogUtil.js +0 -54
package/README.md
CHANGED
|
@@ -2,10 +2,10 @@
|
|
|
2
2
|
|
|
3
3
|
[](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml) [](https://npmjs.com/package/@rdfc/sparql-ingest-processor-ts)
|
|
4
4
|
|
|
5
|
-
TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) into a SPARQL endpoint.
|
|
5
|
+
TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) or in general, a stream of RDF quads into a SPARQL endpoint.
|
|
6
6
|
|
|
7
|
-
This processor takes a stream of RDF records, transforms them into [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
|
|
8
|
-
It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content.
|
|
7
|
+
This processor takes a stream of RDF records, transforms them into corresponding [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
|
|
8
|
+
It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content. It also supports direct quad ingestion via the SPARQL Graph Store Protocol.
|
|
9
9
|
|
|
10
10
|
---
|
|
11
11
|
|
|
@@ -46,8 +46,7 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
46
46
|
<ingester> a rdfc:SPARQLIngest;
|
|
47
47
|
rdfc:memberStream <in>;
|
|
48
48
|
rdfc:ingestConfig [
|
|
49
|
-
rdfc:
|
|
50
|
-
rdfc:memberShape "http://ex.org/Shape1", "http://ex.org/Shape2";
|
|
49
|
+
rdfc:memberShape "http://ex.org/Shape";
|
|
51
50
|
rdfc:changeSemantics [
|
|
52
51
|
rdfc:changeTypePath "http://ex.org/changeType";
|
|
53
52
|
rdfc:createValue "http://ex.org/Create";
|
|
@@ -60,7 +59,14 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
60
59
|
rdfc:transactionEndPath "http://ex.org/transactionEnd"
|
|
61
60
|
];
|
|
62
61
|
rdfc:graphStoreUrl "http://example.org/sparql";
|
|
63
|
-
rdfc:forVirtuoso false
|
|
62
|
+
rdfc:forVirtuoso false;
|
|
63
|
+
rdfc:accessToken "myAccessToken";
|
|
64
|
+
rdfc:measurePerformance [
|
|
65
|
+
rdfc:name "myPerformanceMeasurement";
|
|
66
|
+
rdfc:outputPath "/path/to/output.json";
|
|
67
|
+
rdfc:failureIsFatal true;
|
|
68
|
+
rdfc:queryTimeout 30000
|
|
69
|
+
]
|
|
64
70
|
];
|
|
65
71
|
rdfc:sparqlWriter <out>.
|
|
66
72
|
```
|
|
@@ -77,13 +83,12 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
77
83
|
---
|
|
78
84
|
|
|
79
85
|
### Parameters of `rdfc:IngestConfig`:
|
|
80
|
-
- `rdfc:
|
|
81
|
-
- `rdfc:memberShape` (**string**, optional, repeatable): SHACL shape identifiers used to guide query construction when payloads are incomplete.
|
|
86
|
+
- `rdfc:memberShape` (**string**, optional): SHACL shape used to guide query construction when payloads are incomplete.
|
|
82
87
|
- `rdfc:changeSemantics` (**rdfc:ChangeSemantics**, optional): Configures mapping between change types (create/update/delete) and SPARQL operations.
|
|
83
|
-
- `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph
|
|
88
|
+
- `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph.
|
|
84
89
|
- `rdfc:transactionConfig` (**rdfc:TransactionConfig**, optional): Groups records by transaction ID for atomic updates.
|
|
85
90
|
- `rdfc:graphStoreUrl` (**string**, optional): SPARQL Graph Store endpoint URL.
|
|
86
|
-
- `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling.
|
|
91
|
+
- `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling to avoid query size limits.
|
|
87
92
|
- `rdfc:accessToken` (**string**, optional): Access token for authenticated graph stores.
|
|
88
93
|
- `rdfc:measurePerformance` (**rdfc:PerformanceConfig**, optional): Enables performance measurement of SPARQL queries.
|
|
89
94
|
|
|
@@ -117,7 +122,6 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
117
122
|
<ingester> a rdfc:SPARQLIngest;
|
|
118
123
|
rdfc:memberStream <in>;
|
|
119
124
|
rdfc:ingestConfig [
|
|
120
|
-
rdfc:memberIsGraph true;
|
|
121
125
|
rdfc:targetNamedGraph "http://example.org/targetGraph";
|
|
122
126
|
rdfc:graphStoreUrl "http://example.org/sparql"
|
|
123
127
|
];
|
|
@@ -128,8 +132,7 @@ npm install @rdfc/sparql-ingest-processor-ts
|
|
|
128
132
|
|
|
129
133
|
## Notes
|
|
130
134
|
|
|
131
|
-
- Delete operations can be handled differently depending on how complete the
|
|
132
|
-
-
|
|
133
|
-
-
|
|
134
|
-
- SHACL shapes (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
|
|
135
|
+
- Delete operations can be handled differently depending on how complete the input record is.
|
|
136
|
+
- Transactions can buffer multiple input records and commit them together using `rdfc:transactionConfig`.
|
|
137
|
+
- A SHACL shape (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
|
|
135
138
|
|
package/lib/SPARQLIngest.d.ts
CHANGED
|
@@ -26,8 +26,7 @@ export declare enum OperationMode {
|
|
|
26
26
|
export type IngestConfig = {
|
|
27
27
|
operationMode?: OperationMode;
|
|
28
28
|
memberBatchSize?: number;
|
|
29
|
-
|
|
30
|
-
memberShapes?: string[];
|
|
29
|
+
memberShape?: string;
|
|
31
30
|
changeSemantics?: ChangeSemantics;
|
|
32
31
|
targetNamedGraph?: string;
|
|
33
32
|
transactionConfig?: TransactionConfig;
|
|
@@ -57,7 +56,6 @@ export declare class SPARQLIngest extends Processor<SPARQLIngestArgs> {
|
|
|
57
56
|
transform(this: SPARQLIngestArgs & this): Promise<void>;
|
|
58
57
|
produce(this: SPARQLIngestArgs & this): Promise<void>;
|
|
59
58
|
verifyTransaction(stores: RdfStore[], transactionIdPath: string, transactionId: Term): void;
|
|
60
|
-
getNamedGraphIfAny(memberIRI: Term, memberIsGraph: boolean | undefined, targetNamedGraph?: string): string | undefined;
|
|
61
59
|
createTransactionQueries(transactionMembers: TransactionMember[], config: IngestConfig): string;
|
|
62
60
|
}
|
|
63
61
|
export {};
|
package/lib/SPARQLIngest.js
CHANGED
|
@@ -34,11 +34,17 @@ export class SPARQLIngest extends Processor {
|
|
|
34
34
|
}
|
|
35
35
|
async transform() {
|
|
36
36
|
for await (const rawQuads of this.memberStream.strings()) {
|
|
37
|
-
this.logger.debug(`Raw member data received: \n${rawQuads}`);
|
|
38
37
|
const quads = new Parser().parse(rawQuads);
|
|
39
38
|
this.logger.verbose(`Parsed ${quads.length} quads from received member data`);
|
|
40
39
|
const store = RdfStore.createDefault();
|
|
41
|
-
quads.forEach(q =>
|
|
40
|
+
quads.forEach(q => {
|
|
41
|
+
if (q.graph.equals(df.defaultGraph()) && this.config.targetNamedGraph) {
|
|
42
|
+
store.addQuad(df.quad(q.subject, q.predicate, q.object, df.namedNode(this.config.targetNamedGraph)));
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
store.addQuad(q);
|
|
46
|
+
}
|
|
47
|
+
});
|
|
42
48
|
let query;
|
|
43
49
|
const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
|
|
44
50
|
if (memberIRI) {
|
|
@@ -92,21 +98,19 @@ export class SPARQLIngest extends Processor {
|
|
|
92
98
|
this.transactionMembers = [];
|
|
93
99
|
}
|
|
94
100
|
else {
|
|
95
|
-
const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
|
|
96
101
|
const ctv = store.getQuads(null, df.namedNode(this.config.changeSemantics.changeTypePath))[0];
|
|
97
|
-
store.removeQuad(ctv);
|
|
98
102
|
sanitizeQuads(store);
|
|
99
103
|
if (ctv.object.value === this.config.changeSemantics.createValue) {
|
|
100
104
|
this.logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
|
|
101
|
-
query = CREATE(store, this.config.forVirtuoso
|
|
105
|
+
query = CREATE(store, this.config.forVirtuoso);
|
|
102
106
|
}
|
|
103
107
|
else if (ctv.object.value === this.config.changeSemantics.updateValue) {
|
|
104
108
|
this.logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
105
|
-
query = UPDATE(store, this.config.forVirtuoso
|
|
109
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
106
110
|
}
|
|
107
111
|
else if (ctv.object.value === this.config.changeSemantics.deleteValue) {
|
|
108
112
|
this.logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
109
|
-
query =
|
|
113
|
+
query = DELETE(store, memberIRI.value, this.config.memberShape);
|
|
110
114
|
}
|
|
111
115
|
else {
|
|
112
116
|
this.logger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
|
|
@@ -117,42 +121,45 @@ export class SPARQLIngest extends Processor {
|
|
|
117
121
|
else {
|
|
118
122
|
if (this.transactionMembers.length > 0) {
|
|
119
123
|
this.transactionMembers.forEach(ts => {
|
|
120
|
-
ts.store.getQuads(
|
|
124
|
+
ts.store.getQuads().forEach(q => store.addQuad(q));
|
|
121
125
|
});
|
|
122
126
|
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
|
|
123
|
-
query = UPDATE(store, this.config.forVirtuoso
|
|
127
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
124
128
|
}
|
|
125
129
|
else {
|
|
126
130
|
if (this.config.operationMode === OperationMode.REPLICATION) {
|
|
127
|
-
this.memberBatch.push(...store.getQuads(
|
|
131
|
+
this.memberBatch.push(...store.getQuads().map(q => {
|
|
132
|
+
if (q.graph.equals(df.defaultGraph()) && this.config.targetNamedGraph) {
|
|
133
|
+
q.graph = df.namedNode(this.config.targetNamedGraph);
|
|
134
|
+
}
|
|
135
|
+
return q;
|
|
136
|
+
}));
|
|
128
137
|
this.batchCount++;
|
|
129
138
|
if (this.batchCount < this.config.memberBatchSize) {
|
|
130
139
|
continue;
|
|
131
140
|
}
|
|
132
141
|
}
|
|
133
142
|
else {
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
query = UPDATE(store, this.config.forVirtuoso, ng);
|
|
143
|
+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for member ${memberIRI.value}`);
|
|
144
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
137
145
|
}
|
|
138
146
|
}
|
|
139
147
|
}
|
|
140
148
|
}
|
|
141
149
|
else {
|
|
142
150
|
if (this.config.operationMode === OperationMode.REPLICATION) {
|
|
143
|
-
this.memberBatch.push(...store.getQuads(
|
|
151
|
+
this.memberBatch.push(...store.getQuads());
|
|
144
152
|
this.batchCount++;
|
|
145
153
|
if (this.batchCount < this.config.memberBatchSize) {
|
|
146
154
|
continue;
|
|
147
155
|
}
|
|
148
156
|
}
|
|
149
157
|
else {
|
|
150
|
-
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL
|
|
151
|
-
query = UPDATE(store, this.config.forVirtuoso
|
|
158
|
+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for received quads (${store.size})`);
|
|
159
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
152
160
|
}
|
|
153
161
|
}
|
|
154
162
|
if (query && query.length > 0) {
|
|
155
|
-
this.logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
|
|
156
163
|
if (this.config.graphStoreUrl) {
|
|
157
164
|
try {
|
|
158
165
|
const t0 = Date.now();
|
|
@@ -176,7 +183,14 @@ export class SPARQLIngest extends Processor {
|
|
|
176
183
|
}
|
|
177
184
|
}
|
|
178
185
|
if (this.sparqlWriter) {
|
|
179
|
-
|
|
186
|
+
if (this.config.forVirtuoso) {
|
|
187
|
+
for (const q of query) {
|
|
188
|
+
await this.sparqlWriter.string(q);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
191
|
+
else {
|
|
192
|
+
await this.sparqlWriter.string(query.join("\n"));
|
|
193
|
+
}
|
|
180
194
|
}
|
|
181
195
|
}
|
|
182
196
|
else {
|
|
@@ -256,16 +270,6 @@ export class SPARQLIngest extends Processor {
|
|
|
256
270
|
}
|
|
257
271
|
}
|
|
258
272
|
}
|
|
259
|
-
getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
|
|
260
|
-
let ng;
|
|
261
|
-
if (memberIsGraph) {
|
|
262
|
-
ng = memberIRI.value;
|
|
263
|
-
}
|
|
264
|
-
else if (targetNamedGraph) {
|
|
265
|
-
ng = targetNamedGraph;
|
|
266
|
-
}
|
|
267
|
-
return ng;
|
|
268
|
-
}
|
|
269
273
|
createTransactionQueries(transactionMembers, config) {
|
|
270
274
|
this.createTransactionQueriesLogger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
|
|
271
275
|
+ ` members of transaction ${transactionMembers[0].transactionId}`);
|
|
@@ -276,15 +280,14 @@ export class SPARQLIngest extends Processor {
|
|
|
276
280
|
const transactionQueryBuilder = [];
|
|
277
281
|
for (const tsm of transactionMembers) {
|
|
278
282
|
const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
|
|
279
|
-
tsm.store.removeQuad(ctv);
|
|
280
283
|
if (ctv.object.value === config.changeSemantics.createValue) {
|
|
281
|
-
tsm.store.getQuads(
|
|
284
|
+
tsm.store.getQuads().forEach(q => createStore.addQuad(q));
|
|
282
285
|
}
|
|
283
286
|
else if (ctv.object.value === config.changeSemantics.updateValue) {
|
|
284
|
-
tsm.store.getQuads(
|
|
287
|
+
tsm.store.getQuads().forEach(q => updateStore.addQuad(q));
|
|
285
288
|
}
|
|
286
289
|
else if (ctv.object.value === config.changeSemantics.deleteValue) {
|
|
287
|
-
tsm.store.getQuads(
|
|
290
|
+
tsm.store.getQuads().forEach(q => deleteStore.addQuad(q));
|
|
288
291
|
deleteMembers.push(tsm.memberId);
|
|
289
292
|
}
|
|
290
293
|
else {
|
|
@@ -293,14 +296,16 @@ export class SPARQLIngest extends Processor {
|
|
|
293
296
|
}
|
|
294
297
|
}
|
|
295
298
|
if (createStore.size > 0) {
|
|
296
|
-
transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso
|
|
299
|
+
transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso).join("\n"));
|
|
297
300
|
}
|
|
298
301
|
if (updateStore.size > 0) {
|
|
299
|
-
transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso
|
|
302
|
+
transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso).join("\n"));
|
|
300
303
|
}
|
|
301
304
|
if (deleteStore.size > 0) {
|
|
302
|
-
|
|
305
|
+
deleteMembers.forEach(dm => {
|
|
306
|
+
transactionQueryBuilder.push(DELETE(deleteStore, dm, config.memberShape).join("\n"));
|
|
307
|
+
});
|
|
303
308
|
}
|
|
304
|
-
return transactionQueryBuilder.join("
|
|
309
|
+
return transactionQueryBuilder.join("\n");
|
|
305
310
|
}
|
|
306
311
|
}
|
package/lib/SPARQLQueries.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
import { RdfStore } from "rdf-stores";
|
|
2
|
-
export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean
|
|
3
|
-
export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean
|
|
4
|
-
export declare const DELETE: (store: RdfStore,
|
|
2
|
+
export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
|
|
3
|
+
export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
|
|
4
|
+
export declare const DELETE: (store: RdfStore, memberIRI: string, memberShape?: string) => string[];
|
package/lib/SPARQLQueries.js
CHANGED
|
@@ -2,73 +2,81 @@ import { RDF, SHACL } from "@treecg/types";
|
|
|
2
2
|
import { Writer as N3Writer, Parser } from "n3";
|
|
3
3
|
import { RdfStore } from "rdf-stores";
|
|
4
4
|
import { DataFactory } from "rdf-data-factory";
|
|
5
|
-
import { getObjects, getSubjects,
|
|
5
|
+
import { getObjects, getSubjects, splitStoreOnSize, splitStorePerNamedGraph } from "./Utils.js";
|
|
6
6
|
const df = new DataFactory();
|
|
7
|
-
export const CREATE = (store, forVirtuoso
|
|
8
|
-
const
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
7
|
+
export const CREATE = (store, forVirtuoso) => {
|
|
8
|
+
const queries = [];
|
|
9
|
+
const storesPerGraph = splitStorePerNamedGraph(store);
|
|
10
|
+
for (const { graph, store } of storesPerGraph) {
|
|
11
|
+
const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
|
|
12
|
+
subStores.forEach((s, i) => {
|
|
13
|
+
queries.push(`
|
|
14
|
+
INSERT DATA {
|
|
15
|
+
${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
|
|
16
|
+
${new N3Writer().quadsToString(s.getQuads().map(q => {
|
|
17
|
+
return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
|
|
18
|
+
}))}
|
|
19
|
+
${graph.equals(df.defaultGraph()) ? "" : `}`}
|
|
20
|
+
};
|
|
21
|
+
`);
|
|
22
|
+
});
|
|
23
|
+
}
|
|
24
|
+
return queries;
|
|
19
25
|
};
|
|
20
|
-
export const UPDATE = (store, forVirtuoso
|
|
21
|
-
const
|
|
22
|
-
const
|
|
23
|
-
const
|
|
24
|
-
|
|
25
|
-
|
|
26
|
+
export const UPDATE = (store, forVirtuoso) => {
|
|
27
|
+
const queries = [];
|
|
28
|
+
const storesPerGraph = splitStorePerNamedGraph(store);
|
|
29
|
+
for (const { graph, store } of storesPerGraph) {
|
|
30
|
+
const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
|
|
31
|
+
const formattedQuery = formatQuery(store);
|
|
32
|
+
const deleteInsertQuery = [`
|
|
33
|
+
${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
|
|
26
34
|
DELETE {
|
|
27
35
|
${formattedQuery[0]}
|
|
28
36
|
}
|
|
29
37
|
WHERE {
|
|
30
38
|
${formattedQuery[0]}
|
|
31
39
|
};
|
|
32
|
-
`
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
40
|
+
`];
|
|
41
|
+
subStores.forEach((s, i) => {
|
|
42
|
+
deleteInsertQuery.push(`
|
|
43
|
+
INSERT DATA {
|
|
44
|
+
${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
|
|
45
|
+
${new N3Writer().quadsToString(s.getQuads().map(q => {
|
|
46
|
+
return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
|
|
47
|
+
}))}
|
|
48
|
+
${graph.equals(df.defaultGraph()) ? "" : `}`}
|
|
49
|
+
};
|
|
50
|
+
`);
|
|
51
|
+
});
|
|
52
|
+
queries.push(...deleteInsertQuery);
|
|
53
|
+
}
|
|
44
54
|
return queries;
|
|
45
55
|
};
|
|
46
|
-
export const DELETE = (store,
|
|
47
|
-
const
|
|
48
|
-
const
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
56
|
+
export const DELETE = (store, memberIRI, memberShape) => {
|
|
57
|
+
const queries = [];
|
|
58
|
+
const storesPerGraph = splitStorePerNamedGraph(store);
|
|
59
|
+
for (const { graph, store } of storesPerGraph) {
|
|
60
|
+
const formatted = formatQuery(store, memberIRI, memberShape);
|
|
61
|
+
const deleteBuilder = formatted.length > 1 ? formatted[1] : formatted[0];
|
|
62
|
+
const whereBuilder = formatted[0];
|
|
63
|
+
queries.push(`
|
|
64
|
+
${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
|
|
65
|
+
DELETE {
|
|
66
|
+
${deleteBuilder}
|
|
67
|
+
} WHERE {
|
|
68
|
+
${whereBuilder}
|
|
69
|
+
};
|
|
70
|
+
`);
|
|
55
71
|
}
|
|
56
|
-
return
|
|
57
|
-
${namedGraph ? `WITH <${namedGraph}>` : ""}
|
|
58
|
-
DELETE {
|
|
59
|
-
${deleteBuilder.join("\n")}
|
|
60
|
-
} WHERE {
|
|
61
|
-
${whereBuilder.join("\n")}
|
|
62
|
-
}
|
|
63
|
-
`;
|
|
72
|
+
return queries;
|
|
64
73
|
};
|
|
65
|
-
function formatQuery(memberStore, memberIRI,
|
|
74
|
+
function formatQuery(memberStore, memberIRI, memberShape, indexStart = 0) {
|
|
66
75
|
const subjectSet = new Set();
|
|
67
76
|
const blankNodeMap = new Map();
|
|
68
77
|
const queryBuilder = [];
|
|
69
|
-
const formattedQueries = [];
|
|
70
78
|
let i = indexStart;
|
|
71
|
-
if (!
|
|
79
|
+
if (!memberShape) {
|
|
72
80
|
for (const quad of memberStore.getQuads()) {
|
|
73
81
|
if (!subjectSet.has(quad.subject.value)) {
|
|
74
82
|
subjectSet.add(quad.subject.value);
|
|
@@ -82,7 +90,8 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
|
|
|
82
90
|
if (quad.object.termType === "BlankNode") {
|
|
83
91
|
blankNodeMap.set(quad.object.value, `?bn_ref_${i}`);
|
|
84
92
|
}
|
|
85
|
-
queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal"
|
|
93
|
+
queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal"
|
|
94
|
+
? `"${quad.object.value}"^^<${quad.object.datatype.value}>`
|
|
86
95
|
: quad.object.termType === "BlankNode" ? `${blankNodeMap.get(quad.object.value)} `
|
|
87
96
|
: `<${quad.object.value}>`}.`);
|
|
88
97
|
queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} ?p_${i} ?o_${i}.`);
|
|
@@ -91,51 +100,28 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
|
|
|
91
100
|
i++;
|
|
92
101
|
}
|
|
93
102
|
}
|
|
94
|
-
|
|
103
|
+
return [queryBuilder.join("\n")];
|
|
95
104
|
}
|
|
96
105
|
else {
|
|
97
|
-
const
|
|
98
|
-
|
|
99
|
-
const shapeStore = RdfStore.createDefault();
|
|
100
|
-
new Parser().parse(msh).forEach(quad => shapeStore.addQuad(quad));
|
|
101
|
-
shapeIndex.set(extractMainTargetClass(shapeStore).value, shapeStore);
|
|
102
|
-
});
|
|
106
|
+
const shapeStore = RdfStore.createDefault();
|
|
107
|
+
new Parser().parse(memberShape).forEach(quad => shapeStore.addQuad(quad));
|
|
103
108
|
queryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
|
|
104
|
-
const
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
}
|
|
115
|
-
formattedQueries.push(queryBuilder.join("\n"));
|
|
116
|
-
}
|
|
117
|
-
else {
|
|
118
|
-
const deleteQueryBuilder = [];
|
|
119
|
-
deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
|
|
109
|
+
const deleteQueryBuilder = [];
|
|
110
|
+
deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
|
|
111
|
+
i++;
|
|
112
|
+
const propShapes = getObjects(shapeStore, null, SHACL.terms.property, null);
|
|
113
|
+
queryBuilder.push(" OPTIONAL { ");
|
|
114
|
+
for (const propSh of propShapes) {
|
|
115
|
+
const pred = getObjects(shapeStore, propSh, SHACL.terms.path, null)[0];
|
|
116
|
+
queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
117
|
+
deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
118
|
+
queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
119
|
+
deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
120
120
|
i++;
|
|
121
|
-
shapeIndex.forEach(mshStore => {
|
|
122
|
-
const propShapes = getObjects(mshStore, null, SHACL.terms.property, null);
|
|
123
|
-
queryBuilder.push(" OPTIONAL { ");
|
|
124
|
-
for (const propSh of propShapes) {
|
|
125
|
-
const pred = getObjects(mshStore, propSh, SHACL.terms.path, null)[0];
|
|
126
|
-
queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
127
|
-
deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
|
|
128
|
-
queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
129
|
-
deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
|
|
130
|
-
i++;
|
|
131
|
-
}
|
|
132
|
-
queryBuilder.push(" }");
|
|
133
|
-
});
|
|
134
|
-
formattedQueries.push(queryBuilder.join("\n"));
|
|
135
|
-
formattedQueries.push(deleteQueryBuilder.join("\n"));
|
|
136
121
|
}
|
|
122
|
+
queryBuilder.push(" }");
|
|
123
|
+
return [queryBuilder.join("\n"), deleteQueryBuilder.join("\n")];
|
|
137
124
|
}
|
|
138
|
-
return formattedQueries;
|
|
139
125
|
}
|
|
140
126
|
function extractMainTargetClass(store) {
|
|
141
127
|
const nodeShapes = getSubjects(store, RDF.terms.type, SHACL.terms.NodeShape, null);
|
package/lib/Utils.d.ts
CHANGED
|
@@ -1,9 +1,13 @@
|
|
|
1
1
|
import { RdfStore } from "rdf-stores";
|
|
2
|
-
import type { Term, Quad_Subject, Quad_Object, Quad } from "@rdfjs/types";
|
|
2
|
+
import type { Term, Quad_Subject, Quad_Object, Quad, Quad_Graph } from "@rdfjs/types";
|
|
3
3
|
import type { IngestConfig } from "./SPARQLIngest.js";
|
|
4
4
|
import { Logger } from "winston";
|
|
5
5
|
export declare function getSubjects(store: RdfStore, predicate: Term | null, object: Term | null, graph?: Term | null): Quad_Subject[];
|
|
6
6
|
export declare function getObjects(store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null): Quad_Object[];
|
|
7
|
-
export declare function
|
|
7
|
+
export declare function splitStorePerNamedGraph(store: RdfStore): {
|
|
8
|
+
graph: Quad_Graph;
|
|
9
|
+
store: RdfStore;
|
|
10
|
+
}[];
|
|
11
|
+
export declare function splitStoreOnSize(store: RdfStore, threshold: number): RdfStore[];
|
|
8
12
|
export declare function sanitizeQuads(store: RdfStore): void;
|
|
9
13
|
export declare function doSPARQLRequest(query: string[] | Quad[], config: IngestConfig, logger: Logger): Promise<void>;
|
package/lib/Utils.js
CHANGED
|
@@ -14,7 +14,20 @@ export function getObjects(store, subject, predicate, graph) {
|
|
|
14
14
|
return quad.object;
|
|
15
15
|
});
|
|
16
16
|
}
|
|
17
|
-
export function
|
|
17
|
+
export function splitStorePerNamedGraph(store) {
|
|
18
|
+
const stores = [];
|
|
19
|
+
const namedGraphs = new Set();
|
|
20
|
+
store.getQuads(null, null, null, null)
|
|
21
|
+
.forEach(q => namedGraphs.add(q.graph));
|
|
22
|
+
namedGraphs.forEach(ng => {
|
|
23
|
+
const subStore = RdfStore.createDefault();
|
|
24
|
+
const quads = store.getQuads(null, null, null, ng);
|
|
25
|
+
quads.forEach(q => subStore.addQuad(q));
|
|
26
|
+
stores.push({ graph: ng, store: subStore });
|
|
27
|
+
});
|
|
28
|
+
return stores;
|
|
29
|
+
}
|
|
30
|
+
export function splitStoreOnSize(store, threshold) {
|
|
18
31
|
const stores = [];
|
|
19
32
|
if (store.size < threshold) {
|
|
20
33
|
stores.push(store);
|
|
@@ -92,7 +105,7 @@ export async function doSPARQLRequest(query, config, logger) {
|
|
|
92
105
|
return;
|
|
93
106
|
}
|
|
94
107
|
let queries = [];
|
|
95
|
-
const jointQuery = query.join("
|
|
108
|
+
const jointQuery = query.join(";\n");
|
|
96
109
|
if (config.forVirtuoso && Buffer.byteLength(jointQuery, 'utf8') > 1e6) {
|
|
97
110
|
queries = query;
|
|
98
111
|
}
|
|
@@ -100,7 +113,6 @@ export async function doSPARQLRequest(query, config, logger) {
|
|
|
100
113
|
queries.push(jointQuery);
|
|
101
114
|
}
|
|
102
115
|
for (const q of queries) {
|
|
103
|
-
logger.debug(`Executing SPARQL query: \n${q}`);
|
|
104
116
|
const res = await fetch(config.graphStoreUrl, {
|
|
105
117
|
method: "POST",
|
|
106
118
|
headers: {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@rdfc/sparql-ingest-processor-ts",
|
|
3
|
-
"version": "2.0
|
|
3
|
+
"version": "2.1.0",
|
|
4
4
|
"description": "SPARQL Update function to be within RDF-Connect pipelines",
|
|
5
5
|
"author": "Julián Rojas",
|
|
6
6
|
"contributors": [
|
|
@@ -31,7 +31,7 @@
|
|
|
31
31
|
"n3": "^2.0.1",
|
|
32
32
|
"rdf-data-factory": "^2.0.2",
|
|
33
33
|
"rdf-stores": "^2.1.1",
|
|
34
|
-
"undici": "^7.
|
|
34
|
+
"undici": "^7.21.0",
|
|
35
35
|
"winston": "^3.19.0"
|
|
36
36
|
},
|
|
37
37
|
"devDependencies": {
|
|
@@ -50,7 +50,7 @@
|
|
|
50
50
|
"ts-patch": "^3.3.0",
|
|
51
51
|
"tsc-alias": "^1.8.16",
|
|
52
52
|
"typescript": "^5.9.3",
|
|
53
|
-
"vite-tsconfig-paths": "^6.0
|
|
53
|
+
"vite-tsconfig-paths": "^6.1.0",
|
|
54
54
|
"vitest": "^4.0.18"
|
|
55
55
|
}
|
|
56
56
|
}
|
package/processors.ttl
CHANGED
|
@@ -43,16 +43,11 @@ rdfc:SPARQLIngest rdfc:jsImplementationOf rdfc:Processor;
|
|
|
43
43
|
sh:datatype xsd:integer;
|
|
44
44
|
sh:name "memberBatchSize";
|
|
45
45
|
sh:maxCount 1;
|
|
46
|
-
], [
|
|
47
|
-
sh:path rdfc:memberIsGraph;
|
|
48
|
-
sh:datatype xsd:boolean;
|
|
49
|
-
sh:name "memberIsGraph";
|
|
50
|
-
sh:maxCount 1;
|
|
51
46
|
], [
|
|
52
47
|
sh:path rdfc:memberShape;
|
|
53
48
|
sh:datatype xsd:string;
|
|
54
|
-
sh:name "
|
|
55
|
-
sh:
|
|
49
|
+
sh:name "memberShape";
|
|
50
|
+
sh:maxCount 1;
|
|
56
51
|
], [
|
|
57
52
|
sh:path rdfc:changeSemantics;
|
|
58
53
|
sh:class rdfc:ChangeSemantics;
|
package/lib/LogUtil.d.ts
DELETED
package/lib/LogUtil.js
DELETED
|
@@ -1,54 +0,0 @@
|
|
|
1
|
-
import winston, { format } from "winston";
|
|
2
|
-
const PROCESSOR_NAME = "sparql-ingest";
|
|
3
|
-
const consoleTransport = new winston.transports.Console({
|
|
4
|
-
stderrLevels: [
|
|
5
|
-
"error",
|
|
6
|
-
"warn",
|
|
7
|
-
"info",
|
|
8
|
-
"http",
|
|
9
|
-
"verbose",
|
|
10
|
-
"debug",
|
|
11
|
-
"silly"
|
|
12
|
-
]
|
|
13
|
-
});
|
|
14
|
-
if (typeof process !== "undefined") {
|
|
15
|
-
consoleTransport.level =
|
|
16
|
-
process.env.LOG_LEVEL ||
|
|
17
|
-
(process.env.DEBUG?.includes(PROCESSOR_NAME) ||
|
|
18
|
-
process.env.DEBUG === "*"
|
|
19
|
-
? "debug"
|
|
20
|
-
: "info");
|
|
21
|
-
}
|
|
22
|
-
const classLoggers = new WeakMap();
|
|
23
|
-
const stringLoggers = new Map();
|
|
24
|
-
export function getLoggerFor(loggable) {
|
|
25
|
-
let logger;
|
|
26
|
-
if (typeof loggable === "string") {
|
|
27
|
-
if (stringLoggers.has(loggable)) {
|
|
28
|
-
logger = stringLoggers.get(loggable);
|
|
29
|
-
}
|
|
30
|
-
else {
|
|
31
|
-
logger = createLogger(loggable);
|
|
32
|
-
stringLoggers.set(loggable, logger);
|
|
33
|
-
}
|
|
34
|
-
}
|
|
35
|
-
else {
|
|
36
|
-
const { constructor } = loggable;
|
|
37
|
-
if (classLoggers.has(constructor)) {
|
|
38
|
-
logger = classLoggers.get(constructor);
|
|
39
|
-
}
|
|
40
|
-
else {
|
|
41
|
-
logger = createLogger(constructor.name);
|
|
42
|
-
classLoggers.set(constructor, logger);
|
|
43
|
-
}
|
|
44
|
-
}
|
|
45
|
-
return logger;
|
|
46
|
-
}
|
|
47
|
-
function createLogger(label) {
|
|
48
|
-
return winston.createLogger({
|
|
49
|
-
format: format.combine(format.label({ label }), format.colorize(), format.timestamp(), format.metadata({
|
|
50
|
-
fillExcept: ["level", "timestamp", "label", "message"],
|
|
51
|
-
}), format.printf(({ level: levelInner, message, label: labelInner, timestamp, }) => `${timestamp} {${PROCESSOR_NAME}} [${labelInner}] ${levelInner}: ${message}`)),
|
|
52
|
-
transports: [consoleTransport],
|
|
53
|
-
});
|
|
54
|
-
}
|