@rdfc/sparql-ingest-processor-ts 0.5.1 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +119 -25
- package/lib/SPARQLIngest.d.ts +22 -3
- package/lib/SPARQLIngest.js +171 -156
- package/lib/Utils.d.ts +2 -1
- package/lib/Utils.js +1 -3
- package/lib/tsconfig.tsbuildinfo +1 -1
- package/package.json +13 -7
- package/processors.ttl +46 -63
package/README.md
CHANGED
|
@@ -1,41 +1,135 @@
|
|
|
1
1
|
# sparql-ingest-processor-ts
|
|
2
2
|
|
|
3
|
-
[](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml) [](https://npmjs.com/package/@rdfc/sparql-ingest-processor-ts)
|
|
4
4
|
|
|
5
|
-
|
|
5
|
+
TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) into a SPARQL endpoint.
|
|
6
6
|
|
|
7
|
-
|
|
7
|
+
This processor takes a stream of RDF records, transforms them into [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
|
|
8
|
+
It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content.
|
|
8
9
|
|
|
9
|
-
|
|
10
|
+
---
|
|
10
11
|
|
|
11
|
-
|
|
12
|
+
## Usage
|
|
13
|
+
|
|
14
|
+
### Installation
|
|
15
|
+
|
|
16
|
+
```bash
|
|
17
|
+
npm install
|
|
18
|
+
npm run build
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
Or install from NPM:
|
|
22
|
+
|
|
23
|
+
```bash
|
|
24
|
+
npm install @rdfc/sparql-ingest-processor-ts
|
|
25
|
+
```
|
|
26
|
+
|
|
27
|
+
---
|
|
28
|
+
|
|
29
|
+
### Pipeline Configuration Example
|
|
12
30
|
|
|
13
31
|
```turtle
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
32
|
+
@prefix rdfc: <https://w3id.org/rdf-connect#>.
|
|
33
|
+
@prefix owl: <http://www.w3.org/2002/07/owl#>.
|
|
34
|
+
|
|
35
|
+
### Import the processor definitions
|
|
36
|
+
<> owl:imports <./node_modules/@rdfc/sparql-ingest-processor-ts/processors.ttl>.
|
|
37
|
+
|
|
38
|
+
### Define the channels your processor needs
|
|
39
|
+
<in> a rdfc:Reader.
|
|
40
|
+
<out> a rdfc:Writer.
|
|
41
|
+
|
|
42
|
+
### Attach the processor to the pipeline under the NodeRunner
|
|
43
|
+
# Add the `rdfc:processor <ingester>` statement under the `rdfc:consistsOf` statement of the `rdfc:NodeRunner`
|
|
44
|
+
|
|
45
|
+
### Define and configure the processor
|
|
46
|
+
<ingester> a rdfc:SPARQLIngest;
|
|
47
|
+
rdfc:memberStream <in>;
|
|
48
|
+
rdfc:ingestConfig [
|
|
49
|
+
rdfc:memberIsGraph false;
|
|
50
|
+
rdfc:memberShape "http://ex.org/Shape1", "http://ex.org/Shape2";
|
|
51
|
+
rdfc:changeSemantics [
|
|
52
|
+
rdfc:changeTypePath "http://ex.org/changeType";
|
|
53
|
+
rdfc:createValue "http://ex.org/Create";
|
|
54
|
+
rdfc:updateValue "http://ex.org/Update";
|
|
55
|
+
rdfc:deleteValue "http://ex.org/Delete"
|
|
24
56
|
];
|
|
25
|
-
|
|
26
|
-
|
|
57
|
+
rdfc:targetNamedGraph "http://ex.org/myGraph";
|
|
58
|
+
rdfc:transactionConfig [
|
|
59
|
+
rdfc:transactionIdPath "http://ex.org/transactionId";
|
|
60
|
+
rdfc:transactionEndPath "http://ex.org/transactionEnd"
|
|
61
|
+
];
|
|
62
|
+
rdfc:graphStoreUrl "http://example.org/sparql";
|
|
63
|
+
rdfc:forVirtuoso false
|
|
27
64
|
];
|
|
28
|
-
|
|
65
|
+
rdfc:sparqlWriter <out>.
|
|
29
66
|
```
|
|
30
67
|
|
|
31
|
-
|
|
68
|
+
---
|
|
69
|
+
|
|
70
|
+
## Configuration
|
|
71
|
+
|
|
72
|
+
### Parameters of `rdfc:SPARQLIngest`:
|
|
73
|
+
- `rdfc:memberStream` (**rdfc:Reader**, required): Input SDS record stream.
|
|
74
|
+
- `rdfc:ingestConfig` (**rdfc:IngestConfig**, required): Configuration for ingest behavior.
|
|
75
|
+
- `rdfc:sparqlWriter` (**rdfc:Writer**, optional): Output stream of generated SPARQL queries.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### Parameters of `rdfc:IngestConfig`:
|
|
80
|
+
- `rdfc:memberIsGraph` (**boolean**, required): Whether each SDS record represents a named graph.
|
|
81
|
+
- `rdfc:memberShape` (**string**, optional, repeatable): SHACL shape identifiers used to guide query construction when payloads are incomplete.
|
|
82
|
+
- `rdfc:changeSemantics` (**rdfc:ChangeSemantics**, optional): Configures mapping between change types (create/update/delete) and SPARQL operations.
|
|
83
|
+
- `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph (ignored if `memberIsGraph = true`).
|
|
84
|
+
- `rdfc:transactionConfig` (**rdfc:TransactionConfig**, optional): Groups records by transaction ID for atomic updates.
|
|
85
|
+
- `rdfc:graphStoreUrl` (**string**, optional): SPARQL Graph Store endpoint URL.
|
|
86
|
+
- `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling.
|
|
87
|
+
- `rdfc:accessToken` (**string**, optional): Access token for authenticated graph stores.
|
|
88
|
+
- `rdfc:measurePerformance` (**rdfc:PerformanceConfig**, optional): Enables performance measurement of SPARQL queries.
|
|
89
|
+
|
|
90
|
+
---
|
|
91
|
+
|
|
92
|
+
### Parameters of `rdfc:ChangeSemantics`:
|
|
93
|
+
- `rdfc:changeTypePath` (**string**, required): Predicate identifying the type of change in SDS records.
|
|
94
|
+
- `rdfc:createValue` (**string**, required): Value representing a create operation.
|
|
95
|
+
- `rdfc:updateValue` (**string**, required): Value representing an update operation.
|
|
96
|
+
- `rdfc:deleteValue` (**string**, required): Value representing a delete operation.
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
### Parameters of `rdfc:TransactionConfig`:
|
|
101
|
+
- `rdfc:transactionIdPath` (**string**, required): Predicate identifying the transaction ID.
|
|
102
|
+
- `rdfc:transactionEndPath` (**string**, required): Predicate marking the last record in a transaction.
|
|
103
|
+
|
|
104
|
+
---
|
|
105
|
+
|
|
106
|
+
### Parameters of `rdfc:PerformanceConfig`:
|
|
107
|
+
- `rdfc:name` (**string**, required): Name of the performance measurement run.
|
|
108
|
+
- `rdfc:outputPath` (**string**, required): File path where performance logs will be written.
|
|
109
|
+
- `rdfc:failureIsFatal` (**boolean**, optional): If true, aborts on performance measurement failure.
|
|
110
|
+
- `rdfc:queryTimeout` (**integer**, optional): Maximum query execution time in milliseconds.
|
|
111
|
+
|
|
112
|
+
---
|
|
113
|
+
|
|
114
|
+
## Example
|
|
115
|
+
|
|
116
|
+
```turtle
|
|
117
|
+
<ingester> a rdfc:SPARQLIngest;
|
|
118
|
+
rdfc:memberStream <in>;
|
|
119
|
+
rdfc:ingestConfig [
|
|
120
|
+
rdfc:memberIsGraph true;
|
|
121
|
+
rdfc:targetNamedGraph "http://example.org/targetGraph";
|
|
122
|
+
rdfc:graphStoreUrl "http://example.org/sparql"
|
|
123
|
+
];
|
|
124
|
+
rdfc:sparqlWriter <out>.
|
|
125
|
+
```
|
|
32
126
|
|
|
33
|
-
|
|
34
|
-
2. The payload only contains the type of the payload's main entity (or member) via `rdf:type`. In this case one or more SHACL shapes can be configured via the `js:memberShape` property. The processor will identify the corresponding shape of an input SDS record (via the shape's target class) and the proper query pattern will be generated.
|
|
35
|
-
3. The payload does not contain the type of the payload's main entity (or member). In this case, is not possible to identify the corresponding SHACL shape, therefore a query reflecting all shapes via `OPTIONAL` clauses will be generated.
|
|
127
|
+
---
|
|
36
128
|
|
|
37
|
-
|
|
129
|
+
## Notes
|
|
38
130
|
|
|
39
|
-
|
|
131
|
+
- Delete operations can be handled differently depending on how complete the SDS record payload is.
|
|
132
|
+
- When `memberIsGraph = true`, queries are wrapped with `GRAPH` and `WITH` clauses.
|
|
133
|
+
- Transactions can buffer multiple SDS records and commit them together using `rdfc:transactionConfig`.
|
|
134
|
+
- SHACL shapes (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
|
|
40
135
|
|
|
41
|
-
Lastly, the main entity (member) of SDS record payload may contain a transaction ID when the member is part of a larger group of members that must be updated altogether into the targeted triple store. This particular property can be indicated to the processor via the `js:transactionIdPath` configuration property. The processor will proceed to buffer all records containing the same transaction ID and execute the corresponding SPARQL Update query for all members at once.
|
package/lib/SPARQLIngest.d.ts
CHANGED
|
@@ -1,5 +1,7 @@
|
|
|
1
|
-
import
|
|
1
|
+
import { Processor, Reader, Writer } from "@rdfc/js-runner";
|
|
2
2
|
import { RdfStore } from "rdf-stores";
|
|
3
|
+
import type { Term } from "@rdfjs/types";
|
|
4
|
+
import { Logger } from "winston";
|
|
3
5
|
export type ChangeSemantics = {
|
|
4
6
|
changeTypePath: string;
|
|
5
7
|
createValue: string;
|
|
@@ -17,7 +19,7 @@ export type PerformanceConfig = {
|
|
|
17
19
|
failureIsFatal?: boolean;
|
|
18
20
|
};
|
|
19
21
|
export type IngestConfig = {
|
|
20
|
-
memberIsGraph
|
|
22
|
+
memberIsGraph?: boolean;
|
|
21
23
|
memberShapes?: string[];
|
|
22
24
|
changeSemantics?: ChangeSemantics;
|
|
23
25
|
targetNamedGraph?: string;
|
|
@@ -32,4 +34,21 @@ export type TransactionMember = {
|
|
|
32
34
|
transactionId: string;
|
|
33
35
|
store: RdfStore;
|
|
34
36
|
};
|
|
35
|
-
|
|
37
|
+
type SPARQLIngestArgs = {
|
|
38
|
+
memberStream: Reader;
|
|
39
|
+
config: IngestConfig;
|
|
40
|
+
sparqlWriter?: Writer;
|
|
41
|
+
};
|
|
42
|
+
export declare class SPARQLIngest extends Processor<SPARQLIngestArgs> {
|
|
43
|
+
protected transactionMembers: TransactionMember[];
|
|
44
|
+
protected requestsPerformance: number[];
|
|
45
|
+
protected createTransactionQueriesLogger: Logger;
|
|
46
|
+
protected doSPARQLRequestLogger: Logger;
|
|
47
|
+
init(this: SPARQLIngestArgs & this): Promise<void>;
|
|
48
|
+
transform(this: SPARQLIngestArgs & this): Promise<void>;
|
|
49
|
+
produce(this: SPARQLIngestArgs & this): Promise<void>;
|
|
50
|
+
verifyTransaction(stores: RdfStore[], transactionIdPath: string, transactionId: Term): void;
|
|
51
|
+
getNamedGraphIfAny(memberIRI: Term, memberIsGraph: boolean | undefined, targetNamedGraph?: string): string | undefined;
|
|
52
|
+
createTransactionQueries(transactionMembers: TransactionMember[], config: IngestConfig): string;
|
|
53
|
+
}
|
|
54
|
+
export {};
|
package/lib/SPARQLIngest.js
CHANGED
|
@@ -1,206 +1,221 @@
|
|
|
1
|
+
import { extendLogger, Processor } from "@rdfc/js-runner";
|
|
1
2
|
import { SDS } from "@treecg/types";
|
|
2
3
|
import { DataFactory } from "rdf-data-factory";
|
|
3
4
|
import { RdfStore } from "rdf-stores";
|
|
4
5
|
import { Parser } from "n3";
|
|
5
6
|
import { writeFile } from "fs/promises";
|
|
6
|
-
import { CREATE,
|
|
7
|
-
import { doSPARQLRequest,
|
|
8
|
-
import { getLoggerFor } from "./LogUtil.js";
|
|
7
|
+
import { CREATE, DELETE, UPDATE } from "./SPARQLQueries.js";
|
|
8
|
+
import { doSPARQLRequest, getObjects, sanitizeQuads } from "./Utils.js";
|
|
9
9
|
const df = new DataFactory();
|
|
10
|
-
export
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
10
|
+
export class SPARQLIngest extends Processor {
|
|
11
|
+
transactionMembers = [];
|
|
12
|
+
requestsPerformance = [];
|
|
13
|
+
createTransactionQueriesLogger;
|
|
14
|
+
doSPARQLRequestLogger;
|
|
15
|
+
async init() {
|
|
16
|
+
this.createTransactionQueriesLogger = extendLogger(this.logger, "createTransactionQueries");
|
|
17
|
+
this.doSPARQLRequestLogger = extendLogger(this.logger, "doSPARQLRequest");
|
|
18
|
+
}
|
|
19
|
+
async transform() {
|
|
20
|
+
for await (const rawQuads of this.memberStream.strings()) {
|
|
21
|
+
this.logger.debug(`Raw member data received: \n${rawQuads}`);
|
|
22
|
+
const quads = new Parser().parse(rawQuads);
|
|
23
|
+
this.logger.verbose(`Parsed ${quads.length} quads from received member data`);
|
|
24
|
+
const store = RdfStore.createDefault();
|
|
25
|
+
quads.forEach(q => store.addQuad(q));
|
|
26
|
+
let query;
|
|
27
|
+
const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
|
|
28
|
+
if (memberIRI) {
|
|
29
|
+
this.logger.verbose(`Member IRI found in SDS metadata: ${memberIRI.value}`);
|
|
30
|
+
const sdsQuads = store.getQuads(null, null, null, SDS.terms.custom("DataDescription"));
|
|
31
|
+
sdsQuads.forEach(q => store.removeQuad(q));
|
|
32
|
+
if (this.config.transactionConfig) {
|
|
33
|
+
const transactionId = getObjects(store, null, df.namedNode(this.config.transactionConfig.transactionIdPath), null)[0];
|
|
34
|
+
if (transactionId) {
|
|
35
|
+
store.removeQuad(df.quad(memberIRI, df.namedNode(this.config.transactionConfig.transactionIdPath), transactionId));
|
|
36
|
+
const isLastOfTransaction = getObjects(store, null, df.namedNode(this.config.transactionConfig.transactionEndPath), null)[0];
|
|
37
|
+
if (isLastOfTransaction) {
|
|
38
|
+
this.logger.info(`Last member of ${transactionId.value} received!`);
|
|
39
|
+
this.verifyTransaction(this.transactionMembers.map(ts => ts.store), this.config.transactionConfig.transactionIdPath, transactionId);
|
|
40
|
+
store.removeQuad(df.quad(memberIRI, df.namedNode(this.config.transactionConfig.transactionEndPath), isLastOfTransaction));
|
|
41
|
+
this.transactionMembers.push({
|
|
42
|
+
memberId: memberIRI.value,
|
|
43
|
+
transactionId: transactionId.value,
|
|
44
|
+
store
|
|
45
|
+
});
|
|
46
|
+
}
|
|
47
|
+
else if (this.transactionMembers.length > 0) {
|
|
48
|
+
this.verifyTransaction(this.transactionMembers.map(ts => ts.store), this.config.transactionConfig.transactionIdPath, transactionId);
|
|
49
|
+
this.transactionMembers.push({
|
|
50
|
+
memberId: memberIRI.value,
|
|
51
|
+
transactionId: transactionId.value,
|
|
52
|
+
store
|
|
53
|
+
});
|
|
54
|
+
return;
|
|
55
|
+
}
|
|
56
|
+
else {
|
|
57
|
+
this.logger.info(`New transaction ${transactionId.value} started!`);
|
|
58
|
+
if (this.transactionMembers.length > 0) {
|
|
59
|
+
this.logger.error(`Received new transaction ${transactionId.value}, `
|
|
60
|
+
+ `but older transaction ${this.transactionMembers[0].transactionId} hasn't been finalized `);
|
|
61
|
+
throw new Error(`Received new transaction ${transactionId.value}, `
|
|
62
|
+
+ `but older transaction ${this.transactionMembers[0].transactionId} hasn't been finalized `);
|
|
63
|
+
}
|
|
64
|
+
this.transactionMembers.push({
|
|
65
|
+
memberId: memberIRI.value,
|
|
66
|
+
transactionId: transactionId.value,
|
|
67
|
+
store
|
|
68
|
+
});
|
|
69
|
+
return;
|
|
70
|
+
}
|
|
39
71
|
}
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
store
|
|
46
|
-
});
|
|
47
|
-
return;
|
|
72
|
+
}
|
|
73
|
+
if (this.config.changeSemantics) {
|
|
74
|
+
if (this.transactionMembers.length > 0) {
|
|
75
|
+
query = [this.createTransactionQueries(this.transactionMembers, this.config)];
|
|
76
|
+
this.transactionMembers = [];
|
|
48
77
|
}
|
|
49
78
|
else {
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
79
|
+
const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
|
|
80
|
+
const ctv = store.getQuads(null, df.namedNode(this.config.changeSemantics.changeTypePath))[0];
|
|
81
|
+
store.removeQuad(ctv);
|
|
82
|
+
sanitizeQuads(store);
|
|
83
|
+
if (ctv.object.value === this.config.changeSemantics.createValue) {
|
|
84
|
+
this.logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
|
|
85
|
+
query = CREATE(store, this.config.forVirtuoso, ng);
|
|
86
|
+
}
|
|
87
|
+
else if (ctv.object.value === this.config.changeSemantics.updateValue) {
|
|
88
|
+
this.logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
89
|
+
query = UPDATE(store, this.config.forVirtuoso, ng);
|
|
90
|
+
}
|
|
91
|
+
else if (ctv.object.value === this.config.changeSemantics.deleteValue) {
|
|
92
|
+
this.logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
93
|
+
query = [DELETE(store, [memberIRI.value], this.config.memberShapes, ng)];
|
|
94
|
+
}
|
|
95
|
+
else {
|
|
96
|
+
this.logger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
|
|
97
|
+
throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
|
|
98
|
+
}
|
|
60
99
|
}
|
|
61
100
|
}
|
|
62
|
-
}
|
|
63
|
-
let query;
|
|
64
|
-
if (config.changeSemantics) {
|
|
65
|
-
if (transactionMembers.length > 0) {
|
|
66
|
-
query = [createTransactionQueries(transactionMembers, config)];
|
|
67
|
-
transactionMembers = [];
|
|
68
|
-
}
|
|
69
101
|
else {
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
query = CREATE(store, config.forVirtuoso, ng);
|
|
77
|
-
}
|
|
78
|
-
else if (ctv.object.value === config.changeSemantics.updateValue) {
|
|
79
|
-
logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
80
|
-
query = UPDATE(store, config.forVirtuoso, ng);
|
|
81
|
-
}
|
|
82
|
-
else if (ctv.object.value === config.changeSemantics.deleteValue) {
|
|
83
|
-
logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
|
|
84
|
-
query = [DELETE(store, [memberIRI.value], config.memberShapes, ng)];
|
|
102
|
+
if (this.transactionMembers.length > 0) {
|
|
103
|
+
this.transactionMembers.forEach(ts => {
|
|
104
|
+
ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q));
|
|
105
|
+
});
|
|
106
|
+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
|
|
107
|
+
query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
|
|
85
108
|
}
|
|
86
109
|
else {
|
|
87
|
-
|
|
110
|
+
const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
|
|
111
|
+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
|
|
112
|
+
query = UPDATE(store, this.config.forVirtuoso, ng);
|
|
88
113
|
}
|
|
89
114
|
}
|
|
90
115
|
}
|
|
91
116
|
else {
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q));
|
|
95
|
-
});
|
|
96
|
-
logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
|
|
97
|
-
query = UPDATE(store, config.forVirtuoso, config.targetNamedGraph);
|
|
98
|
-
}
|
|
99
|
-
else {
|
|
100
|
-
const ng = getNamedGraphIfAny(memberIRI, config.memberIsGraph, config.targetNamedGraph);
|
|
101
|
-
logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
|
|
102
|
-
query = UPDATE(store, config.forVirtuoso, ng);
|
|
103
|
-
}
|
|
117
|
+
this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for received triples (${store.size})`);
|
|
118
|
+
query = UPDATE(store, this.config.forVirtuoso);
|
|
104
119
|
}
|
|
105
120
|
if (query && query.length > 0) {
|
|
106
|
-
logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
|
|
107
|
-
if (config.graphStoreUrl) {
|
|
121
|
+
this.logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
|
|
122
|
+
if (this.config.graphStoreUrl) {
|
|
108
123
|
try {
|
|
109
124
|
const t0 = Date.now();
|
|
110
|
-
await doSPARQLRequest(query, config);
|
|
125
|
+
await doSPARQLRequest(query, this.config, this.doSPARQLRequestLogger);
|
|
111
126
|
const reqTime = Date.now() - t0;
|
|
112
|
-
if (config.measurePerformance) {
|
|
113
|
-
requestsPerformance.push(reqTime);
|
|
127
|
+
if (this.config.measurePerformance) {
|
|
128
|
+
this.requestsPerformance.push(reqTime);
|
|
114
129
|
}
|
|
115
|
-
logger.info(`Executed query on remote SPARQL server ${config.graphStoreUrl} (took ${reqTime} ms)`);
|
|
130
|
+
this.logger.info(`Executed query on remote SPARQL server ${this.config.graphStoreUrl} (took ${reqTime} ms)`);
|
|
116
131
|
}
|
|
117
132
|
catch (error) {
|
|
118
|
-
if (!config.measurePerformance || config.measurePerformance.failureIsFatal) {
|
|
133
|
+
if (!this.config.measurePerformance || this.config.measurePerformance.failureIsFatal) {
|
|
134
|
+
this.logger.error(`Error executing query on remote SPARQL server ${this.config.graphStoreUrl}: ${error}`);
|
|
119
135
|
throw error;
|
|
120
136
|
}
|
|
121
137
|
else {
|
|
122
|
-
if (config.measurePerformance) {
|
|
123
|
-
requestsPerformance.push(-1);
|
|
138
|
+
if (this.config.measurePerformance) {
|
|
139
|
+
this.requestsPerformance.push(-1);
|
|
124
140
|
}
|
|
125
141
|
}
|
|
126
142
|
}
|
|
127
143
|
}
|
|
128
|
-
if (sparqlWriter) {
|
|
129
|
-
await sparqlWriter.
|
|
144
|
+
if (this.sparqlWriter) {
|
|
145
|
+
await this.sparqlWriter.string(query.join("\n"));
|
|
130
146
|
}
|
|
131
147
|
}
|
|
132
148
|
else {
|
|
133
|
-
logger.warn(`No query generated for member ${memberIRI.value}`);
|
|
149
|
+
this.logger.warn(`No query generated for member ${memberIRI.value}`);
|
|
134
150
|
}
|
|
135
151
|
}
|
|
136
|
-
|
|
137
|
-
|
|
152
|
+
if (this.sparqlWriter) {
|
|
153
|
+
this.logger.info("Closing SPARQL writer");
|
|
138
154
|
}
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
if (sparqlWriter) {
|
|
142
|
-
await sparqlWriter.end();
|
|
155
|
+
if (this.config.measurePerformance) {
|
|
156
|
+
await writeFile(`${this.config.measurePerformance.outputPath}/${this.config.measurePerformance.name}.json`, JSON.stringify(this.requestsPerformance), "utf-8");
|
|
143
157
|
}
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
158
|
+
}
|
|
159
|
+
async produce() {
|
|
160
|
+
}
|
|
161
|
+
verifyTransaction(stores, transactionIdPath, transactionId) {
|
|
162
|
+
for (const store of stores) {
|
|
163
|
+
const tIds = getObjects(store, null, df.namedNode(transactionIdPath), null);
|
|
164
|
+
for (const tid of tIds) {
|
|
165
|
+
if (!tid.equals(transactionId)) {
|
|
166
|
+
this.logger.error(`[sparqlIngest] Received non-matching transaction ID ${transactionId.value} `
|
|
167
|
+
+ `with previous transaction: ${tid.value}`);
|
|
168
|
+
throw new Error(`[sparqlIngest] Received non-matching transaction ID ${transactionId.value} `
|
|
169
|
+
+ `with previous transaction: ${tid.value}`);
|
|
170
|
+
}
|
|
156
171
|
}
|
|
157
172
|
}
|
|
158
173
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
174
|
+
getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
|
|
175
|
+
let ng;
|
|
176
|
+
if (memberIsGraph) {
|
|
177
|
+
ng = memberIRI.value;
|
|
178
|
+
}
|
|
179
|
+
else if (targetNamedGraph) {
|
|
180
|
+
ng = targetNamedGraph;
|
|
181
|
+
}
|
|
182
|
+
return ng;
|
|
167
183
|
}
|
|
168
|
-
|
|
169
|
-
}
|
|
170
|
-
|
|
171
|
-
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
+
createTransactionQueries(transactionMembers, config) {
|
|
185
|
+
this.createTransactionQueriesLogger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
|
|
186
|
+
+ ` members of transaction ${transactionMembers[0].transactionId}`);
|
|
187
|
+
const createStore = RdfStore.createDefault();
|
|
188
|
+
const updateStore = RdfStore.createDefault();
|
|
189
|
+
const deleteStore = RdfStore.createDefault();
|
|
190
|
+
const deleteMembers = [];
|
|
191
|
+
const transactionQueryBuilder = [];
|
|
192
|
+
for (const tsm of transactionMembers) {
|
|
193
|
+
const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
|
|
194
|
+
tsm.store.removeQuad(ctv);
|
|
195
|
+
if (ctv.object.value === config.changeSemantics.createValue) {
|
|
196
|
+
tsm.store.getQuads(null, null, null, null).forEach(q => createStore.addQuad(q));
|
|
197
|
+
}
|
|
198
|
+
else if (ctv.object.value === config.changeSemantics.updateValue) {
|
|
199
|
+
tsm.store.getQuads(null, null, null, null).forEach(q => updateStore.addQuad(q));
|
|
200
|
+
}
|
|
201
|
+
else if (ctv.object.value === config.changeSemantics.deleteValue) {
|
|
202
|
+
tsm.store.getQuads(null, null, null, null).forEach(q => deleteStore.addQuad(q));
|
|
203
|
+
deleteMembers.push(tsm.memberId);
|
|
204
|
+
}
|
|
205
|
+
else {
|
|
206
|
+
this.createTransactionQueriesLogger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
|
|
207
|
+
throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
|
|
208
|
+
}
|
|
184
209
|
}
|
|
185
|
-
|
|
186
|
-
|
|
210
|
+
if (createStore.size > 0) {
|
|
211
|
+
transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
|
|
187
212
|
}
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
deleteMembers.push(tsm.memberId);
|
|
213
|
+
if (updateStore.size > 0) {
|
|
214
|
+
transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
|
|
191
215
|
}
|
|
192
|
-
|
|
193
|
-
|
|
216
|
+
if (updateStore.size > 0) {
|
|
217
|
+
transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph));
|
|
194
218
|
}
|
|
219
|
+
return transactionQueryBuilder.join(";\n");
|
|
195
220
|
}
|
|
196
|
-
if (createStore.size > 0) {
|
|
197
|
-
transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
|
|
198
|
-
}
|
|
199
|
-
if (updateStore.size > 0) {
|
|
200
|
-
transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
|
|
201
|
-
}
|
|
202
|
-
if (updateStore.size > 0) {
|
|
203
|
-
transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph));
|
|
204
|
-
}
|
|
205
|
-
return transactionQueryBuilder.join(";\n");
|
|
206
221
|
}
|
package/lib/Utils.d.ts
CHANGED
|
@@ -1,8 +1,9 @@
|
|
|
1
1
|
import { RdfStore } from "rdf-stores";
|
|
2
2
|
import type { Term, Quad_Subject, Quad_Object } from "@rdfjs/types";
|
|
3
3
|
import type { IngestConfig } from "./SPARQLIngest.js";
|
|
4
|
+
import { Logger } from "winston";
|
|
4
5
|
export declare function getSubjects(store: RdfStore, predicate: Term | null, object: Term | null, graph?: Term | null): Quad_Subject[];
|
|
5
6
|
export declare function getObjects(store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null): Quad_Object[];
|
|
6
7
|
export declare function splitStore(store: RdfStore, threshold: number): RdfStore[];
|
|
7
8
|
export declare function sanitizeQuads(store: RdfStore): void;
|
|
8
|
-
export declare function doSPARQLRequest(query: string[], config: IngestConfig): Promise<void>;
|
|
9
|
+
export declare function doSPARQLRequest(query: string[], config: IngestConfig, logger: Logger): Promise<void>;
|
package/lib/Utils.js
CHANGED
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { XSD } from "@treecg/types";
|
|
2
2
|
import { DataFactory } from "rdf-data-factory";
|
|
3
3
|
import { RdfStore } from "rdf-stores";
|
|
4
|
-
import { getLoggerFor } from "./LogUtil.js";
|
|
5
4
|
import { Agent } from "undici";
|
|
6
5
|
const df = new DataFactory();
|
|
7
6
|
export function getSubjects(store, predicate, object, graph) {
|
|
@@ -63,8 +62,7 @@ export function sanitizeQuads(store) {
|
|
|
63
62
|
}
|
|
64
63
|
}
|
|
65
64
|
}
|
|
66
|
-
export async function doSPARQLRequest(query, config) {
|
|
67
|
-
const logger = getLoggerFor("doSPARQLRequest");
|
|
65
|
+
export async function doSPARQLRequest(query, config, logger) {
|
|
68
66
|
try {
|
|
69
67
|
let queries = [];
|
|
70
68
|
const jointQuery = query.join("\n");
|