@rdfc/sparql-ingest-processor-ts 0.5.1 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,41 +1,135 @@
1
1
  # sparql-ingest-processor-ts
2
2
 
3
- [![Bun CI](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml/badge.svg)](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml) [![npm](https://img.shields.io/npm/v/@rdfc/sparql-ingest-processor-ts.svg?style=popout)](https://npmjs.com/package/@rdfc/sparql-ingest-processor-ts)
3
+ [![Build and tests with Node.js](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml/badge.svg)](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml) [![npm](https://img.shields.io/npm/v/@rdfc/sparql-ingest-processor-ts.svg?style=popout)](https://npmjs.com/package/@rdfc/sparql-ingest-processor-ts)
4
4
 
5
- Typescript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for producing the corresponding SPARQL Update queries that write a stream of [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) into a SPARQL triple store. Currently this repository exposes one function:
5
+ TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) into a SPARQL endpoint.
6
6
 
7
- ### [`js:SPARQLIngest`](https://github.com/rdf-connect/sparql-ingest-processor-ts/blob/main/processors.ttl#L9)
7
+ This processor takes a stream of RDF records, transforms them into [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
8
+ It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content.
8
9
 
9
- This processor is able to take an input stream of SDS records (described by the `sds:stream` and `sds:payload` properties) and produce corresponding [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries (`INSERT DATA`, `DELETE INSERT WHERE` and `DELETE WHERE`) to be executed over a graph store via the [SPARQL protocol](https://www.w3.org/TR/sparql11-protocol/).
10
+ ---
10
11
 
11
- By default, this processor will produce a `DELETE INSERT WHERE` query that will overwrite all the triples present in the payload of the received SDS record. However, specific query operations can be generated based on configurable change semantics that can be included in the SDS record payload. Next, an example of this processor is shown with a configuration that specifies the predicate `ex:changeType` and the values `as:Create`, `as:Update` and `as:Delete` as the expected values for generating `INSERT DATA`, `DELETE INSERT WHERE` ans `DELETE WHERE` queries respectively.
12
+ ## Usage
13
+
14
+ ### Installation
15
+
16
+ ```bash
17
+ npm install
18
+ npm run build
19
+ ```
20
+
21
+ Or install from NPM:
22
+
23
+ ```bash
24
+ npm install @rdfc/sparql-ingest-processor-ts
25
+ ```
26
+
27
+ ---
28
+
29
+ ### Pipeline Configuration Example
12
30
 
13
31
  ```turtle
14
- [ ] a js:SPARQLIngest;
15
- js:memberStream <inputStream>;
16
- js:ingestConfig [
17
- js:memberIsGraph false;
18
- js:memberShape "Some SHACL shape", "Another SHACL shape";
19
- js:changeSemantics [
20
- js:changeTypePath "http://ex.org/changeType";
21
- js:createValue "http://ex.org/Create";
22
- js:updateValue "http://ex.org/Update";
23
- js:deleteValue "http://ex.org/Delete"
32
+ @prefix rdfc: <https://w3id.org/rdf-connect#>.
33
+ @prefix owl: <http://www.w3.org/2002/07/owl#>.
34
+
35
+ ### Import the processor definitions
36
+ <> owl:imports <./node_modules/@rdfc/sparql-ingest-processor-ts/processors.ttl>.
37
+
38
+ ### Define the channels your processor needs
39
+ <in> a rdfc:Reader.
40
+ <out> a rdfc:Writer.
41
+
42
+ ### Attach the processor to the pipeline under the NodeRunner
43
+ # Add the `rdfc:processor <ingester>` statement under the `rdfc:consistsOf` statement of the `rdfc:NodeRunner`
44
+
45
+ ### Define and configure the processor
46
+ <ingester> a rdfc:SPARQLIngest;
47
+ rdfc:memberStream <in>;
48
+ rdfc:ingestConfig [
49
+ rdfc:memberIsGraph false;
50
+ rdfc:memberShape "http://ex.org/Shape1", "http://ex.org/Shape2";
51
+ rdfc:changeSemantics [
52
+ rdfc:changeTypePath "http://ex.org/changeType";
53
+ rdfc:createValue "http://ex.org/Create";
54
+ rdfc:updateValue "http://ex.org/Update";
55
+ rdfc:deleteValue "http://ex.org/Delete"
24
56
  ];
25
- js:targetNamedGraph "http://ex.org/myGraph";
26
- js:transactionIdPath "http://ex.org/trancationId"
57
+ rdfc:targetNamedGraph "http://ex.org/myGraph";
58
+ rdfc:transactionConfig [
59
+ rdfc:transactionIdPath "http://ex.org/transactionId";
60
+ rdfc:transactionEndPath "http://ex.org/transactionEnd"
61
+ ];
62
+ rdfc:graphStoreUrl "http://example.org/sparql";
63
+ rdfc:forVirtuoso false
27
64
  ];
28
- js:sparqlWriter <outputStream>.
65
+ rdfc:sparqlWriter <out>.
29
66
  ```
30
67
 
31
- For the case of delete operations, additional information can be provided depending on the content of the SDS record payload signaling a delete:
68
+ ---
69
+
70
+ ## Configuration
71
+
72
+ ### Parameters of `rdfc:SPARQLIngest`:
73
+ - `rdfc:memberStream` (**rdfc:Reader**, required): Input SDS record stream.
74
+ - `rdfc:ingestConfig` (**rdfc:IngestConfig**, required): Configuration for ingest behavior.
75
+ - `rdfc:sparqlWriter` (**rdfc:Writer**, optional): Output stream of generated SPARQL queries.
76
+
77
+ ---
78
+
79
+ ### Parameters of `rdfc:IngestConfig`:
80
+ - `rdfc:memberIsGraph` (**boolean**, required): Whether each SDS record represents a named graph.
81
+ - `rdfc:memberShape` (**string**, optional, repeatable): SHACL shape identifiers used to guide query construction when payloads are incomplete.
82
+ - `rdfc:changeSemantics` (**rdfc:ChangeSemantics**, optional): Configures mapping between change types (create/update/delete) and SPARQL operations.
83
+ - `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph (ignored if `memberIsGraph = true`).
84
+ - `rdfc:transactionConfig` (**rdfc:TransactionConfig**, optional): Groups records by transaction ID for atomic updates.
85
+ - `rdfc:graphStoreUrl` (**string**, optional): SPARQL Graph Store endpoint URL.
86
+ - `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling.
87
+ - `rdfc:accessToken` (**string**, optional): Access token for authenticated graph stores.
88
+ - `rdfc:measurePerformance` (**rdfc:PerformanceConfig**, optional): Enables performance measurement of SPARQL queries.
89
+
90
+ ---
91
+
92
+ ### Parameters of `rdfc:ChangeSemantics`:
93
+ - `rdfc:changeTypePath` (**string**, required): Predicate identifying the type of change in SDS records.
94
+ - `rdfc:createValue` (**string**, required): Value representing a create operation.
95
+ - `rdfc:updateValue` (**string**, required): Value representing an update operation.
96
+ - `rdfc:deleteValue` (**string**, required): Value representing a delete operation.
97
+
98
+ ---
99
+
100
+ ### Parameters of `rdfc:TransactionConfig`:
101
+ - `rdfc:transactionIdPath` (**string**, required): Predicate identifying the transaction ID.
102
+ - `rdfc:transactionEndPath` (**string**, required): Predicate marking the last record in a transaction.
103
+
104
+ ---
105
+
106
+ ### Parameters of `rdfc:PerformanceConfig`:
107
+ - `rdfc:name` (**string**, required): Name of the performance measurement run.
108
+ - `rdfc:outputPath` (**string**, required): File path where performance logs will be written.
109
+ - `rdfc:failureIsFatal` (**boolean**, optional): If true, aborts on performance measurement failure.
110
+ - `rdfc:queryTimeout` (**integer**, optional): Maximum query execution time in milliseconds.
111
+
112
+ ---
113
+
114
+ ## Example
115
+
116
+ ```turtle
117
+ <ingester> a rdfc:SPARQLIngest;
118
+ rdfc:memberStream <in>;
119
+ rdfc:ingestConfig [
120
+ rdfc:memberIsGraph true;
121
+ rdfc:targetNamedGraph "http://example.org/targetGraph";
122
+ rdfc:graphStoreUrl "http://example.org/sparql"
123
+ ];
124
+ rdfc:sparqlWriter <out>.
125
+ ```
32
126
 
33
- 1. The payload is complete and contains all the triples that must be deleted from the triple store. In this case no additional information is needed.
34
- 2. The payload only contains the type of the payload's main entity (or member) via `rdf:type`. In this case one or more SHACL shapes can be configured via the `js:memberShape` property. The processor will identify the corresponding shape of an input SDS record (via the shape's target class) and the proper query pattern will be generated.
35
- 3. The payload does not contain the type of the payload's main entity (or member). In this case, is not possible to identify the corresponding SHACL shape, therefore a query reflecting all shapes via `OPTIONAL` clauses will be generated.
127
+ ---
36
128
 
37
- In case that the main entity (member) of the SDS record payload is always a named graph, this can be configured by setting the `js:memberIsGraph` to `true`. In this scenario, all resulting queries will be properly set with the `GRAPH` and the `WITH` clauses.
129
+ ## Notes
38
130
 
39
- If a specific named graph should be targeted by all the resulting SPARQL Update queries, this can be configured via the `js:targetNamedGraph` property. This property will be ignored if the `js:memberIsGraph` property is `true`.
131
+ - Delete operations can be handled differently depending on how complete the SDS record payload is.
132
+ - When `memberIsGraph = true`, queries are wrapped with `GRAPH` and `WITH` clauses.
133
+ - Transactions can buffer multiple SDS records and commit them together using `rdfc:transactionConfig`.
134
+ - SHACL shapes (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
40
135
 
41
- Lastly, the main entity (member) of SDS record payload may contain a transaction ID when the member is part of a larger group of members that must be updated altogether into the targeted triple store. This particular property can be indicated to the processor via the `js:transactionIdPath` configuration property. The processor will proceed to buffer all records containing the same transaction ID and execute the corresponding SPARQL Update query for all members at once.
@@ -1,5 +1,7 @@
1
- import type { Stream, Writer } from "@rdfc/js-runner";
1
+ import { Processor, Reader, Writer } from "@rdfc/js-runner";
2
2
  import { RdfStore } from "rdf-stores";
3
+ import type { Term } from "@rdfjs/types";
4
+ import { Logger } from "winston";
3
5
  export type ChangeSemantics = {
4
6
  changeTypePath: string;
5
7
  createValue: string;
@@ -17,7 +19,7 @@ export type PerformanceConfig = {
17
19
  failureIsFatal?: boolean;
18
20
  };
19
21
  export type IngestConfig = {
20
- memberIsGraph: boolean;
22
+ memberIsGraph?: boolean;
21
23
  memberShapes?: string[];
22
24
  changeSemantics?: ChangeSemantics;
23
25
  targetNamedGraph?: string;
@@ -32,4 +34,21 @@ export type TransactionMember = {
32
34
  transactionId: string;
33
35
  store: RdfStore;
34
36
  };
35
- export declare function sparqlIngest(memberStream: Stream<string>, config: IngestConfig, sparqlWriter?: Writer<string>): Promise<void>;
37
+ type SPARQLIngestArgs = {
38
+ memberStream: Reader;
39
+ config: IngestConfig;
40
+ sparqlWriter?: Writer;
41
+ };
42
+ export declare class SPARQLIngest extends Processor<SPARQLIngestArgs> {
43
+ protected transactionMembers: TransactionMember[];
44
+ protected requestsPerformance: number[];
45
+ protected createTransactionQueriesLogger: Logger;
46
+ protected doSPARQLRequestLogger: Logger;
47
+ init(this: SPARQLIngestArgs & this): Promise<void>;
48
+ transform(this: SPARQLIngestArgs & this): Promise<void>;
49
+ produce(this: SPARQLIngestArgs & this): Promise<void>;
50
+ verifyTransaction(stores: RdfStore[], transactionIdPath: string, transactionId: Term): void;
51
+ getNamedGraphIfAny(memberIRI: Term, memberIsGraph: boolean | undefined, targetNamedGraph?: string): string | undefined;
52
+ createTransactionQueries(transactionMembers: TransactionMember[], config: IngestConfig): string;
53
+ }
54
+ export {};
@@ -1,206 +1,221 @@
1
+ import { extendLogger, Processor } from "@rdfc/js-runner";
1
2
  import { SDS } from "@treecg/types";
2
3
  import { DataFactory } from "rdf-data-factory";
3
4
  import { RdfStore } from "rdf-stores";
4
5
  import { Parser } from "n3";
5
6
  import { writeFile } from "fs/promises";
6
- import { CREATE, UPDATE, DELETE } from "./SPARQLQueries.js";
7
- import { doSPARQLRequest, sanitizeQuads, getObjects } from "./Utils.js";
8
- import { getLoggerFor } from "./LogUtil.js";
7
+ import { CREATE, DELETE, UPDATE } from "./SPARQLQueries.js";
8
+ import { doSPARQLRequest, getObjects, sanitizeQuads } from "./Utils.js";
9
9
  const df = new DataFactory();
10
- export async function sparqlIngest(memberStream, config, sparqlWriter) {
11
- const logger = getLoggerFor("sparqlIngest");
12
- let transactionMembers = [];
13
- const requestsPerformance = [];
14
- memberStream.data(async (rawQuads) => {
15
- logger.debug(`Raw member data received: \n${rawQuads}`);
16
- const quads = new Parser().parse(rawQuads);
17
- logger.verbose(`Parsed ${quads.length} quads from received member data`);
18
- const store = RdfStore.createDefault();
19
- quads.forEach(q => store.addQuad(q));
20
- const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
21
- logger.verbose(`Member IRI found: ${memberIRI ? memberIRI.value : "none"}`);
22
- if (memberIRI) {
23
- const sdsQuads = store.getQuads(null, null, null, SDS.terms.custom("DataDescription"));
24
- sdsQuads.forEach(q => store.removeQuad(q));
25
- if (config.transactionConfig) {
26
- const transactionId = getObjects(store, null, df.namedNode(config.transactionConfig.transactionIdPath), null)[0];
27
- if (transactionId) {
28
- store.removeQuad(df.quad(memberIRI, df.namedNode(config.transactionConfig.transactionIdPath), transactionId));
29
- const isLastOfTransaction = getObjects(store, null, df.namedNode(config.transactionConfig.transactionEndPath), null)[0];
30
- if (isLastOfTransaction) {
31
- logger.info(`Last member of ${transactionId.value} received!`);
32
- verifyTransaction(transactionMembers.map(ts => ts.store), config.transactionConfig.transactionIdPath, transactionId);
33
- store.removeQuad(df.quad(memberIRI, df.namedNode(config.transactionConfig.transactionEndPath), isLastOfTransaction));
34
- transactionMembers.push({
35
- memberId: memberIRI.value,
36
- transactionId: transactionId.value,
37
- store
38
- });
10
+ export class SPARQLIngest extends Processor {
11
+ transactionMembers = [];
12
+ requestsPerformance = [];
13
+ createTransactionQueriesLogger;
14
+ doSPARQLRequestLogger;
15
+ async init() {
16
+ this.createTransactionQueriesLogger = extendLogger(this.logger, "createTransactionQueries");
17
+ this.doSPARQLRequestLogger = extendLogger(this.logger, "doSPARQLRequest");
18
+ }
19
+ async transform() {
20
+ for await (const rawQuads of this.memberStream.strings()) {
21
+ this.logger.debug(`Raw member data received: \n${rawQuads}`);
22
+ const quads = new Parser().parse(rawQuads);
23
+ this.logger.verbose(`Parsed ${quads.length} quads from received member data`);
24
+ const store = RdfStore.createDefault();
25
+ quads.forEach(q => store.addQuad(q));
26
+ let query;
27
+ const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
28
+ if (memberIRI) {
29
+ this.logger.verbose(`Member IRI found in SDS metadata: ${memberIRI.value}`);
30
+ const sdsQuads = store.getQuads(null, null, null, SDS.terms.custom("DataDescription"));
31
+ sdsQuads.forEach(q => store.removeQuad(q));
32
+ if (this.config.transactionConfig) {
33
+ const transactionId = getObjects(store, null, df.namedNode(this.config.transactionConfig.transactionIdPath), null)[0];
34
+ if (transactionId) {
35
+ store.removeQuad(df.quad(memberIRI, df.namedNode(this.config.transactionConfig.transactionIdPath), transactionId));
36
+ const isLastOfTransaction = getObjects(store, null, df.namedNode(this.config.transactionConfig.transactionEndPath), null)[0];
37
+ if (isLastOfTransaction) {
38
+ this.logger.info(`Last member of ${transactionId.value} received!`);
39
+ this.verifyTransaction(this.transactionMembers.map(ts => ts.store), this.config.transactionConfig.transactionIdPath, transactionId);
40
+ store.removeQuad(df.quad(memberIRI, df.namedNode(this.config.transactionConfig.transactionEndPath), isLastOfTransaction));
41
+ this.transactionMembers.push({
42
+ memberId: memberIRI.value,
43
+ transactionId: transactionId.value,
44
+ store
45
+ });
46
+ }
47
+ else if (this.transactionMembers.length > 0) {
48
+ this.verifyTransaction(this.transactionMembers.map(ts => ts.store), this.config.transactionConfig.transactionIdPath, transactionId);
49
+ this.transactionMembers.push({
50
+ memberId: memberIRI.value,
51
+ transactionId: transactionId.value,
52
+ store
53
+ });
54
+ return;
55
+ }
56
+ else {
57
+ this.logger.info(`New transaction ${transactionId.value} started!`);
58
+ if (this.transactionMembers.length > 0) {
59
+ this.logger.error(`Received new transaction ${transactionId.value}, `
60
+ + `but older transaction ${this.transactionMembers[0].transactionId} hasn't been finalized `);
61
+ throw new Error(`Received new transaction ${transactionId.value}, `
62
+ + `but older transaction ${this.transactionMembers[0].transactionId} hasn't been finalized `);
63
+ }
64
+ this.transactionMembers.push({
65
+ memberId: memberIRI.value,
66
+ transactionId: transactionId.value,
67
+ store
68
+ });
69
+ return;
70
+ }
39
71
  }
40
- else if (transactionMembers.length > 0) {
41
- verifyTransaction(transactionMembers.map(ts => ts.store), config.transactionConfig.transactionIdPath, transactionId);
42
- transactionMembers.push({
43
- memberId: memberIRI.value,
44
- transactionId: transactionId.value,
45
- store
46
- });
47
- return;
72
+ }
73
+ if (this.config.changeSemantics) {
74
+ if (this.transactionMembers.length > 0) {
75
+ query = [this.createTransactionQueries(this.transactionMembers, this.config)];
76
+ this.transactionMembers = [];
48
77
  }
49
78
  else {
50
- logger.info(`New transaction ${transactionId.value} started!`);
51
- if (transactionMembers.length > 0)
52
- throw new Error(`Received new transaction ${transactionId.value}, `
53
- + `but older transaction ${transactionMembers[0].transactionId} hasn't been finalized `);
54
- transactionMembers.push({
55
- memberId: memberIRI.value,
56
- transactionId: transactionId.value,
57
- store
58
- });
59
- return;
79
+ const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
80
+ const ctv = store.getQuads(null, df.namedNode(this.config.changeSemantics.changeTypePath))[0];
81
+ store.removeQuad(ctv);
82
+ sanitizeQuads(store);
83
+ if (ctv.object.value === this.config.changeSemantics.createValue) {
84
+ this.logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
85
+ query = CREATE(store, this.config.forVirtuoso, ng);
86
+ }
87
+ else if (ctv.object.value === this.config.changeSemantics.updateValue) {
88
+ this.logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
89
+ query = UPDATE(store, this.config.forVirtuoso, ng);
90
+ }
91
+ else if (ctv.object.value === this.config.changeSemantics.deleteValue) {
92
+ this.logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
93
+ query = [DELETE(store, [memberIRI.value], this.config.memberShapes, ng)];
94
+ }
95
+ else {
96
+ this.logger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
97
+ throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
98
+ }
60
99
  }
61
100
  }
62
- }
63
- let query;
64
- if (config.changeSemantics) {
65
- if (transactionMembers.length > 0) {
66
- query = [createTransactionQueries(transactionMembers, config)];
67
- transactionMembers = [];
68
- }
69
101
  else {
70
- const ng = getNamedGraphIfAny(memberIRI, config.memberIsGraph, config.targetNamedGraph);
71
- const ctv = store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
72
- store.removeQuad(ctv);
73
- sanitizeQuads(store);
74
- if (ctv.object.value === config.changeSemantics.createValue) {
75
- logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
76
- query = CREATE(store, config.forVirtuoso, ng);
77
- }
78
- else if (ctv.object.value === config.changeSemantics.updateValue) {
79
- logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
80
- query = UPDATE(store, config.forVirtuoso, ng);
81
- }
82
- else if (ctv.object.value === config.changeSemantics.deleteValue) {
83
- logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
84
- query = [DELETE(store, [memberIRI.value], config.memberShapes, ng)];
102
+ if (this.transactionMembers.length > 0) {
103
+ this.transactionMembers.forEach(ts => {
104
+ ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q));
105
+ });
106
+ this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
107
+ query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
85
108
  }
86
109
  else {
87
- throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
110
+ const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
111
+ this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
112
+ query = UPDATE(store, this.config.forVirtuoso, ng);
88
113
  }
89
114
  }
90
115
  }
91
116
  else {
92
- if (transactionMembers.length > 0) {
93
- transactionMembers.forEach(ts => {
94
- ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q));
95
- });
96
- logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
97
- query = UPDATE(store, config.forVirtuoso, config.targetNamedGraph);
98
- }
99
- else {
100
- const ng = getNamedGraphIfAny(memberIRI, config.memberIsGraph, config.targetNamedGraph);
101
- logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
102
- query = UPDATE(store, config.forVirtuoso, ng);
103
- }
117
+ this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for received triples (${store.size})`);
118
+ query = UPDATE(store, this.config.forVirtuoso);
104
119
  }
105
120
  if (query && query.length > 0) {
106
- logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
107
- if (config.graphStoreUrl) {
121
+ this.logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
122
+ if (this.config.graphStoreUrl) {
108
123
  try {
109
124
  const t0 = Date.now();
110
- await doSPARQLRequest(query, config);
125
+ await doSPARQLRequest(query, this.config, this.doSPARQLRequestLogger);
111
126
  const reqTime = Date.now() - t0;
112
- if (config.measurePerformance) {
113
- requestsPerformance.push(reqTime);
127
+ if (this.config.measurePerformance) {
128
+ this.requestsPerformance.push(reqTime);
114
129
  }
115
- logger.info(`Executed query on remote SPARQL server ${config.graphStoreUrl} (took ${reqTime} ms)`);
130
+ this.logger.info(`Executed query on remote SPARQL server ${this.config.graphStoreUrl} (took ${reqTime} ms)`);
116
131
  }
117
132
  catch (error) {
118
- if (!config.measurePerformance || config.measurePerformance.failureIsFatal) {
133
+ if (!this.config.measurePerformance || this.config.measurePerformance.failureIsFatal) {
134
+ this.logger.error(`Error executing query on remote SPARQL server ${this.config.graphStoreUrl}: ${error}`);
119
135
  throw error;
120
136
  }
121
137
  else {
122
- if (config.measurePerformance) {
123
- requestsPerformance.push(-1);
138
+ if (this.config.measurePerformance) {
139
+ this.requestsPerformance.push(-1);
124
140
  }
125
141
  }
126
142
  }
127
143
  }
128
- if (sparqlWriter) {
129
- await sparqlWriter.push(query.join("\n"));
144
+ if (this.sparqlWriter) {
145
+ await this.sparqlWriter.string(query.join("\n"));
130
146
  }
131
147
  }
132
148
  else {
133
- logger.warn(`No query generated for member ${memberIRI.value}`);
149
+ this.logger.warn(`No query generated for member ${memberIRI.value}`);
134
150
  }
135
151
  }
136
- else {
137
- throw new Error(`[sparqlIngest] No member IRI found in received RDF data: \n${rawQuads}`);
152
+ if (this.sparqlWriter) {
153
+ this.logger.info("Closing SPARQL writer");
138
154
  }
139
- });
140
- memberStream.on("end", async () => {
141
- if (sparqlWriter) {
142
- await sparqlWriter.end();
155
+ if (this.config.measurePerformance) {
156
+ await writeFile(`${this.config.measurePerformance.outputPath}/${this.config.measurePerformance.name}.json`, JSON.stringify(this.requestsPerformance), "utf-8");
143
157
  }
144
- if (config.measurePerformance) {
145
- await writeFile(`${config.measurePerformance.outputPath}/${config.measurePerformance.name}.json`, JSON.stringify(requestsPerformance), "utf-8");
146
- }
147
- });
148
- }
149
- function verifyTransaction(stores, transactionIdPath, transactionId) {
150
- for (const store of stores) {
151
- const tIds = getObjects(store, null, df.namedNode(transactionIdPath), null);
152
- for (const tid of tIds) {
153
- if (!tid.equals(transactionId)) {
154
- throw new Error(`[sparqlIngest] Received non-matching transaction ID ${transactionId.value} `
155
- + `with previous transaction: ${tid.value}`);
158
+ }
159
+ async produce() {
160
+ }
161
+ verifyTransaction(stores, transactionIdPath, transactionId) {
162
+ for (const store of stores) {
163
+ const tIds = getObjects(store, null, df.namedNode(transactionIdPath), null);
164
+ for (const tid of tIds) {
165
+ if (!tid.equals(transactionId)) {
166
+ this.logger.error(`[sparqlIngest] Received non-matching transaction ID ${transactionId.value} `
167
+ + `with previous transaction: ${tid.value}`);
168
+ throw new Error(`[sparqlIngest] Received non-matching transaction ID ${transactionId.value} `
169
+ + `with previous transaction: ${tid.value}`);
170
+ }
156
171
  }
157
172
  }
158
173
  }
159
- }
160
- function getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
161
- let ng;
162
- if (memberIsGraph) {
163
- ng = memberIRI.value;
164
- }
165
- else if (targetNamedGraph) {
166
- ng = targetNamedGraph;
174
+ getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
175
+ let ng;
176
+ if (memberIsGraph) {
177
+ ng = memberIRI.value;
178
+ }
179
+ else if (targetNamedGraph) {
180
+ ng = targetNamedGraph;
181
+ }
182
+ return ng;
167
183
  }
168
- return ng;
169
- }
170
- function createTransactionQueries(transactionMembers, config) {
171
- const logger = getLoggerFor("createTransactionQueries");
172
- logger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
173
- + ` members of transaction ${transactionMembers[0].transactionId}`);
174
- const createStore = RdfStore.createDefault();
175
- const updateStore = RdfStore.createDefault();
176
- const deleteStore = RdfStore.createDefault();
177
- const deleteMembers = [];
178
- const transactionQueryBuilder = [];
179
- for (const tsm of transactionMembers) {
180
- const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
181
- tsm.store.removeQuad(ctv);
182
- if (ctv.object.value === config.changeSemantics.createValue) {
183
- tsm.store.getQuads(null, null, null, null).forEach(q => createStore.addQuad(q));
184
+ createTransactionQueries(transactionMembers, config) {
185
+ this.createTransactionQueriesLogger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
186
+ + ` members of transaction ${transactionMembers[0].transactionId}`);
187
+ const createStore = RdfStore.createDefault();
188
+ const updateStore = RdfStore.createDefault();
189
+ const deleteStore = RdfStore.createDefault();
190
+ const deleteMembers = [];
191
+ const transactionQueryBuilder = [];
192
+ for (const tsm of transactionMembers) {
193
+ const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
194
+ tsm.store.removeQuad(ctv);
195
+ if (ctv.object.value === config.changeSemantics.createValue) {
196
+ tsm.store.getQuads(null, null, null, null).forEach(q => createStore.addQuad(q));
197
+ }
198
+ else if (ctv.object.value === config.changeSemantics.updateValue) {
199
+ tsm.store.getQuads(null, null, null, null).forEach(q => updateStore.addQuad(q));
200
+ }
201
+ else if (ctv.object.value === config.changeSemantics.deleteValue) {
202
+ tsm.store.getQuads(null, null, null, null).forEach(q => deleteStore.addQuad(q));
203
+ deleteMembers.push(tsm.memberId);
204
+ }
205
+ else {
206
+ this.createTransactionQueriesLogger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
207
+ throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
208
+ }
184
209
  }
185
- else if (ctv.object.value === config.changeSemantics.updateValue) {
186
- tsm.store.getQuads(null, null, null, null).forEach(q => updateStore.addQuad(q));
210
+ if (createStore.size > 0) {
211
+ transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
187
212
  }
188
- else if (ctv.object.value === config.changeSemantics.deleteValue) {
189
- tsm.store.getQuads(null, null, null, null).forEach(q => deleteStore.addQuad(q));
190
- deleteMembers.push(tsm.memberId);
213
+ if (updateStore.size > 0) {
214
+ transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
191
215
  }
192
- else {
193
- throw new Error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
216
+ if (updateStore.size > 0) {
217
+ transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph));
194
218
  }
219
+ return transactionQueryBuilder.join(";\n");
195
220
  }
196
- if (createStore.size > 0) {
197
- transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
198
- }
199
- if (updateStore.size > 0) {
200
- transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
201
- }
202
- if (updateStore.size > 0) {
203
- transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph));
204
- }
205
- return transactionQueryBuilder.join(";\n");
206
221
  }
package/lib/Utils.d.ts CHANGED
@@ -1,8 +1,9 @@
1
1
  import { RdfStore } from "rdf-stores";
2
2
  import type { Term, Quad_Subject, Quad_Object } from "@rdfjs/types";
3
3
  import type { IngestConfig } from "./SPARQLIngest.js";
4
+ import { Logger } from "winston";
4
5
  export declare function getSubjects(store: RdfStore, predicate: Term | null, object: Term | null, graph?: Term | null): Quad_Subject[];
5
6
  export declare function getObjects(store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null): Quad_Object[];
6
7
  export declare function splitStore(store: RdfStore, threshold: number): RdfStore[];
7
8
  export declare function sanitizeQuads(store: RdfStore): void;
8
- export declare function doSPARQLRequest(query: string[], config: IngestConfig): Promise<void>;
9
+ export declare function doSPARQLRequest(query: string[], config: IngestConfig, logger: Logger): Promise<void>;
package/lib/Utils.js CHANGED
@@ -1,7 +1,6 @@
1
1
  import { XSD } from "@treecg/types";
2
2
  import { DataFactory } from "rdf-data-factory";
3
3
  import { RdfStore } from "rdf-stores";
4
- import { getLoggerFor } from "./LogUtil.js";
5
4
  import { Agent } from "undici";
6
5
  const df = new DataFactory();
7
6
  export function getSubjects(store, predicate, object, graph) {
@@ -63,8 +62,7 @@ export function sanitizeQuads(store) {
63
62
  }
64
63
  }
65
64
  }
66
- export async function doSPARQLRequest(query, config) {
67
- const logger = getLoggerFor("doSPARQLRequest");
65
+ export async function doSPARQLRequest(query, config, logger) {
68
66
  try {
69
67
  let queries = [];
70
68
  const jointQuery = query.join("\n");