@rdfc/sparql-ingest-processor-ts 2.0.3 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,10 +2,10 @@
2
2
 
3
3
  [![Build and tests with Node.js](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml/badge.svg)](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml) [![npm](https://img.shields.io/npm/v/@rdfc/sparql-ingest-processor-ts.svg?style=popout)](https://npmjs.com/package/@rdfc/sparql-ingest-processor-ts)
4
4
 
5
- TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) into a SPARQL endpoint.
5
+ TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) or in general, a stream of RDF quads into a SPARQL endpoint.
6
6
 
7
- This processor takes a stream of RDF records, transforms them into [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
8
- It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content.
7
+ This processor takes a stream of RDF records, transforms them into corresponding [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
8
+ It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content. It also supports direct quad ingestion via the SPARQL Graph Store Protocol.
9
9
 
10
10
  ---
11
11
 
@@ -46,8 +46,7 @@ npm install @rdfc/sparql-ingest-processor-ts
46
46
  <ingester> a rdfc:SPARQLIngest;
47
47
  rdfc:memberStream <in>;
48
48
  rdfc:ingestConfig [
49
- rdfc:memberIsGraph false;
50
- rdfc:memberShape "http://ex.org/Shape1", "http://ex.org/Shape2";
49
+ rdfc:memberShape "http://ex.org/Shape";
51
50
  rdfc:changeSemantics [
52
51
  rdfc:changeTypePath "http://ex.org/changeType";
53
52
  rdfc:createValue "http://ex.org/Create";
@@ -60,7 +59,14 @@ npm install @rdfc/sparql-ingest-processor-ts
60
59
  rdfc:transactionEndPath "http://ex.org/transactionEnd"
61
60
  ];
62
61
  rdfc:graphStoreUrl "http://example.org/sparql";
63
- rdfc:forVirtuoso false
62
+ rdfc:forVirtuoso false;
63
+ rdfc:accessToken "myAccessToken";
64
+ rdfc:measurePerformance [
65
+ rdfc:name "myPerformanceMeasurement";
66
+ rdfc:outputPath "/path/to/output.json";
67
+ rdfc:failureIsFatal true;
68
+ rdfc:queryTimeout 30000
69
+ ]
64
70
  ];
65
71
  rdfc:sparqlWriter <out>.
66
72
  ```
@@ -77,13 +83,12 @@ npm install @rdfc/sparql-ingest-processor-ts
77
83
  ---
78
84
 
79
85
  ### Parameters of `rdfc:IngestConfig`:
80
- - `rdfc:memberIsGraph` (**boolean**, required): Whether each SDS record represents a named graph.
81
- - `rdfc:memberShape` (**string**, optional, repeatable): SHACL shape identifiers used to guide query construction when payloads are incomplete.
86
+ - `rdfc:memberShape` (**string**, optional): SHACL shape used to guide query construction when payloads are incomplete.
82
87
  - `rdfc:changeSemantics` (**rdfc:ChangeSemantics**, optional): Configures mapping between change types (create/update/delete) and SPARQL operations.
83
- - `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph (ignored if `memberIsGraph = true`).
88
+ - `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph.
84
89
  - `rdfc:transactionConfig` (**rdfc:TransactionConfig**, optional): Groups records by transaction ID for atomic updates.
85
90
  - `rdfc:graphStoreUrl` (**string**, optional): SPARQL Graph Store endpoint URL.
86
- - `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling.
91
+ - `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling to avoid query size limits.
87
92
  - `rdfc:accessToken` (**string**, optional): Access token for authenticated graph stores.
88
93
  - `rdfc:measurePerformance` (**rdfc:PerformanceConfig**, optional): Enables performance measurement of SPARQL queries.
89
94
 
@@ -117,7 +122,6 @@ npm install @rdfc/sparql-ingest-processor-ts
117
122
  <ingester> a rdfc:SPARQLIngest;
118
123
  rdfc:memberStream <in>;
119
124
  rdfc:ingestConfig [
120
- rdfc:memberIsGraph true;
121
125
  rdfc:targetNamedGraph "http://example.org/targetGraph";
122
126
  rdfc:graphStoreUrl "http://example.org/sparql"
123
127
  ];
@@ -128,8 +132,7 @@ npm install @rdfc/sparql-ingest-processor-ts
128
132
 
129
133
  ## Notes
130
134
 
131
- - Delete operations can be handled differently depending on how complete the SDS record payload is.
132
- - When `memberIsGraph = true`, queries are wrapped with `GRAPH` and `WITH` clauses.
133
- - Transactions can buffer multiple SDS records and commit them together using `rdfc:transactionConfig`.
134
- - SHACL shapes (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
135
+ - Delete operations can be handled differently depending on how complete the input record is.
136
+ - Transactions can buffer multiple input records and commit them together using `rdfc:transactionConfig`.
137
+ - A SHACL shape (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
135
138
 
@@ -26,8 +26,7 @@ export declare enum OperationMode {
26
26
  export type IngestConfig = {
27
27
  operationMode?: OperationMode;
28
28
  memberBatchSize?: number;
29
- memberIsGraph?: boolean;
30
- memberShapes?: string[];
29
+ memberShape?: string;
31
30
  changeSemantics?: ChangeSemantics;
32
31
  targetNamedGraph?: string;
33
32
  transactionConfig?: TransactionConfig;
@@ -57,7 +56,6 @@ export declare class SPARQLIngest extends Processor<SPARQLIngestArgs> {
57
56
  transform(this: SPARQLIngestArgs & this): Promise<void>;
58
57
  produce(this: SPARQLIngestArgs & this): Promise<void>;
59
58
  verifyTransaction(stores: RdfStore[], transactionIdPath: string, transactionId: Term): void;
60
- getNamedGraphIfAny(memberIRI: Term, memberIsGraph: boolean | undefined, targetNamedGraph?: string): string | undefined;
61
59
  createTransactionQueries(transactionMembers: TransactionMember[], config: IngestConfig): string;
62
60
  }
63
61
  export {};
@@ -34,11 +34,17 @@ export class SPARQLIngest extends Processor {
34
34
  }
35
35
  async transform() {
36
36
  for await (const rawQuads of this.memberStream.strings()) {
37
- this.logger.debug(`Raw member data received: \n${rawQuads}`);
38
37
  const quads = new Parser().parse(rawQuads);
39
38
  this.logger.verbose(`Parsed ${quads.length} quads from received member data`);
40
39
  const store = RdfStore.createDefault();
41
- quads.forEach(q => store.addQuad(q));
40
+ quads.forEach(q => {
41
+ if (q.graph.equals(df.defaultGraph()) && this.config.targetNamedGraph) {
42
+ store.addQuad(df.quad(q.subject, q.predicate, q.object, df.namedNode(this.config.targetNamedGraph)));
43
+ }
44
+ else {
45
+ store.addQuad(q);
46
+ }
47
+ });
42
48
  let query;
43
49
  const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
44
50
  if (memberIRI) {
@@ -92,21 +98,19 @@ export class SPARQLIngest extends Processor {
92
98
  this.transactionMembers = [];
93
99
  }
94
100
  else {
95
- const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
96
101
  const ctv = store.getQuads(null, df.namedNode(this.config.changeSemantics.changeTypePath))[0];
97
- store.removeQuad(ctv);
98
102
  sanitizeQuads(store);
99
103
  if (ctv.object.value === this.config.changeSemantics.createValue) {
100
104
  this.logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
101
- query = CREATE(store, this.config.forVirtuoso, ng);
105
+ query = CREATE(store, this.config.forVirtuoso);
102
106
  }
103
107
  else if (ctv.object.value === this.config.changeSemantics.updateValue) {
104
108
  this.logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
105
- query = UPDATE(store, this.config.forVirtuoso, ng);
109
+ query = UPDATE(store, this.config.forVirtuoso);
106
110
  }
107
111
  else if (ctv.object.value === this.config.changeSemantics.deleteValue) {
108
112
  this.logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
109
- query = [DELETE(store, [memberIRI.value], this.config.memberShapes, ng)];
113
+ query = DELETE(store, memberIRI.value, this.config.memberShape);
110
114
  }
111
115
  else {
112
116
  this.logger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
@@ -117,42 +121,45 @@ export class SPARQLIngest extends Processor {
117
121
  else {
118
122
  if (this.transactionMembers.length > 0) {
119
123
  this.transactionMembers.forEach(ts => {
120
- ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q));
124
+ ts.store.getQuads().forEach(q => store.addQuad(q));
121
125
  });
122
126
  this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
123
- query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
127
+ query = UPDATE(store, this.config.forVirtuoso);
124
128
  }
125
129
  else {
126
130
  if (this.config.operationMode === OperationMode.REPLICATION) {
127
- this.memberBatch.push(...store.getQuads(null, null, null, null));
131
+ this.memberBatch.push(...store.getQuads().map(q => {
132
+ if (q.graph.equals(df.defaultGraph()) && this.config.targetNamedGraph) {
133
+ q.graph = df.namedNode(this.config.targetNamedGraph);
134
+ }
135
+ return q;
136
+ }));
128
137
  this.batchCount++;
129
138
  if (this.batchCount < this.config.memberBatchSize) {
130
139
  continue;
131
140
  }
132
141
  }
133
142
  else {
134
- const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
135
- this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
136
- query = UPDATE(store, this.config.forVirtuoso, ng);
143
+ this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for member ${memberIRI.value}`);
144
+ query = UPDATE(store, this.config.forVirtuoso);
137
145
  }
138
146
  }
139
147
  }
140
148
  }
141
149
  else {
142
150
  if (this.config.operationMode === OperationMode.REPLICATION) {
143
- this.memberBatch.push(...store.getQuads(null, null, null, null));
151
+ this.memberBatch.push(...store.getQuads());
144
152
  this.batchCount++;
145
153
  if (this.batchCount < this.config.memberBatchSize) {
146
154
  continue;
147
155
  }
148
156
  }
149
157
  else {
150
- this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for received triples (${store.size})`);
151
- query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
158
+ this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for received quads (${store.size})`);
159
+ query = UPDATE(store, this.config.forVirtuoso);
152
160
  }
153
161
  }
154
162
  if (query && query.length > 0) {
155
- this.logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
156
163
  if (this.config.graphStoreUrl) {
157
164
  try {
158
165
  const t0 = Date.now();
@@ -176,7 +183,14 @@ export class SPARQLIngest extends Processor {
176
183
  }
177
184
  }
178
185
  if (this.sparqlWriter) {
179
- await this.sparqlWriter.string(query.join("\n"));
186
+ if (this.config.forVirtuoso) {
187
+ for (const q of query) {
188
+ await this.sparqlWriter.string(q);
189
+ }
190
+ }
191
+ else {
192
+ await this.sparqlWriter.string(query.join("\n"));
193
+ }
180
194
  }
181
195
  }
182
196
  else {
@@ -256,16 +270,6 @@ export class SPARQLIngest extends Processor {
256
270
  }
257
271
  }
258
272
  }
259
- getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
260
- let ng;
261
- if (memberIsGraph) {
262
- ng = memberIRI.value;
263
- }
264
- else if (targetNamedGraph) {
265
- ng = targetNamedGraph;
266
- }
267
- return ng;
268
- }
269
273
  createTransactionQueries(transactionMembers, config) {
270
274
  this.createTransactionQueriesLogger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
271
275
  + ` members of transaction ${transactionMembers[0].transactionId}`);
@@ -276,15 +280,14 @@ export class SPARQLIngest extends Processor {
276
280
  const transactionQueryBuilder = [];
277
281
  for (const tsm of transactionMembers) {
278
282
  const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
279
- tsm.store.removeQuad(ctv);
280
283
  if (ctv.object.value === config.changeSemantics.createValue) {
281
- tsm.store.getQuads(null, null, null, null).forEach(q => createStore.addQuad(q));
284
+ tsm.store.getQuads().forEach(q => createStore.addQuad(q));
282
285
  }
283
286
  else if (ctv.object.value === config.changeSemantics.updateValue) {
284
- tsm.store.getQuads(null, null, null, null).forEach(q => updateStore.addQuad(q));
287
+ tsm.store.getQuads().forEach(q => updateStore.addQuad(q));
285
288
  }
286
289
  else if (ctv.object.value === config.changeSemantics.deleteValue) {
287
- tsm.store.getQuads(null, null, null, null).forEach(q => deleteStore.addQuad(q));
290
+ tsm.store.getQuads().forEach(q => deleteStore.addQuad(q));
288
291
  deleteMembers.push(tsm.memberId);
289
292
  }
290
293
  else {
@@ -293,14 +296,16 @@ export class SPARQLIngest extends Processor {
293
296
  }
294
297
  }
295
298
  if (createStore.size > 0) {
296
- transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
299
+ transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso).join("\n"));
297
300
  }
298
301
  if (updateStore.size > 0) {
299
- transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
302
+ transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso).join("\n"));
300
303
  }
301
304
  if (deleteStore.size > 0) {
302
- transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph));
305
+ deleteMembers.forEach(dm => {
306
+ transactionQueryBuilder.push(DELETE(deleteStore, dm, config.memberShape).join("\n"));
307
+ });
303
308
  }
304
- return transactionQueryBuilder.join(";\n");
309
+ return transactionQueryBuilder.join("\n");
305
310
  }
306
311
  }
@@ -1,4 +1,4 @@
1
1
  import { RdfStore } from "rdf-stores";
2
- export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean, namedGraph?: string, multipleNamedGraphs?: boolean) => string[];
3
- export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean, namedGraph?: string, multipleNamedGraphs?: boolean) => string[];
4
- export declare const DELETE: (store: RdfStore, memberIRIs: string[], memberShapes?: string[], namedGraph?: string, multipleNamedGraphs?: boolean) => string;
2
+ export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
3
+ export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
4
+ export declare const DELETE: (store: RdfStore, memberIRI: string, memberShape?: string) => string[];
@@ -2,73 +2,81 @@ import { RDF, SHACL } from "@treecg/types";
2
2
  import { Writer as N3Writer, Parser } from "n3";
3
3
  import { RdfStore } from "rdf-stores";
4
4
  import { DataFactory } from "rdf-data-factory";
5
- import { getObjects, getSubjects, splitStore } from "./Utils.js";
5
+ import { getObjects, getSubjects, splitStoreOnSize, splitStorePerNamedGraph } from "./Utils.js";
6
6
  const df = new DataFactory();
7
- export const CREATE = (store, forVirtuoso, namedGraph, multipleNamedGraphs) => {
8
- const stores = splitStore(store, forVirtuoso ? 500 : 50000);
9
- return stores.map((subStore, i) => {
10
- return `
11
- INSERT DATA {
12
- ${namedGraph ? `GRAPH <${namedGraph}> {` : ""}
13
- ${new N3Writer().quadsToString(subStore.getQuads())}
14
- ${namedGraph ? `}` : ""}
15
- }
16
- ${i === stores.length - 1 ? "" : ";"}
17
- `;
18
- });
7
+ export const CREATE = (store, forVirtuoso) => {
8
+ const queries = [];
9
+ const storesPerGraph = splitStorePerNamedGraph(store);
10
+ for (const { graph, store } of storesPerGraph) {
11
+ const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
12
+ subStores.forEach((s, i) => {
13
+ queries.push(`
14
+ INSERT DATA {
15
+ ${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
16
+ ${new N3Writer().quadsToString(s.getQuads().map(q => {
17
+ return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
18
+ }))}
19
+ ${graph.equals(df.defaultGraph()) ? "" : `}`}
20
+ };
21
+ `);
22
+ });
23
+ }
24
+ return queries;
19
25
  };
20
- export const UPDATE = (store, forVirtuoso, namedGraph, multipleNamedGraphs) => {
21
- const formattedQuery = formatQuery(store);
22
- const stores = splitStore(store, forVirtuoso ? 500 : 50000);
23
- const queries = [
24
- `
25
- ${namedGraph ? `WITH <${namedGraph}>` : ""}
26
+ export const UPDATE = (store, forVirtuoso) => {
27
+ const queries = [];
28
+ const storesPerGraph = splitStorePerNamedGraph(store);
29
+ for (const { graph, store } of storesPerGraph) {
30
+ const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
31
+ const formattedQuery = formatQuery(store);
32
+ const deleteInsertQuery = [`
33
+ ${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
26
34
  DELETE {
27
35
  ${formattedQuery[0]}
28
36
  }
29
37
  WHERE {
30
38
  ${formattedQuery[0]}
31
39
  };
32
- `
33
- ];
34
- stores.forEach((subStore, i) => {
35
- queries.push(`
36
- INSERT DATA {
37
- ${namedGraph ? `GRAPH <${namedGraph}> {` : ""}
38
- ${new N3Writer().quadsToString(subStore.getQuads())}
39
- ${namedGraph ? `}` : ""}
40
- }
41
- ${i === stores.length - 1 ? "" : ";"}
42
- `);
43
- });
40
+ `];
41
+ subStores.forEach((s, i) => {
42
+ deleteInsertQuery.push(`
43
+ INSERT DATA {
44
+ ${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
45
+ ${new N3Writer().quadsToString(s.getQuads().map(q => {
46
+ return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
47
+ }))}
48
+ ${graph.equals(df.defaultGraph()) ? "" : `}`}
49
+ };
50
+ `);
51
+ });
52
+ queries.push(...deleteInsertQuery);
53
+ }
44
54
  return queries;
45
55
  };
46
- export const DELETE = (store, memberIRIs, memberShapes, namedGraph, multipleNamedGraphs) => {
47
- const deleteBuilder = [];
48
- const whereBuilder = [];
49
- let indexStart = 0;
50
- for (const memberIRI of memberIRIs) {
51
- const formatted = formatQuery(store, memberIRI, memberShapes, indexStart);
52
- deleteBuilder.push(formatted.length > 1 ? formatted[1] : formatted[0]);
53
- whereBuilder.push(formatted[0]);
54
- indexStart++;
56
+ export const DELETE = (store, memberIRI, memberShape) => {
57
+ const queries = [];
58
+ const storesPerGraph = splitStorePerNamedGraph(store);
59
+ for (const { graph, store } of storesPerGraph) {
60
+ const formatted = formatQuery(store, memberIRI, memberShape);
61
+ const deleteBuilder = formatted.length > 1 ? formatted[1] : formatted[0];
62
+ const whereBuilder = formatted[0];
63
+ queries.push(`
64
+ ${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
65
+ DELETE {
66
+ ${deleteBuilder}
67
+ } WHERE {
68
+ ${whereBuilder}
69
+ };
70
+ `);
55
71
  }
56
- return `
57
- ${namedGraph ? `WITH <${namedGraph}>` : ""}
58
- DELETE {
59
- ${deleteBuilder.join("\n")}
60
- } WHERE {
61
- ${whereBuilder.join("\n")}
62
- }
63
- `;
72
+ return queries;
64
73
  };
65
- function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
74
+ function formatQuery(memberStore, memberIRI, memberShape, indexStart = 0) {
66
75
  const subjectSet = new Set();
67
76
  const blankNodeMap = new Map();
68
77
  const queryBuilder = [];
69
- const formattedQueries = [];
70
78
  let i = indexStart;
71
- if (!memberShapes || memberShapes.length === 0) {
79
+ if (!memberShape) {
72
80
  for (const quad of memberStore.getQuads()) {
73
81
  if (!subjectSet.has(quad.subject.value)) {
74
82
  subjectSet.add(quad.subject.value);
@@ -82,7 +90,8 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
82
90
  if (quad.object.termType === "BlankNode") {
83
91
  blankNodeMap.set(quad.object.value, `?bn_ref_${i}`);
84
92
  }
85
- queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal" ? `"${quad.object.value}"^^<${quad.object.datatype.value}>`
93
+ queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal"
94
+ ? `"${quad.object.value}"^^<${quad.object.datatype.value}>`
86
95
  : quad.object.termType === "BlankNode" ? `${blankNodeMap.get(quad.object.value)} `
87
96
  : `<${quad.object.value}>`}.`);
88
97
  queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} ?p_${i} ?o_${i}.`);
@@ -91,51 +100,28 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
91
100
  i++;
92
101
  }
93
102
  }
94
- formattedQueries.push(queryBuilder.join("\n"));
103
+ return [queryBuilder.join("\n")];
95
104
  }
96
105
  else {
97
- const shapeIndex = new Map();
98
- memberShapes.forEach(msh => {
99
- const shapeStore = RdfStore.createDefault();
100
- new Parser().parse(msh).forEach(quad => shapeStore.addQuad(quad));
101
- shapeIndex.set(extractMainTargetClass(shapeStore).value, shapeStore);
102
- });
106
+ const shapeStore = RdfStore.createDefault();
107
+ new Parser().parse(memberShape).forEach(quad => shapeStore.addQuad(quad));
103
108
  queryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
104
- const memberType = getObjects(memberStore, df.namedNode(memberIRI), RDF.terms.type)[0];
105
- if (memberType) {
106
- i++;
107
- const mshStore = shapeIndex.get(memberType.value);
108
- const propShapes = getObjects(mshStore, null, SHACL.terms.property, null);
109
- for (const propSh of propShapes) {
110
- const pred = getObjects(mshStore, propSh, SHACL.terms.path, null)[0];
111
- queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
112
- queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
113
- i++;
114
- }
115
- formattedQueries.push(queryBuilder.join("\n"));
116
- }
117
- else {
118
- const deleteQueryBuilder = [];
119
- deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
109
+ const deleteQueryBuilder = [];
110
+ deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
111
+ i++;
112
+ const propShapes = getObjects(shapeStore, null, SHACL.terms.property, null);
113
+ queryBuilder.push(" OPTIONAL { ");
114
+ for (const propSh of propShapes) {
115
+ const pred = getObjects(shapeStore, propSh, SHACL.terms.path, null)[0];
116
+ queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
117
+ deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
118
+ queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
119
+ deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
120
120
  i++;
121
- shapeIndex.forEach(mshStore => {
122
- const propShapes = getObjects(mshStore, null, SHACL.terms.property, null);
123
- queryBuilder.push(" OPTIONAL { ");
124
- for (const propSh of propShapes) {
125
- const pred = getObjects(mshStore, propSh, SHACL.terms.path, null)[0];
126
- queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
127
- deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
128
- queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
129
- deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
130
- i++;
131
- }
132
- queryBuilder.push(" }");
133
- });
134
- formattedQueries.push(queryBuilder.join("\n"));
135
- formattedQueries.push(deleteQueryBuilder.join("\n"));
136
121
  }
122
+ queryBuilder.push(" }");
123
+ return [queryBuilder.join("\n"), deleteQueryBuilder.join("\n")];
137
124
  }
138
- return formattedQueries;
139
125
  }
140
126
  function extractMainTargetClass(store) {
141
127
  const nodeShapes = getSubjects(store, RDF.terms.type, SHACL.terms.NodeShape, null);
package/lib/Utils.d.ts CHANGED
@@ -1,9 +1,13 @@
1
1
  import { RdfStore } from "rdf-stores";
2
- import type { Term, Quad_Subject, Quad_Object, Quad } from "@rdfjs/types";
2
+ import type { Term, Quad_Subject, Quad_Object, Quad, Quad_Graph } from "@rdfjs/types";
3
3
  import type { IngestConfig } from "./SPARQLIngest.js";
4
4
  import { Logger } from "winston";
5
5
  export declare function getSubjects(store: RdfStore, predicate: Term | null, object: Term | null, graph?: Term | null): Quad_Subject[];
6
6
  export declare function getObjects(store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null): Quad_Object[];
7
- export declare function splitStore(store: RdfStore, threshold: number): RdfStore[];
7
+ export declare function splitStorePerNamedGraph(store: RdfStore): {
8
+ graph: Quad_Graph;
9
+ store: RdfStore;
10
+ }[];
11
+ export declare function splitStoreOnSize(store: RdfStore, threshold: number): RdfStore[];
8
12
  export declare function sanitizeQuads(store: RdfStore): void;
9
13
  export declare function doSPARQLRequest(query: string[] | Quad[], config: IngestConfig, logger: Logger): Promise<void>;
package/lib/Utils.js CHANGED
@@ -14,7 +14,20 @@ export function getObjects(store, subject, predicate, graph) {
14
14
  return quad.object;
15
15
  });
16
16
  }
17
- export function splitStore(store, threshold) {
17
+ export function splitStorePerNamedGraph(store) {
18
+ const stores = [];
19
+ const namedGraphs = new Set();
20
+ store.getQuads(null, null, null, null)
21
+ .forEach(q => namedGraphs.add(q.graph));
22
+ namedGraphs.forEach(ng => {
23
+ const subStore = RdfStore.createDefault();
24
+ const quads = store.getQuads(null, null, null, ng);
25
+ quads.forEach(q => subStore.addQuad(q));
26
+ stores.push({ graph: ng, store: subStore });
27
+ });
28
+ return stores;
29
+ }
30
+ export function splitStoreOnSize(store, threshold) {
18
31
  const stores = [];
19
32
  if (store.size < threshold) {
20
33
  stores.push(store);
@@ -92,7 +105,7 @@ export async function doSPARQLRequest(query, config, logger) {
92
105
  return;
93
106
  }
94
107
  let queries = [];
95
- const jointQuery = query.join("\n");
108
+ const jointQuery = query.join(";\n");
96
109
  if (config.forVirtuoso && Buffer.byteLength(jointQuery, 'utf8') > 1e6) {
97
110
  queries = query;
98
111
  }
@@ -100,7 +113,6 @@ export async function doSPARQLRequest(query, config, logger) {
100
113
  queries.push(jointQuery);
101
114
  }
102
115
  for (const q of queries) {
103
- logger.debug(`Executing SPARQL query: \n${q}`);
104
116
  const res = await fetch(config.graphStoreUrl, {
105
117
  method: "POST",
106
118
  headers: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rdfc/sparql-ingest-processor-ts",
3
- "version": "2.0.3",
3
+ "version": "2.1.0",
4
4
  "description": "SPARQL Update function to be within RDF-Connect pipelines",
5
5
  "author": "Julián Rojas",
6
6
  "contributors": [
@@ -31,7 +31,7 @@
31
31
  "n3": "^2.0.1",
32
32
  "rdf-data-factory": "^2.0.2",
33
33
  "rdf-stores": "^2.1.1",
34
- "undici": "^7.20.0",
34
+ "undici": "^7.21.0",
35
35
  "winston": "^3.19.0"
36
36
  },
37
37
  "devDependencies": {
@@ -50,7 +50,7 @@
50
50
  "ts-patch": "^3.3.0",
51
51
  "tsc-alias": "^1.8.16",
52
52
  "typescript": "^5.9.3",
53
- "vite-tsconfig-paths": "^6.0.5",
53
+ "vite-tsconfig-paths": "^6.1.0",
54
54
  "vitest": "^4.0.18"
55
55
  }
56
56
  }
package/processors.ttl CHANGED
@@ -43,16 +43,11 @@ rdfc:SPARQLIngest rdfc:jsImplementationOf rdfc:Processor;
43
43
  sh:datatype xsd:integer;
44
44
  sh:name "memberBatchSize";
45
45
  sh:maxCount 1;
46
- ], [
47
- sh:path rdfc:memberIsGraph;
48
- sh:datatype xsd:boolean;
49
- sh:name "memberIsGraph";
50
- sh:maxCount 1;
51
46
  ], [
52
47
  sh:path rdfc:memberShape;
53
48
  sh:datatype xsd:string;
54
- sh:name "memberShapes";
55
- sh:minCount 0;
49
+ sh:name "memberShape";
50
+ sh:maxCount 1;
56
51
  ], [
57
52
  sh:path rdfc:changeSemantics;
58
53
  sh:class rdfc:ChangeSemantics;
package/lib/LogUtil.d.ts DELETED
@@ -1,9 +0,0 @@
1
- import { Logger } from "winston";
2
- export declare function getLoggerFor(loggable: string | Instance): Logger;
3
- interface Constructor {
4
- name: string;
5
- }
6
- interface Instance {
7
- constructor: Constructor;
8
- }
9
- export {};
package/lib/LogUtil.js DELETED
@@ -1,54 +0,0 @@
1
- import winston, { format } from "winston";
2
- const PROCESSOR_NAME = "sparql-ingest";
3
- const consoleTransport = new winston.transports.Console({
4
- stderrLevels: [
5
- "error",
6
- "warn",
7
- "info",
8
- "http",
9
- "verbose",
10
- "debug",
11
- "silly"
12
- ]
13
- });
14
- if (typeof process !== "undefined") {
15
- consoleTransport.level =
16
- process.env.LOG_LEVEL ||
17
- (process.env.DEBUG?.includes(PROCESSOR_NAME) ||
18
- process.env.DEBUG === "*"
19
- ? "debug"
20
- : "info");
21
- }
22
- const classLoggers = new WeakMap();
23
- const stringLoggers = new Map();
24
- export function getLoggerFor(loggable) {
25
- let logger;
26
- if (typeof loggable === "string") {
27
- if (stringLoggers.has(loggable)) {
28
- logger = stringLoggers.get(loggable);
29
- }
30
- else {
31
- logger = createLogger(loggable);
32
- stringLoggers.set(loggable, logger);
33
- }
34
- }
35
- else {
36
- const { constructor } = loggable;
37
- if (classLoggers.has(constructor)) {
38
- logger = classLoggers.get(constructor);
39
- }
40
- else {
41
- logger = createLogger(constructor.name);
42
- classLoggers.set(constructor, logger);
43
- }
44
- }
45
- return logger;
46
- }
47
- function createLogger(label) {
48
- return winston.createLogger({
49
- format: format.combine(format.label({ label }), format.colorize(), format.timestamp(), format.metadata({
50
- fillExcept: ["level", "timestamp", "label", "message"],
51
- }), format.printf(({ level: levelInner, message, label: labelInner, timestamp, }) => `${timestamp} {${PROCESSOR_NAME}} [${labelInner}] ${levelInner}: ${message}`)),
52
- transports: [consoleTransport],
53
- });
54
- }