@rdfc/sparql-ingest-processor-ts 2.0.3 → 2.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,10 +2,10 @@
2
2
 
3
3
  [![Build and tests with Node.js](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml/badge.svg)](https://github.com/rdf-connect/sparql-ingest-processor-ts/actions/workflows/build-test.yml) [![npm](https://img.shields.io/npm/v/@rdfc/sparql-ingest-processor-ts.svg?style=popout)](https://npmjs.com/package/@rdfc/sparql-ingest-processor-ts)
4
4
 
5
- TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) into a SPARQL endpoint.
5
+ TypeScript [RDF-Connect](https://rdf-connect.github.io/rdfc.github.io/) processor for ingesting [SDS records](https://treecg.github.io/SmartDataStreams-Spec/) or in general, a stream of RDF quads into a SPARQL endpoint.
6
6
 
7
- This processor takes a stream of RDF records, transforms them into [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
8
- It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content.
7
+ This processor takes a stream of RDF records, transforms them into corresponding [SPARQL Update](https://www.w3.org/TR/sparql11-update/) queries, and executes them against a SPARQL Graph Store via the [SPARQL Protocol](https://www.w3.org/TR/sparql11-protocol/).
8
+ It supports `INSERT DATA`, `DELETE INSERT WHERE`, and `DELETE WHERE` queries, configurable through change semantics or SDS record content. It also supports direct quad ingestion via the SPARQL Graph Store Protocol.
9
9
 
10
10
  ---
11
11
 
@@ -46,8 +46,7 @@ npm install @rdfc/sparql-ingest-processor-ts
46
46
  <ingester> a rdfc:SPARQLIngest;
47
47
  rdfc:memberStream <in>;
48
48
  rdfc:ingestConfig [
49
- rdfc:memberIsGraph false;
50
- rdfc:memberShape "http://ex.org/Shape1", "http://ex.org/Shape2";
49
+ rdfc:memberShape "http://ex.org/Shape";
51
50
  rdfc:changeSemantics [
52
51
  rdfc:changeTypePath "http://ex.org/changeType";
53
52
  rdfc:createValue "http://ex.org/Create";
@@ -60,7 +59,14 @@ npm install @rdfc/sparql-ingest-processor-ts
60
59
  rdfc:transactionEndPath "http://ex.org/transactionEnd"
61
60
  ];
62
61
  rdfc:graphStoreUrl "http://example.org/sparql";
63
- rdfc:forVirtuoso false
62
+ rdfc:forVirtuoso false;
63
+ rdfc:accessToken "myAccessToken";
64
+ rdfc:measurePerformance [
65
+ rdfc:name "myPerformanceMeasurement";
66
+ rdfc:outputPath "/path/to/output.json";
67
+ rdfc:failureIsFatal true;
68
+ rdfc:queryTimeout 30000
69
+ ]
64
70
  ];
65
71
  rdfc:sparqlWriter <out>.
66
72
  ```
@@ -77,13 +83,12 @@ npm install @rdfc/sparql-ingest-processor-ts
77
83
  ---
78
84
 
79
85
  ### Parameters of `rdfc:IngestConfig`:
80
- - `rdfc:memberIsGraph` (**boolean**, required): Whether each SDS record represents a named graph.
81
- - `rdfc:memberShape` (**string**, optional, repeatable): SHACL shape identifiers used to guide query construction when payloads are incomplete.
86
+ - `rdfc:memberShape` (**string**, optional): SHACL shape used to guide query construction when payloads are incomplete.
82
87
  - `rdfc:changeSemantics` (**rdfc:ChangeSemantics**, optional): Configures mapping between change types (create/update/delete) and SPARQL operations.
83
- - `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph (ignored if `memberIsGraph = true`).
88
+ - `rdfc:targetNamedGraph` (**string**, optional): Force all operations into a specific named graph.
84
89
  - `rdfc:transactionConfig` (**rdfc:TransactionConfig**, optional): Groups records by transaction ID for atomic updates.
85
90
  - `rdfc:graphStoreUrl` (**string**, optional): SPARQL Graph Store endpoint URL.
86
- - `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling.
91
+ - `rdfc:forVirtuoso` (**boolean**, optional): Enables Virtuoso-specific handling to avoid query size limits.
87
92
  - `rdfc:accessToken` (**string**, optional): Access token for authenticated graph stores.
88
93
  - `rdfc:measurePerformance` (**rdfc:PerformanceConfig**, optional): Enables performance measurement of SPARQL queries.
89
94
 
@@ -117,7 +122,6 @@ npm install @rdfc/sparql-ingest-processor-ts
117
122
  <ingester> a rdfc:SPARQLIngest;
118
123
  rdfc:memberStream <in>;
119
124
  rdfc:ingestConfig [
120
- rdfc:memberIsGraph true;
121
125
  rdfc:targetNamedGraph "http://example.org/targetGraph";
122
126
  rdfc:graphStoreUrl "http://example.org/sparql"
123
127
  ];
@@ -128,8 +132,7 @@ npm install @rdfc/sparql-ingest-processor-ts
128
132
 
129
133
  ## Notes
130
134
 
131
- - Delete operations can be handled differently depending on how complete the SDS record payload is.
132
- - When `memberIsGraph = true`, queries are wrapped with `GRAPH` and `WITH` clauses.
133
- - Transactions can buffer multiple SDS records and commit them together using `rdfc:transactionConfig`.
134
- - SHACL shapes (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
135
+ - Delete operations can be handled differently depending on how complete the input record is.
136
+ - Transactions can buffer multiple input records and commit them together using `rdfc:transactionConfig`.
137
+ - A SHACL shape (`rdfc:memberShape`) can be provided to help identify deletion targets when payloads are incomplete.
135
138
 
@@ -26,8 +26,7 @@ export declare enum OperationMode {
26
26
  export type IngestConfig = {
27
27
  operationMode?: OperationMode;
28
28
  memberBatchSize?: number;
29
- memberIsGraph?: boolean;
30
- memberShapes?: string[];
29
+ memberShape?: string;
31
30
  changeSemantics?: ChangeSemantics;
32
31
  targetNamedGraph?: string;
33
32
  transactionConfig?: TransactionConfig;
@@ -57,7 +56,6 @@ export declare class SPARQLIngest extends Processor<SPARQLIngestArgs> {
57
56
  transform(this: SPARQLIngestArgs & this): Promise<void>;
58
57
  produce(this: SPARQLIngestArgs & this): Promise<void>;
59
58
  verifyTransaction(stores: RdfStore[], transactionIdPath: string, transactionId: Term): void;
60
- getNamedGraphIfAny(memberIRI: Term, memberIsGraph: boolean | undefined, targetNamedGraph?: string): string | undefined;
61
59
  createTransactionQueries(transactionMembers: TransactionMember[], config: IngestConfig): string;
62
60
  }
63
61
  export {};
@@ -38,7 +38,15 @@ export class SPARQLIngest extends Processor {
38
38
  const quads = new Parser().parse(rawQuads);
39
39
  this.logger.verbose(`Parsed ${quads.length} quads from received member data`);
40
40
  const store = RdfStore.createDefault();
41
- quads.forEach(q => store.addQuad(q));
41
+ quads.forEach(q => {
42
+ if (q.graph.equals(df.defaultGraph()) && this.config.targetNamedGraph) {
43
+ store.addQuad(df.quad(q.subject, q.predicate, q.object, df.namedNode(this.config.targetNamedGraph)));
44
+ }
45
+ else {
46
+ store.addQuad(q);
47
+ }
48
+ });
49
+ sanitizeQuads(store);
42
50
  let query;
43
51
  const memberIRI = getObjects(store, null, SDS.terms.payload, SDS.terms.custom("DataDescription"))[0];
44
52
  if (memberIRI) {
@@ -92,21 +100,18 @@ export class SPARQLIngest extends Processor {
92
100
  this.transactionMembers = [];
93
101
  }
94
102
  else {
95
- const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
96
103
  const ctv = store.getQuads(null, df.namedNode(this.config.changeSemantics.changeTypePath))[0];
97
- store.removeQuad(ctv);
98
- sanitizeQuads(store);
99
104
  if (ctv.object.value === this.config.changeSemantics.createValue) {
100
105
  this.logger.info(`Preparing 'INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
101
- query = CREATE(store, this.config.forVirtuoso, ng);
106
+ query = CREATE(store, this.config.forVirtuoso);
102
107
  }
103
108
  else if (ctv.object.value === this.config.changeSemantics.updateValue) {
104
109
  this.logger.info(`Preparing 'DELETE {} INSERT {} WHERE {}' SPARQL query for member ${memberIRI.value}`);
105
- query = UPDATE(store, this.config.forVirtuoso, ng);
110
+ query = UPDATE(store, this.config.forVirtuoso);
106
111
  }
107
112
  else if (ctv.object.value === this.config.changeSemantics.deleteValue) {
108
113
  this.logger.info(`Preparing 'DELETE WHERE {}' SPARQL query for member ${memberIRI.value}`);
109
- query = [DELETE(store, [memberIRI.value], this.config.memberShapes, ng)];
114
+ query = DELETE(store, memberIRI.value, this.config.memberShape);
110
115
  }
111
116
  else {
112
117
  this.logger.error(`[sparqlIngest] Unrecognized change type value: ${ctv.object.value}`);
@@ -117,42 +122,40 @@ export class SPARQLIngest extends Processor {
117
122
  else {
118
123
  if (this.transactionMembers.length > 0) {
119
124
  this.transactionMembers.forEach(ts => {
120
- ts.store.getQuads(null, null, null, null).forEach(q => store.addQuad(q));
125
+ ts.store.getQuads().forEach(q => store.addQuad(q));
121
126
  });
122
127
  this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for transaction member ${memberIRI.value}`);
123
- query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
128
+ query = UPDATE(store, this.config.forVirtuoso);
124
129
  }
125
130
  else {
126
131
  if (this.config.operationMode === OperationMode.REPLICATION) {
127
- this.memberBatch.push(...store.getQuads(null, null, null, null));
132
+ this.memberBatch.push(...store.getQuads());
128
133
  this.batchCount++;
129
134
  if (this.batchCount < this.config.memberBatchSize) {
130
135
  continue;
131
136
  }
132
137
  }
133
138
  else {
134
- const ng = this.getNamedGraphIfAny(memberIRI, this.config.memberIsGraph, this.config.targetNamedGraph);
135
- this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for member ${memberIRI.value}`);
136
- query = UPDATE(store, this.config.forVirtuoso, ng);
139
+ this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for member ${memberIRI.value}`);
140
+ query = UPDATE(store, this.config.forVirtuoso);
137
141
  }
138
142
  }
139
143
  }
140
144
  }
141
145
  else {
142
146
  if (this.config.operationMode === OperationMode.REPLICATION) {
143
- this.memberBatch.push(...store.getQuads(null, null, null, null));
147
+ this.memberBatch.push(...store.getQuads());
144
148
  this.batchCount++;
145
149
  if (this.batchCount < this.config.memberBatchSize) {
146
150
  continue;
147
151
  }
148
152
  }
149
153
  else {
150
- this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL query for received triples (${store.size})`);
151
- query = UPDATE(store, this.config.forVirtuoso, this.config.targetNamedGraph);
154
+ this.logger.info(`Preparing 'DELETE {} WHERE {} + INSERT DATA {}' SPARQL queries for received quads (${store.size})`);
155
+ query = UPDATE(store, this.config.forVirtuoso);
152
156
  }
153
157
  }
154
158
  if (query && query.length > 0) {
155
- this.logger.debug(`Complete SPARQL query generated for received member: \n${query.join("\n")}`);
156
159
  if (this.config.graphStoreUrl) {
157
160
  try {
158
161
  const t0 = Date.now();
@@ -176,7 +179,14 @@ export class SPARQLIngest extends Processor {
176
179
  }
177
180
  }
178
181
  if (this.sparqlWriter) {
179
- await this.sparqlWriter.string(query.join("\n"));
182
+ if (this.config.forVirtuoso) {
183
+ for (const q of query) {
184
+ await this.sparqlWriter.string(q);
185
+ }
186
+ }
187
+ else {
188
+ await this.sparqlWriter.string(query.join("\n"));
189
+ }
180
190
  }
181
191
  }
182
192
  else {
@@ -256,16 +266,6 @@ export class SPARQLIngest extends Processor {
256
266
  }
257
267
  }
258
268
  }
259
- getNamedGraphIfAny(memberIRI, memberIsGraph, targetNamedGraph) {
260
- let ng;
261
- if (memberIsGraph) {
262
- ng = memberIRI.value;
263
- }
264
- else if (targetNamedGraph) {
265
- ng = targetNamedGraph;
266
- }
267
- return ng;
268
- }
269
269
  createTransactionQueries(transactionMembers, config) {
270
270
  this.createTransactionQueriesLogger.info(`Creating multi-operation SPARQL UPDATE query for ${transactionMembers.length}`
271
271
  + ` members of transaction ${transactionMembers[0].transactionId}`);
@@ -276,15 +276,14 @@ export class SPARQLIngest extends Processor {
276
276
  const transactionQueryBuilder = [];
277
277
  for (const tsm of transactionMembers) {
278
278
  const ctv = tsm.store.getQuads(null, df.namedNode(config.changeSemantics.changeTypePath))[0];
279
- tsm.store.removeQuad(ctv);
280
279
  if (ctv.object.value === config.changeSemantics.createValue) {
281
- tsm.store.getQuads(null, null, null, null).forEach(q => createStore.addQuad(q));
280
+ tsm.store.getQuads().forEach(q => createStore.addQuad(q));
282
281
  }
283
282
  else if (ctv.object.value === config.changeSemantics.updateValue) {
284
- tsm.store.getQuads(null, null, null, null).forEach(q => updateStore.addQuad(q));
283
+ tsm.store.getQuads().forEach(q => updateStore.addQuad(q));
285
284
  }
286
285
  else if (ctv.object.value === config.changeSemantics.deleteValue) {
287
- tsm.store.getQuads(null, null, null, null).forEach(q => deleteStore.addQuad(q));
286
+ tsm.store.getQuads().forEach(q => deleteStore.addQuad(q));
288
287
  deleteMembers.push(tsm.memberId);
289
288
  }
290
289
  else {
@@ -293,14 +292,16 @@ export class SPARQLIngest extends Processor {
293
292
  }
294
293
  }
295
294
  if (createStore.size > 0) {
296
- transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
295
+ transactionQueryBuilder.push(CREATE(createStore, config.forVirtuoso).join("\n"));
297
296
  }
298
297
  if (updateStore.size > 0) {
299
- transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso, config.targetNamedGraph).join("\n"));
298
+ transactionQueryBuilder.push(UPDATE(updateStore, config.forVirtuoso).join("\n"));
300
299
  }
301
300
  if (deleteStore.size > 0) {
302
- transactionQueryBuilder.push(DELETE(deleteStore, deleteMembers, config.memberShapes, config.targetNamedGraph));
301
+ deleteMembers.forEach(dm => {
302
+ transactionQueryBuilder.push(DELETE(deleteStore, dm, config.memberShape).join("\n"));
303
+ });
303
304
  }
304
- return transactionQueryBuilder.join(";\n");
305
+ return transactionQueryBuilder.join("\n");
305
306
  }
306
307
  }
@@ -1,4 +1,4 @@
1
1
  import { RdfStore } from "rdf-stores";
2
- export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean, namedGraph?: string, multipleNamedGraphs?: boolean) => string[];
3
- export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean, namedGraph?: string, multipleNamedGraphs?: boolean) => string[];
4
- export declare const DELETE: (store: RdfStore, memberIRIs: string[], memberShapes?: string[], namedGraph?: string, multipleNamedGraphs?: boolean) => string;
2
+ export declare const CREATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
3
+ export declare const UPDATE: (store: RdfStore, forVirtuoso?: boolean) => string[];
4
+ export declare const DELETE: (store: RdfStore, memberIRI: string, memberShape?: string) => string[];
@@ -2,73 +2,81 @@ import { RDF, SHACL } from "@treecg/types";
2
2
  import { Writer as N3Writer, Parser } from "n3";
3
3
  import { RdfStore } from "rdf-stores";
4
4
  import { DataFactory } from "rdf-data-factory";
5
- import { getObjects, getSubjects, splitStore } from "./Utils.js";
5
+ import { getObjects, getSubjects, splitStoreOnSize, splitStorePerNamedGraph } from "./Utils.js";
6
6
  const df = new DataFactory();
7
- export const CREATE = (store, forVirtuoso, namedGraph, multipleNamedGraphs) => {
8
- const stores = splitStore(store, forVirtuoso ? 500 : 50000);
9
- return stores.map((subStore, i) => {
10
- return `
11
- INSERT DATA {
12
- ${namedGraph ? `GRAPH <${namedGraph}> {` : ""}
13
- ${new N3Writer().quadsToString(subStore.getQuads())}
14
- ${namedGraph ? `}` : ""}
15
- }
16
- ${i === stores.length - 1 ? "" : ";"}
17
- `;
18
- });
7
+ export const CREATE = (store, forVirtuoso) => {
8
+ const queries = [];
9
+ const storesPerGraph = splitStorePerNamedGraph(store);
10
+ for (const { graph, store } of storesPerGraph) {
11
+ const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
12
+ subStores.forEach((s, i) => {
13
+ queries.push(`
14
+ INSERT DATA {
15
+ ${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
16
+ ${new N3Writer().quadsToString(s.getQuads().map(q => {
17
+ return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
18
+ }))}
19
+ ${graph.equals(df.defaultGraph()) ? "" : `}`}
20
+ };
21
+ `);
22
+ });
23
+ }
24
+ return queries;
19
25
  };
20
- export const UPDATE = (store, forVirtuoso, namedGraph, multipleNamedGraphs) => {
21
- const formattedQuery = formatQuery(store);
22
- const stores = splitStore(store, forVirtuoso ? 500 : 50000);
23
- const queries = [
24
- `
25
- ${namedGraph ? `WITH <${namedGraph}>` : ""}
26
+ export const UPDATE = (store, forVirtuoso) => {
27
+ const queries = [];
28
+ const storesPerGraph = splitStorePerNamedGraph(store);
29
+ for (const { graph, store } of storesPerGraph) {
30
+ const subStores = splitStoreOnSize(store, forVirtuoso ? 500 : 50000);
31
+ const formattedQuery = formatQuery(store);
32
+ const deleteInsertQuery = [`
33
+ ${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
26
34
  DELETE {
27
35
  ${formattedQuery[0]}
28
36
  }
29
37
  WHERE {
30
38
  ${formattedQuery[0]}
31
39
  };
32
- `
33
- ];
34
- stores.forEach((subStore, i) => {
35
- queries.push(`
36
- INSERT DATA {
37
- ${namedGraph ? `GRAPH <${namedGraph}> {` : ""}
38
- ${new N3Writer().quadsToString(subStore.getQuads())}
39
- ${namedGraph ? `}` : ""}
40
- }
41
- ${i === stores.length - 1 ? "" : ";"}
42
- `);
43
- });
40
+ `];
41
+ subStores.forEach((s, i) => {
42
+ deleteInsertQuery.push(`
43
+ INSERT DATA {
44
+ ${graph.equals(df.defaultGraph()) ? "" : `GRAPH <${graph.value}> {`}
45
+ ${new N3Writer().quadsToString(s.getQuads().map(q => {
46
+ return df.quad(q.subject, q.predicate, q.object, df.defaultGraph());
47
+ }))}
48
+ ${graph.equals(df.defaultGraph()) ? "" : `}`}
49
+ };
50
+ `);
51
+ });
52
+ queries.push(...deleteInsertQuery);
53
+ }
44
54
  return queries;
45
55
  };
46
- export const DELETE = (store, memberIRIs, memberShapes, namedGraph, multipleNamedGraphs) => {
47
- const deleteBuilder = [];
48
- const whereBuilder = [];
49
- let indexStart = 0;
50
- for (const memberIRI of memberIRIs) {
51
- const formatted = formatQuery(store, memberIRI, memberShapes, indexStart);
52
- deleteBuilder.push(formatted.length > 1 ? formatted[1] : formatted[0]);
53
- whereBuilder.push(formatted[0]);
54
- indexStart++;
56
+ export const DELETE = (store, memberIRI, memberShape) => {
57
+ const queries = [];
58
+ const storesPerGraph = splitStorePerNamedGraph(store);
59
+ for (const { graph, store } of storesPerGraph) {
60
+ const formatted = formatQuery(store, memberIRI, memberShape);
61
+ const deleteBuilder = formatted.length > 1 ? formatted[1] : formatted[0];
62
+ const whereBuilder = formatted[0];
63
+ queries.push(`
64
+ ${graph.equals(df.defaultGraph()) ? "" : `WITH <${graph.value}>`}
65
+ DELETE {
66
+ ${deleteBuilder}
67
+ } WHERE {
68
+ ${whereBuilder}
69
+ };
70
+ `);
55
71
  }
56
- return `
57
- ${namedGraph ? `WITH <${namedGraph}>` : ""}
58
- DELETE {
59
- ${deleteBuilder.join("\n")}
60
- } WHERE {
61
- ${whereBuilder.join("\n")}
62
- }
63
- `;
72
+ return queries;
64
73
  };
65
- function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
74
+ function formatQuery(memberStore, memberIRI, memberShape, indexStart = 0) {
66
75
  const subjectSet = new Set();
67
76
  const blankNodeMap = new Map();
68
77
  const queryBuilder = [];
69
- const formattedQueries = [];
70
78
  let i = indexStart;
71
- if (!memberShapes || memberShapes.length === 0) {
79
+ if (!memberShape) {
72
80
  for (const quad of memberStore.getQuads()) {
73
81
  if (!subjectSet.has(quad.subject.value)) {
74
82
  subjectSet.add(quad.subject.value);
@@ -82,7 +90,8 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
82
90
  if (quad.object.termType === "BlankNode") {
83
91
  blankNodeMap.set(quad.object.value, `?bn_ref_${i}`);
84
92
  }
85
- queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal" ? `"${quad.object.value}"^^<${quad.object.datatype.value}>`
93
+ queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} <${quad.predicate.value}> ${quad.object.termType === "Literal"
94
+ ? `"${quad.object.value}"^^<${quad.object.datatype.value}>`
86
95
  : quad.object.termType === "BlankNode" ? `${blankNodeMap.get(quad.object.value)} `
87
96
  : `<${quad.object.value}>`}.`);
88
97
  queryBuilder.push(`${blankNodeMap.get(quad.subject.value)} ?p_${i} ?o_${i}.`);
@@ -91,51 +100,28 @@ function formatQuery(memberStore, memberIRI, memberShapes, indexStart = 0) {
91
100
  i++;
92
101
  }
93
102
  }
94
- formattedQueries.push(queryBuilder.join("\n"));
103
+ return [queryBuilder.join("\n")];
95
104
  }
96
105
  else {
97
- const shapeIndex = new Map();
98
- memberShapes.forEach(msh => {
99
- const shapeStore = RdfStore.createDefault();
100
- new Parser().parse(msh).forEach(quad => shapeStore.addQuad(quad));
101
- shapeIndex.set(extractMainTargetClass(shapeStore).value, shapeStore);
102
- });
106
+ const shapeStore = RdfStore.createDefault();
107
+ new Parser().parse(memberShape).forEach(quad => shapeStore.addQuad(quad));
103
108
  queryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
104
- const memberType = getObjects(memberStore, df.namedNode(memberIRI), RDF.terms.type)[0];
105
- if (memberType) {
106
- i++;
107
- const mshStore = shapeIndex.get(memberType.value);
108
- const propShapes = getObjects(mshStore, null, SHACL.terms.property, null);
109
- for (const propSh of propShapes) {
110
- const pred = getObjects(mshStore, propSh, SHACL.terms.path, null)[0];
111
- queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
112
- queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
113
- i++;
114
- }
115
- formattedQueries.push(queryBuilder.join("\n"));
116
- }
117
- else {
118
- const deleteQueryBuilder = [];
119
- deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
109
+ const deleteQueryBuilder = [];
110
+ deleteQueryBuilder.push(`<${memberIRI}> ?p_${i} ?o_${i}.`);
111
+ i++;
112
+ const propShapes = getObjects(shapeStore, null, SHACL.terms.property, null);
113
+ queryBuilder.push(" OPTIONAL { ");
114
+ for (const propSh of propShapes) {
115
+ const pred = getObjects(shapeStore, propSh, SHACL.terms.path, null)[0];
116
+ queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
117
+ deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
118
+ queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
119
+ deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
120
120
  i++;
121
- shapeIndex.forEach(mshStore => {
122
- const propShapes = getObjects(mshStore, null, SHACL.terms.property, null);
123
- queryBuilder.push(" OPTIONAL { ");
124
- for (const propSh of propShapes) {
125
- const pred = getObjects(mshStore, propSh, SHACL.terms.path, null)[0];
126
- queryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
127
- deleteQueryBuilder.push(`<${memberIRI}> <${pred.value}> ?subEnt_${i}.`);
128
- queryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
129
- deleteQueryBuilder.push(`?subEnt_${i} ?p_${i} ?o_${i}.`);
130
- i++;
131
- }
132
- queryBuilder.push(" }");
133
- });
134
- formattedQueries.push(queryBuilder.join("\n"));
135
- formattedQueries.push(deleteQueryBuilder.join("\n"));
136
121
  }
122
+ queryBuilder.push(" }");
123
+ return [queryBuilder.join("\n"), deleteQueryBuilder.join("\n")];
137
124
  }
138
- return formattedQueries;
139
125
  }
140
126
  function extractMainTargetClass(store) {
141
127
  const nodeShapes = getSubjects(store, RDF.terms.type, SHACL.terms.NodeShape, null);
package/lib/Utils.d.ts CHANGED
@@ -1,9 +1,13 @@
1
1
  import { RdfStore } from "rdf-stores";
2
- import type { Term, Quad_Subject, Quad_Object, Quad } from "@rdfjs/types";
2
+ import type { Term, Quad_Subject, Quad_Object, Quad, Quad_Graph } from "@rdfjs/types";
3
3
  import type { IngestConfig } from "./SPARQLIngest.js";
4
4
  import { Logger } from "winston";
5
5
  export declare function getSubjects(store: RdfStore, predicate: Term | null, object: Term | null, graph?: Term | null): Quad_Subject[];
6
6
  export declare function getObjects(store: RdfStore, subject: Term | null, predicate: Term | null, graph?: Term | null): Quad_Object[];
7
- export declare function splitStore(store: RdfStore, threshold: number): RdfStore[];
7
+ export declare function splitStorePerNamedGraph(store: RdfStore): {
8
+ graph: Quad_Graph;
9
+ store: RdfStore;
10
+ }[];
11
+ export declare function splitStoreOnSize(store: RdfStore, threshold: number): RdfStore[];
8
12
  export declare function sanitizeQuads(store: RdfStore): void;
9
13
  export declare function doSPARQLRequest(query: string[] | Quad[], config: IngestConfig, logger: Logger): Promise<void>;
package/lib/Utils.js CHANGED
@@ -14,7 +14,20 @@ export function getObjects(store, subject, predicate, graph) {
14
14
  return quad.object;
15
15
  });
16
16
  }
17
- export function splitStore(store, threshold) {
17
+ export function splitStorePerNamedGraph(store) {
18
+ const stores = [];
19
+ const namedGraphs = new Set();
20
+ store.getQuads(null, null, null, null)
21
+ .forEach(q => namedGraphs.add(q.graph));
22
+ namedGraphs.forEach(ng => {
23
+ const subStore = RdfStore.createDefault();
24
+ const quads = store.getQuads(null, null, null, ng);
25
+ quads.forEach(q => subStore.addQuad(q));
26
+ stores.push({ graph: ng, store: subStore });
27
+ });
28
+ return stores;
29
+ }
30
+ export function splitStoreOnSize(store, threshold) {
18
31
  const stores = [];
19
32
  if (store.size < threshold) {
20
33
  stores.push(store);
@@ -74,7 +87,8 @@ export async function doSPARQLRequest(query, config, logger) {
74
87
  if (config.accessToken) {
75
88
  url.searchParams.append("access-token", config.accessToken);
76
89
  }
77
- logger.debug(`Executing SPARQL Graph Store request (POST) with ${quads.length} quads.`);
90
+ logger.verbose(`[doSPARQLRequest] Executing SPARQL Graph Store request (POST) with ${quads.length} quads.`);
91
+ logger.debug(`[doSPARQLRequest] POSTing the following RDF quads:\n${serialized}`);
78
92
  const res = await fetch(url.toString(), {
79
93
  method: "POST",
80
94
  headers: {
@@ -92,7 +106,7 @@ export async function doSPARQLRequest(query, config, logger) {
92
106
  return;
93
107
  }
94
108
  let queries = [];
95
- const jointQuery = query.join("\n");
109
+ const jointQuery = query.join(";\n");
96
110
  if (config.forVirtuoso && Buffer.byteLength(jointQuery, 'utf8') > 1e6) {
97
111
  queries = query;
98
112
  }
@@ -100,7 +114,7 @@ export async function doSPARQLRequest(query, config, logger) {
100
114
  queries.push(jointQuery);
101
115
  }
102
116
  for (const q of queries) {
103
- logger.debug(`Executing SPARQL query: \n${q}`);
117
+ logger.debug(`[doSPARQLRequest] Executing SPARQL query: \n${q}`);
104
118
  const res = await fetch(config.graphStoreUrl, {
105
119
  method: "POST",
106
120
  headers: {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@rdfc/sparql-ingest-processor-ts",
3
- "version": "2.0.3",
3
+ "version": "2.1.1",
4
4
  "description": "SPARQL Update function to be within RDF-Connect pipelines",
5
5
  "author": "Julián Rojas",
6
6
  "contributors": [
@@ -28,10 +28,10 @@
28
28
  },
29
29
  "dependencies": {
30
30
  "@treecg/types": "^0.4.6",
31
- "n3": "^2.0.1",
31
+ "n3": "^1.26.0",
32
32
  "rdf-data-factory": "^2.0.2",
33
33
  "rdf-stores": "^2.1.1",
34
- "undici": "^7.20.0",
34
+ "undici": "^7.21.0",
35
35
  "winston": "^3.19.0"
36
36
  },
37
37
  "devDependencies": {
@@ -50,7 +50,7 @@
50
50
  "ts-patch": "^3.3.0",
51
51
  "tsc-alias": "^1.8.16",
52
52
  "typescript": "^5.9.3",
53
- "vite-tsconfig-paths": "^6.0.5",
53
+ "vite-tsconfig-paths": "^6.1.0",
54
54
  "vitest": "^4.0.18"
55
55
  }
56
56
  }
package/processors.ttl CHANGED
@@ -43,16 +43,11 @@ rdfc:SPARQLIngest rdfc:jsImplementationOf rdfc:Processor;
43
43
  sh:datatype xsd:integer;
44
44
  sh:name "memberBatchSize";
45
45
  sh:maxCount 1;
46
- ], [
47
- sh:path rdfc:memberIsGraph;
48
- sh:datatype xsd:boolean;
49
- sh:name "memberIsGraph";
50
- sh:maxCount 1;
51
46
  ], [
52
47
  sh:path rdfc:memberShape;
53
48
  sh:datatype xsd:string;
54
- sh:name "memberShapes";
55
- sh:minCount 0;
49
+ sh:name "memberShape";
50
+ sh:maxCount 1;
56
51
  ], [
57
52
  sh:path rdfc:changeSemantics;
58
53
  sh:class rdfc:ChangeSemantics;
package/lib/LogUtil.d.ts DELETED
@@ -1,9 +0,0 @@
1
- import { Logger } from "winston";
2
- export declare function getLoggerFor(loggable: string | Instance): Logger;
3
- interface Constructor {
4
- name: string;
5
- }
6
- interface Instance {
7
- constructor: Constructor;
8
- }
9
- export {};
package/lib/LogUtil.js DELETED
@@ -1,54 +0,0 @@
1
- import winston, { format } from "winston";
2
- const PROCESSOR_NAME = "sparql-ingest";
3
- const consoleTransport = new winston.transports.Console({
4
- stderrLevels: [
5
- "error",
6
- "warn",
7
- "info",
8
- "http",
9
- "verbose",
10
- "debug",
11
- "silly"
12
- ]
13
- });
14
- if (typeof process !== "undefined") {
15
- consoleTransport.level =
16
- process.env.LOG_LEVEL ||
17
- (process.env.DEBUG?.includes(PROCESSOR_NAME) ||
18
- process.env.DEBUG === "*"
19
- ? "debug"
20
- : "info");
21
- }
22
- const classLoggers = new WeakMap();
23
- const stringLoggers = new Map();
24
- export function getLoggerFor(loggable) {
25
- let logger;
26
- if (typeof loggable === "string") {
27
- if (stringLoggers.has(loggable)) {
28
- logger = stringLoggers.get(loggable);
29
- }
30
- else {
31
- logger = createLogger(loggable);
32
- stringLoggers.set(loggable, logger);
33
- }
34
- }
35
- else {
36
- const { constructor } = loggable;
37
- if (classLoggers.has(constructor)) {
38
- logger = classLoggers.get(constructor);
39
- }
40
- else {
41
- logger = createLogger(constructor.name);
42
- classLoggers.set(constructor, logger);
43
- }
44
- }
45
- return logger;
46
- }
47
- function createLogger(label) {
48
- return winston.createLogger({
49
- format: format.combine(format.label({ label }), format.colorize(), format.timestamp(), format.metadata({
50
- fillExcept: ["level", "timestamp", "label", "message"],
51
- }), format.printf(({ level: levelInner, message, label: labelInner, timestamp, }) => `${timestamp} {${PROCESSOR_NAME}} [${labelInner}] ${levelInner}: ${message}`)),
52
- transports: [consoleTransport],
53
- });
54
- }