@lde/pipeline 0.28.4 → 0.28.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +11 -0
- package/dist/sparql/executor.d.ts +35 -0
- package/dist/sparql/executor.d.ts.map +1 -1
- package/dist/sparql/executor.js +32 -1
- package/dist/sparql/index.d.ts +1 -1
- package/dist/sparql/index.d.ts.map +1 -1
- package/dist/sparql/index.js +1 -1
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -116,6 +116,17 @@ const executor = new SparqlConstructExecutor({
|
|
|
116
116
|
});
|
|
117
117
|
```
|
|
118
118
|
|
|
119
|
+
SPARQL CONSTRUCT queries can produce duplicate triples — for example, constant triples (like `?dataset a edm:ProvidedCHO`) are emitted for every solution row. Enable `deduplicate` to remove duplicates inline on the stream using a string-based identity set (inspired by [Comunica's `distinctConstruct`](https://comunica.dev/docs/query/advanced/context/#14--distinct-construct)):
|
|
120
|
+
|
|
121
|
+
```typescript
|
|
122
|
+
const executor = new SparqlConstructExecutor({
|
|
123
|
+
query: 'CONSTRUCT { ?s a edm:ProvidedCHO . ?s ?p ?o } WHERE { ?s ?p ?o }',
|
|
124
|
+
deduplicate: true,
|
|
125
|
+
});
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
The dedup set is scoped to each `execute()` call, so memory stays bounded to the number of unique quads per batch. A standalone `deduplicateQuads()` function is also exported for use outside the executor.
|
|
129
|
+
|
|
119
130
|
`Executor` is an interface, so you can implement your own for logic that's hard to express in pure SPARQL — for example, cleaning up messy date notations or converting locale-specific dates to ISO 8601. The decorator pattern lets you wrap a SPARQL executor and post-process its quad stream in TypeScript:
|
|
120
131
|
|
|
121
132
|
```typescript
|
|
@@ -54,6 +54,22 @@ export interface SparqlConstructExecutorOptions {
|
|
|
54
54
|
* @default false
|
|
55
55
|
*/
|
|
56
56
|
lineBuffer?: boolean;
|
|
57
|
+
/**
|
|
58
|
+
* Deduplicate triples in the CONSTRUCT output stream.
|
|
59
|
+
*
|
|
60
|
+
* SPARQL CONSTRUCT queries can produce duplicate triples — for example,
|
|
61
|
+
* constant triples (like `?dataset a edm:ProvidedCHO`) are emitted for
|
|
62
|
+
* every solution row. When enabled, a streaming identity filter removes
|
|
63
|
+
* duplicates inline without buffering.
|
|
64
|
+
*
|
|
65
|
+
* The dedup set is scoped to each {@link execute} call, so memory stays
|
|
66
|
+
* bounded to the number of unique quads per call (typically one batch).
|
|
67
|
+
*
|
|
68
|
+
* Inspired by Comunica's `distinctConstruct` context option.
|
|
69
|
+
*
|
|
70
|
+
* @default false
|
|
71
|
+
*/
|
|
72
|
+
deduplicate?: boolean;
|
|
57
73
|
}
|
|
58
74
|
/**
|
|
59
75
|
* A streaming SPARQL CONSTRUCT executor.
|
|
@@ -88,6 +104,7 @@ export declare class SparqlConstructExecutor implements Executor {
|
|
|
88
104
|
private readonly fetcher;
|
|
89
105
|
private readonly retries;
|
|
90
106
|
private readonly lineBuffer;
|
|
107
|
+
private readonly deduplicate;
|
|
91
108
|
private readonly generator;
|
|
92
109
|
constructor(options: SparqlConstructExecutorOptions);
|
|
93
110
|
/**
|
|
@@ -142,4 +159,22 @@ export declare class LineBufferTransform extends Transform {
|
|
|
142
159
|
_transform(chunk: Buffer, _encoding: string, callback: () => void): void;
|
|
143
160
|
_flush(callback: () => void): void;
|
|
144
161
|
}
|
|
162
|
+
/**
|
|
163
|
+
* Remove duplicate quads from an async quad stream.
|
|
164
|
+
*
|
|
165
|
+
* Uses string-based identity (the same approach as Comunica's
|
|
166
|
+
* `distinctConstruct`): each quad is serialised via
|
|
167
|
+
* [`rdf-string`](https://github.com/rubensworks/rdf-string.js) and checked
|
|
168
|
+
* against a {@link Set}. Only the first occurrence of each unique quad is
|
|
169
|
+
* yielded.
|
|
170
|
+
*
|
|
171
|
+
* @example
|
|
172
|
+
* ```typescript
|
|
173
|
+
* const unique = deduplicateQuads(quadStream);
|
|
174
|
+
* for await (const quad of unique) {
|
|
175
|
+
* // each quad appears at most once
|
|
176
|
+
* }
|
|
177
|
+
* ```
|
|
178
|
+
*/
|
|
179
|
+
export declare function deduplicateQuads(quads: AsyncIterable<Quad>): AsyncIterable<Quad>;
|
|
145
180
|
//# sourceMappingURL=executor.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAiB,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAiB,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAWxC;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;IAEhC;;;;;;;;;OASG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IAErB;;;;;;;;;;;;;;OAcG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAU;IACrC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IACtC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAqBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IA0C/B;;;;OAIG;YACW,UAAU;IAmBxB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,mBAAoB,SAAQ,SAAS;IAChD,OAAO,CAAC,SAAS,CAAM;IAEd,UAAU,CACjB,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,IAAI;IAWb,MAAM,CAAC,QAAQ,EAAE,MAAM,IAAI;CAMrC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAuB,gBAAgB,CACrC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GACzB,aAAa,CAAC,IAAI,CAAC,CASrB"}
|
package/dist/sparql/executor.js
CHANGED
|
@@ -8,6 +8,7 @@ import { Parser } from '@traqula/parser-sparql-1-1';
|
|
|
8
8
|
import { Generator } from '@traqula/generator-sparql-1-1';
|
|
9
9
|
import isNetworkError from 'is-network-error';
|
|
10
10
|
import pRetry from 'p-retry';
|
|
11
|
+
import { quadToStringQuad } from 'rdf-string';
|
|
11
12
|
import { withDefaultGraph } from './graph.js';
|
|
12
13
|
import { injectValues } from './values.js';
|
|
13
14
|
/**
|
|
@@ -52,11 +53,13 @@ export class SparqlConstructExecutor {
|
|
|
52
53
|
fetcher;
|
|
53
54
|
retries;
|
|
54
55
|
lineBuffer;
|
|
56
|
+
deduplicate;
|
|
55
57
|
generator = new Generator();
|
|
56
58
|
constructor(options) {
|
|
57
59
|
this.rawQuery = options.query;
|
|
58
60
|
this.retries = options.retries ?? 3;
|
|
59
61
|
this.lineBuffer = options.lineBuffer ?? false;
|
|
62
|
+
this.deduplicate = options.deduplicate ?? false;
|
|
60
63
|
if (!options.query.includes('#subjectFilter#')) {
|
|
61
64
|
const parsed = new Parser().parse(options.query);
|
|
62
65
|
if (parsed.type !== 'query' || parsed.subType !== 'construct') {
|
|
@@ -102,10 +105,11 @@ export class SparqlConstructExecutor {
|
|
|
102
105
|
let query = this.generator.generate(ast);
|
|
103
106
|
assertSafeIri(dataset.iri.toString());
|
|
104
107
|
query = query.replaceAll('?dataset', `<${dataset.iri}>`);
|
|
105
|
-
|
|
108
|
+
const quads = await pRetry(() => this.fetchQuads(endpoint.toString(), query), {
|
|
106
109
|
retries: this.retries,
|
|
107
110
|
shouldRetry: ({ error }) => isTransientError(error),
|
|
108
111
|
});
|
|
112
|
+
return this.deduplicate ? deduplicateQuads(quads) : quads;
|
|
109
113
|
}
|
|
110
114
|
/**
|
|
111
115
|
* Fetch quads from the endpoint, optionally line-buffering the response
|
|
@@ -176,6 +180,33 @@ export class LineBufferTransform extends Transform {
|
|
|
176
180
|
callback();
|
|
177
181
|
}
|
|
178
182
|
}
|
|
183
|
+
/**
|
|
184
|
+
* Remove duplicate quads from an async quad stream.
|
|
185
|
+
*
|
|
186
|
+
* Uses string-based identity (the same approach as Comunica's
|
|
187
|
+
* `distinctConstruct`): each quad is serialised via
|
|
188
|
+
* [`rdf-string`](https://github.com/rubensworks/rdf-string.js) and checked
|
|
189
|
+
* against a {@link Set}. Only the first occurrence of each unique quad is
|
|
190
|
+
* yielded.
|
|
191
|
+
*
|
|
192
|
+
* @example
|
|
193
|
+
* ```typescript
|
|
194
|
+
* const unique = deduplicateQuads(quadStream);
|
|
195
|
+
* for await (const quad of unique) {
|
|
196
|
+
* // each quad appears at most once
|
|
197
|
+
* }
|
|
198
|
+
* ```
|
|
199
|
+
*/
|
|
200
|
+
export async function* deduplicateQuads(quads) {
|
|
201
|
+
const seen = new Set();
|
|
202
|
+
for await (const quad of quads) {
|
|
203
|
+
const key = Object.values(quadToStringQuad(quad)).join(' ');
|
|
204
|
+
if (!seen.has(key)) {
|
|
205
|
+
seen.add(key);
|
|
206
|
+
yield quad;
|
|
207
|
+
}
|
|
208
|
+
}
|
|
209
|
+
}
|
|
179
210
|
const transientStatusPattern = /HTTP status (\d+)/;
|
|
180
211
|
function isTransientError(error) {
|
|
181
212
|
if (isNetworkError(error))
|
package/dist/sparql/index.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { SparqlConstructExecutor, LineBufferTransform, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type VariableBindings, } from './executor.js';
|
|
1
|
+
export { deduplicateQuads, SparqlConstructExecutor, LineBufferTransform, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type VariableBindings, } from './executor.js';
|
|
2
2
|
export { SparqlItemSelector, type SparqlItemSelectorOptions, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,mBAAmB,EACnB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,uBAAuB,EACvB,mBAAmB,EACnB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/sparql/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { SparqlConstructExecutor, LineBufferTransform, NotSupported, readQueryFile, } from './executor.js';
|
|
1
|
+
export { deduplicateQuads, SparqlConstructExecutor, LineBufferTransform, NotSupported, readQueryFile, } from './executor.js';
|
|
2
2
|
export { SparqlItemSelector, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.28.
|
|
3
|
+
"version": "0.28.5",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -37,6 +37,7 @@
|
|
|
37
37
|
"is-network-error": "^1.3.1",
|
|
38
38
|
"n3": "^2.0.1",
|
|
39
39
|
"p-retry": "^7.1.1",
|
|
40
|
+
"rdf-string": "^2.0.1",
|
|
40
41
|
"tslib": "^2.3.0"
|
|
41
42
|
}
|
|
42
43
|
}
|