@lde/pipeline 0.6.22 → 0.6.24
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/asyncQueue.d.ts +29 -0
- package/dist/asyncQueue.d.ts.map +1 -0
- package/dist/asyncQueue.js +106 -0
- package/dist/builder.d.ts +6 -0
- package/dist/builder.d.ts.map +1 -1
- package/dist/builder.js +1 -0
- package/dist/config.d.ts +1 -0
- package/dist/config.d.ts.map +1 -1
- package/dist/config.js +5 -1
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/stage.d.ts +8 -2
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +97 -17
- package/dist/writer/sparqlUpdateWriter.d.ts +6 -0
- package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -1
- package/dist/writer/sparqlUpdateWriter.js +9 -3
- package/package.json +3 -3
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A bounded async channel: producers `push()` items, a single consumer
|
|
3
|
+
* iterates with `for await...of`. Backpressure is applied when the buffer
|
|
4
|
+
* reaches `capacity` — `push()` will block until the consumer pulls.
|
|
5
|
+
*/
|
|
6
|
+
export declare class AsyncQueue<T> implements AsyncIterable<T> {
|
|
7
|
+
private buffer;
|
|
8
|
+
private readonly capacity;
|
|
9
|
+
private closed;
|
|
10
|
+
private error;
|
|
11
|
+
/** Resolvers for a blocked consumer waiting for data or close/abort. */
|
|
12
|
+
private consumerResolve?;
|
|
13
|
+
private consumerReject?;
|
|
14
|
+
/** Resolvers for blocked producers waiting for buffer space. */
|
|
15
|
+
private producerResolvers;
|
|
16
|
+
constructor(capacity?: number);
|
|
17
|
+
/**
|
|
18
|
+
* Push an item into the queue. Blocks (returns a Promise) when the buffer
|
|
19
|
+
* is full. Throws if the queue has been closed or aborted.
|
|
20
|
+
*/
|
|
21
|
+
push(item: T): Promise<void>;
|
|
22
|
+
/** Signal that no more items will be pushed. */
|
|
23
|
+
close(): void;
|
|
24
|
+
/** Signal an error. Unblocks all waiting producers and the consumer. */
|
|
25
|
+
abort(error: unknown): void;
|
|
26
|
+
[Symbol.asyncIterator](): AsyncIterator<T, undefined>;
|
|
27
|
+
private pull;
|
|
28
|
+
}
|
|
29
|
+
//# sourceMappingURL=asyncQueue.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"asyncQueue.d.ts","sourceRoot":"","sources":["../src/asyncQueue.ts"],"names":[],"mappings":"AAAA;;;;GAIG;AACH,qBAAa,UAAU,CAAC,CAAC,CAAE,YAAW,aAAa,CAAC,CAAC,CAAC;IACpD,OAAO,CAAC,MAAM,CAAW;IACzB,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,MAAM,CAAS;IACvB,OAAO,CAAC,KAAK,CAAsB;IAEnC,wEAAwE;IACxE,OAAO,CAAC,eAAe,CAAC,CAAgD;IACxE,OAAO,CAAC,cAAc,CAAC,CAA4B;IAEnD,gEAAgE;IAChE,OAAO,CAAC,iBAAiB,CAGjB;gBAEI,QAAQ,SAAM;IAI1B;;;OAGG;IACG,IAAI,CAAC,IAAI,EAAE,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA2BlC,gDAAgD;IAChD,KAAK,IAAI,IAAI;IAab,wEAAwE;IACxE,KAAK,CAAC,KAAK,EAAE,OAAO,GAAG,IAAI;IAoB3B,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,aAAa,CAAC,CAAC,EAAE,SAAS,CAAC;IAMrD,OAAO,CAAC,IAAI;CA2Bb"}
|
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A bounded async channel: producers `push()` items, a single consumer
|
|
3
|
+
* iterates with `for await...of`. Backpressure is applied when the buffer
|
|
4
|
+
* reaches `capacity` — `push()` will block until the consumer pulls.
|
|
5
|
+
*/
|
|
6
|
+
export class AsyncQueue {
|
|
7
|
+
buffer = [];
|
|
8
|
+
capacity;
|
|
9
|
+
closed = false;
|
|
10
|
+
error = undefined;
|
|
11
|
+
/** Resolvers for a blocked consumer waiting for data or close/abort. */
|
|
12
|
+
consumerResolve;
|
|
13
|
+
consumerReject;
|
|
14
|
+
/** Resolvers for blocked producers waiting for buffer space. */
|
|
15
|
+
producerResolvers = [];
|
|
16
|
+
constructor(capacity = 128) {
|
|
17
|
+
this.capacity = capacity;
|
|
18
|
+
}
|
|
19
|
+
/**
|
|
20
|
+
* Push an item into the queue. Blocks (returns a Promise) when the buffer
|
|
21
|
+
* is full. Throws if the queue has been closed or aborted.
|
|
22
|
+
*/
|
|
23
|
+
async push(item) {
|
|
24
|
+
if (this.error !== undefined) {
|
|
25
|
+
throw this.error;
|
|
26
|
+
}
|
|
27
|
+
if (this.closed) {
|
|
28
|
+
throw new Error('Cannot push to a closed queue');
|
|
29
|
+
}
|
|
30
|
+
// If a consumer is already waiting, deliver directly.
|
|
31
|
+
if (this.consumerResolve) {
|
|
32
|
+
const resolve = this.consumerResolve;
|
|
33
|
+
this.consumerResolve = undefined;
|
|
34
|
+
this.consumerReject = undefined;
|
|
35
|
+
resolve({ value: item, done: false });
|
|
36
|
+
return;
|
|
37
|
+
}
|
|
38
|
+
// Wait for space if buffer is at capacity.
|
|
39
|
+
if (this.buffer.length >= this.capacity) {
|
|
40
|
+
await new Promise((resolve, reject) => {
|
|
41
|
+
this.producerResolvers.push({ resolve, reject });
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
this.buffer.push(item);
|
|
45
|
+
}
|
|
46
|
+
/** Signal that no more items will be pushed. */
|
|
47
|
+
close() {
|
|
48
|
+
if (this.closed)
|
|
49
|
+
return;
|
|
50
|
+
this.closed = true;
|
|
51
|
+
// Wake a waiting consumer with done signal if buffer is empty.
|
|
52
|
+
if (this.buffer.length === 0 && this.consumerResolve) {
|
|
53
|
+
const resolve = this.consumerResolve;
|
|
54
|
+
this.consumerResolve = undefined;
|
|
55
|
+
this.consumerReject = undefined;
|
|
56
|
+
resolve({ value: undefined, done: true });
|
|
57
|
+
}
|
|
58
|
+
}
|
|
59
|
+
/** Signal an error. Unblocks all waiting producers and the consumer. */
|
|
60
|
+
abort(error) {
|
|
61
|
+
if (this.error !== undefined)
|
|
62
|
+
return; // first error wins
|
|
63
|
+
this.error = error;
|
|
64
|
+
this.closed = true;
|
|
65
|
+
// Reject all blocked producers.
|
|
66
|
+
for (const { reject } of this.producerResolvers) {
|
|
67
|
+
reject(error);
|
|
68
|
+
}
|
|
69
|
+
this.producerResolvers = [];
|
|
70
|
+
// Reject or resolve the consumer depending on buffered items.
|
|
71
|
+
if (this.consumerReject) {
|
|
72
|
+
const reject = this.consumerReject;
|
|
73
|
+
this.consumerResolve = undefined;
|
|
74
|
+
this.consumerReject = undefined;
|
|
75
|
+
reject(error);
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
[Symbol.asyncIterator]() {
|
|
79
|
+
return {
|
|
80
|
+
next: () => this.pull(),
|
|
81
|
+
};
|
|
82
|
+
}
|
|
83
|
+
pull() {
|
|
84
|
+
// Drain buffer first.
|
|
85
|
+
if (this.buffer.length > 0) {
|
|
86
|
+
const item = this.buffer.shift();
|
|
87
|
+
// Unblock one waiting producer.
|
|
88
|
+
if (this.producerResolvers.length > 0) {
|
|
89
|
+
this.producerResolvers.shift().resolve();
|
|
90
|
+
}
|
|
91
|
+
return Promise.resolve({ value: item, done: false });
|
|
92
|
+
}
|
|
93
|
+
// Buffer empty — check for error or closed.
|
|
94
|
+
if (this.error !== undefined) {
|
|
95
|
+
return Promise.reject(this.error);
|
|
96
|
+
}
|
|
97
|
+
if (this.closed) {
|
|
98
|
+
return Promise.resolve({ value: undefined, done: true });
|
|
99
|
+
}
|
|
100
|
+
// Wait for a producer to push or for close/abort.
|
|
101
|
+
return new Promise((resolve, reject) => {
|
|
102
|
+
this.consumerResolve = resolve;
|
|
103
|
+
this.consumerReject = reject;
|
|
104
|
+
});
|
|
105
|
+
}
|
|
106
|
+
}
|
package/dist/builder.d.ts
CHANGED
|
@@ -30,6 +30,11 @@ export interface WriterConfig {
|
|
|
30
30
|
type: 'file' | 'sparql';
|
|
31
31
|
outputDir?: string;
|
|
32
32
|
endpoint?: URL;
|
|
33
|
+
/**
|
|
34
|
+
* Value for the Authorization header sent with SPARQL UPDATE requests, e.g.
|
|
35
|
+
* `"Basic dXNlcjpwYXNz"`, `"Bearer my-token"`, or `"GDB eyJ…"`.
|
|
36
|
+
*/
|
|
37
|
+
auth?: string;
|
|
33
38
|
}
|
|
34
39
|
/**
|
|
35
40
|
* Complete pipeline configuration.
|
|
@@ -110,5 +115,6 @@ export declare function fileWriter(options: {
|
|
|
110
115
|
*/
|
|
111
116
|
export declare function sparqlWriter(options: {
|
|
112
117
|
endpoint: URL;
|
|
118
|
+
auth?: string;
|
|
113
119
|
}): WriterConfig;
|
|
114
120
|
//# sourceMappingURL=builder.d.ts.map
|
package/dist/builder.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"builder.d.ts","sourceRoot":"","sources":["../src/builder.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,IAAI,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC1B;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,GAAG,CAAC;
|
|
1
|
+
{"version":3,"file":"builder.d.ts","sourceRoot":"","sources":["../src/builder.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,IAAI,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC1B;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,GAAG,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;CACf;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;IACzB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,CAAW;IAC5B,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,YAAY,CAAC,CAAe;IAEpC;;OAEG;IACH,MAAM,CAAC,MAAM,IAAI,eAAe;IAIhC;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKtC;;OAEG;IACH,UAAU,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKtC;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;IAKzB;;OAEG;IACH,QAAQ,CAAC,GAAG,KAAK,EAAE,IAAI,EAAE,GAAG,IAAI;IAKhC;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKrC;;;OAGG;IACH,KAAK,IAAI,cAAc;CAYxB;AAID;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,GAAG,GAAG,gBAAgB,CAMjE;AAED;;;;GAIG;AACH,wBAAgB,MAAM,CAAC,GAAG,WAAW,EAAE,GAAG,EAAE,GAAG,sBAAsB,CAKpE;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GAAG,YAAY,CAKvE;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE;IACpC,QAAQ,EAAE,GAAG,CAAC;IACd,IAAI,CAAC,EAAE,MAAM,CAAC;CACf,GAAG,YAAY,CAMf"}
|
package/dist/builder.js
CHANGED
package/dist/config.d.ts
CHANGED
package/dist/config.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AACA,OAAO,EACL,cAAc,EACd,YAAY,EAIb,MAAM,cAAc,CAAC;AAKtB;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE;QACT,IAAI,EAAE,UAAU,GAAG,QAAQ,CAAC;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;IACF,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,cAAc,CAAC;QACrB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;QACxB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AACA,OAAO,EACL,cAAc,EACd,YAAY,EAIb,MAAM,cAAc,CAAC;AAKtB;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE;QACT,IAAI,EAAE,UAAU,GAAG,QAAQ,CAAC;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;IACF,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,cAAc,CAAC;QACrB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;QACxB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,IAAI,CAAC,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,iBAAiB,GAAG,iBAAiB,CAEzE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,cAAc,CAAC,CAWzB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,iBAAiB,GAAG,cAAc,CAOtE"}
|
package/dist/config.js
CHANGED
|
@@ -102,7 +102,11 @@ function normalizeWriters(raw) {
|
|
|
102
102
|
if (!writer.endpoint) {
|
|
103
103
|
throw new Error('SPARQL writer requires endpoint');
|
|
104
104
|
}
|
|
105
|
-
return {
|
|
105
|
+
return {
|
|
106
|
+
type: 'sparql',
|
|
107
|
+
endpoint: new URL(writer.endpoint),
|
|
108
|
+
auth: writer.auth,
|
|
109
|
+
};
|
|
106
110
|
default:
|
|
107
111
|
throw new Error(`Unknown writer type: ${writer.type}`);
|
|
108
112
|
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC;AACtC,cAAc,cAAc,CAAC;AAC7B,cAAc,aAAa,CAAC;AAC5B,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
|
package/dist/index.js
CHANGED
package/dist/stage.d.ts
CHANGED
|
@@ -8,15 +8,21 @@ export interface StageOptions {
|
|
|
8
8
|
selector?: StageSelector;
|
|
9
9
|
/** Maximum number of bindings per executor call. @default 10 */
|
|
10
10
|
batchSize?: number;
|
|
11
|
+
/** Maximum concurrent in-flight executor batches. @default 10 */
|
|
12
|
+
maxConcurrency?: number;
|
|
13
|
+
}
|
|
14
|
+
export interface RunOptions {
|
|
15
|
+
onProgress?: (elementsProcessed: number, quadsGenerated: number) => void;
|
|
11
16
|
}
|
|
12
17
|
export declare class Stage {
|
|
13
18
|
readonly name: string;
|
|
14
19
|
private readonly executors;
|
|
15
20
|
private readonly selector?;
|
|
16
21
|
private readonly batchSize;
|
|
22
|
+
private readonly maxConcurrency;
|
|
17
23
|
constructor(options: StageOptions);
|
|
18
|
-
run(dataset: Dataset, distribution: Distribution, writer: Writer): Promise<NotSupported | void>;
|
|
19
|
-
private
|
|
24
|
+
run(dataset: Dataset, distribution: Distribution, writer: Writer, options?: RunOptions): Promise<NotSupported | void>;
|
|
25
|
+
private runWithSelector;
|
|
20
26
|
private executeAll;
|
|
21
27
|
}
|
|
22
28
|
/** Stage-level selector that yields variable bindings for use in executor queries. Pagination is an implementation detail. */
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,aAAa,CAAC;IACzB,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;CACzB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAgB;IAC1C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAU3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAajB,eAAe;YA8Gf,UAAU;CAqBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
|
package/dist/stage.js
CHANGED
|
@@ -1,10 +1,12 @@
|
|
|
1
1
|
import { NotSupported } from './sparql/executor.js';
|
|
2
2
|
import { batch } from './batch.js';
|
|
3
|
+
import { AsyncQueue } from './asyncQueue.js';
|
|
3
4
|
export class Stage {
|
|
4
5
|
name;
|
|
5
6
|
executors;
|
|
6
7
|
selector;
|
|
7
8
|
batchSize;
|
|
9
|
+
maxConcurrency;
|
|
8
10
|
constructor(options) {
|
|
9
11
|
this.name = options.name;
|
|
10
12
|
this.executors = Array.isArray(options.executors)
|
|
@@ -12,37 +14,115 @@ export class Stage {
|
|
|
12
14
|
: [options.executors];
|
|
13
15
|
this.selector = options.selector;
|
|
14
16
|
this.batchSize = options.batchSize ?? 10;
|
|
17
|
+
this.maxConcurrency = options.maxConcurrency ?? 10;
|
|
15
18
|
}
|
|
16
|
-
async run(dataset, distribution, writer) {
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
19
|
+
async run(dataset, distribution, writer, options) {
|
|
20
|
+
if (this.selector) {
|
|
21
|
+
return this.runWithSelector(dataset, distribution, writer, options);
|
|
22
|
+
}
|
|
23
|
+
const streams = await this.executeAll(dataset, distribution);
|
|
20
24
|
if (streams instanceof NotSupported) {
|
|
21
25
|
return streams;
|
|
22
26
|
}
|
|
23
27
|
await writer.write(dataset, mergeStreams(streams));
|
|
24
28
|
}
|
|
25
|
-
async
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
29
|
+
async runWithSelector(dataset, distribution, writer, options) {
|
|
30
|
+
// Peek the first batch to detect an empty selector before starting the
|
|
31
|
+
// writer (important because e.g. SparqlUpdateWriter does CLEAR GRAPH).
|
|
32
|
+
const batches = batch(this.selector, this.batchSize);
|
|
33
|
+
const iter = batches[Symbol.asyncIterator]();
|
|
34
|
+
const first = await iter.next();
|
|
35
|
+
if (first.done) {
|
|
36
|
+
return new NotSupported('All executors returned NotSupported');
|
|
37
|
+
}
|
|
38
|
+
// Reconstruct a full iterable including the peeked first batch.
|
|
39
|
+
const allBatches = (async function* () {
|
|
40
|
+
yield first.value;
|
|
41
|
+
// Continue yielding remaining batches from the same iterator.
|
|
42
|
+
for (;;) {
|
|
43
|
+
const next = await iter.next();
|
|
44
|
+
if (next.done)
|
|
45
|
+
break;
|
|
46
|
+
yield next.value;
|
|
47
|
+
}
|
|
48
|
+
})();
|
|
49
|
+
const queue = new AsyncQueue();
|
|
50
|
+
let elementsProcessed = 0;
|
|
51
|
+
let quadsGenerated = 0;
|
|
52
|
+
let hasResults = false;
|
|
53
|
+
const dispatch = async () => {
|
|
54
|
+
const inFlight = new Set();
|
|
55
|
+
let firstError;
|
|
56
|
+
const track = (promise) => {
|
|
57
|
+
const p = promise.then(() => {
|
|
58
|
+
inFlight.delete(p);
|
|
59
|
+
}, (err) => {
|
|
60
|
+
inFlight.delete(p);
|
|
61
|
+
firstError ??= err;
|
|
31
62
|
});
|
|
32
|
-
|
|
33
|
-
|
|
63
|
+
inFlight.add(p);
|
|
64
|
+
};
|
|
65
|
+
try {
|
|
66
|
+
for await (const bindings of allBatches) {
|
|
67
|
+
if (firstError)
|
|
68
|
+
break;
|
|
69
|
+
for (const executor of this.executors) {
|
|
70
|
+
if (firstError)
|
|
71
|
+
break;
|
|
72
|
+
// Respect maxConcurrency: wait for a slot to open.
|
|
73
|
+
if (inFlight.size >= this.maxConcurrency) {
|
|
74
|
+
await Promise.race(inFlight);
|
|
75
|
+
if (firstError)
|
|
76
|
+
break;
|
|
77
|
+
}
|
|
78
|
+
track((async () => {
|
|
79
|
+
const result = await executor.execute(dataset, distribution, {
|
|
80
|
+
bindings,
|
|
81
|
+
});
|
|
82
|
+
if (!(result instanceof NotSupported)) {
|
|
83
|
+
hasResults = true;
|
|
84
|
+
for await (const quad of result) {
|
|
85
|
+
await queue.push(quad);
|
|
86
|
+
quadsGenerated++;
|
|
87
|
+
}
|
|
88
|
+
}
|
|
89
|
+
elementsProcessed += bindings.length;
|
|
90
|
+
options?.onProgress?.(elementsProcessed, quadsGenerated);
|
|
91
|
+
})());
|
|
92
|
+
}
|
|
34
93
|
}
|
|
35
94
|
}
|
|
36
|
-
|
|
37
|
-
|
|
95
|
+
catch (err) {
|
|
96
|
+
firstError ??= err;
|
|
97
|
+
}
|
|
98
|
+
// Wait for all remaining in-flight tasks to settle.
|
|
99
|
+
await Promise.all(inFlight);
|
|
100
|
+
if (firstError) {
|
|
101
|
+
queue.abort(firstError);
|
|
102
|
+
}
|
|
103
|
+
else {
|
|
104
|
+
queue.close();
|
|
105
|
+
}
|
|
106
|
+
};
|
|
107
|
+
const dispatchPromise = dispatch();
|
|
108
|
+
const writePromise = (async () => {
|
|
109
|
+
try {
|
|
110
|
+
await writer.write(dataset, queue);
|
|
111
|
+
}
|
|
112
|
+
catch (err) {
|
|
113
|
+
queue.abort(err);
|
|
114
|
+
throw err;
|
|
115
|
+
}
|
|
116
|
+
})();
|
|
117
|
+
await Promise.all([dispatchPromise, writePromise]);
|
|
118
|
+
if (!hasResults) {
|
|
38
119
|
return new NotSupported('All executors returned NotSupported');
|
|
39
120
|
}
|
|
40
|
-
return streams;
|
|
41
121
|
}
|
|
42
122
|
async executeAll(dataset, distribution) {
|
|
123
|
+
const results = await Promise.all(this.executors.map((executor) => executor.execute(dataset, distribution)));
|
|
43
124
|
const streams = [];
|
|
44
|
-
for (const
|
|
45
|
-
const result = await executor.execute(dataset, distribution);
|
|
125
|
+
for (const result of results) {
|
|
46
126
|
if (!(result instanceof NotSupported)) {
|
|
47
127
|
streams.push(result);
|
|
48
128
|
}
|
|
@@ -6,6 +6,11 @@ export interface SparqlWriterOptions {
|
|
|
6
6
|
* The SPARQL UPDATE endpoint URL.
|
|
7
7
|
*/
|
|
8
8
|
endpoint: URL;
|
|
9
|
+
/**
|
|
10
|
+
* Value for the Authorization header, e.g.
|
|
11
|
+
* `"Basic dXNlcjpwYXNz"`, `"Bearer my-token"`, or `"GDB eyJ…"`.
|
|
12
|
+
*/
|
|
13
|
+
auth?: string;
|
|
9
14
|
/**
|
|
10
15
|
* Optional fetch implementation for making HTTP requests.
|
|
11
16
|
* @default globalThis.fetch
|
|
@@ -26,6 +31,7 @@ export interface SparqlWriterOptions {
|
|
|
26
31
|
*/
|
|
27
32
|
export declare class SparqlUpdateWriter implements Writer {
|
|
28
33
|
private readonly endpoint;
|
|
34
|
+
private readonly auth?;
|
|
29
35
|
private readonly fetch;
|
|
30
36
|
private readonly batchSize;
|
|
31
37
|
constructor(options: SparqlWriterOptions);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;
|
|
1
|
+
{"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAOlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAS1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAqB5B"}
|
|
@@ -8,10 +8,12 @@ import { serializeQuads } from './serialize.js';
|
|
|
8
8
|
*/
|
|
9
9
|
export class SparqlUpdateWriter {
|
|
10
10
|
endpoint;
|
|
11
|
+
auth;
|
|
11
12
|
fetch;
|
|
12
13
|
batchSize;
|
|
13
14
|
constructor(options) {
|
|
14
15
|
this.endpoint = options.endpoint;
|
|
16
|
+
this.auth = options.auth;
|
|
15
17
|
this.fetch = options.fetch ?? globalThis.fetch;
|
|
16
18
|
this.batchSize = options.batchSize ?? 10000;
|
|
17
19
|
}
|
|
@@ -30,11 +32,15 @@ export class SparqlUpdateWriter {
|
|
|
30
32
|
await this.executeUpdate(`INSERT DATA { GRAPH <${graphUri}> { ${turtleData} } }`);
|
|
31
33
|
}
|
|
32
34
|
async executeUpdate(query) {
|
|
35
|
+
const headers = {
|
|
36
|
+
'Content-Type': 'application/sparql-update',
|
|
37
|
+
};
|
|
38
|
+
if (this.auth) {
|
|
39
|
+
headers['Authorization'] = this.auth;
|
|
40
|
+
}
|
|
33
41
|
const response = await this.fetch(this.endpoint.toString(), {
|
|
34
42
|
method: 'POST',
|
|
35
|
-
headers
|
|
36
|
-
'Content-Type': 'application/sparql-update',
|
|
37
|
-
},
|
|
43
|
+
headers,
|
|
38
44
|
body: query,
|
|
39
45
|
});
|
|
40
46
|
if (!response.ok) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.6.
|
|
3
|
+
"version": "0.6.24",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "https://github.com/ldengine/lde",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -24,12 +24,12 @@
|
|
|
24
24
|
],
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"@lde/dataset": "0.6.8",
|
|
27
|
-
"@lde/dataset-registry-client": "0.6.
|
|
27
|
+
"@lde/dataset-registry-client": "0.6.15",
|
|
28
28
|
"@lde/sparql-importer": "0.2.8",
|
|
29
29
|
"@lde/sparql-server": "0.4.8",
|
|
30
30
|
"@rdfjs/types": "^2.0.1",
|
|
31
31
|
"c12": "^3.0.2",
|
|
32
|
-
"fetch-sparql-endpoint": "^
|
|
32
|
+
"fetch-sparql-endpoint": "^7.1.0",
|
|
33
33
|
"filenamify-url": "^3.0.0",
|
|
34
34
|
"n3": "^1.17.0",
|
|
35
35
|
"sparqljs": "^3.7.3",
|