@lde/pipeline 0.6.25 → 0.6.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pipeline.d.ts +2 -2
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +2 -2
- package/dist/selector.d.ts +3 -3
- package/dist/selector.d.ts.map +1 -1
- package/dist/sparql/executor.d.ts +10 -13
- package/dist/sparql/executor.d.ts.map +1 -1
- package/dist/sparql/executor.js +29 -28
- package/dist/sparql/index.d.ts +2 -2
- package/dist/sparql/index.d.ts.map +1 -1
- package/dist/sparql/index.js +2 -2
- package/dist/sparql/selector.d.ts +5 -5
- package/dist/sparql/selector.d.ts.map +1 -1
- package/dist/sparql/selector.js +2 -2
- package/dist/stage.d.ts +6 -4
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +9 -6
- package/package.json +1 -1
package/dist/pipeline.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { DatasetSelector } from './selector.js';
|
|
2
2
|
import { Stage } from './stage.js';
|
|
3
3
|
import type { Writer } from './writer/writer.js';
|
|
4
4
|
import { type DistributionResolver } from './distribution/resolver.js';
|
|
@@ -6,7 +6,7 @@ import type { StageOutputResolver } from './stageOutputResolver.js';
|
|
|
6
6
|
import type { ProgressReporter } from './progressReporter.js';
|
|
7
7
|
export interface PipelineOptions {
|
|
8
8
|
name: string;
|
|
9
|
-
|
|
9
|
+
datasetSelector: DatasetSelector;
|
|
10
10
|
stages: Stage[];
|
|
11
11
|
writer: Writer;
|
|
12
12
|
distributionResolver: DistributionResolver;
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -16,10 +16,10 @@ export class Pipeline {
|
|
|
16
16
|
this.options = options;
|
|
17
17
|
}
|
|
18
18
|
async run() {
|
|
19
|
-
const {
|
|
19
|
+
const { datasetSelector, reporter, name } = this.options;
|
|
20
20
|
const start = Date.now();
|
|
21
21
|
reporter?.pipelineStart(name);
|
|
22
|
-
const datasets = await
|
|
22
|
+
const datasets = await datasetSelector.select();
|
|
23
23
|
for await (const dataset of datasets) {
|
|
24
24
|
await this.processDataset(dataset);
|
|
25
25
|
}
|
package/dist/selector.d.ts
CHANGED
|
@@ -3,10 +3,10 @@ import { Client, Paginator } from '@lde/dataset-registry-client';
|
|
|
3
3
|
/**
|
|
4
4
|
* Select {@link Dataset}s for processing in a pipeline.
|
|
5
5
|
*/
|
|
6
|
-
export interface
|
|
6
|
+
export interface DatasetSelector {
|
|
7
7
|
select(): Promise<Paginator<Dataset>>;
|
|
8
8
|
}
|
|
9
|
-
export declare class ManualDatasetSelection implements
|
|
9
|
+
export declare class ManualDatasetSelection implements DatasetSelector {
|
|
10
10
|
private readonly datasets;
|
|
11
11
|
constructor(datasets: Dataset[]);
|
|
12
12
|
select(): Promise<Paginator<Dataset>>;
|
|
@@ -26,7 +26,7 @@ export declare class ManualDatasetSelection implements Selector {
|
|
|
26
26
|
* @param string options.query Optional custom SPARQL query to select datasets.
|
|
27
27
|
* @param object options.criteria Optional search criteria to select datasets.
|
|
28
28
|
*/
|
|
29
|
-
export declare class RegistrySelector implements
|
|
29
|
+
export declare class RegistrySelector implements DatasetSelector {
|
|
30
30
|
private readonly registry;
|
|
31
31
|
private readonly query?;
|
|
32
32
|
private readonly criteria?;
|
package/dist/selector.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,eAAe;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;gBAEvB,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB;IAMK,MAAM;CAOb"}
|
|
@@ -45,12 +45,16 @@ export interface SparqlConstructExecutorOptions {
|
|
|
45
45
|
fetcher?: SparqlEndpointFetcher;
|
|
46
46
|
}
|
|
47
47
|
/**
|
|
48
|
-
* A streaming SPARQL CONSTRUCT executor
|
|
49
|
-
*
|
|
48
|
+
* A streaming SPARQL CONSTRUCT executor.
|
|
49
|
+
*
|
|
50
|
+
* Queries **without** `#subjectFilter#` are parsed once in the constructor
|
|
51
|
+
* (fast path). Queries that contain the template are stored as raw strings
|
|
52
|
+
* and parsed at {@link execute} time after substitution.
|
|
50
53
|
*
|
|
51
54
|
* Template substitution (applied in order):
|
|
52
|
-
* 1.
|
|
53
|
-
* 2.
|
|
55
|
+
* 1. `#subjectFilter#` — replaced with `distribution.subjectFilter` (deferred to execute)
|
|
56
|
+
* 2. `FROM <graph>` — set via `withDefaultGraph` if the distribution has a named graph
|
|
57
|
+
* 3. `?dataset` — replaced with the dataset IRI (string substitution on the serialised query)
|
|
54
58
|
*
|
|
55
59
|
* @example
|
|
56
60
|
* ```typescript
|
|
@@ -68,7 +72,8 @@ export interface SparqlConstructExecutorOptions {
|
|
|
68
72
|
* ```
|
|
69
73
|
*/
|
|
70
74
|
export declare class SparqlConstructExecutor implements Executor {
|
|
71
|
-
private readonly
|
|
75
|
+
private readonly rawQuery;
|
|
76
|
+
private readonly preParsed?;
|
|
72
77
|
private readonly fetcher;
|
|
73
78
|
private readonly generator;
|
|
74
79
|
constructor(options: SparqlConstructExecutorOptions);
|
|
@@ -89,14 +94,6 @@ export declare class SparqlConstructExecutor implements Executor {
|
|
|
89
94
|
*/
|
|
90
95
|
static fromFile(filename: string, options?: Omit<SparqlConstructExecutorOptions, 'query'>): Promise<SparqlConstructExecutor>;
|
|
91
96
|
}
|
|
92
|
-
/**
|
|
93
|
-
* Substitute template variables in a SPARQL query.
|
|
94
|
-
*
|
|
95
|
-
* - `#subjectFilter#` — replaced with the distribution's subject filter
|
|
96
|
-
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
97
|
-
* - `?dataset` — replaced with the dataset IRI
|
|
98
|
-
*/
|
|
99
|
-
export declare function substituteQueryTemplates(query: string, distribution: Distribution | null, dataset: Dataset): string;
|
|
100
97
|
/**
|
|
101
98
|
* Read a SPARQL query from a file.
|
|
102
99
|
*/
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAO5C;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED
|
|
1
|
+
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAO5C;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;;GAGG;AACH,MAAM,MAAM,UAAU,GAAG,QAAQ,GAAG,MAAM,CAAC,IAAI,CAAC,CAAC;AAEjD;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAkBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,UAAU,CAAC;IAiCtB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE"}
|
package/dist/sparql/executor.js
CHANGED
|
@@ -14,12 +14,16 @@ export class NotSupported {
|
|
|
14
14
|
}
|
|
15
15
|
}
|
|
16
16
|
/**
|
|
17
|
-
* A streaming SPARQL CONSTRUCT executor
|
|
18
|
-
*
|
|
17
|
+
* A streaming SPARQL CONSTRUCT executor.
|
|
18
|
+
*
|
|
19
|
+
* Queries **without** `#subjectFilter#` are parsed once in the constructor
|
|
20
|
+
* (fast path). Queries that contain the template are stored as raw strings
|
|
21
|
+
* and parsed at {@link execute} time after substitution.
|
|
19
22
|
*
|
|
20
23
|
* Template substitution (applied in order):
|
|
21
|
-
* 1.
|
|
22
|
-
* 2.
|
|
24
|
+
* 1. `#subjectFilter#` — replaced with `distribution.subjectFilter` (deferred to execute)
|
|
25
|
+
* 2. `FROM <graph>` — set via `withDefaultGraph` if the distribution has a named graph
|
|
26
|
+
* 3. `?dataset` — replaced with the dataset IRI (string substitution on the serialised query)
|
|
23
27
|
*
|
|
24
28
|
* @example
|
|
25
29
|
* ```typescript
|
|
@@ -37,16 +41,19 @@ export class NotSupported {
|
|
|
37
41
|
* ```
|
|
38
42
|
*/
|
|
39
43
|
export class SparqlConstructExecutor {
|
|
40
|
-
|
|
44
|
+
rawQuery;
|
|
45
|
+
preParsed;
|
|
41
46
|
fetcher;
|
|
42
47
|
generator = new Generator();
|
|
43
48
|
constructor(options) {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
|
|
49
|
+
this.rawQuery = options.query;
|
|
50
|
+
if (!options.query.includes('#subjectFilter#')) {
|
|
51
|
+
const parsed = new Parser().parse(options.query);
|
|
52
|
+
if (parsed.type !== 'query' || parsed.queryType !== 'CONSTRUCT') {
|
|
53
|
+
throw new Error('Query must be a CONSTRUCT query');
|
|
54
|
+
}
|
|
55
|
+
this.preParsed = parsed;
|
|
48
56
|
}
|
|
49
|
-
this.query = parsed;
|
|
50
57
|
this.fetcher =
|
|
51
58
|
options.fetcher ??
|
|
52
59
|
new SparqlEndpointFetcher({
|
|
@@ -63,7 +70,18 @@ export class SparqlConstructExecutor {
|
|
|
63
70
|
*/
|
|
64
71
|
async execute(dataset, distribution, options) {
|
|
65
72
|
const endpoint = distribution.accessUrl;
|
|
66
|
-
let ast
|
|
73
|
+
let ast;
|
|
74
|
+
if (this.preParsed) {
|
|
75
|
+
ast = structuredClone(this.preParsed);
|
|
76
|
+
}
|
|
77
|
+
else {
|
|
78
|
+
const substituted = this.rawQuery.replace('#subjectFilter#', distribution.subjectFilter ?? '');
|
|
79
|
+
const parsed = new Parser().parse(substituted);
|
|
80
|
+
if (parsed.type !== 'query' || parsed.queryType !== 'CONSTRUCT') {
|
|
81
|
+
throw new Error('Query must be a CONSTRUCT query');
|
|
82
|
+
}
|
|
83
|
+
ast = parsed;
|
|
84
|
+
}
|
|
67
85
|
if (distribution.namedGraph) {
|
|
68
86
|
withDefaultGraph(ast, distribution.namedGraph);
|
|
69
87
|
}
|
|
@@ -86,23 +104,6 @@ export class SparqlConstructExecutor {
|
|
|
86
104
|
return new SparqlConstructExecutor({ ...options, query });
|
|
87
105
|
}
|
|
88
106
|
}
|
|
89
|
-
/**
|
|
90
|
-
* Substitute template variables in a SPARQL query.
|
|
91
|
-
*
|
|
92
|
-
* - `#subjectFilter#` — replaced with the distribution's subject filter
|
|
93
|
-
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
94
|
-
* - `?dataset` — replaced with the dataset IRI
|
|
95
|
-
*/
|
|
96
|
-
export function substituteQueryTemplates(query, distribution, dataset) {
|
|
97
|
-
const subjectFilter = distribution?.subjectFilter ?? '';
|
|
98
|
-
const namedGraph = distribution?.namedGraph
|
|
99
|
-
? `FROM <${distribution.namedGraph}>`
|
|
100
|
-
: '';
|
|
101
|
-
return query
|
|
102
|
-
.replace('#subjectFilter#', subjectFilter)
|
|
103
|
-
.replaceAll('?dataset', `<${dataset.iri}>`)
|
|
104
|
-
.replace('#namedGraph#', namedGraph);
|
|
105
|
-
}
|
|
106
107
|
/**
|
|
107
108
|
* Read a SPARQL query from a file.
|
|
108
109
|
*/
|
package/dist/sparql/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
export { SparqlConstructExecutor,
|
|
2
|
-
export {
|
|
1
|
+
export { SparqlConstructExecutor, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type QuadStream, type VariableBindings, } from './executor.js';
|
|
2
|
+
export { SparqlItemSelector, type SparqlItemSelectorOptions, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
|
5
5
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/sparql/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
export { SparqlConstructExecutor,
|
|
2
|
-
export {
|
|
1
|
+
export { SparqlConstructExecutor, NotSupported, readQueryFile, } from './executor.js';
|
|
2
|
+
export { SparqlItemSelector, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
2
|
-
import type {
|
|
2
|
+
import type { ItemSelector } from '../stage.js';
|
|
3
3
|
import type { VariableBindings } from './executor.js';
|
|
4
|
-
export interface
|
|
4
|
+
export interface SparqlItemSelectorOptions {
|
|
5
5
|
/** SELECT query projecting at least one named variable. A LIMIT in the query sets the default page size. */
|
|
6
6
|
query: string;
|
|
7
7
|
/** SPARQL endpoint URL. */
|
|
@@ -12,7 +12,7 @@ export interface SparqlSelectorOptions {
|
|
|
12
12
|
fetcher?: SparqlEndpointFetcher;
|
|
13
13
|
}
|
|
14
14
|
/**
|
|
15
|
-
* {@link
|
|
15
|
+
* {@link ItemSelector} that pages through SPARQL SELECT results,
|
|
16
16
|
* yielding all projected variable bindings (NamedNode values only) per row.
|
|
17
17
|
*
|
|
18
18
|
* Pagination is an internal detail — consumers iterate binding rows directly.
|
|
@@ -20,12 +20,12 @@ export interface SparqlSelectorOptions {
|
|
|
20
20
|
* (can be overridden by the `pageSize` option). Pagination continues
|
|
21
21
|
* until a page returns fewer results than the page size.
|
|
22
22
|
*/
|
|
23
|
-
export declare class
|
|
23
|
+
export declare class SparqlItemSelector implements ItemSelector {
|
|
24
24
|
private readonly parsed;
|
|
25
25
|
private readonly endpoint;
|
|
26
26
|
private readonly pageSize;
|
|
27
27
|
private readonly fetcher;
|
|
28
|
-
constructor(options:
|
|
28
|
+
constructor(options: SparqlItemSelectorOptions);
|
|
29
29
|
[Symbol.asyncIterator](): AsyncIterableIterator<VariableBindings>;
|
|
30
30
|
}
|
|
31
31
|
//# sourceMappingURL=selector.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAKtD,MAAM,WAAW,yBAAyB;IACxC,4GAA4G;IAC5G,KAAK,EAAE,MAAM,CAAC;IACd,2BAA2B;IAC3B,QAAQ,EAAE,GAAG,CAAC;IACd,sEAAsE;IACtE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;GAQG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,yBAAyB;IAmBvC,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,qBAAqB,CAAC,gBAAgB,CAAC;CAkCzE"}
|
package/dist/sparql/selector.js
CHANGED
|
@@ -3,7 +3,7 @@ import { Generator, Parser, } from 'sparqljs';
|
|
|
3
3
|
const parser = new Parser();
|
|
4
4
|
const generator = new Generator();
|
|
5
5
|
/**
|
|
6
|
-
* {@link
|
|
6
|
+
* {@link ItemSelector} that pages through SPARQL SELECT results,
|
|
7
7
|
* yielding all projected variable bindings (NamedNode values only) per row.
|
|
8
8
|
*
|
|
9
9
|
* Pagination is an internal detail — consumers iterate binding rows directly.
|
|
@@ -11,7 +11,7 @@ const generator = new Generator();
|
|
|
11
11
|
* (can be overridden by the `pageSize` option). Pagination continues
|
|
12
12
|
* until a page returns fewer results than the page size.
|
|
13
13
|
*/
|
|
14
|
-
export class
|
|
14
|
+
export class SparqlItemSelector {
|
|
15
15
|
parsed;
|
|
16
16
|
endpoint;
|
|
17
17
|
pageSize;
|
package/dist/stage.d.ts
CHANGED
|
@@ -2,10 +2,12 @@ import { Dataset, Distribution } from '@lde/dataset';
|
|
|
2
2
|
import type { Executor, VariableBindings } from './sparql/executor.js';
|
|
3
3
|
import { NotSupported } from './sparql/executor.js';
|
|
4
4
|
import type { Writer } from './writer/writer.js';
|
|
5
|
+
/** An item selector, or a factory that receives the runtime distribution. */
|
|
6
|
+
export type ItemSelectorInput = ItemSelector | ((distribution: Distribution) => ItemSelector);
|
|
5
7
|
export interface StageOptions {
|
|
6
8
|
name: string;
|
|
7
9
|
executors: Executor | Executor[];
|
|
8
|
-
|
|
10
|
+
itemSelector?: ItemSelectorInput;
|
|
9
11
|
/** Maximum number of bindings per executor call. @default 10 */
|
|
10
12
|
batchSize?: number;
|
|
11
13
|
/** Maximum concurrent in-flight executor batches. @default 10 */
|
|
@@ -20,7 +22,7 @@ export declare class Stage {
|
|
|
20
22
|
readonly name: string;
|
|
21
23
|
readonly stages: readonly Stage[];
|
|
22
24
|
private readonly executors;
|
|
23
|
-
private readonly
|
|
25
|
+
private readonly itemSelectorInput?;
|
|
24
26
|
private readonly batchSize;
|
|
25
27
|
private readonly maxConcurrency;
|
|
26
28
|
constructor(options: StageOptions);
|
|
@@ -28,7 +30,7 @@ export declare class Stage {
|
|
|
28
30
|
private runWithSelector;
|
|
29
31
|
private executeAll;
|
|
30
32
|
}
|
|
31
|
-
/**
|
|
32
|
-
export interface
|
|
33
|
+
/** Selects items (as variable bindings) for executors to process. Pagination is an implementation detail. */
|
|
34
|
+
export interface ItemSelector extends AsyncIterable<VariableBindings> {
|
|
33
35
|
}
|
|
34
36
|
//# sourceMappingURL=stage.d.ts.map
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,6EAA6E;AAC7E,MAAM,MAAM,iBAAiB,GACzB,YAAY,GACZ,CAAC,CAAC,YAAY,EAAE,YAAY,KAAK,YAAY,CAAC,CAAC;AAEnD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,iBAAiB,CAAC;IACjC,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAoB;IACvD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAuBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,6GAA6G;AAE7G,MAAM,WAAW,YAAa,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
|
package/dist/stage.js
CHANGED
|
@@ -5,7 +5,7 @@ export class Stage {
|
|
|
5
5
|
name;
|
|
6
6
|
stages;
|
|
7
7
|
executors;
|
|
8
|
-
|
|
8
|
+
itemSelectorInput;
|
|
9
9
|
batchSize;
|
|
10
10
|
maxConcurrency;
|
|
11
11
|
constructor(options) {
|
|
@@ -14,13 +14,16 @@ export class Stage {
|
|
|
14
14
|
this.executors = Array.isArray(options.executors)
|
|
15
15
|
? options.executors
|
|
16
16
|
: [options.executors];
|
|
17
|
-
this.
|
|
17
|
+
this.itemSelectorInput = options.itemSelector;
|
|
18
18
|
this.batchSize = options.batchSize ?? 10;
|
|
19
19
|
this.maxConcurrency = options.maxConcurrency ?? 10;
|
|
20
20
|
}
|
|
21
21
|
async run(dataset, distribution, writer, options) {
|
|
22
|
-
if (this.
|
|
23
|
-
|
|
22
|
+
if (this.itemSelectorInput) {
|
|
23
|
+
const selector = typeof this.itemSelectorInput === 'function'
|
|
24
|
+
? this.itemSelectorInput(distribution)
|
|
25
|
+
: this.itemSelectorInput;
|
|
26
|
+
return this.runWithSelector(selector, dataset, distribution, writer, options);
|
|
24
27
|
}
|
|
25
28
|
const streams = await this.executeAll(dataset, distribution);
|
|
26
29
|
if (streams instanceof NotSupported) {
|
|
@@ -28,10 +31,10 @@ export class Stage {
|
|
|
28
31
|
}
|
|
29
32
|
await writer.write(dataset, mergeStreams(streams));
|
|
30
33
|
}
|
|
31
|
-
async runWithSelector(dataset, distribution, writer, options) {
|
|
34
|
+
async runWithSelector(selector, dataset, distribution, writer, options) {
|
|
32
35
|
// Peek the first batch to detect an empty selector before starting the
|
|
33
36
|
// writer (important because e.g. SparqlUpdateWriter does CLEAR GRAPH).
|
|
34
|
-
const batches = batch(
|
|
37
|
+
const batches = batch(selector, this.batchSize);
|
|
35
38
|
const iter = batches[Symbol.asyncIterator]();
|
|
36
39
|
const first = await iter.next();
|
|
37
40
|
if (first.done) {
|