@lde/pipeline 0.6.26 → 0.6.27
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/pipeline.d.ts +2 -2
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +2 -2
- package/dist/selector.d.ts +3 -3
- package/dist/selector.d.ts.map +1 -1
- package/dist/sparql/index.d.ts +1 -1
- package/dist/sparql/index.d.ts.map +1 -1
- package/dist/sparql/index.js +1 -1
- package/dist/sparql/selector.d.ts +5 -5
- package/dist/sparql/selector.d.ts.map +1 -1
- package/dist/sparql/selector.js +2 -2
- package/dist/stage.d.ts +6 -6
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +6 -6
- package/package.json +1 -1
package/dist/pipeline.d.ts
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { DatasetSelector } from './selector.js';
|
|
2
2
|
import { Stage } from './stage.js';
|
|
3
3
|
import type { Writer } from './writer/writer.js';
|
|
4
4
|
import { type DistributionResolver } from './distribution/resolver.js';
|
|
@@ -6,7 +6,7 @@ import type { StageOutputResolver } from './stageOutputResolver.js';
|
|
|
6
6
|
import type { ProgressReporter } from './progressReporter.js';
|
|
7
7
|
export interface PipelineOptions {
|
|
8
8
|
name: string;
|
|
9
|
-
|
|
9
|
+
datasetSelector: DatasetSelector;
|
|
10
10
|
stages: Stage[];
|
|
11
11
|
writer: Writer;
|
|
12
12
|
distributionResolver: DistributionResolver;
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -16,10 +16,10 @@ export class Pipeline {
|
|
|
16
16
|
this.options = options;
|
|
17
17
|
}
|
|
18
18
|
async run() {
|
|
19
|
-
const {
|
|
19
|
+
const { datasetSelector, reporter, name } = this.options;
|
|
20
20
|
const start = Date.now();
|
|
21
21
|
reporter?.pipelineStart(name);
|
|
22
|
-
const datasets = await
|
|
22
|
+
const datasets = await datasetSelector.select();
|
|
23
23
|
for await (const dataset of datasets) {
|
|
24
24
|
await this.processDataset(dataset);
|
|
25
25
|
}
|
package/dist/selector.d.ts
CHANGED
|
@@ -3,10 +3,10 @@ import { Client, Paginator } from '@lde/dataset-registry-client';
|
|
|
3
3
|
/**
|
|
4
4
|
* Select {@link Dataset}s for processing in a pipeline.
|
|
5
5
|
*/
|
|
6
|
-
export interface
|
|
6
|
+
export interface DatasetSelector {
|
|
7
7
|
select(): Promise<Paginator<Dataset>>;
|
|
8
8
|
}
|
|
9
|
-
export declare class ManualDatasetSelection implements
|
|
9
|
+
export declare class ManualDatasetSelection implements DatasetSelector {
|
|
10
10
|
private readonly datasets;
|
|
11
11
|
constructor(datasets: Dataset[]);
|
|
12
12
|
select(): Promise<Paginator<Dataset>>;
|
|
@@ -26,7 +26,7 @@ export declare class ManualDatasetSelection implements Selector {
|
|
|
26
26
|
* @param string options.query Optional custom SPARQL query to select datasets.
|
|
27
27
|
* @param object options.criteria Optional search criteria to select datasets.
|
|
28
28
|
*/
|
|
29
|
-
export declare class RegistrySelector implements
|
|
29
|
+
export declare class RegistrySelector implements DatasetSelector {
|
|
30
30
|
private readonly registry;
|
|
31
31
|
private readonly query?;
|
|
32
32
|
private readonly criteria?;
|
package/dist/selector.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,eAAe;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;gBAEvB,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB;IAMK,MAAM;CAOb"}
|
package/dist/sparql/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
export { SparqlConstructExecutor, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type QuadStream, type VariableBindings, } from './executor.js';
|
|
2
|
-
export {
|
|
2
|
+
export { SparqlItemSelector, type SparqlItemSelectorOptions, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
|
5
5
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
|
package/dist/sparql/index.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
1
|
export { SparqlConstructExecutor, NotSupported, readQueryFile, } from './executor.js';
|
|
2
|
-
export {
|
|
2
|
+
export { SparqlItemSelector, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
2
|
-
import type {
|
|
2
|
+
import type { ItemSelector } from '../stage.js';
|
|
3
3
|
import type { VariableBindings } from './executor.js';
|
|
4
|
-
export interface
|
|
4
|
+
export interface SparqlItemSelectorOptions {
|
|
5
5
|
/** SELECT query projecting at least one named variable. A LIMIT in the query sets the default page size. */
|
|
6
6
|
query: string;
|
|
7
7
|
/** SPARQL endpoint URL. */
|
|
@@ -12,7 +12,7 @@ export interface SparqlSelectorOptions {
|
|
|
12
12
|
fetcher?: SparqlEndpointFetcher;
|
|
13
13
|
}
|
|
14
14
|
/**
|
|
15
|
-
* {@link
|
|
15
|
+
* {@link ItemSelector} that pages through SPARQL SELECT results,
|
|
16
16
|
* yielding all projected variable bindings (NamedNode values only) per row.
|
|
17
17
|
*
|
|
18
18
|
* Pagination is an internal detail — consumers iterate binding rows directly.
|
|
@@ -20,12 +20,12 @@ export interface SparqlSelectorOptions {
|
|
|
20
20
|
* (can be overridden by the `pageSize` option). Pagination continues
|
|
21
21
|
* until a page returns fewer results than the page size.
|
|
22
22
|
*/
|
|
23
|
-
export declare class
|
|
23
|
+
export declare class SparqlItemSelector implements ItemSelector {
|
|
24
24
|
private readonly parsed;
|
|
25
25
|
private readonly endpoint;
|
|
26
26
|
private readonly pageSize;
|
|
27
27
|
private readonly fetcher;
|
|
28
|
-
constructor(options:
|
|
28
|
+
constructor(options: SparqlItemSelectorOptions);
|
|
29
29
|
[Symbol.asyncIterator](): AsyncIterableIterator<VariableBindings>;
|
|
30
30
|
}
|
|
31
31
|
//# sourceMappingURL=selector.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAKtD,MAAM,WAAW,yBAAyB;IACxC,4GAA4G;IAC5G,KAAK,EAAE,MAAM,CAAC;IACd,2BAA2B;IAC3B,QAAQ,EAAE,GAAG,CAAC;IACd,sEAAsE;IACtE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;GAQG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,yBAAyB;IAmBvC,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,qBAAqB,CAAC,gBAAgB,CAAC;CAkCzE"}
|
package/dist/sparql/selector.js
CHANGED
|
@@ -3,7 +3,7 @@ import { Generator, Parser, } from 'sparqljs';
|
|
|
3
3
|
const parser = new Parser();
|
|
4
4
|
const generator = new Generator();
|
|
5
5
|
/**
|
|
6
|
-
* {@link
|
|
6
|
+
* {@link ItemSelector} that pages through SPARQL SELECT results,
|
|
7
7
|
* yielding all projected variable bindings (NamedNode values only) per row.
|
|
8
8
|
*
|
|
9
9
|
* Pagination is an internal detail — consumers iterate binding rows directly.
|
|
@@ -11,7 +11,7 @@ const generator = new Generator();
|
|
|
11
11
|
* (can be overridden by the `pageSize` option). Pagination continues
|
|
12
12
|
* until a page returns fewer results than the page size.
|
|
13
13
|
*/
|
|
14
|
-
export class
|
|
14
|
+
export class SparqlItemSelector {
|
|
15
15
|
parsed;
|
|
16
16
|
endpoint;
|
|
17
17
|
pageSize;
|
package/dist/stage.d.ts
CHANGED
|
@@ -2,12 +2,12 @@ import { Dataset, Distribution } from '@lde/dataset';
|
|
|
2
2
|
import type { Executor, VariableBindings } from './sparql/executor.js';
|
|
3
3
|
import { NotSupported } from './sparql/executor.js';
|
|
4
4
|
import type { Writer } from './writer/writer.js';
|
|
5
|
-
/**
|
|
6
|
-
export type
|
|
5
|
+
/** An item selector, or a factory that receives the runtime distribution. */
|
|
6
|
+
export type ItemSelectorInput = ItemSelector | ((distribution: Distribution) => ItemSelector);
|
|
7
7
|
export interface StageOptions {
|
|
8
8
|
name: string;
|
|
9
9
|
executors: Executor | Executor[];
|
|
10
|
-
|
|
10
|
+
itemSelector?: ItemSelectorInput;
|
|
11
11
|
/** Maximum number of bindings per executor call. @default 10 */
|
|
12
12
|
batchSize?: number;
|
|
13
13
|
/** Maximum concurrent in-flight executor batches. @default 10 */
|
|
@@ -22,7 +22,7 @@ export declare class Stage {
|
|
|
22
22
|
readonly name: string;
|
|
23
23
|
readonly stages: readonly Stage[];
|
|
24
24
|
private readonly executors;
|
|
25
|
-
private readonly
|
|
25
|
+
private readonly itemSelectorInput?;
|
|
26
26
|
private readonly batchSize;
|
|
27
27
|
private readonly maxConcurrency;
|
|
28
28
|
constructor(options: StageOptions);
|
|
@@ -30,7 +30,7 @@ export declare class Stage {
|
|
|
30
30
|
private runWithSelector;
|
|
31
31
|
private executeAll;
|
|
32
32
|
}
|
|
33
|
-
/**
|
|
34
|
-
export interface
|
|
33
|
+
/** Selects items (as variable bindings) for executors to process. Pagination is an implementation detail. */
|
|
34
|
+
export interface ItemSelector extends AsyncIterable<VariableBindings> {
|
|
35
35
|
}
|
|
36
36
|
//# sourceMappingURL=stage.d.ts.map
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,6EAA6E;AAC7E,MAAM,MAAM,iBAAiB,GACzB,YAAY,GACZ,CAAC,CAAC,YAAY,EAAE,YAAY,KAAK,YAAY,CAAC,CAAC;AAEnD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,iBAAiB,CAAC;IACjC,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAAC,CAAoB;IACvD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAuBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,6GAA6G;AAE7G,MAAM,WAAW,YAAa,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
|
package/dist/stage.js
CHANGED
|
@@ -5,7 +5,7 @@ export class Stage {
|
|
|
5
5
|
name;
|
|
6
6
|
stages;
|
|
7
7
|
executors;
|
|
8
|
-
|
|
8
|
+
itemSelectorInput;
|
|
9
9
|
batchSize;
|
|
10
10
|
maxConcurrency;
|
|
11
11
|
constructor(options) {
|
|
@@ -14,15 +14,15 @@ export class Stage {
|
|
|
14
14
|
this.executors = Array.isArray(options.executors)
|
|
15
15
|
? options.executors
|
|
16
16
|
: [options.executors];
|
|
17
|
-
this.
|
|
17
|
+
this.itemSelectorInput = options.itemSelector;
|
|
18
18
|
this.batchSize = options.batchSize ?? 10;
|
|
19
19
|
this.maxConcurrency = options.maxConcurrency ?? 10;
|
|
20
20
|
}
|
|
21
21
|
async run(dataset, distribution, writer, options) {
|
|
22
|
-
if (this.
|
|
23
|
-
const selector = typeof this.
|
|
24
|
-
? this.
|
|
25
|
-
: this.
|
|
22
|
+
if (this.itemSelectorInput) {
|
|
23
|
+
const selector = typeof this.itemSelectorInput === 'function'
|
|
24
|
+
? this.itemSelectorInput(distribution)
|
|
25
|
+
: this.itemSelectorInput;
|
|
26
26
|
return this.runWithSelector(selector, dataset, distribution, writer, options);
|
|
27
27
|
}
|
|
28
28
|
const streams = await this.executeAll(dataset, distribution);
|