@lde/pipeline 0.6.26 → 0.6.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,63 +2,111 @@
2
2
 
3
3
  Framework for building RDF data processing pipelines with SPARQL.
4
4
 
5
- ## Features
6
-
7
- - **Pipeline** — orchestrates steps that process DCAT datasets
8
- - **PipelineBuilder** — fluent API for constructing pipelines from steps and selectors
9
- - **PipelineConfig** — load pipeline configuration from YAML/JSON files
10
- - **SparqlConstructExecutor** — streaming SPARQL CONSTRUCT with template substitution and variable bindings
11
- - **Distribution analysis** — probe and analyze dataset distributions
12
-
13
5
  ## Components
14
6
 
15
7
  A **Pipeline** consists of:
16
8
 
17
- - one **[Dataset Selector](#dataset-selector)**
18
- - one **[Distribution Resolver](#distribution-resolver)** that resolves the input dataset to a usable SPARQL distribution
9
+ - a **Dataset Selector** that selects which datasets to process
10
+ - a **Distribution Resolver** that resolves each dataset to a usable SPARQL endpoint
19
11
  - one or more **Stages**, each consisting of:
20
- - an optional **Selector** that filters resources
21
- - one or more **Executors** that generate triples for each selected resource
12
+ - an optional **Item Selector** that selects resources (as variable bindings) for fan-out
13
+ - one or more **Executors** that generate triples
22
14
 
23
15
  ### Dataset Selector
24
16
 
25
- Selects datasets, either manually by the user or dynamically by querying a DCAT Dataset Registry.
17
+ Selects datasets, either manually or by querying a DCAT Dataset Registry:
26
18
 
27
- ### Distribution Resolver
19
+ ```typescript
20
+ // From a registry
21
+ const selector = new RegistrySelector({
22
+ registry: new Client(new URL('https://example.com/sparql')),
23
+ });
28
24
 
29
- Resolves each selected dataset to a usable distribution.
25
+ // Manual
26
+ const selector = new ManualDatasetSelection([dataset]);
27
+ ```
30
28
 
31
- #### SPARQL Distribution Resolver
29
+ ### Item Selector
32
30
 
33
- If a working SPARQL endpoint is already available for the dataset, that is used.
34
- If not, and a valid RDF datadump is available, that is imported to a local SPARQL server.
31
+ Selects resources from the distribution and fans out executor calls per batch of results. Implements the `ItemSelector` interface:
35
32
 
36
- #### Other Distribution Resolvers
33
+ ```typescript
34
+ interface ItemSelector {
35
+ select(distribution: Distribution): AsyncIterable<VariableBindings>;
36
+ }
37
+ ```
37
38
 
38
- ### Bindings Selector
39
+ The distribution is received at run time, so selectors don't need the endpoint URL at construction time. Use `SparqlItemSelector` for SPARQL-based selection with automatic pagination:
39
40
 
40
- Selects resources from the dataset and to fan out queries per result in the executor.
41
- Bindings are free, and replaced with `VALUES { ... }`.
41
+ ```typescript
42
+ new SparqlItemSelector({
43
+ query: 'SELECT DISTINCT ?class WHERE { ?s a ?class }',
44
+ });
45
+ ```
46
+
47
+ For dynamic queries that depend on the distribution, implement `ItemSelector` directly:
48
+
49
+ ```typescript
50
+ const itemSelector: ItemSelector = {
51
+ select: (distribution) => {
52
+ const query = buildQuery(distribution);
53
+ return new SparqlItemSelector({ query }).select(distribution);
54
+ },
55
+ };
56
+ ```
42
57
 
43
58
  ### Executor
44
59
 
60
+ Generates RDF triples. `SparqlConstructExecutor` runs a SPARQL CONSTRUCT query with template substitution and variable bindings:
61
+
62
+ ```typescript
63
+ const executor = new SparqlConstructExecutor({
64
+ query: 'CONSTRUCT { ?s ?p ?o } WHERE { ?s ?p ?o }',
65
+ });
66
+ ```
67
+
68
+ ### Writer
69
+
70
+ Writes generated quads to a destination:
71
+
72
+ - `SparqlUpdateWriter` — writes to a SPARQL endpoint via UPDATE queries
73
+ - `FileWriter` — writes to local files
74
+
45
75
  ## Usage
46
76
 
47
77
  ```typescript
48
78
  import {
49
- PipelineBuilder,
79
+ Pipeline,
80
+ Stage,
50
81
  SparqlConstructExecutor,
51
- collect,
82
+ SparqlItemSelector,
83
+ SparqlUpdateWriter,
84
+ ManualDatasetSelection,
85
+ SparqlDistributionResolver,
52
86
  } from '@lde/pipeline';
53
87
 
54
- // Build a pipeline from steps
55
- const pipeline = new PipelineBuilder().addStep(myStep).build();
56
-
57
- // Or use the SPARQL executor directly
58
- const executor = new SparqlConstructExecutor({
59
- query: 'CONSTRUCT { ?dataset ?p ?o } WHERE { ?s ?p ?o }',
88
+ const pipeline = new Pipeline({
89
+ name: 'example',
90
+ datasetSelector: new ManualDatasetSelection([dataset]),
91
+ distributionResolver: new SparqlDistributionResolver(),
92
+ stages: [
93
+ new Stage({
94
+ name: 'per-class',
95
+ itemSelector: new SparqlItemSelector({
96
+ query: 'SELECT DISTINCT ?class WHERE { ?s a ?class }',
97
+ }),
98
+ executors: new SparqlConstructExecutor({
99
+ query:
100
+ 'CONSTRUCT { ?class a <http://example.org/Class> } WHERE { ?s a ?class }',
101
+ }),
102
+ }),
103
+ ],
104
+ writer: new SparqlUpdateWriter({
105
+ endpoint: new URL('http://localhost:7200/repositories/lde/statements'),
106
+ }),
60
107
  });
61
- const result = await executor.execute(dataset);
108
+
109
+ await pipeline.run();
62
110
  ```
63
111
 
64
112
  ## Validation
@@ -1,4 +1,4 @@
1
- import type { Selector } from './selector.js';
1
+ import type { DatasetSelector } from './selector.js';
2
2
  import { Stage } from './stage.js';
3
3
  import type { Writer } from './writer/writer.js';
4
4
  import { type DistributionResolver } from './distribution/resolver.js';
@@ -6,7 +6,7 @@ import type { StageOutputResolver } from './stageOutputResolver.js';
6
6
  import type { ProgressReporter } from './progressReporter.js';
7
7
  export interface PipelineOptions {
8
8
  name: string;
9
- selector: Selector;
9
+ datasetSelector: DatasetSelector;
10
10
  stages: Stage[];
11
11
  writer: Writer;
12
12
  distributionResolver: DistributionResolver;
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AAC9C,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAEpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,IAAI,EAAE,MAAM,CAAC;IACb,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,MAAM,EAAE,MAAM,CAAC;IACf,oBAAoB,EAAE,oBAAoB,CAAC;IAC3C,mBAAmB,CAAC,EAAE,mBAAmB,CAAC;IAC1C,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAClD,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAED,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAkB;gBAE9B,OAAO,EAAE,eAAe;IAe9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAcZ,cAAc;YA2Bd,QAAQ;YAgCR,QAAQ;YAoDR,eAAe;YAoCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -16,10 +16,10 @@ export class Pipeline {
16
16
  this.options = options;
17
17
  }
18
18
  async run() {
19
- const { selector, reporter, name } = this.options;
19
+ const { datasetSelector, reporter, name } = this.options;
20
20
  const start = Date.now();
21
21
  reporter?.pipelineStart(name);
22
- const datasets = await selector.select();
22
+ const datasets = await datasetSelector.select();
23
23
  for await (const dataset of datasets) {
24
24
  await this.processDataset(dataset);
25
25
  }
@@ -3,10 +3,10 @@ import { Client, Paginator } from '@lde/dataset-registry-client';
3
3
  /**
4
4
  * Select {@link Dataset}s for processing in a pipeline.
5
5
  */
6
- export interface Selector {
6
+ export interface DatasetSelector {
7
7
  select(): Promise<Paginator<Dataset>>;
8
8
  }
9
- export declare class ManualDatasetSelection implements Selector {
9
+ export declare class ManualDatasetSelection implements DatasetSelector {
10
10
  private readonly datasets;
11
11
  constructor(datasets: Dataset[]);
12
12
  select(): Promise<Paginator<Dataset>>;
@@ -26,7 +26,7 @@ export declare class ManualDatasetSelection implements Selector {
26
26
  * @param string options.query Optional custom SPARQL query to select datasets.
27
27
  * @param object options.criteria Optional search criteria to select datasets.
28
28
  */
29
- export declare class RegistrySelector implements Selector {
29
+ export declare class RegistrySelector implements DatasetSelector {
30
30
  private readonly registry;
31
31
  private readonly query?;
32
32
  private readonly criteria?;
@@ -1 +1 @@
1
- {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,QAAQ;IACzC,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,QAAQ;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;gBAEvB,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB;IAMK,MAAM;CAOb"}
1
+ {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,eAAe;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;gBAEvB,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB;IAMK,MAAM;CAOb"}
@@ -1,5 +1,5 @@
1
1
  export { SparqlConstructExecutor, NotSupported, readQueryFile, type ExecuteOptions, type Executor, type SparqlConstructExecutorOptions, type QuadStream, type VariableBindings, } from './executor.js';
2
- export { SparqlSelector, type SparqlSelectorOptions } from './selector.js';
2
+ export { SparqlItemSelector, type SparqlItemSelectorOptions, } from './selector.js';
3
3
  export { injectValues } from './values.js';
4
4
  export { withDefaultGraph } from './graph.js';
5
5
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,cAAc,EAAE,KAAK,qBAAqB,EAAE,MAAM,eAAe,CAAC;AAE3E,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,uBAAuB,EACvB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,UAAU,EACf,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
@@ -1,4 +1,4 @@
1
1
  export { SparqlConstructExecutor, NotSupported, readQueryFile, } from './executor.js';
2
- export { SparqlSelector } from './selector.js';
2
+ export { SparqlItemSelector, } from './selector.js';
3
3
  export { injectValues } from './values.js';
4
4
  export { withDefaultGraph } from './graph.js';
@@ -1,31 +1,30 @@
1
+ import type { Distribution } from '@lde/dataset';
1
2
  import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
2
- import type { StageSelector } from '../stage.js';
3
+ import type { ItemSelector } from '../stage.js';
3
4
  import type { VariableBindings } from './executor.js';
4
- export interface SparqlSelectorOptions {
5
+ export interface SparqlItemSelectorOptions {
5
6
  /** SELECT query projecting at least one named variable. A LIMIT in the query sets the default page size. */
6
7
  query: string;
7
- /** SPARQL endpoint URL. */
8
- endpoint: URL;
9
8
  /** Results per page. Overrides any LIMIT in the query. @default 10 */
10
9
  pageSize?: number;
11
10
  /** Custom fetcher instance. */
12
11
  fetcher?: SparqlEndpointFetcher;
13
12
  }
14
13
  /**
15
- * {@link StageSelector} that pages through SPARQL SELECT results,
14
+ * {@link ItemSelector} that pages through SPARQL SELECT results,
16
15
  * yielding all projected variable bindings (NamedNode values only) per row.
17
16
  *
17
+ * The endpoint URL comes from the {@link Distribution} passed to {@link select}.
18
18
  * Pagination is an internal detail — consumers iterate binding rows directly.
19
19
  * If the query contains a LIMIT, it is used as the default page size
20
20
  * (can be overridden by the `pageSize` option). Pagination continues
21
21
  * until a page returns fewer results than the page size.
22
22
  */
23
- export declare class SparqlSelector implements StageSelector {
23
+ export declare class SparqlItemSelector implements ItemSelector {
24
24
  private readonly parsed;
25
- private readonly endpoint;
26
25
  private readonly pageSize;
27
26
  private readonly fetcher;
28
- constructor(options: SparqlSelectorOptions);
29
- [Symbol.asyncIterator](): AsyncIterableIterator<VariableBindings>;
27
+ constructor(options: SparqlItemSelectorOptions);
28
+ select(distribution: Distribution): AsyncIterableIterator<VariableBindings>;
30
29
  }
31
30
  //# sourceMappingURL=selector.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAKtD,MAAM,WAAW,qBAAqB;IACpC,4GAA4G;IAC5G,KAAK,EAAE,MAAM,CAAC;IACd,2BAA2B;IAC3B,QAAQ,EAAE,GAAG,CAAC;IACd,sEAAsE;IACtE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;GAQG;AACH,qBAAa,cAAe,YAAW,aAAa;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,qBAAqB;IAmBnC,CAAC,MAAM,CAAC,aAAa,CAAC,IAAI,qBAAqB,CAAC,gBAAgB,CAAC;CAkCzE"}
1
+ {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAKtD,MAAM,WAAW,yBAAyB;IACxC,4GAA4G;IAC5G,KAAK,EAAE,MAAM,CAAC;IACd,sEAAsE;IACtE,QAAQ,CAAC,EAAE,MAAM,CAAC;IAClB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;GASG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,yBAAyB;IAkBvC,MAAM,CACX,YAAY,EAAE,YAAY,GACzB,qBAAqB,CAAC,gBAAgB,CAAC;CAmC3C"}
@@ -3,17 +3,17 @@ import { Generator, Parser, } from 'sparqljs';
3
3
  const parser = new Parser();
4
4
  const generator = new Generator();
5
5
  /**
6
- * {@link StageSelector} that pages through SPARQL SELECT results,
6
+ * {@link ItemSelector} that pages through SPARQL SELECT results,
7
7
  * yielding all projected variable bindings (NamedNode values only) per row.
8
8
  *
9
+ * The endpoint URL comes from the {@link Distribution} passed to {@link select}.
9
10
  * Pagination is an internal detail — consumers iterate binding rows directly.
10
11
  * If the query contains a LIMIT, it is used as the default page size
11
12
  * (can be overridden by the `pageSize` option). Pagination continues
12
13
  * until a page returns fewer results than the page size.
13
14
  */
14
- export class SparqlSelector {
15
+ export class SparqlItemSelector {
15
16
  parsed;
16
- endpoint;
17
17
  pageSize;
18
18
  fetcher;
19
19
  constructor(options) {
@@ -26,17 +26,17 @@ export class SparqlSelector {
26
26
  throw new Error('Query must project at least one named variable (SELECT * is not supported)');
27
27
  }
28
28
  this.parsed = parsed;
29
- this.endpoint = options.endpoint;
30
29
  this.pageSize = options.pageSize ?? parsed.limit ?? 10;
31
30
  this.fetcher = options.fetcher ?? new SparqlEndpointFetcher();
32
31
  }
33
- async *[Symbol.asyncIterator]() {
32
+ async *select(distribution) {
33
+ const endpoint = distribution.accessUrl;
34
34
  let offset = 0;
35
35
  while (true) {
36
36
  this.parsed.limit = this.pageSize;
37
37
  this.parsed.offset = offset;
38
38
  const paginatedQuery = generator.stringify(this.parsed);
39
- const stream = (await this.fetcher.fetchBindings(this.endpoint.toString(), paginatedQuery));
39
+ const stream = (await this.fetcher.fetchBindings(endpoint.toString(), paginatedQuery));
40
40
  let pageSize = 0;
41
41
  for await (const record of stream) {
42
42
  const row = Object.fromEntries(Object.entries(record).filter(([, term]) => term.termType === 'NamedNode'));
package/dist/stage.d.ts CHANGED
@@ -2,12 +2,10 @@ import { Dataset, Distribution } from '@lde/dataset';
2
2
  import type { Executor, VariableBindings } from './sparql/executor.js';
3
3
  import { NotSupported } from './sparql/executor.js';
4
4
  import type { Writer } from './writer/writer.js';
5
- /** A selector, or a factory that receives the runtime distribution. */
6
- export type StageSelectorInput = StageSelector | ((distribution: Distribution) => StageSelector);
7
5
  export interface StageOptions {
8
6
  name: string;
9
7
  executors: Executor | Executor[];
10
- selector?: StageSelectorInput;
8
+ itemSelector?: ItemSelector;
11
9
  /** Maximum number of bindings per executor call. @default 10 */
12
10
  batchSize?: number;
13
11
  /** Maximum concurrent in-flight executor batches. @default 10 */
@@ -22,7 +20,7 @@ export declare class Stage {
22
20
  readonly name: string;
23
21
  readonly stages: readonly Stage[];
24
22
  private readonly executors;
25
- private readonly selectorInput?;
23
+ private readonly itemSelector?;
26
24
  private readonly batchSize;
27
25
  private readonly maxConcurrency;
28
26
  constructor(options: StageOptions);
@@ -30,7 +28,8 @@ export declare class Stage {
30
28
  private runWithSelector;
31
29
  private executeAll;
32
30
  }
33
- /** Stage-level selector that yields variable bindings for use in executor queries. Pagination is an implementation detail. */
34
- export interface StageSelector extends AsyncIterable<VariableBindings> {
31
+ /** Selects items (as variable bindings) for executors to process. Pagination is an implementation detail. */
32
+ export interface ItemSelector {
33
+ select(distribution: Distribution): AsyncIterable<VariableBindings>;
35
34
  }
36
35
  //# sourceMappingURL=stage.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,uEAAuE;AACvE,MAAM,MAAM,kBAAkB,GAC1B,aAAa,GACb,CAAC,CAAC,YAAY,EAAE,YAAY,KAAK,aAAa,CAAC,CAAC;AAEpD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAC9B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAC,CAAqB;IACpD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAuBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,8HAA8H;AAE9H,MAAM,WAAW,aAAc,SAAQ,aAAa,CAAC,gBAAgB,CAAC;CAAG"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAmBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACrE"}
package/dist/stage.js CHANGED
@@ -5,7 +5,7 @@ export class Stage {
5
5
  name;
6
6
  stages;
7
7
  executors;
8
- selectorInput;
8
+ itemSelector;
9
9
  batchSize;
10
10
  maxConcurrency;
11
11
  constructor(options) {
@@ -14,16 +14,13 @@ export class Stage {
14
14
  this.executors = Array.isArray(options.executors)
15
15
  ? options.executors
16
16
  : [options.executors];
17
- this.selectorInput = options.selector;
17
+ this.itemSelector = options.itemSelector;
18
18
  this.batchSize = options.batchSize ?? 10;
19
19
  this.maxConcurrency = options.maxConcurrency ?? 10;
20
20
  }
21
21
  async run(dataset, distribution, writer, options) {
22
- if (this.selectorInput) {
23
- const selector = typeof this.selectorInput === 'function'
24
- ? this.selectorInput(distribution)
25
- : this.selectorInput;
26
- return this.runWithSelector(selector, dataset, distribution, writer, options);
22
+ if (this.itemSelector) {
23
+ return this.runWithSelector(this.itemSelector.select(distribution), dataset, distribution, writer, options);
27
24
  }
28
25
  const streams = await this.executeAll(dataset, distribution);
29
26
  if (streams instanceof NotSupported) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.6.26",
3
+ "version": "0.6.28",
4
4
  "repository": {
5
5
  "url": "https://github.com/ldengine/lde",
6
6
  "directory": "packages/pipeline"