@lde/pipeline 0.14.0 → 0.15.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,9 +1,9 @@
1
1
  # Pipeline
2
2
 
3
- A framework for transforming large RDF datasets using pure [SPARQL](https://www.w3.org/TR/sparql11-query/) queries.
3
+ A framework for transforming large RDF datasets, primarily using [SPARQL](https://www.w3.org/TR/sparql11-query/) queries with TypeScript for the parts that are hard to express in SPARQL alone.
4
4
 
5
5
  - **SPARQL-native.** Data transformations are plain SPARQL query files — portable, transparent, testable and version-controlled.
6
- - **Composable.** Decorators wrap executors and resolvers to add behaviour (provenance, vocabulary detection, data import) without subclassing.
6
+ - **Composable.** Executors are an interface: wrap a SPARQL executor with custom TypeScript to handle edge cases like date parsing or string normalisation (see [Executor](#executor)).
7
7
  - **Extensible.** A plugin system lets packages like [@lde/pipeline-void](../pipeline-void) (or your own plugins) hook into the pipeline lifecycle.
8
8
 
9
9
  ## Components
@@ -61,7 +61,7 @@ const itemSelector: ItemSelector = {
61
61
 
62
62
  ### Executor
63
63
 
64
- Generates RDF triples. `SparqlConstructExecutor` runs a SPARQL CONSTRUCT query with template substitution and variable bindings:
64
+ Generates RDF triples. The built-in `SparqlConstructExecutor` runs a SPARQL CONSTRUCT query with template substitution and variable bindings:
65
65
 
66
66
  ```typescript
67
67
  const executor = new SparqlConstructExecutor({
@@ -69,6 +69,65 @@ const executor = new SparqlConstructExecutor({
69
69
  });
70
70
  ```
71
71
 
72
+ `Executor` is an interface, so you can implement your own for logic that's hard to express in pure SPARQL — for example, cleaning up messy date notations or converting locale-specific dates to ISO 8601. The decorator pattern lets you wrap a SPARQL executor and post-process its quad stream in TypeScript:
73
+
74
+ ```typescript
75
+ import { DataFactory } from 'n3';
76
+ import type { Quad, Literal } from '@rdfjs/types';
77
+ import type { Dataset, Distribution } from '@lde/dataset';
78
+ import {
79
+ type Executor,
80
+ type ExecuteOptions,
81
+ NotSupported,
82
+ } from '@lde/pipeline';
83
+
84
+ class TransformExecutor implements Executor {
85
+ constructor(
86
+ private readonly inner: Executor,
87
+ private readonly transform: (
88
+ quads: AsyncIterable<Quad>,
89
+ dataset: Dataset,
90
+ ) => AsyncIterable<Quad>,
91
+ ) {}
92
+
93
+ async execute(
94
+ dataset: Dataset,
95
+ distribution: Distribution,
96
+ options?: ExecuteOptions,
97
+ ): Promise<AsyncIterable<Quad> | NotSupported> {
98
+ const result = await this.inner.execute(dataset, distribution, options);
99
+ if (result instanceof NotSupported) return result;
100
+ return this.transform(result, dataset);
101
+ }
102
+ }
103
+ ```
104
+
105
+ Then use it to wrap any SPARQL executor:
106
+
107
+ ```typescript
108
+ new Stage({
109
+ name: 'dates',
110
+ executors: new TransformExecutor(
111
+ await SparqlConstructExecutor.fromFile('dates.rq'),
112
+ async function* (quads) {
113
+ for await (const quad of quads) {
114
+ if (quad.object.termType === 'Literal' && isMessyDate(quad.object)) {
115
+ const cleaned = DataFactory.literal(
116
+ parseDutchDate(quad.object.value),
117
+ DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#date'),
118
+ );
119
+ yield DataFactory.quad(quad.subject, quad.predicate, cleaned);
120
+ } else {
121
+ yield quad;
122
+ }
123
+ }
124
+ },
125
+ ),
126
+ });
127
+ ```
128
+
129
+ This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `VocabularyExecutor` for a real-world example of this pattern.
130
+
72
131
  ### Writer
73
132
 
74
133
  Writes generated quads to a destination:
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAsCD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA6Cd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAsCD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAeZ,cAAc;YA6Cd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -74,7 +74,9 @@ export class Pipeline {
74
74
  async run() {
75
75
  const start = Date.now();
76
76
  this.reporter?.pipelineStart?.(this.name);
77
+ const selectStart = Date.now();
77
78
  const datasets = await this.datasetSelector.select();
79
+ this.reporter?.datasetsSelected?.(datasets.total, Date.now() - selectStart);
78
80
  for await (const dataset of datasets) {
79
81
  await this.processDataset(dataset);
80
82
  }
@@ -8,7 +8,7 @@ export interface DistributionAnalysisResult {
8
8
  }
9
9
  export interface ProgressReporter {
10
10
  pipelineStart?(name: string): void;
11
- datasetsSelected?(count: number): void;
11
+ datasetsSelected?(count: number, duration: number): void;
12
12
  datasetStart?(dataset: Dataset): void;
13
13
  distributionsAnalyzed?(dataset: Dataset, results: DistributionAnalysisResult[]): void;
14
14
  distributionSelected?(dataset: Dataset, distribution: Distribution, importedFrom?: Distribution, importDuration?: number): void;
@@ -1 +1 @@
1
- {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE1D,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACvC,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,qBAAqB,CAAC,CACpB,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,0BAA0B,EAAE,GACpC,IAAI,CAAC;IACR,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,GACtB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,eAAe,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACzC,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACvD"}
1
+ {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAE1D,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;CAChB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,qBAAqB,CAAC,CACpB,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,0BAA0B,EAAE,GACpC,IAAI,CAAC;IACR,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,GACtB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;KACxB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,iBAAiB,EAAE,MAAM,CAAC;QAC1B,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,eAAe,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACzC,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QAAE,QAAQ,EAAE,MAAM,CAAA;KAAE,GAAG,IAAI,CAAC;CACvD"}
@@ -1,5 +1,5 @@
1
1
  import { Dataset } from '@lde/dataset';
2
- import { Client, Paginator } from '@lde/dataset-registry-client';
2
+ import { Client, Paginator, type SearchCriteria } from '@lde/dataset-registry-client';
3
3
  /**
4
4
  * Select {@link Dataset}s for processing in a pipeline.
5
5
  */
@@ -24,7 +24,7 @@ export declare class ManualDatasetSelection implements DatasetSelector {
24
24
  * @param {object} options
25
25
  * @param Client options.registry The Dataset Registry Client to query for datasets.
26
26
  * @param string options.query Optional custom SPARQL query to select datasets.
27
- * @param object options.criteria Optional search criteria to select datasets.
27
+ * @param SearchCriteria options.criteria Optional search criteria to select datasets.
28
28
  */
29
29
  export declare class RegistrySelector implements DatasetSelector {
30
30
  private readonly registry;
@@ -33,7 +33,7 @@ export declare class RegistrySelector implements DatasetSelector {
33
33
  constructor({ registry, query, criteria, }: {
34
34
  registry: Client;
35
35
  query?: string;
36
- criteria?: object;
36
+ criteria?: SearchCriteria;
37
37
  });
38
38
  select(): Promise<Paginator<Dataset>>;
39
39
  }
@@ -1 +1 @@
1
- {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,eAAe;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;gBAEvB,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB;IAMK,MAAM;CAOb"}
1
+ {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EACL,MAAM,EACN,SAAS,EACT,KAAK,cAAc,EACpB,MAAM,8BAA8B,CAAC;AAEtC;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,eAAe;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAiB;gBAE/B,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,cAAc,CAAC;KAC3B;IAMK,MAAM;CAOb"}
package/dist/selector.js CHANGED
@@ -1,4 +1,4 @@
1
- import { Paginator } from '@lde/dataset-registry-client';
1
+ import { Paginator, } from '@lde/dataset-registry-client';
2
2
  export class ManualDatasetSelection {
3
3
  datasets;
4
4
  constructor(datasets) {
@@ -21,7 +21,7 @@ export class ManualDatasetSelection {
21
21
  * @param {object} options
22
22
  * @param Client options.registry The Dataset Registry Client to query for datasets.
23
23
  * @param string options.query Optional custom SPARQL query to select datasets.
24
- * @param object options.criteria Optional search criteria to select datasets.
24
+ * @param SearchCriteria options.criteria Optional search criteria to select datasets.
25
25
  */
26
26
  export class RegistrySelector {
27
27
  registry;
package/package.json CHANGED
@@ -1,8 +1,8 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.14.0",
3
+ "version": "0.15.1",
4
4
  "repository": {
5
- "url": "git+https://github.com/ldengine/lde.git",
5
+ "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"
7
7
  },
8
8
  "type": "module",
@@ -24,7 +24,7 @@
24
24
  ],
25
25
  "dependencies": {
26
26
  "@lde/dataset": "0.7.0",
27
- "@lde/dataset-registry-client": "0.7.0",
27
+ "@lde/dataset-registry-client": "0.7.1",
28
28
  "@lde/sparql-importer": "0.3.0",
29
29
  "@lde/sparql-server": "0.4.10",
30
30
  "@rdfjs/types": "^2.0.1",