@lde/pipeline 0.15.0 → 0.15.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +62 -3
- package/dist/selector.d.ts +3 -3
- package/dist/selector.d.ts.map +1 -1
- package/dist/selector.js +2 -2
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# Pipeline
|
|
2
2
|
|
|
3
|
-
A framework for transforming large RDF datasets using
|
|
3
|
+
A framework for transforming large RDF datasets, primarily using [SPARQL](https://www.w3.org/TR/sparql11-query/) queries with TypeScript for the parts that are hard to express in SPARQL alone.
|
|
4
4
|
|
|
5
5
|
- **SPARQL-native.** Data transformations are plain SPARQL query files — portable, transparent, testable and version-controlled.
|
|
6
|
-
- **Composable.**
|
|
6
|
+
- **Composable.** Executors are an interface: wrap a SPARQL executor with custom TypeScript to handle edge cases like date parsing or string normalisation (see [Executor](#executor)).
|
|
7
7
|
- **Extensible.** A plugin system lets packages like [@lde/pipeline-void](../pipeline-void) (or your own plugins) hook into the pipeline lifecycle.
|
|
8
8
|
|
|
9
9
|
## Components
|
|
@@ -61,7 +61,7 @@ const itemSelector: ItemSelector = {
|
|
|
61
61
|
|
|
62
62
|
### Executor
|
|
63
63
|
|
|
64
|
-
Generates RDF triples. `SparqlConstructExecutor` runs a SPARQL CONSTRUCT query with template substitution and variable bindings:
|
|
64
|
+
Generates RDF triples. The built-in `SparqlConstructExecutor` runs a SPARQL CONSTRUCT query with template substitution and variable bindings:
|
|
65
65
|
|
|
66
66
|
```typescript
|
|
67
67
|
const executor = new SparqlConstructExecutor({
|
|
@@ -69,6 +69,65 @@ const executor = new SparqlConstructExecutor({
|
|
|
69
69
|
});
|
|
70
70
|
```
|
|
71
71
|
|
|
72
|
+
`Executor` is an interface, so you can implement your own for logic that's hard to express in pure SPARQL — for example, cleaning up messy date notations or converting locale-specific dates to ISO 8601. The decorator pattern lets you wrap a SPARQL executor and post-process its quad stream in TypeScript:
|
|
73
|
+
|
|
74
|
+
```typescript
|
|
75
|
+
import { DataFactory } from 'n3';
|
|
76
|
+
import type { Quad, Literal } from '@rdfjs/types';
|
|
77
|
+
import type { Dataset, Distribution } from '@lde/dataset';
|
|
78
|
+
import {
|
|
79
|
+
type Executor,
|
|
80
|
+
type ExecuteOptions,
|
|
81
|
+
NotSupported,
|
|
82
|
+
} from '@lde/pipeline';
|
|
83
|
+
|
|
84
|
+
class TransformExecutor implements Executor {
|
|
85
|
+
constructor(
|
|
86
|
+
private readonly inner: Executor,
|
|
87
|
+
private readonly transform: (
|
|
88
|
+
quads: AsyncIterable<Quad>,
|
|
89
|
+
dataset: Dataset,
|
|
90
|
+
) => AsyncIterable<Quad>,
|
|
91
|
+
) {}
|
|
92
|
+
|
|
93
|
+
async execute(
|
|
94
|
+
dataset: Dataset,
|
|
95
|
+
distribution: Distribution,
|
|
96
|
+
options?: ExecuteOptions,
|
|
97
|
+
): Promise<AsyncIterable<Quad> | NotSupported> {
|
|
98
|
+
const result = await this.inner.execute(dataset, distribution, options);
|
|
99
|
+
if (result instanceof NotSupported) return result;
|
|
100
|
+
return this.transform(result, dataset);
|
|
101
|
+
}
|
|
102
|
+
}
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
Then use it to wrap any SPARQL executor:
|
|
106
|
+
|
|
107
|
+
```typescript
|
|
108
|
+
new Stage({
|
|
109
|
+
name: 'dates',
|
|
110
|
+
executors: new TransformExecutor(
|
|
111
|
+
await SparqlConstructExecutor.fromFile('dates.rq'),
|
|
112
|
+
async function* (quads) {
|
|
113
|
+
for await (const quad of quads) {
|
|
114
|
+
if (quad.object.termType === 'Literal' && isMessyDate(quad.object)) {
|
|
115
|
+
const cleaned = DataFactory.literal(
|
|
116
|
+
parseDutchDate(quad.object.value),
|
|
117
|
+
DataFactory.namedNode('http://www.w3.org/2001/XMLSchema#date'),
|
|
118
|
+
);
|
|
119
|
+
yield DataFactory.quad(quad.subject, quad.predicate, cleaned);
|
|
120
|
+
} else {
|
|
121
|
+
yield quad;
|
|
122
|
+
}
|
|
123
|
+
}
|
|
124
|
+
},
|
|
125
|
+
),
|
|
126
|
+
});
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `VocabularyExecutor` for a real-world example of this pattern.
|
|
130
|
+
|
|
72
131
|
### Writer
|
|
73
132
|
|
|
74
133
|
Writes generated quads to a destination:
|
package/dist/selector.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
1
|
import { Dataset } from '@lde/dataset';
|
|
2
|
-
import { Client, Paginator } from '@lde/dataset-registry-client';
|
|
2
|
+
import { Client, Paginator, type SearchCriteria } from '@lde/dataset-registry-client';
|
|
3
3
|
/**
|
|
4
4
|
* Select {@link Dataset}s for processing in a pipeline.
|
|
5
5
|
*/
|
|
@@ -24,7 +24,7 @@ export declare class ManualDatasetSelection implements DatasetSelector {
|
|
|
24
24
|
* @param {object} options
|
|
25
25
|
* @param Client options.registry The Dataset Registry Client to query for datasets.
|
|
26
26
|
* @param string options.query Optional custom SPARQL query to select datasets.
|
|
27
|
-
* @param
|
|
27
|
+
* @param SearchCriteria options.criteria Optional search criteria to select datasets.
|
|
28
28
|
*/
|
|
29
29
|
export declare class RegistrySelector implements DatasetSelector {
|
|
30
30
|
private readonly registry;
|
|
@@ -33,7 +33,7 @@ export declare class RegistrySelector implements DatasetSelector {
|
|
|
33
33
|
constructor({ registry, query, criteria, }: {
|
|
34
34
|
registry: Client;
|
|
35
35
|
query?: string;
|
|
36
|
-
criteria?:
|
|
36
|
+
criteria?: SearchCriteria;
|
|
37
37
|
});
|
|
38
38
|
select(): Promise<Paginator<Dataset>>;
|
|
39
39
|
}
|
package/dist/selector.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EACL,MAAM,EACN,SAAS,EACT,KAAK,cAAc,EACpB,MAAM,8BAA8B,CAAC;AAEtC;;GAEG;AACH,MAAM,WAAW,eAAe;IAC9B,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,eAAe;IAChD,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,eAAe;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAiB;gBAE/B,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,cAAc,CAAC;KAC3B;IAMK,MAAM;CAOb"}
|
package/dist/selector.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { Paginator } from '@lde/dataset-registry-client';
|
|
1
|
+
import { Paginator, } from '@lde/dataset-registry-client';
|
|
2
2
|
export class ManualDatasetSelection {
|
|
3
3
|
datasets;
|
|
4
4
|
constructor(datasets) {
|
|
@@ -21,7 +21,7 @@ export class ManualDatasetSelection {
|
|
|
21
21
|
* @param {object} options
|
|
22
22
|
* @param Client options.registry The Dataset Registry Client to query for datasets.
|
|
23
23
|
* @param string options.query Optional custom SPARQL query to select datasets.
|
|
24
|
-
* @param
|
|
24
|
+
* @param SearchCriteria options.criteria Optional search criteria to select datasets.
|
|
25
25
|
*/
|
|
26
26
|
export class RegistrySelector {
|
|
27
27
|
registry;
|
package/package.json
CHANGED
|
@@ -1,8 +1,8 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.15.
|
|
3
|
+
"version": "0.15.1",
|
|
4
4
|
"repository": {
|
|
5
|
-
"url": "git+https://github.com/
|
|
5
|
+
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
7
7
|
},
|
|
8
8
|
"type": "module",
|
|
@@ -24,7 +24,7 @@
|
|
|
24
24
|
],
|
|
25
25
|
"dependencies": {
|
|
26
26
|
"@lde/dataset": "0.7.0",
|
|
27
|
-
"@lde/dataset-registry-client": "0.7.
|
|
27
|
+
"@lde/dataset-registry-client": "0.7.1",
|
|
28
28
|
"@lde/sparql-importer": "0.3.0",
|
|
29
29
|
"@lde/sparql-server": "0.4.10",
|
|
30
30
|
"@rdfjs/types": "^2.0.1",
|