@lde/pipeline 0.28.7 → 0.28.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +29 -3
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/plugin/namespaceNormalization.d.ts +26 -0
- package/dist/plugin/namespaceNormalization.d.ts.map +1 -0
- package/dist/plugin/namespaceNormalization.js +40 -0
- package/dist/plugin/schemaOrgNormalization.d.ts +10 -3
- package/dist/plugin/schemaOrgNormalization.d.ts.map +1 -1
- package/dist/plugin/schemaOrgNormalization.js +14 -21
- package/package.json +3 -3
package/README.md
CHANGED
|
@@ -229,21 +229,47 @@ Writes generated quads to a destination:
|
|
|
229
229
|
|
|
230
230
|
Plugins hook into the pipeline lifecycle via the `PipelinePlugin` interface. Register them in the `plugins` array when constructing a `Pipeline`.
|
|
231
231
|
|
|
232
|
+
#### `namespaceNormalizationPlugin(options)`
|
|
233
|
+
|
|
234
|
+
Generic plugin that rewrites namespace prefixes in `void:class` and `void:property` quad objects. Accepts `from` and `to` options specifying the source and target namespace URI prefixes. `void:vocabulary` quads are left unchanged so consumers can see which namespace the source dataset actually uses.
|
|
235
|
+
|
|
236
|
+
```typescript
|
|
237
|
+
import { namespaceNormalizationPlugin } from ‘@lde/pipeline’;
|
|
238
|
+
|
|
239
|
+
new Pipeline({
|
|
240
|
+
// ...
|
|
241
|
+
plugins: [
|
|
242
|
+
namespaceNormalizationPlugin({
|
|
243
|
+
from: ‘http://example.org/’,
|
|
244
|
+
to: ‘https://example.org/’,
|
|
245
|
+
}),
|
|
246
|
+
],
|
|
247
|
+
});
|
|
248
|
+
```
|
|
249
|
+
|
|
232
250
|
#### `provenancePlugin()`
|
|
233
251
|
|
|
234
252
|
Appends [PROV-O](https://www.w3.org/TR/prov-o/) provenance quads (`prov:Entity`, `prov:Activity`, `prov:startedAtTime`, `prov:endedAtTime`) to every stage’s output.
|
|
235
253
|
|
|
236
|
-
#### `schemaOrgNormalizationPlugin()`
|
|
254
|
+
#### `schemaOrgNormalizationPlugin(options?)`
|
|
255
|
+
|
|
256
|
+
Normalizes Schema.org namespace prefixes in `void:class` and `void:property` quad objects. By default, rewrites `http://schema.org/` to `https://schema.org/`. Pass `{ reverse: true }` to normalize in the opposite direction (`https://` to `http://`). `void:vocabulary` quads are left unchanged so consumers can see which namespace the source dataset actually uses.
|
|
237
257
|
|
|
238
|
-
|
|
258
|
+
This is a convenience wrapper around `namespaceNormalizationPlugin`.
|
|
239
259
|
|
|
240
260
|
```typescript
|
|
241
|
-
import { schemaOrgNormalizationPlugin, provenancePlugin } from
|
|
261
|
+
import { schemaOrgNormalizationPlugin, provenancePlugin } from ‘@lde/pipeline’;
|
|
242
262
|
|
|
243
263
|
new Pipeline({
|
|
244
264
|
// ...
|
|
245
265
|
plugins: [schemaOrgNormalizationPlugin(), provenancePlugin()],
|
|
246
266
|
});
|
|
267
|
+
|
|
268
|
+
// Or reverse: normalize https to http
|
|
269
|
+
new Pipeline({
|
|
270
|
+
// ...
|
|
271
|
+
plugins: [schemaOrgNormalizationPlugin({reverse: true})],
|
|
272
|
+
});
|
|
247
273
|
```
|
|
248
274
|
|
|
249
275
|
## Usage
|
package/dist/index.d.ts
CHANGED
|
@@ -9,6 +9,7 @@ export * from './stageOutputResolver.js';
|
|
|
9
9
|
export * from './sparql/index.js';
|
|
10
10
|
export * from './distribution/index.js';
|
|
11
11
|
export * from './writer/index.js';
|
|
12
|
+
export * from './plugin/namespaceNormalization.js';
|
|
12
13
|
export * from './plugin/provenance.js';
|
|
13
14
|
export * from './plugin/schemaOrgNormalization.js';
|
|
14
15
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,gBAAgB,CAAC;AAC/B,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC;AAClC,cAAc,wBAAwB,CAAC;AACvC,cAAc,oCAAoC,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,gBAAgB,CAAC;AAC/B,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oCAAoC,CAAC;AACnD,cAAc,wBAAwB,CAAC;AACvC,cAAc,oCAAoC,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -9,5 +9,6 @@ export * from './stageOutputResolver.js';
|
|
|
9
9
|
export * from './sparql/index.js';
|
|
10
10
|
export * from './distribution/index.js';
|
|
11
11
|
export * from './writer/index.js';
|
|
12
|
+
export * from './plugin/namespaceNormalization.js';
|
|
12
13
|
export * from './plugin/provenance.js';
|
|
13
14
|
export * from './plugin/schemaOrgNormalization.js';
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
import type { QuadTransform } from '../stage.js';
|
|
2
|
+
import type { PipelinePlugin } from '../pipeline.js';
|
|
3
|
+
export interface NamespaceNormalizationOptions {
|
|
4
|
+
/** Namespace URI prefix to match (e.g. `http://schema.org/`). */
|
|
5
|
+
from: string;
|
|
6
|
+
/** Namespace URI prefix to replace with (e.g. `https://schema.org/`). */
|
|
7
|
+
to: string;
|
|
8
|
+
}
|
|
9
|
+
/**
|
|
10
|
+
* Creates a QuadTransform that rewrites namespace prefixes in `void:class` and
|
|
11
|
+
* `void:property` quad objects from {@link NamespaceNormalizationOptions.from}
|
|
12
|
+
* to {@link NamespaceNormalizationOptions.to}.
|
|
13
|
+
*
|
|
14
|
+
* `void:vocabulary` quads are left unchanged so consumers can see which
|
|
15
|
+
* namespace the source dataset actually uses.
|
|
16
|
+
*/
|
|
17
|
+
export declare function namespaceNormalizationTransform(options: NamespaceNormalizationOptions): QuadTransform;
|
|
18
|
+
/**
|
|
19
|
+
* Pipeline plugin that normalizes namespace prefixes in `void:class` and
|
|
20
|
+
* `void:property` quad objects.
|
|
21
|
+
*
|
|
22
|
+
* `void:vocabulary` quads are left unchanged so consumers can see which
|
|
23
|
+
* namespace the source dataset actually uses.
|
|
24
|
+
*/
|
|
25
|
+
export declare function namespaceNormalizationPlugin(options: NamespaceNormalizationOptions): PipelinePlugin;
|
|
26
|
+
//# sourceMappingURL=namespaceNormalization.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,gBAAgB,CAAC;AASnD,MAAM,WAAW,6BAA6B;IAC5C,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;GAOG;AACH,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,6BAA6B,GACrC,aAAa,CAEf;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,6BAA6B,GACrC,cAAc,CAKhB"}
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { DataFactory } from 'n3';
|
|
2
|
+
const { namedNode, quad } = DataFactory;
|
|
3
|
+
const VOID_CLASS = namedNode('http://rdfs.org/ns/void#class');
|
|
4
|
+
const VOID_PROPERTY = namedNode('http://rdfs.org/ns/void#property');
|
|
5
|
+
/**
|
|
6
|
+
* Creates a QuadTransform that rewrites namespace prefixes in `void:class` and
|
|
7
|
+
* `void:property` quad objects from {@link NamespaceNormalizationOptions.from}
|
|
8
|
+
* to {@link NamespaceNormalizationOptions.to}.
|
|
9
|
+
*
|
|
10
|
+
* `void:vocabulary` quads are left unchanged so consumers can see which
|
|
11
|
+
* namespace the source dataset actually uses.
|
|
12
|
+
*/
|
|
13
|
+
export function namespaceNormalizationTransform(options) {
|
|
14
|
+
return (quads) => normalizeNamespace(quads, options);
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Pipeline plugin that normalizes namespace prefixes in `void:class` and
|
|
18
|
+
* `void:property` quad objects.
|
|
19
|
+
*
|
|
20
|
+
* `void:vocabulary` quads are left unchanged so consumers can see which
|
|
21
|
+
* namespace the source dataset actually uses.
|
|
22
|
+
*/
|
|
23
|
+
export function namespaceNormalizationPlugin(options) {
|
|
24
|
+
return {
|
|
25
|
+
name: 'namespace-normalization',
|
|
26
|
+
beforeStageWrite: namespaceNormalizationTransform(options),
|
|
27
|
+
};
|
|
28
|
+
}
|
|
29
|
+
async function* normalizeNamespace(quads, { from, to }) {
|
|
30
|
+
for await (const q of quads) {
|
|
31
|
+
if ((q.predicate.equals(VOID_CLASS) || q.predicate.equals(VOID_PROPERTY)) &&
|
|
32
|
+
q.object.termType === 'NamedNode' &&
|
|
33
|
+
q.object.value.startsWith(from)) {
|
|
34
|
+
yield quad(q.subject, q.predicate, namedNode(to + q.object.value.slice(from.length)), q.graph);
|
|
35
|
+
}
|
|
36
|
+
else {
|
|
37
|
+
yield q;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
40
|
+
}
|
|
@@ -1,13 +1,20 @@
|
|
|
1
1
|
import type { QuadTransform } from '../stage.js';
|
|
2
2
|
import type { PipelinePlugin } from '../pipeline.js';
|
|
3
|
+
export interface SchemaOrgNormalizationOptions {
|
|
4
|
+
/** When true, normalizes `https://schema.org/` to `http://schema.org/` instead. */
|
|
5
|
+
reverse?: boolean;
|
|
6
|
+
}
|
|
3
7
|
/** QuadTransform that normalizes `http://schema.org/` to `https://schema.org/` in `void:class` and `void:property` objects. */
|
|
4
8
|
export declare const schemaOrgNormalizationTransform: QuadTransform;
|
|
5
9
|
/**
|
|
6
|
-
* Pipeline plugin that normalizes
|
|
7
|
-
*
|
|
10
|
+
* Pipeline plugin that normalizes Schema.org namespace prefixes in `void:class`
|
|
11
|
+
* and `void:property` quad objects.
|
|
12
|
+
*
|
|
13
|
+
* By default, rewrites `http://schema.org/` to `https://schema.org/`. Pass
|
|
14
|
+
* `{ reverse: true }` to normalize in the opposite direction.
|
|
8
15
|
*
|
|
9
16
|
* `void:vocabulary` quads are left unchanged so consumers can see which
|
|
10
17
|
* namespace the source dataset actually uses.
|
|
11
18
|
*/
|
|
12
|
-
export declare function schemaOrgNormalizationPlugin(): PipelinePlugin;
|
|
19
|
+
export declare function schemaOrgNormalizationPlugin(options?: SchemaOrgNormalizationOptions): PipelinePlugin;
|
|
13
20
|
//# sourceMappingURL=schemaOrgNormalization.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"schemaOrgNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/schemaOrgNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,
|
|
1
|
+
{"version":3,"file":"schemaOrgNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/schemaOrgNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAC,aAAa,EAAC,MAAM,aAAa,CAAC;AAC/C,OAAO,KAAK,EAAC,cAAc,EAAC,MAAM,gBAAgB,CAAC;AASnD,MAAM,WAAW,6BAA6B;IAC5C,mFAAmF;IACnF,OAAO,CAAC,EAAE,OAAO,CAAC;CACnB;AAED,+HAA+H;AAC/H,eAAO,MAAM,+BAA+B,EAAE,aAI1C,CAAC;AAEL;;;;;;;;;GASG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,CAAC,EAAE,6BAA6B,GACtC,cAAc,CAOhB"}
|
|
@@ -1,33 +1,26 @@
|
|
|
1
|
-
import {
|
|
2
|
-
const { namedNode, quad } = DataFactory;
|
|
3
|
-
const VOID_CLASS = namedNode('http://rdfs.org/ns/void#class');
|
|
4
|
-
const VOID_PROPERTY = namedNode('http://rdfs.org/ns/void#property');
|
|
1
|
+
import { namespaceNormalizationPlugin, namespaceNormalizationTransform, } from './namespaceNormalization.js';
|
|
5
2
|
const HTTP_SCHEMA_ORG = 'http://schema.org/';
|
|
6
3
|
const HTTPS_SCHEMA_ORG = 'https://schema.org/';
|
|
7
4
|
/** QuadTransform that normalizes `http://schema.org/` to `https://schema.org/` in `void:class` and `void:property` objects. */
|
|
8
|
-
export const schemaOrgNormalizationTransform = (
|
|
5
|
+
export const schemaOrgNormalizationTransform = namespaceNormalizationTransform({
|
|
6
|
+
from: HTTP_SCHEMA_ORG,
|
|
7
|
+
to: HTTPS_SCHEMA_ORG,
|
|
8
|
+
});
|
|
9
9
|
/**
|
|
10
|
-
* Pipeline plugin that normalizes
|
|
11
|
-
*
|
|
10
|
+
* Pipeline plugin that normalizes Schema.org namespace prefixes in `void:class`
|
|
11
|
+
* and `void:property` quad objects.
|
|
12
|
+
*
|
|
13
|
+
* By default, rewrites `http://schema.org/` to `https://schema.org/`. Pass
|
|
14
|
+
* `{ reverse: true }` to normalize in the opposite direction.
|
|
12
15
|
*
|
|
13
16
|
* `void:vocabulary` quads are left unchanged so consumers can see which
|
|
14
17
|
* namespace the source dataset actually uses.
|
|
15
18
|
*/
|
|
16
|
-
export function schemaOrgNormalizationPlugin() {
|
|
19
|
+
export function schemaOrgNormalizationPlugin(options) {
|
|
20
|
+
const from = options?.reverse ? HTTPS_SCHEMA_ORG : HTTP_SCHEMA_ORG;
|
|
21
|
+
const to = options?.reverse ? HTTP_SCHEMA_ORG : HTTPS_SCHEMA_ORG;
|
|
17
22
|
return {
|
|
23
|
+
...namespaceNormalizationPlugin({ from, to }),
|
|
18
24
|
name: 'schema-org-normalization',
|
|
19
|
-
beforeStageWrite: schemaOrgNormalizationTransform,
|
|
20
25
|
};
|
|
21
26
|
}
|
|
22
|
-
async function* normalizeSchemaOrg(quads) {
|
|
23
|
-
for await (const q of quads) {
|
|
24
|
-
if ((q.predicate.equals(VOID_CLASS) || q.predicate.equals(VOID_PROPERTY)) &&
|
|
25
|
-
q.object.termType === 'NamedNode' &&
|
|
26
|
-
q.object.value.startsWith(HTTP_SCHEMA_ORG)) {
|
|
27
|
-
yield quad(q.subject, q.predicate, namedNode(HTTPS_SCHEMA_ORG + q.object.value.slice(HTTP_SCHEMA_ORG.length)), q.graph);
|
|
28
|
-
}
|
|
29
|
-
else {
|
|
30
|
-
yield q;
|
|
31
|
-
}
|
|
32
|
-
}
|
|
33
|
-
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.28.
|
|
3
|
+
"version": "0.28.9",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -25,11 +25,11 @@
|
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.2",
|
|
28
|
-
"@lde/dataset-registry-client": "0.7.
|
|
28
|
+
"@lde/dataset-registry-client": "0.7.5",
|
|
29
29
|
"@lde/sparql-importer": "0.6.0",
|
|
30
30
|
"@lde/sparql-server": "0.4.10",
|
|
31
31
|
"@rdfjs/types": "^2.0.1",
|
|
32
|
-
"@traqula/generator-sparql-1-1": "^1.0.
|
|
32
|
+
"@traqula/generator-sparql-1-1": "^1.0.7",
|
|
33
33
|
"@traqula/parser-sparql-1-1": "^1.0.4",
|
|
34
34
|
"@traqula/rules-sparql-1-1": "^1.0.4",
|
|
35
35
|
"fetch-sparql-endpoint": "^7.1.0",
|