@lde/pipeline-void 0.2.31 → 0.2.32
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -12
- package/dist/provenance.d.ts +14 -5
- package/dist/provenance.d.ts.map +1 -1
- package/dist/provenance.js +24 -5
- package/dist/vocabularyAnalyzer.d.ts +11 -4
- package/dist/vocabularyAnalyzer.d.ts.map +1 -1
- package/dist/vocabularyAnalyzer.js +20 -4
- package/package.json +2 -1
package/README.md
CHANGED
|
@@ -9,10 +9,10 @@ VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets.
|
|
|
9
9
|
- `createLanguageStage(distribution)` — Per-class language tags
|
|
10
10
|
- `createObjectClassStage(distribution)` — Per-class object class partitions
|
|
11
11
|
|
|
12
|
-
##
|
|
12
|
+
## Executor decorators
|
|
13
13
|
|
|
14
|
-
- `
|
|
15
|
-
- `
|
|
14
|
+
- `VocabularyExecutor` — Wraps an executor; detects and appends `void:vocabulary` triples
|
|
15
|
+
- `ProvenanceExecutor` — Wraps an executor; appends PROV-O provenance metadata with automatic timing
|
|
16
16
|
|
|
17
17
|
## SPARQL Queries
|
|
18
18
|
|
|
@@ -43,20 +43,23 @@ Generic VOiD analysis queries included:
|
|
|
43
43
|
import {
|
|
44
44
|
createQueryStage,
|
|
45
45
|
createDatatypeStage,
|
|
46
|
-
|
|
47
|
-
|
|
46
|
+
VocabularyExecutor,
|
|
47
|
+
Stage,
|
|
48
48
|
} from '@lde/pipeline-void';
|
|
49
|
-
import {
|
|
50
|
-
|
|
51
|
-
const distribution = Distribution.sparql(new URL('http://example.com/sparql'));
|
|
49
|
+
import { SparqlConstructExecutor } from '@lde/pipeline';
|
|
52
50
|
|
|
53
51
|
// Simple CONSTRUCT query stage
|
|
54
|
-
const stage = await createQueryStage('triples.rq'
|
|
52
|
+
const stage = await createQueryStage('triples.rq');
|
|
55
53
|
await stage.run(dataset, distribution, writer);
|
|
56
54
|
|
|
57
|
-
//
|
|
58
|
-
const
|
|
59
|
-
|
|
55
|
+
// Executor decorator: vocabulary detection wraps entity-properties executor
|
|
56
|
+
const executor = await SparqlConstructExecutor.fromFile(
|
|
57
|
+
'queries/entity-properties.rq',
|
|
58
|
+
);
|
|
59
|
+
const entityPropertiesStage = new Stage({
|
|
60
|
+
name: 'entity-properties',
|
|
61
|
+
executors: new VocabularyExecutor(executor),
|
|
62
|
+
});
|
|
60
63
|
```
|
|
61
64
|
|
|
62
65
|
## Validation
|
package/dist/provenance.d.ts
CHANGED
|
@@ -1,14 +1,23 @@
|
|
|
1
|
+
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
+
import { NotSupported, type Executor, type ExecuteOptions } from '@lde/pipeline';
|
|
1
3
|
import type { Quad } from '@rdfjs/types';
|
|
2
4
|
/**
|
|
3
|
-
*
|
|
4
|
-
* PROV-O provenance metadata.
|
|
5
|
+
* Executor decorator that passes through all quads from the inner executor
|
|
6
|
+
* and appends PROV-O provenance metadata.
|
|
7
|
+
*
|
|
8
|
+
* Timestamps are captured automatically: `startedAt` when `execute()` is
|
|
9
|
+
* called, `endedAt` when the inner quad stream is fully consumed.
|
|
5
10
|
*
|
|
6
11
|
* Appended quads:
|
|
7
|
-
* - `<
|
|
8
|
-
* - `<
|
|
12
|
+
* - `<dataset> a prov:Entity`
|
|
13
|
+
* - `<dataset> prov:wasGeneratedBy _:activity`
|
|
9
14
|
* - `_:activity a prov:Activity`
|
|
10
15
|
* - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
|
|
11
16
|
* - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
|
|
12
17
|
*/
|
|
13
|
-
export declare
|
|
18
|
+
export declare class ProvenanceExecutor implements Executor {
|
|
19
|
+
private readonly inner;
|
|
20
|
+
constructor(inner: Executor);
|
|
21
|
+
execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
|
|
22
|
+
}
|
|
14
23
|
//# sourceMappingURL=provenance.d.ts.map
|
package/dist/provenance.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC
|
|
1
|
+
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,YAAY,EACZ,KAAK,QAAQ,EACb,KAAK,cAAc,EACpB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC;;;;;;;;;;;;;GAaG;AACH,qBAAa,kBAAmB,YAAW,QAAQ;IACrC,OAAO,CAAC,QAAQ,CAAC,KAAK;gBAAL,KAAK,EAAE,QAAQ;IAEtC,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC;CAQ/C"}
|
package/dist/provenance.js
CHANGED
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { NotSupported, } from '@lde/pipeline';
|
|
1
2
|
import { DataFactory } from 'n3';
|
|
2
3
|
const { namedNode, literal, blankNode, quad } = DataFactory;
|
|
3
4
|
const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
@@ -8,20 +9,38 @@ const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime'
|
|
|
8
9
|
const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
|
|
9
10
|
const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
|
|
10
11
|
/**
|
|
11
|
-
*
|
|
12
|
-
* PROV-O provenance metadata.
|
|
12
|
+
* Executor decorator that passes through all quads from the inner executor
|
|
13
|
+
* and appends PROV-O provenance metadata.
|
|
14
|
+
*
|
|
15
|
+
* Timestamps are captured automatically: `startedAt` when `execute()` is
|
|
16
|
+
* called, `endedAt` when the inner quad stream is fully consumed.
|
|
13
17
|
*
|
|
14
18
|
* Appended quads:
|
|
15
|
-
* - `<
|
|
16
|
-
* - `<
|
|
19
|
+
* - `<dataset> a prov:Entity`
|
|
20
|
+
* - `<dataset> prov:wasGeneratedBy _:activity`
|
|
17
21
|
* - `_:activity a prov:Activity`
|
|
18
22
|
* - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
|
|
19
23
|
* - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
|
|
20
24
|
*/
|
|
21
|
-
export
|
|
25
|
+
export class ProvenanceExecutor {
|
|
26
|
+
inner;
|
|
27
|
+
constructor(inner) {
|
|
28
|
+
this.inner = inner;
|
|
29
|
+
}
|
|
30
|
+
async execute(dataset, distribution, options) {
|
|
31
|
+
const startedAt = new Date();
|
|
32
|
+
const result = await this.inner.execute(dataset, distribution, options);
|
|
33
|
+
if (result instanceof NotSupported) {
|
|
34
|
+
return result;
|
|
35
|
+
}
|
|
36
|
+
return withProvenance(result, dataset.iri.toString(), startedAt);
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
async function* withProvenance(quads, iri, startedAt) {
|
|
22
40
|
for await (const q of quads) {
|
|
23
41
|
yield q;
|
|
24
42
|
}
|
|
43
|
+
const endedAt = new Date();
|
|
25
44
|
const subject = namedNode(iri);
|
|
26
45
|
const activity = blankNode();
|
|
27
46
|
yield quad(subject, RDF_TYPE, PROV_ENTITY);
|
|
@@ -1,11 +1,18 @@
|
|
|
1
|
+
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
+
import { NotSupported, type Executor, type ExecuteOptions } from '@lde/pipeline';
|
|
1
3
|
import type { Quad } from '@rdfjs/types';
|
|
2
4
|
/**
|
|
3
|
-
*
|
|
4
|
-
* `void:vocabulary` triples for detected vocabulary prefixes.
|
|
5
|
+
* Executor decorator that passes through all quads from the inner executor
|
|
6
|
+
* and appends `void:vocabulary` triples for detected vocabulary prefixes.
|
|
5
7
|
*
|
|
6
8
|
* Inspects quads with predicate `void:property` to detect known vocabulary
|
|
7
9
|
* namespace prefixes, then yields the corresponding `void:vocabulary` quads
|
|
8
|
-
* after all
|
|
10
|
+
* after all inner quads have been consumed.
|
|
9
11
|
*/
|
|
10
|
-
export declare
|
|
12
|
+
export declare class VocabularyExecutor implements Executor {
|
|
13
|
+
private readonly inner;
|
|
14
|
+
private readonly vocabularies;
|
|
15
|
+
constructor(inner: Executor, vocabularies?: readonly string[]);
|
|
16
|
+
execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
|
|
17
|
+
}
|
|
11
18
|
//# sourceMappingURL=vocabularyAnalyzer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAczC;;;;;;;GAOG;AACH,
|
|
1
|
+
{"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,YAAY,EACZ,KAAK,QAAQ,EACb,KAAK,cAAc,EACpB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAczC;;;;;;;GAOG;AACH,qBAAa,kBAAmB,YAAW,QAAQ;IAE/C,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,YAAY;gBADZ,KAAK,EAAE,QAAQ,EACf,YAAY,GAAE,SAAS,MAAM,EAAwB;IAGlE,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC;CAO/C"}
|
|
@@ -1,3 +1,4 @@
|
|
|
1
|
+
import { NotSupported, } from '@lde/pipeline';
|
|
1
2
|
import prefixes from '@zazuko/prefixes';
|
|
2
3
|
import { DataFactory } from 'n3';
|
|
3
4
|
const { namedNode, quad } = DataFactory;
|
|
@@ -8,14 +9,29 @@ const defaultVocabularies = [
|
|
|
8
9
|
...new Set(Object.values(prefixes)),
|
|
9
10
|
];
|
|
10
11
|
/**
|
|
11
|
-
*
|
|
12
|
-
* `void:vocabulary` triples for detected vocabulary prefixes.
|
|
12
|
+
* Executor decorator that passes through all quads from the inner executor
|
|
13
|
+
* and appends `void:vocabulary` triples for detected vocabulary prefixes.
|
|
13
14
|
*
|
|
14
15
|
* Inspects quads with predicate `void:property` to detect known vocabulary
|
|
15
16
|
* namespace prefixes, then yields the corresponding `void:vocabulary` quads
|
|
16
|
-
* after all
|
|
17
|
+
* after all inner quads have been consumed.
|
|
17
18
|
*/
|
|
18
|
-
export
|
|
19
|
+
export class VocabularyExecutor {
|
|
20
|
+
inner;
|
|
21
|
+
vocabularies;
|
|
22
|
+
constructor(inner, vocabularies = defaultVocabularies) {
|
|
23
|
+
this.inner = inner;
|
|
24
|
+
this.vocabularies = vocabularies;
|
|
25
|
+
}
|
|
26
|
+
async execute(dataset, distribution, options) {
|
|
27
|
+
const result = await this.inner.execute(dataset, distribution, options);
|
|
28
|
+
if (result instanceof NotSupported) {
|
|
29
|
+
return result;
|
|
30
|
+
}
|
|
31
|
+
return withVocabularies(result, dataset.iri.toString(), this.vocabularies);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
async function* withVocabularies(quads, datasetIri, vocabularies) {
|
|
19
35
|
const detectedVocabularies = new Set();
|
|
20
36
|
for await (const q of quads) {
|
|
21
37
|
yield q;
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline-void",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.32",
|
|
4
4
|
"description": "VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets",
|
|
5
5
|
"repository": {
|
|
6
6
|
"url": "https://github.com/ldengine/lde",
|
|
@@ -24,6 +24,7 @@
|
|
|
24
24
|
"!**/*.tsbuildinfo"
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
|
+
"@lde/dataset": "0.6.9",
|
|
27
28
|
"@lde/pipeline": "0.6.28",
|
|
28
29
|
"@rdfjs/types": "^2.0.1",
|
|
29
30
|
"@zazuko/prefixes": "^2.6.1",
|