@lde/pipeline-void 0.2.16 → 0.2.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +26 -21
- package/dist/index.d.ts +1 -2
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -2
- package/dist/provenance.d.ts +5 -9
- package/dist/provenance.d.ts.map +1 -1
- package/dist/provenance.js +13 -16
- package/dist/sparqlQueryAnalyzer.d.ts +6 -35
- package/dist/sparqlQueryAnalyzer.d.ts.map +1 -1
- package/dist/sparqlQueryAnalyzer.js +10 -54
- package/dist/vocabularyAnalyzer.d.ts +7 -13
- package/dist/vocabularyAnalyzer.d.ts.map +1 -1
- package/dist/vocabularyAnalyzer.js +18 -37
- package/package.json +1 -2
package/README.md
CHANGED
|
@@ -2,19 +2,17 @@
|
|
|
2
2
|
|
|
3
3
|
VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Query stages
|
|
6
6
|
|
|
7
|
-
-
|
|
7
|
+
- `createQueryStage(filename, distribution)` — Create a `Stage` from a SPARQL CONSTRUCT query file
|
|
8
|
+
- `createDatatypeStage(distribution)` — Per-class datatype partitions
|
|
9
|
+
- `createLanguageStage(distribution)` — Per-class language tags
|
|
10
|
+
- `createObjectClassStage(distribution)` — Per-class object class partitions
|
|
8
11
|
|
|
9
|
-
##
|
|
12
|
+
## Streaming transformers
|
|
10
13
|
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
with `?class` bound via VALUES:
|
|
14
|
-
|
|
15
|
-
- `createDatatypeStage` — per-class datatype partitions
|
|
16
|
-
- `createLanguageStage` — per-class language tags
|
|
17
|
-
- `createObjectClassStage` — per-class object class partitions
|
|
14
|
+
- `withVocabularies(quads, datasetIri)` — Detect and append `void:vocabulary` triples
|
|
15
|
+
- `withProvenance(quads, iri, startedAt, endedAt)` — Append PROV-O provenance metadata
|
|
18
16
|
|
|
19
17
|
## SPARQL Queries
|
|
20
18
|
|
|
@@ -43,23 +41,30 @@ Generic VOiD analysis queries included:
|
|
|
43
41
|
|
|
44
42
|
```typescript
|
|
45
43
|
import {
|
|
46
|
-
|
|
47
|
-
Success,
|
|
44
|
+
createQueryStage,
|
|
48
45
|
createDatatypeStage,
|
|
46
|
+
withVocabularies,
|
|
47
|
+
withProvenance,
|
|
49
48
|
} from '@lde/pipeline-void';
|
|
50
49
|
import { Distribution } from '@lde/dataset';
|
|
51
50
|
|
|
52
|
-
// Simple CONSTRUCT query analyzer
|
|
53
|
-
const analyzer = await SparqlQueryAnalyzer.fromFile('triples.rq');
|
|
54
|
-
const result = await analyzer.execute(dataset);
|
|
55
|
-
if (result instanceof Success) {
|
|
56
|
-
// result.data contains the VOiD statistics as RDF
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
// Per-class stage (streaming)
|
|
60
51
|
const distribution = Distribution.sparql(new URL('http://example.com/sparql'));
|
|
61
|
-
|
|
52
|
+
|
|
53
|
+
// Simple CONSTRUCT query stage
|
|
54
|
+
const stage = await createQueryStage('triples.rq', distribution);
|
|
62
55
|
const quads = await stage.run(dataset, distribution);
|
|
56
|
+
|
|
57
|
+
// Per-class stage (streaming)
|
|
58
|
+
const datatypeStage = await createDatatypeStage(distribution);
|
|
59
|
+
const datatypeQuads = await datatypeStage.run(dataset, distribution);
|
|
60
|
+
|
|
61
|
+
// Enrich with vocabulary detection and provenance
|
|
62
|
+
const enriched = withProvenance(
|
|
63
|
+
withVocabularies(quads, dataset.iri.toString()),
|
|
64
|
+
dataset.iri.toString(),
|
|
65
|
+
startedAt,
|
|
66
|
+
endedAt
|
|
67
|
+
);
|
|
63
68
|
```
|
|
64
69
|
|
|
65
70
|
## Validation
|
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
export { Stage } from '@lde/pipeline';
|
|
2
|
-
export { type Analyzer, BaseAnalyzer, Success, Failure, NotSupported, } from '@lde/pipeline/analyzer';
|
|
1
|
+
export { Stage, NotSupported } from '@lde/pipeline';
|
|
3
2
|
export * from './sparqlQueryAnalyzer.js';
|
|
4
3
|
export * from './perClassAnalyzer.js';
|
|
5
4
|
export * from './vocabularyAnalyzer.js';
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AACpD,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,iBAAiB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
export { Stage } from '@lde/pipeline';
|
|
2
|
-
export { BaseAnalyzer, Success, Failure, NotSupported, } from '@lde/pipeline/analyzer';
|
|
1
|
+
export { Stage, NotSupported } from '@lde/pipeline';
|
|
3
2
|
export * from './sparqlQueryAnalyzer.js';
|
|
4
3
|
export * from './perClassAnalyzer.js';
|
|
5
4
|
export * from './vocabularyAnalyzer.js';
|
package/dist/provenance.d.ts
CHANGED
|
@@ -1,18 +1,14 @@
|
|
|
1
|
-
import type {
|
|
1
|
+
import type { Quad } from '@rdfjs/types';
|
|
2
2
|
/**
|
|
3
|
-
*
|
|
3
|
+
* Streaming transformer that passes through all quads and appends
|
|
4
|
+
* PROV-O provenance metadata.
|
|
4
5
|
*
|
|
5
|
-
*
|
|
6
|
+
* Appended quads:
|
|
6
7
|
* - `<iri> a prov:Entity`
|
|
7
8
|
* - `<iri> prov:wasGeneratedBy _:activity`
|
|
8
9
|
* - `_:activity a prov:Activity`
|
|
9
10
|
* - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
|
|
10
11
|
* - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
|
|
11
|
-
*
|
|
12
|
-
* @param data The dataset to add provenance to
|
|
13
|
-
* @param iri The IRI of the entity
|
|
14
|
-
* @param startedAt Start time of the activity
|
|
15
|
-
* @param endedAt End time of the activity
|
|
16
12
|
*/
|
|
17
|
-
export declare function withProvenance(
|
|
13
|
+
export declare function withProvenance(quads: AsyncIterable<Quad>, iri: string, startedAt: Date, endedAt: Date): AsyncIterable<Quad>;
|
|
18
14
|
//# sourceMappingURL=provenance.d.ts.map
|
package/dist/provenance.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC;;;;;;;;;;GAUG;AACH,wBAAuB,cAAc,CACnC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,IAAI,EACf,OAAO,EAAE,IAAI,GACZ,aAAa,CAAC,IAAI,CAAC,CAqBrB"}
|
package/dist/provenance.js
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
import { DataFactory
|
|
1
|
+
import { DataFactory } from 'n3';
|
|
2
2
|
const { namedNode, literal, blankNode, quad } = DataFactory;
|
|
3
3
|
const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
4
4
|
const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
|
|
@@ -8,28 +8,25 @@ const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime'
|
|
|
8
8
|
const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
|
|
9
9
|
const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
|
|
10
10
|
/**
|
|
11
|
-
*
|
|
11
|
+
* Streaming transformer that passes through all quads and appends
|
|
12
|
+
* PROV-O provenance metadata.
|
|
12
13
|
*
|
|
13
|
-
*
|
|
14
|
+
* Appended quads:
|
|
14
15
|
* - `<iri> a prov:Entity`
|
|
15
16
|
* - `<iri> prov:wasGeneratedBy _:activity`
|
|
16
17
|
* - `_:activity a prov:Activity`
|
|
17
18
|
* - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
|
|
18
19
|
* - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
|
|
19
|
-
*
|
|
20
|
-
* @param data The dataset to add provenance to
|
|
21
|
-
* @param iri The IRI of the entity
|
|
22
|
-
* @param startedAt Start time of the activity
|
|
23
|
-
* @param endedAt End time of the activity
|
|
24
20
|
*/
|
|
25
|
-
export function withProvenance(
|
|
26
|
-
const
|
|
21
|
+
export async function* withProvenance(quads, iri, startedAt, endedAt) {
|
|
22
|
+
for await (const q of quads) {
|
|
23
|
+
yield q;
|
|
24
|
+
}
|
|
27
25
|
const subject = namedNode(iri);
|
|
28
26
|
const activity = blankNode();
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
return store;
|
|
27
|
+
yield quad(subject, RDF_TYPE, PROV_ENTITY);
|
|
28
|
+
yield quad(subject, PROV_WAS_GENERATED_BY, activity);
|
|
29
|
+
yield quad(activity, RDF_TYPE, PROV_ACTIVITY);
|
|
30
|
+
yield quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME));
|
|
31
|
+
yield quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME));
|
|
35
32
|
}
|
|
@@ -1,39 +1,10 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import {
|
|
3
|
-
import { BaseAnalyzer, Success, Failure, NotSupported } from '@lde/pipeline/analyzer';
|
|
4
|
-
export interface SparqlQueryAnalyzerOptions {
|
|
5
|
-
/**
|
|
6
|
-
* Timeout for SPARQL queries in milliseconds.
|
|
7
|
-
* @default 300000 (5 minutes)
|
|
8
|
-
*/
|
|
9
|
-
timeout?: number;
|
|
10
|
-
/**
|
|
11
|
-
* Custom SparqlEndpointFetcher instance.
|
|
12
|
-
*/
|
|
13
|
-
fetcher?: SparqlEndpointFetcher;
|
|
14
|
-
}
|
|
1
|
+
import { Distribution } from '@lde/dataset';
|
|
2
|
+
import { Stage } from '@lde/pipeline';
|
|
15
3
|
/**
|
|
16
|
-
*
|
|
4
|
+
* Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
|
|
17
5
|
*
|
|
18
|
-
*
|
|
19
|
-
*
|
|
20
|
-
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
21
|
-
* - `?dataset` — replaced with the dataset IRI
|
|
22
|
-
*
|
|
23
|
-
* This class wraps the SparqlConstructExecutor from @lde/pipeline.
|
|
6
|
+
* Pre-processes `#subjectFilter#` before the query is parsed as SPARQL;
|
|
7
|
+
* `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
|
|
24
8
|
*/
|
|
25
|
-
export declare
|
|
26
|
-
readonly name: string;
|
|
27
|
-
private readonly query;
|
|
28
|
-
private readonly fetcher;
|
|
29
|
-
constructor(name: string, query: string, options?: SparqlQueryAnalyzerOptions);
|
|
30
|
-
/**
|
|
31
|
-
* Create an analyzer from a query file in the queries directory.
|
|
32
|
-
*
|
|
33
|
-
* @param filename Query filename (e.g., 'triples.rq')
|
|
34
|
-
* @param options Optional analyzer options
|
|
35
|
-
*/
|
|
36
|
-
static fromFile(filename: string, options?: SparqlQueryAnalyzerOptions): Promise<SparqlQueryAnalyzer>;
|
|
37
|
-
execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
|
|
38
|
-
}
|
|
9
|
+
export declare function createQueryStage(filename: string, distribution: Distribution): Promise<Stage>;
|
|
39
10
|
//# sourceMappingURL=sparqlQueryAnalyzer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlQueryAnalyzer.d.ts","sourceRoot":"","sources":["../src/sparqlQueryAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"sparqlQueryAnalyzer.d.ts","sourceRoot":"","sources":["../src/sparqlQueryAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,KAAK,EAA0C,MAAM,eAAe,CAAC;AAM9E;;;;;GAKG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,KAAK,CAAC,CAShB"}
|
|
@@ -1,61 +1,17 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
1
|
+
import { Stage, SparqlConstructExecutor, readQueryFile } from '@lde/pipeline';
|
|
3
2
|
import { resolve, dirname } from 'node:path';
|
|
4
3
|
import { fileURLToPath } from 'node:url';
|
|
5
|
-
import { BaseAnalyzer, Success, Failure, NotSupported, } from '@lde/pipeline/analyzer';
|
|
6
4
|
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
5
|
/**
|
|
8
|
-
*
|
|
6
|
+
* Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
|
|
9
7
|
*
|
|
10
|
-
*
|
|
11
|
-
*
|
|
12
|
-
* - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
|
|
13
|
-
* - `?dataset` — replaced with the dataset IRI
|
|
14
|
-
*
|
|
15
|
-
* This class wraps the SparqlConstructExecutor from @lde/pipeline.
|
|
8
|
+
* Pre-processes `#subjectFilter#` before the query is parsed as SPARQL;
|
|
9
|
+
* `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
|
|
16
10
|
*/
|
|
17
|
-
export
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
this.name = name;
|
|
24
|
-
this.query = query;
|
|
25
|
-
this.fetcher =
|
|
26
|
-
options?.fetcher ??
|
|
27
|
-
new SparqlEndpointFetcher({
|
|
28
|
-
timeout: options?.timeout ?? 300_000,
|
|
29
|
-
});
|
|
30
|
-
}
|
|
31
|
-
/**
|
|
32
|
-
* Create an analyzer from a query file in the queries directory.
|
|
33
|
-
*
|
|
34
|
-
* @param filename Query filename (e.g., 'triples.rq')
|
|
35
|
-
* @param options Optional analyzer options
|
|
36
|
-
*/
|
|
37
|
-
static async fromFile(filename, options) {
|
|
38
|
-
const query = await readQueryFile(resolve(__dirname, 'queries', filename));
|
|
39
|
-
return new SparqlQueryAnalyzer(filename, query, options);
|
|
40
|
-
}
|
|
41
|
-
async execute(dataset) {
|
|
42
|
-
const sparqlDistribution = dataset.getSparqlDistribution();
|
|
43
|
-
if (sparqlDistribution === null) {
|
|
44
|
-
return new NotSupported('No SPARQL distribution available');
|
|
45
|
-
}
|
|
46
|
-
try {
|
|
47
|
-
const substituted = substituteQueryTemplates(this.query, sparqlDistribution, dataset);
|
|
48
|
-
const executor = new SparqlConstructExecutor({
|
|
49
|
-
query: substituted,
|
|
50
|
-
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
51
|
-
fetcher: this.fetcher,
|
|
52
|
-
});
|
|
53
|
-
const stream = await executor.execute(dataset, sparqlDistribution);
|
|
54
|
-
const store = await collect(stream);
|
|
55
|
-
return new Success(store);
|
|
56
|
-
}
|
|
57
|
-
catch (e) {
|
|
58
|
-
return new Failure(sparqlDistribution.accessUrl ?? new URL('unknown://'), e instanceof Error ? e.message : undefined);
|
|
59
|
-
}
|
|
60
|
-
}
|
|
11
|
+
export async function createQueryStage(filename, distribution) {
|
|
12
|
+
const rawQuery = await readQueryFile(resolve(__dirname, 'queries', filename));
|
|
13
|
+
const subjectFilter = distribution.subjectFilter ?? '';
|
|
14
|
+
const query = rawQuery.replace('#subjectFilter#', subjectFilter);
|
|
15
|
+
const executor = new SparqlConstructExecutor({ query });
|
|
16
|
+
return new Stage({ name: filename, executors: executor });
|
|
61
17
|
}
|
|
@@ -1,17 +1,11 @@
|
|
|
1
|
-
import {
|
|
2
|
-
import { type Analyzer, Success, type Failure, type NotSupported } from '@lde/pipeline/analyzer';
|
|
1
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
2
|
/**
|
|
4
|
-
*
|
|
3
|
+
* Streaming transformer that passes through all quads and appends
|
|
4
|
+
* `void:vocabulary` triples for detected vocabulary prefixes.
|
|
5
5
|
*
|
|
6
|
-
*
|
|
7
|
-
*
|
|
8
|
-
*
|
|
6
|
+
* Inspects quads with predicate `void:property` to detect known vocabulary
|
|
7
|
+
* namespace prefixes, then yields the corresponding `void:vocabulary` quads
|
|
8
|
+
* after all input quads have been consumed.
|
|
9
9
|
*/
|
|
10
|
-
export declare
|
|
11
|
-
private readonly inner;
|
|
12
|
-
readonly name: string;
|
|
13
|
-
constructor(inner: Analyzer);
|
|
14
|
-
execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
|
|
15
|
-
finish(): Promise<void>;
|
|
16
|
-
}
|
|
10
|
+
export declare function withVocabularies(quads: AsyncIterable<Quad>, datasetIri: string): AsyncIterable<Quad>;
|
|
17
11
|
//# sourceMappingURL=vocabularyAnalyzer.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,
|
|
1
|
+
{"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAkCzC;;;;;;;GAOG;AACH,wBAAuB,gBAAgB,CACrC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,UAAU,EAAE,MAAM,GACjB,aAAa,CAAC,IAAI,CAAC,CAqBrB"}
|
|
@@ -1,5 +1,4 @@
|
|
|
1
|
-
import { DataFactory
|
|
2
|
-
import { Success, } from '@lde/pipeline/analyzer';
|
|
1
|
+
import { DataFactory } from 'n3';
|
|
3
2
|
const { namedNode, quad } = DataFactory;
|
|
4
3
|
const VOID = 'http://rdfs.org/ns/void#';
|
|
5
4
|
const voidProperty = namedNode(`${VOID}property`);
|
|
@@ -29,47 +28,29 @@ const vocabularyPrefixes = new Map([
|
|
|
29
28
|
['http://xmlns.com/foaf/0.1/', 'http://xmlns.com/foaf/0.1/'],
|
|
30
29
|
]);
|
|
31
30
|
/**
|
|
32
|
-
*
|
|
31
|
+
* Streaming transformer that passes through all quads and appends
|
|
32
|
+
* `void:vocabulary` triples for detected vocabulary prefixes.
|
|
33
33
|
*
|
|
34
|
-
*
|
|
35
|
-
*
|
|
36
|
-
*
|
|
34
|
+
* Inspects quads with predicate `void:property` to detect known vocabulary
|
|
35
|
+
* namespace prefixes, then yields the corresponding `void:vocabulary` quads
|
|
36
|
+
* after all input quads have been consumed.
|
|
37
37
|
*/
|
|
38
|
-
export
|
|
39
|
-
inner;
|
|
40
|
-
name;
|
|
41
|
-
constructor(inner) {
|
|
42
|
-
this.inner = inner;
|
|
43
|
-
this.name = inner.name;
|
|
44
|
-
}
|
|
45
|
-
async execute(dataset) {
|
|
46
|
-
const result = await this.inner.execute(dataset);
|
|
47
|
-
if (!(result instanceof Success)) {
|
|
48
|
-
return result;
|
|
49
|
-
}
|
|
50
|
-
const enriched = addVocabularyTriples(result.data, dataset.iri.toString());
|
|
51
|
-
return new Success(enriched);
|
|
52
|
-
}
|
|
53
|
-
async finish() {
|
|
54
|
-
await this.inner.finish?.();
|
|
55
|
-
}
|
|
56
|
-
}
|
|
57
|
-
function addVocabularyTriples(data, datasetIri) {
|
|
58
|
-
const store = new Store([...data]);
|
|
59
|
-
const datasetNode = namedNode(datasetIri);
|
|
60
|
-
// Collect unique vocabulary URIs from void:property triples.
|
|
38
|
+
export async function* withVocabularies(quads, datasetIri) {
|
|
61
39
|
const detectedVocabularies = new Set();
|
|
62
|
-
for (const q of
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
40
|
+
for await (const q of quads) {
|
|
41
|
+
yield q;
|
|
42
|
+
if (q.predicate.equals(voidProperty)) {
|
|
43
|
+
const propertyUri = q.object.value;
|
|
44
|
+
for (const [prefix, vocabUri] of vocabularyPrefixes) {
|
|
45
|
+
if (propertyUri.startsWith(prefix)) {
|
|
46
|
+
detectedVocabularies.add(vocabUri);
|
|
47
|
+
break;
|
|
48
|
+
}
|
|
68
49
|
}
|
|
69
50
|
}
|
|
70
51
|
}
|
|
52
|
+
const datasetNode = namedNode(datasetIri);
|
|
71
53
|
for (const vocabUri of detectedVocabularies) {
|
|
72
|
-
|
|
54
|
+
yield quad(datasetNode, voidVocabulary, namedNode(vocabUri));
|
|
73
55
|
}
|
|
74
|
-
return store;
|
|
75
56
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline-void",
|
|
3
|
-
"version": "0.2.
|
|
3
|
+
"version": "0.2.17",
|
|
4
4
|
"description": "VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets",
|
|
5
5
|
"repository": {
|
|
6
6
|
"url": "https://github.com/ldengine/lde",
|
|
@@ -27,7 +27,6 @@
|
|
|
27
27
|
"@lde/dataset": "0.6.8",
|
|
28
28
|
"@lde/pipeline": "0.6.15",
|
|
29
29
|
"@rdfjs/types": "^2.0.1",
|
|
30
|
-
"fetch-sparql-endpoint": "^6.0.0",
|
|
31
30
|
"n3": "^1.17.0",
|
|
32
31
|
"tslib": "^2.3.0"
|
|
33
32
|
}
|