@lde/pipeline-void 0.2.36 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +51 -55
- package/dist/index.d.ts +1 -3
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -3
- package/dist/stage.d.ts +19 -0
- package/dist/stage.d.ts.map +1 -0
- package/dist/stage.js +99 -0
- package/package.json +4 -3
- package/src/queries/class-partition.rq +19 -0
- package/src/queries/class-properties-objects.rq +17 -0
- package/src/queries/class-properties-subjects.rq +23 -0
- package/src/queries/class-property-datatypes.rq +28 -0
- package/src/queries/class-property-languages.rq +30 -0
- package/src/queries/class-property-object-classes.rq +28 -0
- package/src/queries/datatypes.rq +37 -0
- package/src/queries/entity-properties.rq +20 -0
- package/src/queries/licenses.rq +24 -0
- package/src/queries/object-literals.rq +14 -0
- package/src/queries/object-uri-space.rq +25 -0
- package/src/queries/object-uris.rq +14 -0
- package/src/queries/properties.rq +12 -0
- package/src/queries/subject-uri-space.rq +30 -0
- package/src/queries/subjects.rq +13 -0
- package/src/queries/triples.rq +12 -0
- package/dist/perClassAnalyzer.d.ts +0 -5
- package/dist/perClassAnalyzer.d.ts.map +0 -1
- package/dist/perClassAnalyzer.js +0 -44
- package/dist/provenance.d.ts +0 -23
- package/dist/provenance.d.ts.map +0 -1
- package/dist/provenance.js +0 -51
- package/dist/sparqlQueryAnalyzer.d.ts +0 -9
- package/dist/sparqlQueryAnalyzer.d.ts.map +0 -1
- package/dist/sparqlQueryAnalyzer.js +0 -15
package/README.md
CHANGED
|
@@ -1,71 +1,67 @@
|
|
|
1
|
-
# Pipeline
|
|
1
|
+
# Pipeline VoID
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
Extensions to [@lde/pipeline](../pipeline) for VoID (Vocabulary of Interlinked Datasets) statistical analysis of RDF datasets.
|
|
4
4
|
|
|
5
|
-
##
|
|
5
|
+
## Stage factories
|
|
6
6
|
|
|
7
|
-
|
|
8
|
-
- `createDatatypeStage(distribution)` — Per-class datatype partitions
|
|
9
|
-
- `createLanguageStage(distribution)` — Per-class language tags
|
|
10
|
-
- `createObjectClassStage(distribution)` — Per-class object class partitions
|
|
7
|
+
### Global stages (one CONSTRUCT query per dataset):
|
|
11
8
|
|
|
12
|
-
|
|
9
|
+
| Factory | Query |
|
|
10
|
+
| -------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
|
|
11
|
+
| `createClassPartitionStage()` | [`class-partition.rq`](src/queries/class-partition.rq) — Classes with entity counts |
|
|
12
|
+
| `createClassPropertiesSubjectsStage()` | [`class-properties-subjects.rq`](src/queries/class-properties-subjects.rq) — Properties per class (subject counts) |
|
|
13
|
+
| `createClassPropertiesObjectsStage()` | [`class-properties-objects.rq`](src/queries/class-properties-objects.rq) — Properties per class (object counts) |
|
|
14
|
+
| `createDatatypesStage()` | [`datatypes.rq`](src/queries/datatypes.rq) — Dataset-level datatypes |
|
|
15
|
+
| `createLicensesStage()` | [`licenses.rq`](src/queries/licenses.rq) — License detection |
|
|
16
|
+
| `createObjectLiteralsStage()` | [`object-literals.rq`](src/queries/object-literals.rq) — Literal object counts |
|
|
17
|
+
| `createObjectUrisStage()` | [`object-uris.rq`](src/queries/object-uris.rq) — URI object counts |
|
|
18
|
+
| `createPropertiesStage()` | [`properties.rq`](src/queries/properties.rq) — Distinct properties |
|
|
19
|
+
| `createSubjectsStage()` | [`subjects.rq`](src/queries/subjects.rq) — Distinct subjects |
|
|
20
|
+
| `createSubjectUriSpaceStage()` | [`subject-uri-space.rq`](src/queries/subject-uri-space.rq) — Subject URI namespaces |
|
|
21
|
+
| `createTriplesStage()` | [`triples.rq`](src/queries/triples.rq) — Total triple count |
|
|
22
|
+
|
|
23
|
+
### Per-class stages (iterated with a class selector):
|
|
24
|
+
|
|
25
|
+
| Factory | Query |
|
|
26
|
+
| ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------- |
|
|
27
|
+
| `createPerClassDatatypeStage()` | [`class-property-datatypes.rq`](src/queries/class-property-datatypes.rq) — Per-class datatype partitions |
|
|
28
|
+
| `createPerClassLanguageStage()` | [`class-property-languages.rq`](src/queries/class-property-languages.rq) — Per-class language tags |
|
|
29
|
+
| `createPerClassObjectClassStage()` | [`class-property-object-classes.rq`](src/queries/class-property-object-classes.rq) — Per-class object class partitions |
|
|
13
30
|
|
|
14
|
-
-
|
|
15
|
-
- `ProvenanceExecutor` — Wraps an executor; appends PROV-O provenance metadata with automatic timing
|
|
31
|
+
### Domain-specific stages:
|
|
16
32
|
|
|
17
|
-
|
|
33
|
+
| Factory | Description |
|
|
34
|
+
| -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
35
|
+
| `createUriSpaceStage(uriSpaces)` | [`object-uri-space.rq`](src/queries/object-uri-space.rq) — Object URI namespace linksets, aggregated against a provided URI space map |
|
|
36
|
+
| `createVocabularyStage()` | [`entity-properties.rq`](src/queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection |
|
|
18
37
|
|
|
19
|
-
|
|
38
|
+
All factories return `Promise<Stage>`.
|
|
39
|
+
|
|
40
|
+
## Executor decorators
|
|
20
41
|
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
| `triples.rq` | Total triple count |
|
|
24
|
-
| `subjects.rq` | Distinct subjects |
|
|
25
|
-
| `properties.rq` | Distinct properties |
|
|
26
|
-
| `class-partition.rq` | Classes with entity counts |
|
|
27
|
-
| `class-properties-subjects.rq` | Properties per class (subject counts) |
|
|
28
|
-
| `class-properties-objects.rq` | Properties per class (object counts) |
|
|
29
|
-
| `class-property-datatypes.rq` | Per-class datatype partitions |
|
|
30
|
-
| `class-property-languages.rq` | Per-class language tags |
|
|
31
|
-
| `class-property-object-classes.rq` | Per-class object class partitions |
|
|
32
|
-
| `object-literals.rq` | Literal object counts |
|
|
33
|
-
| `object-uris.rq` | URI object counts |
|
|
34
|
-
| `object-uri-space.rq` | Object URI namespaces |
|
|
35
|
-
| `subject-uri-space.rq` | Subject URI namespaces |
|
|
36
|
-
| `datatypes.rq` | Dataset-level datatypes |
|
|
37
|
-
| `entity-properties.rq` | Property statistics |
|
|
38
|
-
| `licenses.rq` | License detection |
|
|
42
|
+
- `VocabularyExecutor` — Wraps an executor; detects known vocabulary namespace prefixes in `void:property` quads and appends `void:vocabulary` triples.
|
|
43
|
+
- `UriSpaceExecutor` — Wraps an executor; consumes `void:Linkset` quads, matches `void:objectsTarget` against configured URI spaces, and emits aggregated linksets.
|
|
39
44
|
|
|
40
45
|
## Usage
|
|
41
46
|
|
|
42
47
|
```typescript
|
|
43
48
|
import {
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
47
|
-
Stage,
|
|
49
|
+
createTriplesStage,
|
|
50
|
+
createClassPartitionStage,
|
|
51
|
+
createVocabularyStage,
|
|
48
52
|
} from '@lde/pipeline-void';
|
|
49
|
-
import {
|
|
50
|
-
|
|
51
|
-
// Simple CONSTRUCT query stage
|
|
52
|
-
const stage = await createQueryStage('triples.rq');
|
|
53
|
-
await stage.run(dataset, distribution, writer);
|
|
54
|
-
|
|
55
|
-
// Executor decorator: vocabulary detection wraps entity-properties executor
|
|
56
|
-
const executor = await SparqlConstructExecutor.fromFile(
|
|
57
|
-
'queries/entity-properties.rq',
|
|
58
|
-
);
|
|
59
|
-
const entityPropertiesStage = new Stage({
|
|
60
|
-
name: 'entity-properties',
|
|
61
|
-
executors: new VocabularyExecutor(executor),
|
|
62
|
-
});
|
|
63
|
-
```
|
|
64
|
-
|
|
65
|
-
## Validation
|
|
53
|
+
import { Pipeline, SparqlUpdateWriter, provenancePlugin } from '@lde/pipeline';
|
|
66
54
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
55
|
+
await new Pipeline({
|
|
56
|
+
datasetSelector: selector,
|
|
57
|
+
stages: [
|
|
58
|
+
createTriplesStage(),
|
|
59
|
+
createClassPartitionStage(),
|
|
60
|
+
createVocabularyStage(),
|
|
61
|
+
],
|
|
62
|
+
plugins: [provenancePlugin()],
|
|
63
|
+
writers: new SparqlUpdateWriter({
|
|
64
|
+
endpoint: new URL('http://localhost:7200/repositories/lde/statements'),
|
|
65
|
+
}),
|
|
66
|
+
}).run();
|
|
71
67
|
```
|
package/dist/index.d.ts
CHANGED
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
export { Stage, NotSupported } from '@lde/pipeline';
|
|
2
|
-
export * from './
|
|
3
|
-
export * from './perClassAnalyzer.js';
|
|
2
|
+
export * from './stage.js';
|
|
4
3
|
export * from './vocabularyAnalyzer.js';
|
|
5
|
-
export * from './provenance.js';
|
|
6
4
|
export * from './uriSpaceExecutor.js';
|
|
7
5
|
//# sourceMappingURL=index.d.ts.map
|
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AACpD,cAAc,
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AACpD,cAAc,YAAY,CAAC;AAC3B,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC"}
|
package/dist/index.js
CHANGED
|
@@ -1,6 +1,4 @@
|
|
|
1
1
|
export { Stage, NotSupported } from '@lde/pipeline';
|
|
2
|
-
export * from './
|
|
3
|
-
export * from './perClassAnalyzer.js';
|
|
2
|
+
export * from './stage.js';
|
|
4
3
|
export * from './vocabularyAnalyzer.js';
|
|
5
|
-
export * from './provenance.js';
|
|
6
4
|
export * from './uriSpaceExecutor.js';
|
package/dist/stage.d.ts
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
import { Stage } from '@lde/pipeline';
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
3
|
+
export declare function createSubjectUriSpaceStage(): Promise<Stage>;
|
|
4
|
+
export declare function createClassPartitionStage(): Promise<Stage>;
|
|
5
|
+
export declare function createObjectLiteralsStage(): Promise<Stage>;
|
|
6
|
+
export declare function createObjectUrisStage(): Promise<Stage>;
|
|
7
|
+
export declare function createPropertiesStage(): Promise<Stage>;
|
|
8
|
+
export declare function createSubjectsStage(): Promise<Stage>;
|
|
9
|
+
export declare function createTriplesStage(): Promise<Stage>;
|
|
10
|
+
export declare function createClassPropertiesSubjectsStage(): Promise<Stage>;
|
|
11
|
+
export declare function createClassPropertiesObjectsStage(): Promise<Stage>;
|
|
12
|
+
export declare function createDatatypesStage(): Promise<Stage>;
|
|
13
|
+
export declare function createLicensesStage(): Promise<Stage>;
|
|
14
|
+
export declare function createPerClassObjectClassStage(): Promise<Stage>;
|
|
15
|
+
export declare function createPerClassDatatypeStage(): Promise<Stage>;
|
|
16
|
+
export declare function createPerClassLanguageStage(): Promise<Stage>;
|
|
17
|
+
export declare function createUriSpaceStage(uriSpaces: ReadonlyMap<string, readonly Quad[]>): Promise<Stage>;
|
|
18
|
+
export declare function createVocabularyStage(): Promise<Stage>;
|
|
19
|
+
//# sourceMappingURL=stage.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAMN,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAqDzC,wBAAgB,0BAA0B,IAAI,OAAO,CAAC,KAAK,CAAC,CAE3D;AAED,wBAAgB,yBAAyB,IAAI,OAAO,CAAC,KAAK,CAAC,CAE1D;AAED,wBAAgB,yBAAyB,IAAI,OAAO,CAAC,KAAK,CAAC,CAE1D;AAED,wBAAgB,qBAAqB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEtD;AAED,wBAAgB,qBAAqB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEtD;AAED,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAED,wBAAgB,kBAAkB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEnD;AAED,wBAAgB,kCAAkC,IAAI,OAAO,CAAC,KAAK,CAAC,CAEnE;AAED,wBAAgB,iCAAiC,IAAI,OAAO,CAAC,KAAK,CAAC,CAElE;AAED,wBAAgB,oBAAoB,IAAI,OAAO,CAAC,KAAK,CAAC,CAErD;AAED,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAID,wBAAgB,8BAA8B,IAAI,OAAO,CAAC,KAAK,CAAC,CAI/D;AAED,wBAAgB,2BAA2B,IAAI,OAAO,CAAC,KAAK,CAAC,CAI5D;AAED,wBAAgB,2BAA2B,IAAI,OAAO,CAAC,KAAK,CAAC,CAI5D;AAID,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,WAAW,CAAC,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC,GAC9C,OAAO,CAAC,KAAK,CAAC,CAKhB;AAED,wBAAgB,qBAAqB,IAAI,OAAO,CAAC,KAAK,CAAC,CAKtD"}
|
package/dist/stage.js
ADDED
|
@@ -0,0 +1,99 @@
|
|
|
1
|
+
import { Stage, SparqlConstructExecutor, SparqlItemSelector, readQueryFile, } from '@lde/pipeline';
|
|
2
|
+
import { resolve, dirname } from 'node:path';
|
|
3
|
+
import { fileURLToPath } from 'node:url';
|
|
4
|
+
import { VocabularyExecutor } from './vocabularyAnalyzer.js';
|
|
5
|
+
import { UriSpaceExecutor } from './uriSpaceExecutor.js';
|
|
6
|
+
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
7
|
+
async function createVoidStage(filename, options) {
|
|
8
|
+
const query = await readQueryFile(resolve(__dirname, 'queries', filename));
|
|
9
|
+
const executor = options?.executor?.(query) ?? new SparqlConstructExecutor({ query });
|
|
10
|
+
if (options?.selection === 'perClass') {
|
|
11
|
+
return new Stage({
|
|
12
|
+
name: filename,
|
|
13
|
+
itemSelector: classSelector(),
|
|
14
|
+
executors: executor,
|
|
15
|
+
});
|
|
16
|
+
}
|
|
17
|
+
return new Stage({ name: filename, executors: executor });
|
|
18
|
+
}
|
|
19
|
+
function classSelector() {
|
|
20
|
+
return {
|
|
21
|
+
select: (distribution) => {
|
|
22
|
+
const subjectFilter = distribution.subjectFilter ?? '';
|
|
23
|
+
const fromClause = distribution.namedGraph
|
|
24
|
+
? `FROM <${distribution.namedGraph}>`
|
|
25
|
+
: '';
|
|
26
|
+
const selectorQuery = [
|
|
27
|
+
'SELECT DISTINCT ?class',
|
|
28
|
+
fromClause,
|
|
29
|
+
`WHERE { ${subjectFilter} ?s a ?class . }`,
|
|
30
|
+
'LIMIT 1000',
|
|
31
|
+
].join('\n');
|
|
32
|
+
return new SparqlItemSelector({
|
|
33
|
+
query: selectorQuery,
|
|
34
|
+
pageSize: 1000,
|
|
35
|
+
}).select(distribution);
|
|
36
|
+
},
|
|
37
|
+
};
|
|
38
|
+
}
|
|
39
|
+
// Global stages
|
|
40
|
+
export function createSubjectUriSpaceStage() {
|
|
41
|
+
return createVoidStage('subject-uri-space.rq');
|
|
42
|
+
}
|
|
43
|
+
export function createClassPartitionStage() {
|
|
44
|
+
return createVoidStage('class-partition.rq');
|
|
45
|
+
}
|
|
46
|
+
export function createObjectLiteralsStage() {
|
|
47
|
+
return createVoidStage('object-literals.rq');
|
|
48
|
+
}
|
|
49
|
+
export function createObjectUrisStage() {
|
|
50
|
+
return createVoidStage('object-uris.rq');
|
|
51
|
+
}
|
|
52
|
+
export function createPropertiesStage() {
|
|
53
|
+
return createVoidStage('properties.rq');
|
|
54
|
+
}
|
|
55
|
+
export function createSubjectsStage() {
|
|
56
|
+
return createVoidStage('subjects.rq');
|
|
57
|
+
}
|
|
58
|
+
export function createTriplesStage() {
|
|
59
|
+
return createVoidStage('triples.rq');
|
|
60
|
+
}
|
|
61
|
+
export function createClassPropertiesSubjectsStage() {
|
|
62
|
+
return createVoidStage('class-properties-subjects.rq');
|
|
63
|
+
}
|
|
64
|
+
export function createClassPropertiesObjectsStage() {
|
|
65
|
+
return createVoidStage('class-properties-objects.rq');
|
|
66
|
+
}
|
|
67
|
+
export function createDatatypesStage() {
|
|
68
|
+
return createVoidStage('datatypes.rq');
|
|
69
|
+
}
|
|
70
|
+
export function createLicensesStage() {
|
|
71
|
+
return createVoidStage('licenses.rq');
|
|
72
|
+
}
|
|
73
|
+
// Per-class stages
|
|
74
|
+
export function createPerClassObjectClassStage() {
|
|
75
|
+
return createVoidStage('class-property-object-classes.rq', {
|
|
76
|
+
selection: 'perClass',
|
|
77
|
+
});
|
|
78
|
+
}
|
|
79
|
+
export function createPerClassDatatypeStage() {
|
|
80
|
+
return createVoidStage('class-property-datatypes.rq', {
|
|
81
|
+
selection: 'perClass',
|
|
82
|
+
});
|
|
83
|
+
}
|
|
84
|
+
export function createPerClassLanguageStage() {
|
|
85
|
+
return createVoidStage('class-property-languages.rq', {
|
|
86
|
+
selection: 'perClass',
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
// Domain-specific executor stages
|
|
90
|
+
export function createUriSpaceStage(uriSpaces) {
|
|
91
|
+
return createVoidStage('object-uri-space.rq', {
|
|
92
|
+
executor: (query) => new UriSpaceExecutor(new SparqlConstructExecutor({ query }), uriSpaces),
|
|
93
|
+
});
|
|
94
|
+
}
|
|
95
|
+
export function createVocabularyStage() {
|
|
96
|
+
return createVoidStage('entity-properties.rq', {
|
|
97
|
+
executor: (query) => new VocabularyExecutor(new SparqlConstructExecutor({ query })),
|
|
98
|
+
});
|
|
99
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline-void",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.3.0",
|
|
4
4
|
"description": "VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets",
|
|
5
5
|
"repository": {
|
|
6
6
|
"url": "https://github.com/ldengine/lde",
|
|
@@ -21,11 +21,12 @@
|
|
|
21
21
|
"types": "./dist/index.d.ts",
|
|
22
22
|
"files": [
|
|
23
23
|
"dist",
|
|
24
|
+
"src/queries",
|
|
24
25
|
"!**/*.tsbuildinfo"
|
|
25
26
|
],
|
|
26
27
|
"dependencies": {
|
|
27
|
-
"@lde/dataset": "0.6.
|
|
28
|
-
"@lde/pipeline": "0.
|
|
28
|
+
"@lde/dataset": "0.6.10",
|
|
29
|
+
"@lde/pipeline": "0.7.0",
|
|
29
30
|
"@rdfjs/types": "^2.0.1",
|
|
30
31
|
"@zazuko/prefixes": "^2.6.1",
|
|
31
32
|
"n3": "^1.17.0",
|
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
|
|
3
|
+
CONSTRUCT {
|
|
4
|
+
?dataset a void:Dataset ;
|
|
5
|
+
void:classPartition ?classPartition .
|
|
6
|
+
?classPartition void:class ?type ;
|
|
7
|
+
void:entities ?entities .
|
|
8
|
+
}
|
|
9
|
+
WHERE {
|
|
10
|
+
{
|
|
11
|
+
SELECT (COUNT(?type) AS ?entities) ?type {
|
|
12
|
+
#subjectFilter#
|
|
13
|
+
?s a ?type .
|
|
14
|
+
}
|
|
15
|
+
GROUP BY ?type
|
|
16
|
+
}
|
|
17
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-", MD5(STR(?type)))) AS ?classPartition)
|
|
18
|
+
}
|
|
19
|
+
LIMIT 10000
|
|
@@ -0,0 +1,17 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
|
|
3
|
+
CONSTRUCT {
|
|
4
|
+
?propertyPartition void:distinctObjects ?objects .
|
|
5
|
+
}
|
|
6
|
+
WHERE {
|
|
7
|
+
# Object counts only. Subject counts in class-properties-subjects.rq.
|
|
8
|
+
{
|
|
9
|
+
SELECT ?type ?p (COUNT(DISTINCT ?o) AS ?objects) {
|
|
10
|
+
#subjectFilter#
|
|
11
|
+
?s a ?type ; ?p ?o .
|
|
12
|
+
}
|
|
13
|
+
GROUP BY ?type ?p
|
|
14
|
+
}
|
|
15
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?type), STR(?p))))) AS ?propertyPartition)
|
|
16
|
+
}
|
|
17
|
+
LIMIT 100000
|
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
|
|
3
|
+
CONSTRUCT {
|
|
4
|
+
?dataset a void:Dataset ;
|
|
5
|
+
void:classPartition ?classPartition .
|
|
6
|
+
?classPartition void:class ?type ;
|
|
7
|
+
void:propertyPartition ?propertyPartition .
|
|
8
|
+
?propertyPartition void:property ?p ;
|
|
9
|
+
void:entities ?subjects .
|
|
10
|
+
}
|
|
11
|
+
WHERE {
|
|
12
|
+
# Subject counts only. Object counts in class-properties-objects.rq.
|
|
13
|
+
{
|
|
14
|
+
SELECT ?type ?p (COUNT(DISTINCT ?s) AS ?subjects) {
|
|
15
|
+
#subjectFilter#
|
|
16
|
+
?s a ?type ; ?p [] .
|
|
17
|
+
}
|
|
18
|
+
GROUP BY ?type ?p
|
|
19
|
+
}
|
|
20
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-", MD5(STR(?type)))) AS ?classPartition)
|
|
21
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?type), STR(?p))))) AS ?propertyPartition)
|
|
22
|
+
}
|
|
23
|
+
LIMIT 100000
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
PREFIX void-ext: <http://ldf.fi/void-ext#>
|
|
3
|
+
|
|
4
|
+
CONSTRUCT {
|
|
5
|
+
?propertyPartition void-ext:datatypePartition ?datatypePartition .
|
|
6
|
+
?datatypePartition
|
|
7
|
+
void-ext:datatype ?dt ;
|
|
8
|
+
void:triples ?count .
|
|
9
|
+
}
|
|
10
|
+
WHERE {
|
|
11
|
+
{
|
|
12
|
+
SELECT ?p ?dt (COUNT(*) AS ?count) {
|
|
13
|
+
{
|
|
14
|
+
SELECT ?p ?o {
|
|
15
|
+
#subjectFilter#
|
|
16
|
+
?s a ?class ;
|
|
17
|
+
?p ?o .
|
|
18
|
+
FILTER (ISLITERAL(?o))
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
BIND(DATATYPE(?o) AS ?dt)
|
|
22
|
+
}
|
|
23
|
+
GROUP BY ?p ?dt
|
|
24
|
+
}
|
|
25
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
|
|
26
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#datatype-", MD5(CONCAT(STR(?class), STR(?p), STR(?dt))))) AS ?datatypePartition)
|
|
27
|
+
}
|
|
28
|
+
LIMIT 100000
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
PREFIX void-ext: <http://ldf.fi/void-ext#>
|
|
3
|
+
|
|
4
|
+
CONSTRUCT {
|
|
5
|
+
?propertyPartition void-ext:languagePartition ?languagePartition .
|
|
6
|
+
?languagePartition
|
|
7
|
+
void-ext:language ?lang ;
|
|
8
|
+
void:triples ?count .
|
|
9
|
+
}
|
|
10
|
+
WHERE {
|
|
11
|
+
{
|
|
12
|
+
SELECT ?p ?lang (COUNT(*) AS ?count) {
|
|
13
|
+
{
|
|
14
|
+
# Pre-filter to distinct literals to minimize LANG calls
|
|
15
|
+
SELECT DISTINCT ?p ?o {
|
|
16
|
+
#subjectFilter#
|
|
17
|
+
?s a ?class ;
|
|
18
|
+
?p ?o .
|
|
19
|
+
FILTER(ISLITERAL(?o))
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
BIND(LANG(?o) AS ?lang)
|
|
23
|
+
FILTER(?lang != "")
|
|
24
|
+
}
|
|
25
|
+
GROUP BY ?p ?lang
|
|
26
|
+
}
|
|
27
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
|
|
28
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#language-", MD5(CONCAT(STR(?class), STR(?p), ?lang)))) AS ?languagePartition)
|
|
29
|
+
}
|
|
30
|
+
LIMIT 100000
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
PREFIX void-ext: <http://ldf.fi/void-ext#>
|
|
3
|
+
|
|
4
|
+
CONSTRUCT {
|
|
5
|
+
?propertyPartition void-ext:objectClassPartition ?objectClassPartition .
|
|
6
|
+
?objectClassPartition
|
|
7
|
+
void:class ?objectClass ;
|
|
8
|
+
void:triples ?count .
|
|
9
|
+
}
|
|
10
|
+
WHERE {
|
|
11
|
+
{
|
|
12
|
+
SELECT ?p ?objectClass (COUNT(*) AS ?count) {
|
|
13
|
+
{
|
|
14
|
+
SELECT ?p ?o {
|
|
15
|
+
#subjectFilter#
|
|
16
|
+
?s a ?class ;
|
|
17
|
+
?p ?o .
|
|
18
|
+
FILTER (ISIRI(?o))
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
?o a ?objectClass .
|
|
22
|
+
}
|
|
23
|
+
GROUP BY ?p ?objectClass
|
|
24
|
+
}
|
|
25
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
|
|
26
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#object-class-", MD5(CONCAT(STR(?class), STR(?p), STR(?objectClass))))) AS ?objectClassPartition)
|
|
27
|
+
}
|
|
28
|
+
LIMIT 100000
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
PREFIX void-ext: <http://ldf.fi/void-ext#>
|
|
2
|
+
|
|
3
|
+
CONSTRUCT {
|
|
4
|
+
?dataset void-ext:datatypes ?count ;
|
|
5
|
+
void-ext:datatype ?datatype .
|
|
6
|
+
}
|
|
7
|
+
WHERE {
|
|
8
|
+
{
|
|
9
|
+
# Count distinct datatypes
|
|
10
|
+
SELECT (COUNT(DISTINCT ?dt) AS ?count) WHERE {
|
|
11
|
+
{
|
|
12
|
+
# Pre-filter to distinct literals to minimize DATATYPE calls
|
|
13
|
+
SELECT DISTINCT ?o WHERE {
|
|
14
|
+
#subjectFilter#
|
|
15
|
+
?s ?p ?o .
|
|
16
|
+
FILTER(ISLITERAL(?o))
|
|
17
|
+
}
|
|
18
|
+
}
|
|
19
|
+
BIND(DATATYPE(?o) AS ?dt)
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
{
|
|
23
|
+
# Get distinct datatypes
|
|
24
|
+
SELECT DISTINCT ?datatype WHERE {
|
|
25
|
+
{
|
|
26
|
+
# Pre-filter to distinct literals to minimize DATATYPE calls
|
|
27
|
+
SELECT DISTINCT ?o WHERE {
|
|
28
|
+
#subjectFilter#
|
|
29
|
+
?s ?p ?o .
|
|
30
|
+
FILTER(ISLITERAL(?o))
|
|
31
|
+
}
|
|
32
|
+
}
|
|
33
|
+
BIND(DATATYPE(?o) AS ?datatype)
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
LIMIT 1000
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
|
|
3
|
+
CONSTRUCT {
|
|
4
|
+
?dataset a void:Dataset ;
|
|
5
|
+
void:propertyPartition ?propertyPartition .
|
|
6
|
+
?propertyPartition
|
|
7
|
+
void:property ?p ;
|
|
8
|
+
void:entities ?subjects ;
|
|
9
|
+
void:distinctObjects ?objects .
|
|
10
|
+
}
|
|
11
|
+
WHERE {
|
|
12
|
+
{
|
|
13
|
+
SELECT (COUNT(DISTINCT ?s) AS ?subjects) (COUNT(DISTINCT ?o) as ?objects) ?p {
|
|
14
|
+
#subjectFilter#
|
|
15
|
+
?s ?p ?o .
|
|
16
|
+
}
|
|
17
|
+
GROUP BY ?p
|
|
18
|
+
}
|
|
19
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#property-partition-", MD5(STR(?p)))) AS ?propertyPartition)
|
|
20
|
+
}
|
|
@@ -0,0 +1,24 @@
|
|
|
1
|
+
PREFIX schema: <http://schema.org/>
|
|
2
|
+
PREFIX dcterms: <http://purl.org/dc/terms/>
|
|
3
|
+
PREFIX dc: <http://purl.org/dc/elements/1.1/>
|
|
4
|
+
PREFIX rico: <https://www.ica.org/standards/RiC/ontology#>
|
|
5
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
6
|
+
|
|
7
|
+
CONSTRUCT {
|
|
8
|
+
?dataset a void:Dataset ;
|
|
9
|
+
void:subset ?licenseSubset .
|
|
10
|
+
?licenseSubset
|
|
11
|
+
dcterms:license ?license ;
|
|
12
|
+
void:triples ?count .
|
|
13
|
+
}
|
|
14
|
+
WHERE {
|
|
15
|
+
{
|
|
16
|
+
SELECT (IRI(?l) AS ?license) (COUNT(*) AS ?count) {
|
|
17
|
+
#subjectFilter#
|
|
18
|
+
?s ?p ?l .
|
|
19
|
+
VALUES ?p { schema:license <https://schema.org/license> dc:license rico:conditionsOfUse }
|
|
20
|
+
}
|
|
21
|
+
GROUP BY ?l
|
|
22
|
+
}
|
|
23
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#license-", MD5(STR(?license)))) AS ?licenseSubset)
|
|
24
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
PREFIX void-ext: <http://ldf.fi/void-ext#>
|
|
3
|
+
|
|
4
|
+
CONSTRUCT {
|
|
5
|
+
?dataset a void:Dataset ;
|
|
6
|
+
void-ext:distinctLiterals ?distinctLiterals .
|
|
7
|
+
}
|
|
8
|
+
WHERE {
|
|
9
|
+
SELECT (COUNT(DISTINCT ?o) AS ?distinctLiterals) {
|
|
10
|
+
#subjectFilter#
|
|
11
|
+
?s ?p ?o .
|
|
12
|
+
FILTER(ISLITERAL(?o))
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,25 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
|
|
3
|
+
CONSTRUCT {
|
|
4
|
+
?linkset a void:Linkset ;
|
|
5
|
+
void:subjectsTarget ?dataset ;
|
|
6
|
+
void:objectsTarget ?prefix ;
|
|
7
|
+
void:triples ?count .
|
|
8
|
+
}
|
|
9
|
+
WHERE {
|
|
10
|
+
{
|
|
11
|
+
SELECT ?prefix (SUM(?ocount) AS ?count) {
|
|
12
|
+
{
|
|
13
|
+
SELECT ?o (COUNT(*) AS ?ocount) {
|
|
14
|
+
#subjectFilter#
|
|
15
|
+
?s ?p ?o .
|
|
16
|
+
FILTER(ISIRI(?o))
|
|
17
|
+
}
|
|
18
|
+
GROUP BY ?o
|
|
19
|
+
}
|
|
20
|
+
BIND(REPLACE(STR(?o), "([^/]+$)", "") AS ?prefix)
|
|
21
|
+
}
|
|
22
|
+
GROUP BY ?prefix ORDER BY DESC(?count) LIMIT 1000
|
|
23
|
+
}
|
|
24
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#linkset-", MD5(CONCAT(STR(?dataset), ?prefix)))) AS ?linkset)
|
|
25
|
+
}
|
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
PREFIX void-ext: <http://ldf.fi/void-ext#>
|
|
3
|
+
|
|
4
|
+
CONSTRUCT {
|
|
5
|
+
?dataset a void:Dataset ;
|
|
6
|
+
void-ext:distinctIRIReferenceObjects ?distinctIRIReferenceObjects .
|
|
7
|
+
}
|
|
8
|
+
WHERE {
|
|
9
|
+
SELECT (COUNT(DISTINCT ?o) AS ?distinctIRIReferenceObjects) {
|
|
10
|
+
#subjectFilter#
|
|
11
|
+
?s ?p ?o .
|
|
12
|
+
FILTER(ISIRI(?o))
|
|
13
|
+
}
|
|
14
|
+
}
|
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
PREFIX void: <http://rdfs.org/ns/void#>
|
|
2
|
+
|
|
3
|
+
CONSTRUCT {
|
|
4
|
+
?dataset a void:Dataset ;
|
|
5
|
+
void:subset ?subset .
|
|
6
|
+
?subset void:uriSpace ?uriSpace ;
|
|
7
|
+
void:entities ?count .
|
|
8
|
+
}
|
|
9
|
+
WHERE {
|
|
10
|
+
{
|
|
11
|
+
# Pre-group distinct subjects before computing namespace to avoid regex on every triple.
|
|
12
|
+
SELECT ?uriSpace (COUNT(*) AS ?count) {
|
|
13
|
+
{
|
|
14
|
+
SELECT DISTINCT ?s {
|
|
15
|
+
#subjectFilter#
|
|
16
|
+
?s ?p ?o .
|
|
17
|
+
FILTER(ISIRI(?s))
|
|
18
|
+
}
|
|
19
|
+
}
|
|
20
|
+
# Extract namespace by removing the local name (everything after the last / or #)
|
|
21
|
+
BIND(REPLACE(STR(?s), "[^/#]+$", "") AS ?uriSpace)
|
|
22
|
+
}
|
|
23
|
+
GROUP BY ?uriSpace
|
|
24
|
+
ORDER BY DESC(?count)
|
|
25
|
+
LIMIT 10
|
|
26
|
+
}
|
|
27
|
+
FILTER(?count > 1) # Only include namespaces with more than 1 entity
|
|
28
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#subject-uri-space-", MD5(?uriSpace))) AS ?subset)
|
|
29
|
+
}
|
|
30
|
+
|
|
@@ -1,5 +0,0 @@
|
|
|
1
|
-
import { Stage } from '@lde/pipeline';
|
|
2
|
-
export declare function createDatatypeStage(): Promise<Stage>;
|
|
3
|
-
export declare function createLanguageStage(): Promise<Stage>;
|
|
4
|
-
export declare function createObjectClassStage(): Promise<Stage>;
|
|
5
|
-
//# sourceMappingURL=perClassAnalyzer.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"perClassAnalyzer.d.ts","sourceRoot":"","sources":["../src/perClassAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAKN,MAAM,eAAe,CAAC;AA4CvB,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAED,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAED,wBAAgB,sBAAsB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEvD"}
|
package/dist/perClassAnalyzer.js
DELETED
|
@@ -1,44 +0,0 @@
|
|
|
1
|
-
import { Stage, SparqlItemSelector, SparqlConstructExecutor, readQueryFile, } from '@lde/pipeline';
|
|
2
|
-
import { resolve, dirname } from 'node:path';
|
|
3
|
-
import { fileURLToPath } from 'node:url';
|
|
4
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
5
|
-
/**
|
|
6
|
-
* Create a Stage that first selects classes from the endpoint,
|
|
7
|
-
* then runs a per-class CONSTRUCT query with `?class` bound via VALUES.
|
|
8
|
-
*/
|
|
9
|
-
async function createPerClassStage(queryFilename) {
|
|
10
|
-
const rawQuery = await readQueryFile(resolve(__dirname, 'queries', queryFilename));
|
|
11
|
-
const executor = new SparqlConstructExecutor({ query: rawQuery });
|
|
12
|
-
const itemSelector = {
|
|
13
|
-
select: (distribution) => {
|
|
14
|
-
const subjectFilter = distribution.subjectFilter ?? '';
|
|
15
|
-
const fromClause = distribution.namedGraph
|
|
16
|
-
? `FROM <${distribution.namedGraph}>`
|
|
17
|
-
: '';
|
|
18
|
-
const selectorQuery = [
|
|
19
|
-
'SELECT DISTINCT ?class',
|
|
20
|
-
fromClause,
|
|
21
|
-
`WHERE { ${subjectFilter} ?s a ?class . }`,
|
|
22
|
-
'LIMIT 1000',
|
|
23
|
-
].join('\n');
|
|
24
|
-
return new SparqlItemSelector({
|
|
25
|
-
query: selectorQuery,
|
|
26
|
-
pageSize: 1000,
|
|
27
|
-
}).select(distribution);
|
|
28
|
-
},
|
|
29
|
-
};
|
|
30
|
-
return new Stage({
|
|
31
|
-
name: queryFilename,
|
|
32
|
-
itemSelector,
|
|
33
|
-
executors: executor,
|
|
34
|
-
});
|
|
35
|
-
}
|
|
36
|
-
export function createDatatypeStage() {
|
|
37
|
-
return createPerClassStage('class-property-datatypes.rq');
|
|
38
|
-
}
|
|
39
|
-
export function createLanguageStage() {
|
|
40
|
-
return createPerClassStage('class-property-languages.rq');
|
|
41
|
-
}
|
|
42
|
-
export function createObjectClassStage() {
|
|
43
|
-
return createPerClassStage('class-property-object-classes.rq');
|
|
44
|
-
}
|
package/dist/provenance.d.ts
DELETED
|
@@ -1,23 +0,0 @@
|
|
|
1
|
-
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
-
import { NotSupported, type Executor, type ExecuteOptions } from '@lde/pipeline';
|
|
3
|
-
import type { Quad } from '@rdfjs/types';
|
|
4
|
-
/**
|
|
5
|
-
* Executor decorator that passes through all quads from the inner executor
|
|
6
|
-
* and appends PROV-O provenance metadata.
|
|
7
|
-
*
|
|
8
|
-
* Timestamps are captured automatically: `startedAt` when `execute()` is
|
|
9
|
-
* called, `endedAt` when the inner quad stream is fully consumed.
|
|
10
|
-
*
|
|
11
|
-
* Appended quads:
|
|
12
|
-
* - `<dataset> a prov:Entity`
|
|
13
|
-
* - `<dataset> prov:wasGeneratedBy _:activity`
|
|
14
|
-
* - `_:activity a prov:Activity`
|
|
15
|
-
* - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
|
|
16
|
-
* - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
|
|
17
|
-
*/
|
|
18
|
-
export declare class ProvenanceExecutor implements Executor {
|
|
19
|
-
private readonly inner;
|
|
20
|
-
constructor(inner: Executor);
|
|
21
|
-
execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
|
|
22
|
-
}
|
|
23
|
-
//# sourceMappingURL=provenance.d.ts.map
|
package/dist/provenance.d.ts.map
DELETED
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,YAAY,EACZ,KAAK,QAAQ,EACb,KAAK,cAAc,EACpB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC;;;;;;;;;;;;;GAaG;AACH,qBAAa,kBAAmB,YAAW,QAAQ;IACrC,OAAO,CAAC,QAAQ,CAAC,KAAK;gBAAL,KAAK,EAAE,QAAQ;IAEtC,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC;CAQ/C"}
|
package/dist/provenance.js
DELETED
|
@@ -1,51 +0,0 @@
|
|
|
1
|
-
import { NotSupported, } from '@lde/pipeline';
|
|
2
|
-
import { DataFactory } from 'n3';
|
|
3
|
-
const { namedNode, literal, blankNode, quad } = DataFactory;
|
|
4
|
-
const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
5
|
-
const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
|
|
6
|
-
const PROV_ACTIVITY = namedNode('http://www.w3.org/ns/prov#Activity');
|
|
7
|
-
const PROV_WAS_GENERATED_BY = namedNode('http://www.w3.org/ns/prov#wasGeneratedBy');
|
|
8
|
-
const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime');
|
|
9
|
-
const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
|
|
10
|
-
const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
|
|
11
|
-
/**
|
|
12
|
-
* Executor decorator that passes through all quads from the inner executor
|
|
13
|
-
* and appends PROV-O provenance metadata.
|
|
14
|
-
*
|
|
15
|
-
* Timestamps are captured automatically: `startedAt` when `execute()` is
|
|
16
|
-
* called, `endedAt` when the inner quad stream is fully consumed.
|
|
17
|
-
*
|
|
18
|
-
* Appended quads:
|
|
19
|
-
* - `<dataset> a prov:Entity`
|
|
20
|
-
* - `<dataset> prov:wasGeneratedBy _:activity`
|
|
21
|
-
* - `_:activity a prov:Activity`
|
|
22
|
-
* - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
|
|
23
|
-
* - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
|
|
24
|
-
*/
|
|
25
|
-
export class ProvenanceExecutor {
|
|
26
|
-
inner;
|
|
27
|
-
constructor(inner) {
|
|
28
|
-
this.inner = inner;
|
|
29
|
-
}
|
|
30
|
-
async execute(dataset, distribution, options) {
|
|
31
|
-
const startedAt = new Date();
|
|
32
|
-
const result = await this.inner.execute(dataset, distribution, options);
|
|
33
|
-
if (result instanceof NotSupported) {
|
|
34
|
-
return result;
|
|
35
|
-
}
|
|
36
|
-
return withProvenance(result, dataset.iri.toString(), startedAt);
|
|
37
|
-
}
|
|
38
|
-
}
|
|
39
|
-
async function* withProvenance(quads, iri, startedAt) {
|
|
40
|
-
for await (const q of quads) {
|
|
41
|
-
yield q;
|
|
42
|
-
}
|
|
43
|
-
const endedAt = new Date();
|
|
44
|
-
const subject = namedNode(iri);
|
|
45
|
-
const activity = blankNode();
|
|
46
|
-
yield quad(subject, RDF_TYPE, PROV_ENTITY);
|
|
47
|
-
yield quad(subject, PROV_WAS_GENERATED_BY, activity);
|
|
48
|
-
yield quad(activity, RDF_TYPE, PROV_ACTIVITY);
|
|
49
|
-
yield quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME));
|
|
50
|
-
yield quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME));
|
|
51
|
-
}
|
|
@@ -1,9 +0,0 @@
|
|
|
1
|
-
import { Stage } from '@lde/pipeline';
|
|
2
|
-
/**
|
|
3
|
-
* Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
|
|
4
|
-
*
|
|
5
|
-
* `#subjectFilter#` is handled at runtime by the executor;
|
|
6
|
-
* `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
|
|
7
|
-
*/
|
|
8
|
-
export declare function createQueryStage(filename: string): Promise<Stage>;
|
|
9
|
-
//# sourceMappingURL=sparqlQueryAnalyzer.d.ts.map
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
{"version":3,"file":"sparqlQueryAnalyzer.d.ts","sourceRoot":"","sources":["../src/sparqlQueryAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAA0C,MAAM,eAAe,CAAC;AAM9E;;;;;GAKG;AACH,wBAAsB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAKvE"}
|
|
@@ -1,15 +0,0 @@
|
|
|
1
|
-
import { Stage, SparqlConstructExecutor, readQueryFile } from '@lde/pipeline';
|
|
2
|
-
import { resolve, dirname } from 'node:path';
|
|
3
|
-
import { fileURLToPath } from 'node:url';
|
|
4
|
-
const __dirname = dirname(fileURLToPath(import.meta.url));
|
|
5
|
-
/**
|
|
6
|
-
* Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
|
|
7
|
-
*
|
|
8
|
-
* `#subjectFilter#` is handled at runtime by the executor;
|
|
9
|
-
* `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
|
|
10
|
-
*/
|
|
11
|
-
export async function createQueryStage(filename) {
|
|
12
|
-
const rawQuery = await readQueryFile(resolve(__dirname, 'queries', filename));
|
|
13
|
-
const executor = new SparqlConstructExecutor({ query: rawQuery });
|
|
14
|
-
return new Stage({ name: filename, executors: executor });
|
|
15
|
-
}
|