@lde/pipeline-void 0.2.37 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,71 +1,67 @@
1
- # Pipeline VOiD
1
+ # Pipeline VoID
2
2
 
3
- VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets.
3
+ Extensions to [@lde/pipeline](../pipeline) for VoID (Vocabulary of Interlinked Datasets) statistical analysis of RDF datasets.
4
4
 
5
- ## Query stages
5
+ ## Stage factories
6
6
 
7
- - `createQueryStage(filename, distribution)` Create a `Stage` from a SPARQL CONSTRUCT query file
8
- - `createDatatypeStage(distribution)` — Per-class datatype partitions
9
- - `createLanguageStage(distribution)` — Per-class language tags
10
- - `createObjectClassStage(distribution)` — Per-class object class partitions
7
+ ### Global stages (one CONSTRUCT query per dataset):
11
8
 
12
- ## Executor decorators
9
+ | Factory | Query |
10
+ | -------------------------------------- | ------------------------------------------------------------------------------------------------------------------ |
11
+ | `createClassPartitionStage()` | [`class-partition.rq`](src/queries/class-partition.rq) — Classes with entity counts |
12
+ | `createClassPropertiesSubjectsStage()` | [`class-properties-subjects.rq`](src/queries/class-properties-subjects.rq) — Properties per class (subject counts) |
13
+ | `createClassPropertiesObjectsStage()` | [`class-properties-objects.rq`](src/queries/class-properties-objects.rq) — Properties per class (object counts) |
14
+ | `createDatatypesStage()` | [`datatypes.rq`](src/queries/datatypes.rq) — Dataset-level datatypes |
15
+ | `createLicensesStage()` | [`licenses.rq`](src/queries/licenses.rq) — License detection |
16
+ | `createObjectLiteralsStage()` | [`object-literals.rq`](src/queries/object-literals.rq) — Literal object counts |
17
+ | `createObjectUrisStage()` | [`object-uris.rq`](src/queries/object-uris.rq) — URI object counts |
18
+ | `createPropertiesStage()` | [`properties.rq`](src/queries/properties.rq) — Distinct properties |
19
+ | `createSubjectsStage()` | [`subjects.rq`](src/queries/subjects.rq) — Distinct subjects |
20
+ | `createSubjectUriSpaceStage()` | [`subject-uri-space.rq`](src/queries/subject-uri-space.rq) — Subject URI namespaces |
21
+ | `createTriplesStage()` | [`triples.rq`](src/queries/triples.rq) — Total triple count |
22
+
23
+ ### Per-class stages (iterated with a class selector):
24
+
25
+ | Factory | Query |
26
+ | ---------------------------------- | ---------------------------------------------------------------------------------------------------------------------- |
27
+ | `createPerClassDatatypeStage()` | [`class-property-datatypes.rq`](src/queries/class-property-datatypes.rq) — Per-class datatype partitions |
28
+ | `createPerClassLanguageStage()` | [`class-property-languages.rq`](src/queries/class-property-languages.rq) — Per-class language tags |
29
+ | `createPerClassObjectClassStage()` | [`class-property-object-classes.rq`](src/queries/class-property-object-classes.rq) — Per-class object class partitions |
13
30
 
14
- - `VocabularyExecutor` — Wraps an executor; detects and appends `void:vocabulary` triples
15
- - `ProvenanceExecutor` — Wraps an executor; appends PROV-O provenance metadata with automatic timing
31
+ ### Domain-specific stages:
16
32
 
17
- ## SPARQL Queries
33
+ | Factory | Description |
34
+ | -------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
35
+ | `createUriSpaceStage(uriSpaces)` | [`object-uri-space.rq`](src/queries/object-uri-space.rq) — Object URI namespace linksets, aggregated against a provided URI space map |
36
+ | `createVocabularyStage()` | [`entity-properties.rq`](src/queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection |
18
37
 
19
- Generic VOiD analysis queries included:
38
+ All factories return `Promise<Stage>`.
39
+
40
+ ## Executor decorators
20
41
 
21
- | Query | Description |
22
- | ---------------------------------- | ------------------------------------- |
23
- | `triples.rq` | Total triple count |
24
- | `subjects.rq` | Distinct subjects |
25
- | `properties.rq` | Distinct properties |
26
- | `class-partition.rq` | Classes with entity counts |
27
- | `class-properties-subjects.rq` | Properties per class (subject counts) |
28
- | `class-properties-objects.rq` | Properties per class (object counts) |
29
- | `class-property-datatypes.rq` | Per-class datatype partitions |
30
- | `class-property-languages.rq` | Per-class language tags |
31
- | `class-property-object-classes.rq` | Per-class object class partitions |
32
- | `object-literals.rq` | Literal object counts |
33
- | `object-uris.rq` | URI object counts |
34
- | `object-uri-space.rq` | Object URI namespaces |
35
- | `subject-uri-space.rq` | Subject URI namespaces |
36
- | `datatypes.rq` | Dataset-level datatypes |
37
- | `entity-properties.rq` | Property statistics |
38
- | `licenses.rq` | License detection |
42
+ - `VocabularyExecutor` — Wraps an executor; detects known vocabulary namespace prefixes in `void:property` quads and appends `void:vocabulary` triples.
43
+ - `UriSpaceExecutor` Wraps an executor; consumes `void:Linkset` quads, matches `void:objectsTarget` against configured URI spaces, and emits aggregated linksets.
39
44
 
40
45
  ## Usage
41
46
 
42
47
  ```typescript
43
48
  import {
44
- createQueryStage,
45
- createDatatypeStage,
46
- VocabularyExecutor,
47
- Stage,
49
+ createTriplesStage,
50
+ createClassPartitionStage,
51
+ createVocabularyStage,
48
52
  } from '@lde/pipeline-void';
49
- import { SparqlConstructExecutor } from '@lde/pipeline';
50
-
51
- // Simple CONSTRUCT query stage
52
- const stage = await createQueryStage('triples.rq');
53
- await stage.run(dataset, distribution, writer);
54
-
55
- // Executor decorator: vocabulary detection wraps entity-properties executor
56
- const executor = await SparqlConstructExecutor.fromFile(
57
- 'queries/entity-properties.rq',
58
- );
59
- const entityPropertiesStage = new Stage({
60
- name: 'entity-properties',
61
- executors: new VocabularyExecutor(executor),
62
- });
63
- ```
64
-
65
- ## Validation
53
+ import { Pipeline, SparqlUpdateWriter, provenancePlugin } from '@lde/pipeline';
66
54
 
67
- ```sh
68
- npx nx test pipeline-void
69
- npx nx lint pipeline-void
70
- npx nx typecheck pipeline-void
55
+ await new Pipeline({
56
+ datasetSelector: selector,
57
+ stages: [
58
+ createTriplesStage(),
59
+ createClassPartitionStage(),
60
+ createVocabularyStage(),
61
+ ],
62
+ plugins: [provenancePlugin()],
63
+ writers: new SparqlUpdateWriter({
64
+ endpoint: new URL('http://localhost:7200/repositories/lde/statements'),
65
+ }),
66
+ }).run();
71
67
  ```
package/dist/index.d.ts CHANGED
@@ -1,7 +1,5 @@
1
1
  export { Stage, NotSupported } from '@lde/pipeline';
2
- export * from './sparqlQueryAnalyzer.js';
3
- export * from './perClassAnalyzer.js';
2
+ export * from './stage.js';
4
3
  export * from './vocabularyAnalyzer.js';
5
- export * from './provenance.js';
6
4
  export * from './uriSpaceExecutor.js';
7
5
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AACpD,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,iBAAiB,CAAC;AAChC,cAAc,uBAAuB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AACpD,cAAc,YAAY,CAAC;AAC3B,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC"}
package/dist/index.js CHANGED
@@ -1,6 +1,4 @@
1
1
  export { Stage, NotSupported } from '@lde/pipeline';
2
- export * from './sparqlQueryAnalyzer.js';
3
- export * from './perClassAnalyzer.js';
2
+ export * from './stage.js';
4
3
  export * from './vocabularyAnalyzer.js';
5
- export * from './provenance.js';
6
4
  export * from './uriSpaceExecutor.js';
@@ -0,0 +1,19 @@
1
+ import { Stage } from '@lde/pipeline';
2
+ import type { Quad } from '@rdfjs/types';
3
+ export declare function createSubjectUriSpaceStage(): Promise<Stage>;
4
+ export declare function createClassPartitionStage(): Promise<Stage>;
5
+ export declare function createObjectLiteralsStage(): Promise<Stage>;
6
+ export declare function createObjectUrisStage(): Promise<Stage>;
7
+ export declare function createPropertiesStage(): Promise<Stage>;
8
+ export declare function createSubjectsStage(): Promise<Stage>;
9
+ export declare function createTriplesStage(): Promise<Stage>;
10
+ export declare function createClassPropertiesSubjectsStage(): Promise<Stage>;
11
+ export declare function createClassPropertiesObjectsStage(): Promise<Stage>;
12
+ export declare function createDatatypesStage(): Promise<Stage>;
13
+ export declare function createLicensesStage(): Promise<Stage>;
14
+ export declare function createPerClassObjectClassStage(): Promise<Stage>;
15
+ export declare function createPerClassDatatypeStage(): Promise<Stage>;
16
+ export declare function createPerClassLanguageStage(): Promise<Stage>;
17
+ export declare function createUriSpaceStage(uriSpaces: ReadonlyMap<string, readonly Quad[]>): Promise<Stage>;
18
+ export declare function createVocabularyStage(): Promise<Stage>;
19
+ //# sourceMappingURL=stage.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAMN,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAqDzC,wBAAgB,0BAA0B,IAAI,OAAO,CAAC,KAAK,CAAC,CAE3D;AAED,wBAAgB,yBAAyB,IAAI,OAAO,CAAC,KAAK,CAAC,CAE1D;AAED,wBAAgB,yBAAyB,IAAI,OAAO,CAAC,KAAK,CAAC,CAE1D;AAED,wBAAgB,qBAAqB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEtD;AAED,wBAAgB,qBAAqB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEtD;AAED,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAED,wBAAgB,kBAAkB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEnD;AAED,wBAAgB,kCAAkC,IAAI,OAAO,CAAC,KAAK,CAAC,CAEnE;AAED,wBAAgB,iCAAiC,IAAI,OAAO,CAAC,KAAK,CAAC,CAElE;AAED,wBAAgB,oBAAoB,IAAI,OAAO,CAAC,KAAK,CAAC,CAErD;AAED,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAID,wBAAgB,8BAA8B,IAAI,OAAO,CAAC,KAAK,CAAC,CAI/D;AAED,wBAAgB,2BAA2B,IAAI,OAAO,CAAC,KAAK,CAAC,CAI5D;AAED,wBAAgB,2BAA2B,IAAI,OAAO,CAAC,KAAK,CAAC,CAI5D;AAID,wBAAgB,mBAAmB,CACjC,SAAS,EAAE,WAAW,CAAC,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC,GAC9C,OAAO,CAAC,KAAK,CAAC,CAKhB;AAED,wBAAgB,qBAAqB,IAAI,OAAO,CAAC,KAAK,CAAC,CAKtD"}
package/dist/stage.js ADDED
@@ -0,0 +1,99 @@
1
+ import { Stage, SparqlConstructExecutor, SparqlItemSelector, readQueryFile, } from '@lde/pipeline';
2
+ import { resolve, dirname } from 'node:path';
3
+ import { fileURLToPath } from 'node:url';
4
+ import { VocabularyExecutor } from './vocabularyAnalyzer.js';
5
+ import { UriSpaceExecutor } from './uriSpaceExecutor.js';
6
+ const __dirname = dirname(fileURLToPath(import.meta.url));
7
+ async function createVoidStage(filename, options) {
8
+ const query = await readQueryFile(resolve(__dirname, 'queries', filename));
9
+ const executor = options?.executor?.(query) ?? new SparqlConstructExecutor({ query });
10
+ if (options?.selection === 'perClass') {
11
+ return new Stage({
12
+ name: filename,
13
+ itemSelector: classSelector(),
14
+ executors: executor,
15
+ });
16
+ }
17
+ return new Stage({ name: filename, executors: executor });
18
+ }
19
+ function classSelector() {
20
+ return {
21
+ select: (distribution) => {
22
+ const subjectFilter = distribution.subjectFilter ?? '';
23
+ const fromClause = distribution.namedGraph
24
+ ? `FROM <${distribution.namedGraph}>`
25
+ : '';
26
+ const selectorQuery = [
27
+ 'SELECT DISTINCT ?class',
28
+ fromClause,
29
+ `WHERE { ${subjectFilter} ?s a ?class . }`,
30
+ 'LIMIT 1000',
31
+ ].join('\n');
32
+ return new SparqlItemSelector({
33
+ query: selectorQuery,
34
+ pageSize: 1000,
35
+ }).select(distribution);
36
+ },
37
+ };
38
+ }
39
+ // Global stages
40
+ export function createSubjectUriSpaceStage() {
41
+ return createVoidStage('subject-uri-space.rq');
42
+ }
43
+ export function createClassPartitionStage() {
44
+ return createVoidStage('class-partition.rq');
45
+ }
46
+ export function createObjectLiteralsStage() {
47
+ return createVoidStage('object-literals.rq');
48
+ }
49
+ export function createObjectUrisStage() {
50
+ return createVoidStage('object-uris.rq');
51
+ }
52
+ export function createPropertiesStage() {
53
+ return createVoidStage('properties.rq');
54
+ }
55
+ export function createSubjectsStage() {
56
+ return createVoidStage('subjects.rq');
57
+ }
58
+ export function createTriplesStage() {
59
+ return createVoidStage('triples.rq');
60
+ }
61
+ export function createClassPropertiesSubjectsStage() {
62
+ return createVoidStage('class-properties-subjects.rq');
63
+ }
64
+ export function createClassPropertiesObjectsStage() {
65
+ return createVoidStage('class-properties-objects.rq');
66
+ }
67
+ export function createDatatypesStage() {
68
+ return createVoidStage('datatypes.rq');
69
+ }
70
+ export function createLicensesStage() {
71
+ return createVoidStage('licenses.rq');
72
+ }
73
+ // Per-class stages
74
+ export function createPerClassObjectClassStage() {
75
+ return createVoidStage('class-property-object-classes.rq', {
76
+ selection: 'perClass',
77
+ });
78
+ }
79
+ export function createPerClassDatatypeStage() {
80
+ return createVoidStage('class-property-datatypes.rq', {
81
+ selection: 'perClass',
82
+ });
83
+ }
84
+ export function createPerClassLanguageStage() {
85
+ return createVoidStage('class-property-languages.rq', {
86
+ selection: 'perClass',
87
+ });
88
+ }
89
+ // Domain-specific executor stages
90
+ export function createUriSpaceStage(uriSpaces) {
91
+ return createVoidStage('object-uri-space.rq', {
92
+ executor: (query) => new UriSpaceExecutor(new SparqlConstructExecutor({ query }), uriSpaces),
93
+ });
94
+ }
95
+ export function createVocabularyStage() {
96
+ return createVoidStage('entity-properties.rq', {
97
+ executor: (query) => new VocabularyExecutor(new SparqlConstructExecutor({ query })),
98
+ });
99
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline-void",
3
- "version": "0.2.37",
3
+ "version": "0.3.0",
4
4
  "description": "VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets",
5
5
  "repository": {
6
6
  "url": "https://github.com/ldengine/lde",
@@ -21,11 +21,12 @@
21
21
  "types": "./dist/index.d.ts",
22
22
  "files": [
23
23
  "dist",
24
+ "src/queries",
24
25
  "!**/*.tsbuildinfo"
25
26
  ],
26
27
  "dependencies": {
27
28
  "@lde/dataset": "0.6.10",
28
- "@lde/pipeline": "0.6.32",
29
+ "@lde/pipeline": "0.7.0",
29
30
  "@rdfjs/types": "^2.0.1",
30
31
  "@zazuko/prefixes": "^2.6.1",
31
32
  "n3": "^1.17.0",
@@ -0,0 +1,19 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset a void:Dataset ;
5
+ void:classPartition ?classPartition .
6
+ ?classPartition void:class ?type ;
7
+ void:entities ?entities .
8
+ }
9
+ WHERE {
10
+ {
11
+ SELECT (COUNT(?type) AS ?entities) ?type {
12
+ #subjectFilter#
13
+ ?s a ?type .
14
+ }
15
+ GROUP BY ?type
16
+ }
17
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-", MD5(STR(?type)))) AS ?classPartition)
18
+ }
19
+ LIMIT 10000
@@ -0,0 +1,17 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?propertyPartition void:distinctObjects ?objects .
5
+ }
6
+ WHERE {
7
+ # Object counts only. Subject counts in class-properties-subjects.rq.
8
+ {
9
+ SELECT ?type ?p (COUNT(DISTINCT ?o) AS ?objects) {
10
+ #subjectFilter#
11
+ ?s a ?type ; ?p ?o .
12
+ }
13
+ GROUP BY ?type ?p
14
+ }
15
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?type), STR(?p))))) AS ?propertyPartition)
16
+ }
17
+ LIMIT 100000
@@ -0,0 +1,23 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset a void:Dataset ;
5
+ void:classPartition ?classPartition .
6
+ ?classPartition void:class ?type ;
7
+ void:propertyPartition ?propertyPartition .
8
+ ?propertyPartition void:property ?p ;
9
+ void:entities ?subjects .
10
+ }
11
+ WHERE {
12
+ # Subject counts only. Object counts in class-properties-objects.rq.
13
+ {
14
+ SELECT ?type ?p (COUNT(DISTINCT ?s) AS ?subjects) {
15
+ #subjectFilter#
16
+ ?s a ?type ; ?p [] .
17
+ }
18
+ GROUP BY ?type ?p
19
+ }
20
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-", MD5(STR(?type)))) AS ?classPartition)
21
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?type), STR(?p))))) AS ?propertyPartition)
22
+ }
23
+ LIMIT 100000
@@ -0,0 +1,28 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+ PREFIX void-ext: <http://ldf.fi/void-ext#>
3
+
4
+ CONSTRUCT {
5
+ ?propertyPartition void-ext:datatypePartition ?datatypePartition .
6
+ ?datatypePartition
7
+ void-ext:datatype ?dt ;
8
+ void:triples ?count .
9
+ }
10
+ WHERE {
11
+ {
12
+ SELECT ?p ?dt (COUNT(*) AS ?count) {
13
+ {
14
+ SELECT ?p ?o {
15
+ #subjectFilter#
16
+ ?s a ?class ;
17
+ ?p ?o .
18
+ FILTER (ISLITERAL(?o))
19
+ }
20
+ }
21
+ BIND(DATATYPE(?o) AS ?dt)
22
+ }
23
+ GROUP BY ?p ?dt
24
+ }
25
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
26
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#datatype-", MD5(CONCAT(STR(?class), STR(?p), STR(?dt))))) AS ?datatypePartition)
27
+ }
28
+ LIMIT 100000
@@ -0,0 +1,30 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+ PREFIX void-ext: <http://ldf.fi/void-ext#>
3
+
4
+ CONSTRUCT {
5
+ ?propertyPartition void-ext:languagePartition ?languagePartition .
6
+ ?languagePartition
7
+ void-ext:language ?lang ;
8
+ void:triples ?count .
9
+ }
10
+ WHERE {
11
+ {
12
+ SELECT ?p ?lang (COUNT(*) AS ?count) {
13
+ {
14
+ # Pre-filter to distinct literals to minimize LANG calls
15
+ SELECT DISTINCT ?p ?o {
16
+ #subjectFilter#
17
+ ?s a ?class ;
18
+ ?p ?o .
19
+ FILTER(ISLITERAL(?o))
20
+ }
21
+ }
22
+ BIND(LANG(?o) AS ?lang)
23
+ FILTER(?lang != "")
24
+ }
25
+ GROUP BY ?p ?lang
26
+ }
27
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
28
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#language-", MD5(CONCAT(STR(?class), STR(?p), ?lang)))) AS ?languagePartition)
29
+ }
30
+ LIMIT 100000
@@ -0,0 +1,28 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+ PREFIX void-ext: <http://ldf.fi/void-ext#>
3
+
4
+ CONSTRUCT {
5
+ ?propertyPartition void-ext:objectClassPartition ?objectClassPartition .
6
+ ?objectClassPartition
7
+ void:class ?objectClass ;
8
+ void:triples ?count .
9
+ }
10
+ WHERE {
11
+ {
12
+ SELECT ?p ?objectClass (COUNT(*) AS ?count) {
13
+ {
14
+ SELECT ?p ?o {
15
+ #subjectFilter#
16
+ ?s a ?class ;
17
+ ?p ?o .
18
+ FILTER (ISIRI(?o))
19
+ }
20
+ }
21
+ ?o a ?objectClass .
22
+ }
23
+ GROUP BY ?p ?objectClass
24
+ }
25
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
26
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#object-class-", MD5(CONCAT(STR(?class), STR(?p), STR(?objectClass))))) AS ?objectClassPartition)
27
+ }
28
+ LIMIT 100000
@@ -0,0 +1,37 @@
1
+ PREFIX void-ext: <http://ldf.fi/void-ext#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset void-ext:datatypes ?count ;
5
+ void-ext:datatype ?datatype .
6
+ }
7
+ WHERE {
8
+ {
9
+ # Count distinct datatypes
10
+ SELECT (COUNT(DISTINCT ?dt) AS ?count) WHERE {
11
+ {
12
+ # Pre-filter to distinct literals to minimize DATATYPE calls
13
+ SELECT DISTINCT ?o WHERE {
14
+ #subjectFilter#
15
+ ?s ?p ?o .
16
+ FILTER(ISLITERAL(?o))
17
+ }
18
+ }
19
+ BIND(DATATYPE(?o) AS ?dt)
20
+ }
21
+ }
22
+ {
23
+ # Get distinct datatypes
24
+ SELECT DISTINCT ?datatype WHERE {
25
+ {
26
+ # Pre-filter to distinct literals to minimize DATATYPE calls
27
+ SELECT DISTINCT ?o WHERE {
28
+ #subjectFilter#
29
+ ?s ?p ?o .
30
+ FILTER(ISLITERAL(?o))
31
+ }
32
+ }
33
+ BIND(DATATYPE(?o) AS ?datatype)
34
+ }
35
+ }
36
+ }
37
+ LIMIT 1000
@@ -0,0 +1,20 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset a void:Dataset ;
5
+ void:propertyPartition ?propertyPartition .
6
+ ?propertyPartition
7
+ void:property ?p ;
8
+ void:entities ?subjects ;
9
+ void:distinctObjects ?objects .
10
+ }
11
+ WHERE {
12
+ {
13
+ SELECT (COUNT(DISTINCT ?s) AS ?subjects) (COUNT(DISTINCT ?o) as ?objects) ?p {
14
+ #subjectFilter#
15
+ ?s ?p ?o .
16
+ }
17
+ GROUP BY ?p
18
+ }
19
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#property-partition-", MD5(STR(?p)))) AS ?propertyPartition)
20
+ }
@@ -0,0 +1,24 @@
1
+ PREFIX schema: <http://schema.org/>
2
+ PREFIX dcterms: <http://purl.org/dc/terms/>
3
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
4
+ PREFIX rico: <https://www.ica.org/standards/RiC/ontology#>
5
+ PREFIX void: <http://rdfs.org/ns/void#>
6
+
7
+ CONSTRUCT {
8
+ ?dataset a void:Dataset ;
9
+ void:subset ?licenseSubset .
10
+ ?licenseSubset
11
+ dcterms:license ?license ;
12
+ void:triples ?count .
13
+ }
14
+ WHERE {
15
+ {
16
+ SELECT (IRI(?l) AS ?license) (COUNT(*) AS ?count) {
17
+ #subjectFilter#
18
+ ?s ?p ?l .
19
+ VALUES ?p { schema:license <https://schema.org/license> dc:license rico:conditionsOfUse }
20
+ }
21
+ GROUP BY ?l
22
+ }
23
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#license-", MD5(STR(?license)))) AS ?licenseSubset)
24
+ }
@@ -0,0 +1,14 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+ PREFIX void-ext: <http://ldf.fi/void-ext#>
3
+
4
+ CONSTRUCT {
5
+ ?dataset a void:Dataset ;
6
+ void-ext:distinctLiterals ?distinctLiterals .
7
+ }
8
+ WHERE {
9
+ SELECT (COUNT(DISTINCT ?o) AS ?distinctLiterals) {
10
+ #subjectFilter#
11
+ ?s ?p ?o .
12
+ FILTER(ISLITERAL(?o))
13
+ }
14
+ }
@@ -0,0 +1,25 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?linkset a void:Linkset ;
5
+ void:subjectsTarget ?dataset ;
6
+ void:objectsTarget ?prefix ;
7
+ void:triples ?count .
8
+ }
9
+ WHERE {
10
+ {
11
+ SELECT ?prefix (SUM(?ocount) AS ?count) {
12
+ {
13
+ SELECT ?o (COUNT(*) AS ?ocount) {
14
+ #subjectFilter#
15
+ ?s ?p ?o .
16
+ FILTER(ISIRI(?o))
17
+ }
18
+ GROUP BY ?o
19
+ }
20
+ BIND(REPLACE(STR(?o), "([^/]+$)", "") AS ?prefix)
21
+ }
22
+ GROUP BY ?prefix ORDER BY DESC(?count) LIMIT 1000
23
+ }
24
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#linkset-", MD5(CONCAT(STR(?dataset), ?prefix)))) AS ?linkset)
25
+ }
@@ -0,0 +1,14 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+ PREFIX void-ext: <http://ldf.fi/void-ext#>
3
+
4
+ CONSTRUCT {
5
+ ?dataset a void:Dataset ;
6
+ void-ext:distinctIRIReferenceObjects ?distinctIRIReferenceObjects .
7
+ }
8
+ WHERE {
9
+ SELECT (COUNT(DISTINCT ?o) AS ?distinctIRIReferenceObjects) {
10
+ #subjectFilter#
11
+ ?s ?p ?o .
12
+ FILTER(ISIRI(?o))
13
+ }
14
+ }
@@ -0,0 +1,12 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset a void:Dataset ;
5
+ void:properties ?count .
6
+ }
7
+ WHERE {
8
+ SELECT (COUNT(DISTINCT ?p) as ?count) {
9
+ #subjectFilter#
10
+ ?s ?p ?o
11
+ }
12
+ }
@@ -0,0 +1,30 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset a void:Dataset ;
5
+ void:subset ?subset .
6
+ ?subset void:uriSpace ?uriSpace ;
7
+ void:entities ?count .
8
+ }
9
+ WHERE {
10
+ {
11
+ # Pre-group distinct subjects before computing namespace to avoid regex on every triple.
12
+ SELECT ?uriSpace (COUNT(*) AS ?count) {
13
+ {
14
+ SELECT DISTINCT ?s {
15
+ #subjectFilter#
16
+ ?s ?p ?o .
17
+ FILTER(ISIRI(?s))
18
+ }
19
+ }
20
+ # Extract namespace by removing the local name (everything after the last / or #)
21
+ BIND(REPLACE(STR(?s), "[^/#]+$", "") AS ?uriSpace)
22
+ }
23
+ GROUP BY ?uriSpace
24
+ ORDER BY DESC(?count)
25
+ LIMIT 10
26
+ }
27
+ FILTER(?count > 1) # Only include namespaces with more than 1 entity
28
+ BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#subject-uri-space-", MD5(?uriSpace))) AS ?subset)
29
+ }
30
+
@@ -0,0 +1,13 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset a void:Dataset ;
5
+ void:distinctSubjects ?count .
6
+ }
7
+ WHERE {
8
+ SELECT (COUNT(DISTINCT ?s) as ?count) {
9
+ #subjectFilter#
10
+ ?s ?p ?o .
11
+ FILTER(!ISBLANK(?s))
12
+ }
13
+ }
@@ -0,0 +1,12 @@
1
+ PREFIX void: <http://rdfs.org/ns/void#>
2
+
3
+ CONSTRUCT {
4
+ ?dataset a void:Dataset ;
5
+ void:triples ?count .
6
+ }
7
+ WHERE {
8
+ SELECT (COUNT(*) as ?count) {
9
+ #subjectFilter#
10
+ ?s ?p ?o
11
+ }
12
+ }
@@ -1,5 +0,0 @@
1
- import { Stage } from '@lde/pipeline';
2
- export declare function createDatatypeStage(): Promise<Stage>;
3
- export declare function createLanguageStage(): Promise<Stage>;
4
- export declare function createObjectClassStage(): Promise<Stage>;
5
- //# sourceMappingURL=perClassAnalyzer.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"perClassAnalyzer.d.ts","sourceRoot":"","sources":["../src/perClassAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAKN,MAAM,eAAe,CAAC;AA4CvB,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAED,wBAAgB,mBAAmB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEpD;AAED,wBAAgB,sBAAsB,IAAI,OAAO,CAAC,KAAK,CAAC,CAEvD"}
@@ -1,44 +0,0 @@
1
- import { Stage, SparqlItemSelector, SparqlConstructExecutor, readQueryFile, } from '@lde/pipeline';
2
- import { resolve, dirname } from 'node:path';
3
- import { fileURLToPath } from 'node:url';
4
- const __dirname = dirname(fileURLToPath(import.meta.url));
5
- /**
6
- * Create a Stage that first selects classes from the endpoint,
7
- * then runs a per-class CONSTRUCT query with `?class` bound via VALUES.
8
- */
9
- async function createPerClassStage(queryFilename) {
10
- const rawQuery = await readQueryFile(resolve(__dirname, 'queries', queryFilename));
11
- const executor = new SparqlConstructExecutor({ query: rawQuery });
12
- const itemSelector = {
13
- select: (distribution) => {
14
- const subjectFilter = distribution.subjectFilter ?? '';
15
- const fromClause = distribution.namedGraph
16
- ? `FROM <${distribution.namedGraph}>`
17
- : '';
18
- const selectorQuery = [
19
- 'SELECT DISTINCT ?class',
20
- fromClause,
21
- `WHERE { ${subjectFilter} ?s a ?class . }`,
22
- 'LIMIT 1000',
23
- ].join('\n');
24
- return new SparqlItemSelector({
25
- query: selectorQuery,
26
- pageSize: 1000,
27
- }).select(distribution);
28
- },
29
- };
30
- return new Stage({
31
- name: queryFilename,
32
- itemSelector,
33
- executors: executor,
34
- });
35
- }
36
- export function createDatatypeStage() {
37
- return createPerClassStage('class-property-datatypes.rq');
38
- }
39
- export function createLanguageStage() {
40
- return createPerClassStage('class-property-languages.rq');
41
- }
42
- export function createObjectClassStage() {
43
- return createPerClassStage('class-property-object-classes.rq');
44
- }
@@ -1,23 +0,0 @@
1
- import { Dataset, Distribution } from '@lde/dataset';
2
- import { NotSupported, type Executor, type ExecuteOptions } from '@lde/pipeline';
3
- import type { Quad } from '@rdfjs/types';
4
- /**
5
- * Executor decorator that passes through all quads from the inner executor
6
- * and appends PROV-O provenance metadata.
7
- *
8
- * Timestamps are captured automatically: `startedAt` when `execute()` is
9
- * called, `endedAt` when the inner quad stream is fully consumed.
10
- *
11
- * Appended quads:
12
- * - `<dataset> a prov:Entity`
13
- * - `<dataset> prov:wasGeneratedBy _:activity`
14
- * - `_:activity a prov:Activity`
15
- * - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
16
- * - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
17
- */
18
- export declare class ProvenanceExecutor implements Executor {
19
- private readonly inner;
20
- constructor(inner: Executor);
21
- execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
22
- }
23
- //# sourceMappingURL=provenance.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,YAAY,EACZ,KAAK,QAAQ,EACb,KAAK,cAAc,EACpB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC;;;;;;;;;;;;;GAaG;AACH,qBAAa,kBAAmB,YAAW,QAAQ;IACrC,OAAO,CAAC,QAAQ,CAAC,KAAK;gBAAL,KAAK,EAAE,QAAQ;IAEtC,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC;CAQ/C"}
@@ -1,51 +0,0 @@
1
- import { NotSupported, } from '@lde/pipeline';
2
- import { DataFactory } from 'n3';
3
- const { namedNode, literal, blankNode, quad } = DataFactory;
4
- const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
5
- const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
6
- const PROV_ACTIVITY = namedNode('http://www.w3.org/ns/prov#Activity');
7
- const PROV_WAS_GENERATED_BY = namedNode('http://www.w3.org/ns/prov#wasGeneratedBy');
8
- const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime');
9
- const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
10
- const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
11
- /**
12
- * Executor decorator that passes through all quads from the inner executor
13
- * and appends PROV-O provenance metadata.
14
- *
15
- * Timestamps are captured automatically: `startedAt` when `execute()` is
16
- * called, `endedAt` when the inner quad stream is fully consumed.
17
- *
18
- * Appended quads:
19
- * - `<dataset> a prov:Entity`
20
- * - `<dataset> prov:wasGeneratedBy _:activity`
21
- * - `_:activity a prov:Activity`
22
- * - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
23
- * - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
24
- */
25
- export class ProvenanceExecutor {
26
- inner;
27
- constructor(inner) {
28
- this.inner = inner;
29
- }
30
- async execute(dataset, distribution, options) {
31
- const startedAt = new Date();
32
- const result = await this.inner.execute(dataset, distribution, options);
33
- if (result instanceof NotSupported) {
34
- return result;
35
- }
36
- return withProvenance(result, dataset.iri.toString(), startedAt);
37
- }
38
- }
39
- async function* withProvenance(quads, iri, startedAt) {
40
- for await (const q of quads) {
41
- yield q;
42
- }
43
- const endedAt = new Date();
44
- const subject = namedNode(iri);
45
- const activity = blankNode();
46
- yield quad(subject, RDF_TYPE, PROV_ENTITY);
47
- yield quad(subject, PROV_WAS_GENERATED_BY, activity);
48
- yield quad(activity, RDF_TYPE, PROV_ACTIVITY);
49
- yield quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME));
50
- yield quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME));
51
- }
@@ -1,9 +0,0 @@
1
- import { Stage } from '@lde/pipeline';
2
- /**
3
- * Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
4
- *
5
- * `#subjectFilter#` is handled at runtime by the executor;
6
- * `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
7
- */
8
- export declare function createQueryStage(filename: string): Promise<Stage>;
9
- //# sourceMappingURL=sparqlQueryAnalyzer.d.ts.map
@@ -1 +0,0 @@
1
- {"version":3,"file":"sparqlQueryAnalyzer.d.ts","sourceRoot":"","sources":["../src/sparqlQueryAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAA0C,MAAM,eAAe,CAAC;AAM9E;;;;;GAKG;AACH,wBAAsB,gBAAgB,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,KAAK,CAAC,CAKvE"}
@@ -1,15 +0,0 @@
1
- import { Stage, SparqlConstructExecutor, readQueryFile } from '@lde/pipeline';
2
- import { resolve, dirname } from 'node:path';
3
- import { fileURLToPath } from 'node:url';
4
- const __dirname = dirname(fileURLToPath(import.meta.url));
5
- /**
6
- * Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
7
- *
8
- * `#subjectFilter#` is handled at runtime by the executor;
9
- * `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
10
- */
11
- export async function createQueryStage(filename) {
12
- const rawQuery = await readQueryFile(resolve(__dirname, 'queries', filename));
13
- const executor = new SparqlConstructExecutor({ query: rawQuery });
14
- return new Stage({ name: filename, executors: executor });
15
- }