@lde/pipeline-void 0.19.1 → 0.21.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +54 -42
- package/dist/stage.d.ts +53 -16
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +94 -35
- package/package.json +2 -2
- package/queries/class-properties-objects.rq +9 -5
- package/queries/class-properties-subjects.rq +11 -7
package/README.md
CHANGED
|
@@ -4,60 +4,72 @@ Extensions to [@lde/pipeline](../pipeline) for VoID (Vocabulary of Interlinked D
|
|
|
4
4
|
|
|
5
5
|
## Stage factories
|
|
6
6
|
|
|
7
|
-
###
|
|
7
|
+
### `voidStages(options?)`
|
|
8
8
|
|
|
9
|
-
|
|
10
|
-
| ------------------------- | ------------------------------------------------------------------------------------------------------------------ |
|
|
11
|
-
| `classPartitions()` | [`class-partition.rq`](src/queries/class-partition.rq) — Classes with entity counts |
|
|
12
|
-
| `classPropertySubjects()` | [`class-properties-subjects.rq`](src/queries/class-properties-subjects.rq) — Properties per class (subject counts) |
|
|
13
|
-
| `classPropertyObjects()` | [`class-properties-objects.rq`](src/queries/class-properties-objects.rq) — Properties per class (object counts) |
|
|
14
|
-
| `countDatatypes()` | [`datatypes.rq`](src/queries/datatypes.rq) — Dataset-level datatypes |
|
|
15
|
-
| `countObjectLiterals()` | [`object-literals.rq`](src/queries/object-literals.rq) — Literal object counts |
|
|
16
|
-
| `countObjectUris()` | [`object-uris.rq`](src/queries/object-uris.rq) — URI object counts |
|
|
17
|
-
| `countProperties()` | [`properties.rq`](src/queries/properties.rq) — Distinct properties |
|
|
18
|
-
| `countSubjects()` | [`subjects.rq`](src/queries/subjects.rq) — Distinct subjects |
|
|
19
|
-
| `countTriples()` | [`triples.rq`](src/queries/triples.rq) — Total triple count |
|
|
20
|
-
| `detectLicenses()` | [`licenses.rq`](src/queries/licenses.rq) — License detection |
|
|
21
|
-
| `subjectUriSpaces()` | [`subject-uri-space.rq`](src/queries/subject-uri-space.rq) — Subject URI namespaces |
|
|
22
|
-
|
|
23
|
-
### Per-class stages (iterated with a class selector):
|
|
24
|
-
|
|
25
|
-
| Factory | Query |
|
|
26
|
-
| ------------------------- | ---------------------------------------------------------------------------------------------------------------------- |
|
|
27
|
-
| `perClassDatatypes()` | [`class-property-datatypes.rq`](src/queries/class-property-datatypes.rq) — Per-class datatype partitions |
|
|
28
|
-
| `perClassLanguages()` | [`class-property-languages.rq`](src/queries/class-property-languages.rq) — Per-class language tags |
|
|
29
|
-
| `perClassObjectClasses()` | [`class-property-object-classes.rq`](src/queries/class-property-object-classes.rq) — Per-class object class partitions |
|
|
30
|
-
|
|
31
|
-
### Domain-specific stages:
|
|
32
|
-
|
|
33
|
-
| Factory | Description |
|
|
34
|
-
| ---------------------- | ------------------------------------------------------------------------------------------------------------------------------------- |
|
|
35
|
-
| `detectVocabularies()` | [`entity-properties.rq`](src/queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection |
|
|
36
|
-
| `uriSpaces(uriSpaces)` | [`object-uri-space.rq`](src/queries/object-uri-space.rq) — Object URI namespace linksets, aggregated against a provided URI space map |
|
|
37
|
-
|
|
38
|
-
All factories return `Promise<Stage>`.
|
|
39
|
-
|
|
40
|
-
## Executor decorators
|
|
9
|
+
Returns all VoID stages in their recommended execution order. The ordering is optimised for cache warming: `classPartitions()` runs before the per-class stages, so the `?s a ?class` pattern is already cached on the SPARQL endpoint when the heavier per-class queries execute — preventing 504 timeouts on cold caches.
|
|
41
10
|
|
|
42
|
-
|
|
43
|
-
- `UriSpaceExecutor` — Wraps an executor; consumes `void:Linkset` quads, matches `void:objectsTarget` against configured URI spaces, and emits aggregated linksets.
|
|
11
|
+
Accepts an optional `VoidStagesOptions` object:
|
|
44
12
|
|
|
45
|
-
|
|
13
|
+
| Option | Default | Description |
|
|
14
|
+
| ---------------- | ------- | --------------------------------------------------------------------- |
|
|
15
|
+
| `timeout` | 60 000 | SPARQL query timeout in milliseconds |
|
|
16
|
+
| `batchSize` | 10 | Maximum class bindings per executor call (per-class stages only) |
|
|
17
|
+
| `maxConcurrency` | 10 | Maximum concurrent in-flight executor batches (per-class stages only) |
|
|
18
|
+
| `perClass` | — | Override per-class iteration for all five per-class stages |
|
|
19
|
+
| `uriSpaces` | — | When provided, includes the object URI space stage |
|
|
46
20
|
|
|
47
21
|
```typescript
|
|
48
|
-
import {
|
|
49
|
-
countTriples,
|
|
50
|
-
classPartitions,
|
|
51
|
-
detectVocabularies,
|
|
52
|
-
} from '@lde/pipeline-void';
|
|
22
|
+
import { voidStages } from '@lde/pipeline-void';
|
|
53
23
|
import { Pipeline, SparqlUpdateWriter, provenancePlugin } from '@lde/pipeline';
|
|
54
24
|
|
|
25
|
+
const stages = await voidStages({ uriSpaces: uriSpaceMap });
|
|
26
|
+
|
|
55
27
|
await new Pipeline({
|
|
56
28
|
datasetSelector: selector,
|
|
57
|
-
stages
|
|
29
|
+
stages,
|
|
58
30
|
plugins: [provenancePlugin()],
|
|
59
31
|
writers: new SparqlUpdateWriter({
|
|
60
32
|
endpoint: new URL('http://localhost:7200/repositories/lde/statements'),
|
|
61
33
|
}),
|
|
62
34
|
}).run();
|
|
63
35
|
```
|
|
36
|
+
|
|
37
|
+
### Individual stage factories
|
|
38
|
+
|
|
39
|
+
Global and domain-specific factories accept `VoidStageOptions` (`timeout`) and return `Promise<Stage>`. Per-class factories accept `PerClassVoidStageOptions` (`timeout`, `batchSize`, `maxConcurrency`, `perClass`) — they default `perClass` to `true`; set it to `false` to run them as monolithic queries instead.
|
|
40
|
+
|
|
41
|
+
#### Global stages (one CONSTRUCT query per dataset):
|
|
42
|
+
|
|
43
|
+
| Factory | Query |
|
|
44
|
+
| ----------------------- | ------------------------------------------------------------------------------- |
|
|
45
|
+
| `classPartitions()` | [`class-partition.rq`](queries/class-partition.rq) — Classes with entity counts |
|
|
46
|
+
| `countDatatypes()` | [`datatypes.rq`](queries/datatypes.rq) — Dataset-level datatypes |
|
|
47
|
+
| `countObjectLiterals()` | [`object-literals.rq`](queries/object-literals.rq) — Literal object counts |
|
|
48
|
+
| `countObjectUris()` | [`object-uris.rq`](queries/object-uris.rq) — URI object counts |
|
|
49
|
+
| `countProperties()` | [`properties.rq`](queries/properties.rq) — Distinct properties |
|
|
50
|
+
| `countSubjects()` | [`subjects.rq`](queries/subjects.rq) — Distinct subjects |
|
|
51
|
+
| `countTriples()` | [`triples.rq`](queries/triples.rq) — Total triple count |
|
|
52
|
+
| `detectLicenses()` | [`licenses.rq`](queries/licenses.rq) — License detection |
|
|
53
|
+
| `subjectUriSpaces()` | [`subject-uri-space.rq`](queries/subject-uri-space.rq) — Subject URI namespaces |
|
|
54
|
+
|
|
55
|
+
#### Per-class stages (iterated with a class selector):
|
|
56
|
+
|
|
57
|
+
| Factory | Query |
|
|
58
|
+
| ------------------------- | ------------------------------------------------------------------------------------------------------------------ |
|
|
59
|
+
| `classPropertySubjects()` | [`class-properties-subjects.rq`](queries/class-properties-subjects.rq) — Properties per class (subject counts) |
|
|
60
|
+
| `classPropertyObjects()` | [`class-properties-objects.rq`](queries/class-properties-objects.rq) — Properties per class (object counts) |
|
|
61
|
+
| `perClassDatatypes()` | [`class-property-datatypes.rq`](queries/class-property-datatypes.rq) — Per-class datatype partitions |
|
|
62
|
+
| `perClassLanguages()` | [`class-property-languages.rq`](queries/class-property-languages.rq) — Per-class language tags |
|
|
63
|
+
| `perClassObjectClasses()` | [`class-property-object-classes.rq`](queries/class-property-object-classes.rq) — Per-class object class partitions |
|
|
64
|
+
|
|
65
|
+
#### Domain-specific stages:
|
|
66
|
+
|
|
67
|
+
| Factory | Description |
|
|
68
|
+
| ------------------------ | --------------------------------------------------------------------------------------------------------------------------------- |
|
|
69
|
+
| `detectVocabularies()` | [`entity-properties.rq`](queries/entity-properties.rq) — Entity properties with automatic `void:vocabulary` detection |
|
|
70
|
+
| `uriSpaces(uriSpaceMap)` | [`object-uri-space.rq`](queries/object-uri-space.rq) — Object URI namespace linksets, aggregated against a provided URI space map |
|
|
71
|
+
|
|
72
|
+
## Executor decorators
|
|
73
|
+
|
|
74
|
+
- `VocabularyExecutor` — Wraps an executor; detects known vocabulary namespace prefixes in `void:property` quads and appends `void:vocabulary` triples.
|
|
75
|
+
- `UriSpaceExecutor` — Wraps an executor; consumes `void:Linkset` quads, matches `void:objectsTarget` against configured URI spaces, and emits aggregated linksets.
|
package/dist/stage.d.ts
CHANGED
|
@@ -1,19 +1,56 @@
|
|
|
1
1
|
import { Stage } from '@lde/pipeline';
|
|
2
2
|
import type { Quad } from '@rdfjs/types';
|
|
3
|
-
|
|
4
|
-
|
|
5
|
-
|
|
6
|
-
export
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
export
|
|
17
|
-
|
|
18
|
-
|
|
3
|
+
/**
|
|
4
|
+
* Options for configuring VoID stage execution.
|
|
5
|
+
*/
|
|
6
|
+
export interface VoidStageOptions {
|
|
7
|
+
/** SPARQL query timeout in milliseconds. @default 60000 */
|
|
8
|
+
timeout?: number;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Options for per-class VoID stages that iterate over classes.
|
|
12
|
+
*
|
|
13
|
+
* `batchSize` and `maxConcurrency` control how class bindings are batched
|
|
14
|
+
* and processed concurrently — they have no effect on global (non-per-class) stages.
|
|
15
|
+
*/
|
|
16
|
+
export interface PerClassVoidStageOptions extends VoidStageOptions {
|
|
17
|
+
/** Maximum number of class bindings per executor call. @default 10 */
|
|
18
|
+
batchSize?: number;
|
|
19
|
+
/** Maximum concurrent in-flight executor batches. @default 10 */
|
|
20
|
+
maxConcurrency?: number;
|
|
21
|
+
/** When true, iterate queries per class using a class selector. @default true */
|
|
22
|
+
perClass?: boolean;
|
|
23
|
+
}
|
|
24
|
+
/**
|
|
25
|
+
* Options for the {@link voidStages} convenience function.
|
|
26
|
+
*/
|
|
27
|
+
export interface VoidStagesOptions extends PerClassVoidStageOptions {
|
|
28
|
+
/** When provided, includes the object URI space stage using this map. */
|
|
29
|
+
uriSpaces?: ReadonlyMap<string, readonly Quad[]>;
|
|
30
|
+
}
|
|
31
|
+
export declare function subjectUriSpaces(options?: VoidStageOptions): Promise<Stage>;
|
|
32
|
+
export declare function classPartitions(options?: VoidStageOptions): Promise<Stage>;
|
|
33
|
+
export declare function countObjectLiterals(options?: VoidStageOptions): Promise<Stage>;
|
|
34
|
+
export declare function countObjectUris(options?: VoidStageOptions): Promise<Stage>;
|
|
35
|
+
export declare function countProperties(options?: VoidStageOptions): Promise<Stage>;
|
|
36
|
+
export declare function countSubjects(options?: VoidStageOptions): Promise<Stage>;
|
|
37
|
+
export declare function countTriples(options?: VoidStageOptions): Promise<Stage>;
|
|
38
|
+
export declare function classPropertySubjects(options?: PerClassVoidStageOptions): Promise<Stage>;
|
|
39
|
+
export declare function classPropertyObjects(options?: PerClassVoidStageOptions): Promise<Stage>;
|
|
40
|
+
export declare function countDatatypes(options?: VoidStageOptions): Promise<Stage>;
|
|
41
|
+
export declare function detectLicenses(options?: VoidStageOptions): Promise<Stage>;
|
|
42
|
+
export declare function perClassObjectClasses(options?: PerClassVoidStageOptions): Promise<Stage>;
|
|
43
|
+
export declare function perClassDatatypes(options?: PerClassVoidStageOptions): Promise<Stage>;
|
|
44
|
+
export declare function perClassLanguages(options?: PerClassVoidStageOptions): Promise<Stage>;
|
|
45
|
+
export declare function uriSpaces(uriSpaceMap: ReadonlyMap<string, readonly Quad[]>, options?: VoidStageOptions): Promise<Stage>;
|
|
46
|
+
export declare function detectVocabularies(options?: VoidStageOptions): Promise<Stage>;
|
|
47
|
+
/**
|
|
48
|
+
* Create all VoID analysis stages in their recommended execution order.
|
|
49
|
+
*
|
|
50
|
+
* The stages are ordered so that {@link classPartitions} runs before the
|
|
51
|
+
* per-class stages. This warms up the `?s a ?class` pattern cache on the
|
|
52
|
+
* SPARQL endpoint, preventing 504 timeouts on the heavier per-class queries
|
|
53
|
+
* when the cache is cold.
|
|
54
|
+
*/
|
|
55
|
+
export declare function voidStages(options?: VoidStagesOptions): Promise<Stage[]>;
|
|
19
56
|
//# sourceMappingURL=stage.d.ts.map
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAMN,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAMN,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAYzC;;GAEG;AACH,MAAM,WAAW,gBAAgB;IAC/B,2DAA2D;IAC3D,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;GAKG;AACH,MAAM,WAAW,wBAAyB,SAAQ,gBAAgB;IAChE,sEAAsE;IACtE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,iFAAiF;IACjF,QAAQ,CAAC,EAAE,OAAO,CAAC;CACpB;AAED;;GAEG;AACH,MAAM,WAAW,iBAAkB,SAAQ,wBAAwB;IACjE,yEAAyE;IACzE,SAAS,CAAC,EAAE,WAAW,CAAC,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC,CAAC;CAClD;AA0DD,wBAAgB,gBAAgB,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAE3E;AAED,wBAAgB,eAAe,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAE1E;AAED,wBAAgB,mBAAmB,CACjC,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,KAAK,CAAC,CAEhB;AAED,wBAAgB,eAAe,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAE1E;AAED,wBAAgB,eAAe,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAE1E;AAED,wBAAgB,aAAa,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAExE;AAED,wBAAgB,YAAY,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAEvE;AAED,wBAAgB,qBAAqB,CACnC,OAAO,CAAC,EAAE,wBAAwB,GACjC,OAAO,CAAC,KAAK,CAAC,CAKhB;AAED,wBAAgB,oBAAoB,CAClC,OAAO,CAAC,EAAE,wBAAwB,GACjC,OAAO,CAAC,KAAK,CAAC,CAKhB;AAED,wBAAgB,cAAc,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAEzE;AAED,wBAAgB,cAAc,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAEzE;AAID,wBAAgB,qBAAqB,CACnC,OAAO,CAAC,EAAE,wBAAwB,GACjC,OAAO,CAAC,KAAK,CAAC,CAKhB;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,CAAC,EAAE,wBAAwB,GACjC,OAAO,CAAC,KAAK,CAAC,CAKhB;AAED,wBAAgB,iBAAiB,CAC/B,OAAO,CAAC,EAAE,wBAAwB,GACjC,OAAO,CAAC,KAAK,CAAC,CAKhB;AAID,wBAAgB,SAAS,CACvB,WAAW,EAAE,WAAW,CAAC,MAAM,EAAE,SAAS,IAAI,EAAE,CAAC,EACjD,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,KAAK,CAAC,CAYhB;AAED,wBAAgB,kBAAkB,CAAC,OAAO,CAAC,EAAE,gBAAgB,GAAG,OAAO,CAAC,KAAK,CAAC,CAW7E;AAED;;;;;;;GAOG;AACH,wBAAsB,UAAU,CAC9B,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,KAAK,EAAE,CAAC,CA4BlB"}
|
package/dist/stage.js
CHANGED
|
@@ -6,15 +6,24 @@ import { UriSpaceExecutor } from './uriSpaceExecutor.js';
|
|
|
6
6
|
const queriesDir = resolve(dirname(fileURLToPath(import.meta.url)), '..', 'queries');
|
|
7
7
|
async function createVoidStage(filename, options) {
|
|
8
8
|
const query = await readQueryFile(resolve(queriesDir, filename));
|
|
9
|
-
const executor = options?.executor?.(query) ??
|
|
10
|
-
|
|
9
|
+
const executor = options?.executor?.(query) ??
|
|
10
|
+
new SparqlConstructExecutor({
|
|
11
|
+
query,
|
|
12
|
+
timeout: options?.timeout ?? 60_000,
|
|
13
|
+
});
|
|
14
|
+
if (options?.perClass) {
|
|
11
15
|
return new Stage({
|
|
12
16
|
name: filename,
|
|
13
17
|
itemSelector: classSelector(),
|
|
14
18
|
executors: executor,
|
|
19
|
+
batchSize: options?.batchSize,
|
|
20
|
+
maxConcurrency: options?.maxConcurrency,
|
|
15
21
|
});
|
|
16
22
|
}
|
|
17
|
-
return new Stage({
|
|
23
|
+
return new Stage({
|
|
24
|
+
name: filename,
|
|
25
|
+
executors: executor,
|
|
26
|
+
});
|
|
18
27
|
}
|
|
19
28
|
function classSelector() {
|
|
20
29
|
return {
|
|
@@ -37,63 +46,113 @@ function classSelector() {
|
|
|
37
46
|
};
|
|
38
47
|
}
|
|
39
48
|
// Global stages
|
|
40
|
-
export function subjectUriSpaces() {
|
|
41
|
-
return createVoidStage('subject-uri-space.rq');
|
|
49
|
+
export function subjectUriSpaces(options) {
|
|
50
|
+
return createVoidStage('subject-uri-space.rq', options);
|
|
42
51
|
}
|
|
43
|
-
export function classPartitions() {
|
|
44
|
-
return createVoidStage('class-partition.rq');
|
|
52
|
+
export function classPartitions(options) {
|
|
53
|
+
return createVoidStage('class-partition.rq', options);
|
|
45
54
|
}
|
|
46
|
-
export function countObjectLiterals() {
|
|
47
|
-
return createVoidStage('object-literals.rq');
|
|
55
|
+
export function countObjectLiterals(options) {
|
|
56
|
+
return createVoidStage('object-literals.rq', options);
|
|
48
57
|
}
|
|
49
|
-
export function countObjectUris() {
|
|
50
|
-
return createVoidStage('object-uris.rq');
|
|
58
|
+
export function countObjectUris(options) {
|
|
59
|
+
return createVoidStage('object-uris.rq', options);
|
|
51
60
|
}
|
|
52
|
-
export function countProperties() {
|
|
53
|
-
return createVoidStage('properties.rq');
|
|
61
|
+
export function countProperties(options) {
|
|
62
|
+
return createVoidStage('properties.rq', options);
|
|
54
63
|
}
|
|
55
|
-
export function countSubjects() {
|
|
56
|
-
return createVoidStage('subjects.rq');
|
|
64
|
+
export function countSubjects(options) {
|
|
65
|
+
return createVoidStage('subjects.rq', options);
|
|
57
66
|
}
|
|
58
|
-
export function countTriples() {
|
|
59
|
-
return createVoidStage('triples.rq');
|
|
67
|
+
export function countTriples(options) {
|
|
68
|
+
return createVoidStage('triples.rq', options);
|
|
60
69
|
}
|
|
61
|
-
export function classPropertySubjects() {
|
|
62
|
-
return createVoidStage('class-properties-subjects.rq'
|
|
70
|
+
export function classPropertySubjects(options) {
|
|
71
|
+
return createVoidStage('class-properties-subjects.rq', {
|
|
72
|
+
...options,
|
|
73
|
+
perClass: options?.perClass ?? true,
|
|
74
|
+
});
|
|
63
75
|
}
|
|
64
|
-
export function classPropertyObjects() {
|
|
65
|
-
return createVoidStage('class-properties-objects.rq'
|
|
76
|
+
export function classPropertyObjects(options) {
|
|
77
|
+
return createVoidStage('class-properties-objects.rq', {
|
|
78
|
+
...options,
|
|
79
|
+
perClass: options?.perClass ?? true,
|
|
80
|
+
});
|
|
66
81
|
}
|
|
67
|
-
export function countDatatypes() {
|
|
68
|
-
return createVoidStage('datatypes.rq');
|
|
82
|
+
export function countDatatypes(options) {
|
|
83
|
+
return createVoidStage('datatypes.rq', options);
|
|
69
84
|
}
|
|
70
|
-
export function detectLicenses() {
|
|
71
|
-
return createVoidStage('licenses.rq');
|
|
85
|
+
export function detectLicenses(options) {
|
|
86
|
+
return createVoidStage('licenses.rq', options);
|
|
72
87
|
}
|
|
73
88
|
// Per-class stages
|
|
74
|
-
export function perClassObjectClasses() {
|
|
89
|
+
export function perClassObjectClasses(options) {
|
|
75
90
|
return createVoidStage('class-property-object-classes.rq', {
|
|
76
|
-
|
|
91
|
+
...options,
|
|
92
|
+
perClass: options?.perClass ?? true,
|
|
77
93
|
});
|
|
78
94
|
}
|
|
79
|
-
export function perClassDatatypes() {
|
|
95
|
+
export function perClassDatatypes(options) {
|
|
80
96
|
return createVoidStage('class-property-datatypes.rq', {
|
|
81
|
-
|
|
97
|
+
...options,
|
|
98
|
+
perClass: options?.perClass ?? true,
|
|
82
99
|
});
|
|
83
100
|
}
|
|
84
|
-
export function perClassLanguages() {
|
|
101
|
+
export function perClassLanguages(options) {
|
|
85
102
|
return createVoidStage('class-property-languages.rq', {
|
|
86
|
-
|
|
103
|
+
...options,
|
|
104
|
+
perClass: options?.perClass ?? true,
|
|
87
105
|
});
|
|
88
106
|
}
|
|
89
107
|
// Domain-specific executor stages
|
|
90
|
-
export function uriSpaces(
|
|
108
|
+
export function uriSpaces(uriSpaceMap, options) {
|
|
91
109
|
return createVoidStage('object-uri-space.rq', {
|
|
92
|
-
|
|
110
|
+
...options,
|
|
111
|
+
executor: (query) => new UriSpaceExecutor(new SparqlConstructExecutor({
|
|
112
|
+
query,
|
|
113
|
+
timeout: options?.timeout ?? 60_000,
|
|
114
|
+
}), uriSpaceMap),
|
|
93
115
|
});
|
|
94
116
|
}
|
|
95
|
-
export function detectVocabularies() {
|
|
117
|
+
export function detectVocabularies(options) {
|
|
96
118
|
return createVoidStage('entity-properties.rq', {
|
|
97
|
-
|
|
119
|
+
...options,
|
|
120
|
+
executor: (query) => new VocabularyExecutor(new SparqlConstructExecutor({
|
|
121
|
+
query,
|
|
122
|
+
timeout: options?.timeout ?? 60_000,
|
|
123
|
+
})),
|
|
98
124
|
});
|
|
99
125
|
}
|
|
126
|
+
/**
|
|
127
|
+
* Create all VoID analysis stages in their recommended execution order.
|
|
128
|
+
*
|
|
129
|
+
* The stages are ordered so that {@link classPartitions} runs before the
|
|
130
|
+
* per-class stages. This warms up the `?s a ?class` pattern cache on the
|
|
131
|
+
* SPARQL endpoint, preventing 504 timeouts on the heavier per-class queries
|
|
132
|
+
* when the cache is cold.
|
|
133
|
+
*/
|
|
134
|
+
export async function voidStages(options) {
|
|
135
|
+
const { uriSpaces: uriSpaceMap, ...stageOptions } = options ?? {};
|
|
136
|
+
return Promise.all([
|
|
137
|
+
// Global counting stages.
|
|
138
|
+
countSubjects(stageOptions),
|
|
139
|
+
countProperties(stageOptions),
|
|
140
|
+
countObjectLiterals(stageOptions),
|
|
141
|
+
countObjectUris(stageOptions),
|
|
142
|
+
countDatatypes(stageOptions),
|
|
143
|
+
countTriples(stageOptions),
|
|
144
|
+
// Cache warming — must precede per-class stages.
|
|
145
|
+
classPartitions(stageOptions),
|
|
146
|
+
// Per-class stages.
|
|
147
|
+
classPropertySubjects(stageOptions),
|
|
148
|
+
classPropertyObjects(stageOptions),
|
|
149
|
+
perClassDatatypes(stageOptions),
|
|
150
|
+
perClassObjectClasses(stageOptions),
|
|
151
|
+
perClassLanguages(stageOptions),
|
|
152
|
+
// Other stages.
|
|
153
|
+
detectLicenses(stageOptions),
|
|
154
|
+
detectVocabularies(stageOptions),
|
|
155
|
+
subjectUriSpaces(stageOptions),
|
|
156
|
+
...(uriSpaceMap ? [uriSpaces(uriSpaceMap, stageOptions)] : []),
|
|
157
|
+
]);
|
|
158
|
+
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline-void",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.21.0",
|
|
4
4
|
"description": "VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets",
|
|
5
5
|
"repository": {
|
|
6
6
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
@@ -32,6 +32,6 @@
|
|
|
32
32
|
},
|
|
33
33
|
"peerDependencies": {
|
|
34
34
|
"@lde/dataset": "0.7.1",
|
|
35
|
-
"@lde/pipeline": "0.
|
|
35
|
+
"@lde/pipeline": "0.23.0"
|
|
36
36
|
}
|
|
37
37
|
}
|
|
@@ -6,12 +6,16 @@ CONSTRUCT {
|
|
|
6
6
|
WHERE {
|
|
7
7
|
# Object counts only. Subject counts in class-properties-subjects.rq.
|
|
8
8
|
{
|
|
9
|
-
SELECT ?
|
|
10
|
-
|
|
11
|
-
|
|
9
|
+
SELECT ?class ?p (COUNT(DISTINCT ?o) AS ?objects) {
|
|
10
|
+
{
|
|
11
|
+
SELECT ?class ?p ?o {
|
|
12
|
+
#subjectFilter#
|
|
13
|
+
?s a ?class ; ?p ?o .
|
|
14
|
+
}
|
|
15
|
+
}
|
|
12
16
|
}
|
|
13
|
-
GROUP BY ?
|
|
17
|
+
GROUP BY ?class ?p
|
|
14
18
|
}
|
|
15
|
-
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?
|
|
19
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
|
|
16
20
|
}
|
|
17
21
|
LIMIT 100000
|
|
@@ -3,7 +3,7 @@ PREFIX void: <http://rdfs.org/ns/void#>
|
|
|
3
3
|
CONSTRUCT {
|
|
4
4
|
?dataset a void:Dataset ;
|
|
5
5
|
void:classPartition ?classPartition .
|
|
6
|
-
?classPartition void:class ?
|
|
6
|
+
?classPartition void:class ?class ;
|
|
7
7
|
void:propertyPartition ?propertyPartition .
|
|
8
8
|
?propertyPartition void:property ?p ;
|
|
9
9
|
void:entities ?subjects .
|
|
@@ -11,13 +11,17 @@ CONSTRUCT {
|
|
|
11
11
|
WHERE {
|
|
12
12
|
# Subject counts only. Object counts in class-properties-objects.rq.
|
|
13
13
|
{
|
|
14
|
-
SELECT ?
|
|
15
|
-
|
|
16
|
-
|
|
14
|
+
SELECT ?class ?p (COUNT(DISTINCT ?s) AS ?subjects) {
|
|
15
|
+
{
|
|
16
|
+
SELECT ?class ?p ?s {
|
|
17
|
+
#subjectFilter#
|
|
18
|
+
?s a ?class ; ?p [] .
|
|
19
|
+
}
|
|
20
|
+
}
|
|
17
21
|
}
|
|
18
|
-
GROUP BY ?
|
|
22
|
+
GROUP BY ?class ?p
|
|
19
23
|
}
|
|
20
|
-
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-", MD5(STR(?
|
|
21
|
-
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?
|
|
24
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-", MD5(STR(?class)))) AS ?classPartition)
|
|
25
|
+
BIND(URI(CONCAT(STR(?dataset), "/.well-known/void#class-property-", MD5(CONCAT(STR(?class), STR(?p))))) AS ?propertyPartition)
|
|
22
26
|
}
|
|
23
27
|
LIMIT 100000
|