@lde/pipeline-shacl-sampler 0.4.11 → 0.4.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +40 -0
- package/dist/sampleStages.d.ts +22 -1
- package/dist/sampleStages.d.ts.map +1 -1
- package/dist/sampleStages.js +5 -4
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -50,6 +50,7 @@ await new Pipeline({ /* … */, stages }).run();
|
|
|
50
50
|
| `validator` | — | Optional [`Validator`](../pipeline/src/validator.ts) attached to every generated stage (typically a `ShaclValidator`). |
|
|
51
51
|
| `onInvalid` | `'write'` | Behaviour when a sampled batch fails validation: `'write'` \| `'skip'` \| `'halt'`. Only used when `validator` is set. |
|
|
52
52
|
| `namespaceAliases` | `[]` | Namespaces to treat as equivalent when matching `sh:targetClass` and when handing quads to the validator. See [Namespace aliases](#namespace-aliases). |
|
|
53
|
+
| `excludeResources` | — | Hook returning a SPARQL fragment that subtracts resources from a target class’s sample. See [Excluding resources](#excluding-resources). |
|
|
53
54
|
|
|
54
55
|
## Namespace aliases
|
|
55
56
|
|
|
@@ -83,6 +84,45 @@ const stages = await shaclSampleStages({
|
|
|
83
84
|
});
|
|
84
85
|
```
|
|
85
86
|
|
|
87
|
+
## Excluding resources
|
|
88
|
+
|
|
89
|
+
A profile may target a class that also describes a dataset’s own
|
|
90
|
+
administrative metadata rather than its collection content. For example,
|
|
91
|
+
SCHEMA-AP-NDE targets `schema:Organization`, so a dump containing nothing
|
|
92
|
+
but a `schema:Dataset` and the publisher `Organization` that supports it
|
|
93
|
+
would get that publisher sampled, found trivially conformant (only `name`
|
|
94
|
+
is required), and reported as “tested and passed” — even though the
|
|
95
|
+
dataset holds no real content.
|
|
96
|
+
|
|
97
|
+
`excludeResources` lets the caller subtract such resources from a target
|
|
98
|
+
class’s sample **without weakening the SHACL shapes**. It is a hook called
|
|
99
|
+
once per `sh:targetClass`; the SPARQL fragment it returns is inlined
|
|
100
|
+
verbatim into the subject-selector `WHERE` clause **after** the type
|
|
101
|
+
pattern, so it can reference the bound `?s` (e.g. a `MINUS { … }` clause
|
|
102
|
+
that removes `?s` when it is the dataset’s publisher). Return `''` to
|
|
103
|
+
sample that class unchanged; omitting the option entirely leaves every
|
|
104
|
+
generated query byte-for-byte the same.
|
|
105
|
+
|
|
106
|
+
The fragment is interpolated verbatim and therefore caller-trusted — the
|
|
107
|
+
same trust model as a distribution’s `subjectFilter`.
|
|
108
|
+
|
|
109
|
+
```ts
|
|
110
|
+
const SCHEMA = 'https://schema.org/';
|
|
111
|
+
const stages = await shaclSampleStages({
|
|
112
|
+
shapesFile,
|
|
113
|
+
validator,
|
|
114
|
+
excludeResources: (targetClass) =>
|
|
115
|
+
targetClass.value === `${SCHEMA}Organization`
|
|
116
|
+
? `MINUS { ?dataset a <${SCHEMA}Dataset> ; <${SCHEMA}publisher> ?s . }`
|
|
117
|
+
: '',
|
|
118
|
+
});
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
Anchoring the exclusion to the dataset node (rather than to the predicate
|
|
122
|
+
alone) keeps genuine content resources in the sample: an `Organization`
|
|
123
|
+
reached as the `creator` of a `CreativeWork`, say, is never the dataset’s
|
|
124
|
+
publisher, so it is still sampled and validated.
|
|
125
|
+
|
|
86
126
|
## Limitations
|
|
87
127
|
|
|
88
128
|
- Only plain-IRI `sh:path` values are supported. Sequence, alternative
|
package/dist/sampleStages.d.ts
CHANGED
|
@@ -69,6 +69,21 @@ export interface ShaclSampleStagesOptions {
|
|
|
69
69
|
* `[{ canonical: 'https://schema.org/', alias: 'http://schema.org/' }]`.
|
|
70
70
|
*/
|
|
71
71
|
namespaceAliases?: NamespaceAlias[];
|
|
72
|
+
/**
|
|
73
|
+
* Optional hook returning a SPARQL graph-pattern fragment that subtracts
|
|
74
|
+
* resources from the per-target-class sample. Called once per
|
|
75
|
+
* `sh:targetClass`; the returned fragment is inlined verbatim into the
|
|
76
|
+
* subject-selector `WHERE` clause after the type pattern (so it can
|
|
77
|
+
* reference the bound `?s`), alongside the per-distribution
|
|
78
|
+
* {@link SubjectSelectorQueryOptions.subjectFilter}. Return `''` (the
|
|
79
|
+
* default behaviour when the option is omitted) to sample that class
|
|
80
|
+
* unchanged.
|
|
81
|
+
*
|
|
82
|
+
* Typical use: a caller that knows some resources are administrative
|
|
83
|
+
* metadata rather than collection content (e.g. a dataset’s own publisher)
|
|
84
|
+
* returns a `MINUS { … }` fragment to keep them out of validation.
|
|
85
|
+
*/
|
|
86
|
+
excludeResources?: (targetClass: NamedNode) => string;
|
|
72
87
|
}
|
|
73
88
|
/**
|
|
74
89
|
* Build one sampling {@link Stage} per `sh:targetClass` declared in the SHACL
|
|
@@ -96,8 +111,14 @@ export interface SubjectSelectorQueryOptions {
|
|
|
96
111
|
namedGraph?: string;
|
|
97
112
|
/** Equivalent namespaces to broaden the type match across. @default [] */
|
|
98
113
|
namespaceAliases?: NamespaceAlias[];
|
|
114
|
+
/**
|
|
115
|
+
* Optional fragment subtracting resources from the sample, inlined verbatim
|
|
116
|
+
* after the type pattern so it can reference the bound `?s` (e.g. a
|
|
117
|
+
* `MINUS { … }` clause). @default ''
|
|
118
|
+
*/
|
|
119
|
+
excludeFilter?: string;
|
|
99
120
|
}
|
|
100
|
-
export declare function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases, }: SubjectSelectorQueryOptions): string;
|
|
121
|
+
export declare function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases, excludeFilter, }: SubjectSelectorQueryOptions): string;
|
|
101
122
|
export declare function buildSampleQuery(shape: TargetShape): string;
|
|
102
123
|
/**
|
|
103
124
|
* Decorate a {@link Validator} so every quad it receives has any IRI in an
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"sampleStages.d.ts","sourceRoot":"","sources":["../src/sampleStages.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAIL,KAAK,YAAY,EAGjB,KAAK,SAAS,EACf,MAAM,eAAe,CAAC;AAEvB,OAAO,KAAK,EAAE,SAAS,EAAQ,MAAM,cAAc,CAAC;AAEpD,OAAO,EAAuB,KAAK,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAI3E,KAAK,SAAS,GAAG,WAAW,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;AAEtE;;;;;;;;;;GAUG;AACH,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,KAAK,EAAE,MAAM,CAAC;CACf;AAED,6CAA6C;AAC7C,MAAM,WAAW,wBAAwB;IACvC,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,EAAE,cAAc,EAAE,CAAC;
|
|
1
|
+
{"version":3,"file":"sampleStages.d.ts","sourceRoot":"","sources":["../src/sampleStages.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAIL,KAAK,YAAY,EAGjB,KAAK,SAAS,EACf,MAAM,eAAe,CAAC;AAEvB,OAAO,KAAK,EAAE,SAAS,EAAQ,MAAM,cAAc,CAAC;AAEpD,OAAO,EAAuB,KAAK,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAI3E,KAAK,SAAS,GAAG,WAAW,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;AAEtE;;;;;;;;;;GAUG;AACH,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,KAAK,EAAE,MAAM,CAAC;CACf;AAED,6CAA6C;AAC7C,MAAM,WAAW,wBAAwB;IACvC,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,EAAE,cAAc,EAAE,CAAC;IACpC;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,EAAE,CAAC,WAAW,EAAE,SAAS,KAAK,MAAM,CAAC;CACvD;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,KAAK,EAAE,CAAC,CAkClB;AA6BD,qDAAqD;AACrD,MAAM,WAAW,2BAA2B;IAC1C,uCAAuC;IACvC,WAAW,EAAE,SAAS,CAAC;IACvB,kFAAkF;IAClF,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0EAA0E;IAC1E,gBAAgB,CAAC,EAAE,cAAc,EAAE,CAAC;IACpC;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,wBAAgB,yBAAyB,CAAC,EACxC,WAAW,EACX,aAAa,EACb,UAAU,EACV,gBAAqB,EACrB,aAAa,GACd,EAAE,2BAA2B,GAAG,MAAM,CAYtC;AA+BD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,WAAW,GAAG,MAAM,CAuB3D;AAED;;;;;;GAMG;AACH,wBAAgB,mCAAmC,CACjD,KAAK,EAAE,SAAS,EAChB,gBAAgB,EAAE,cAAc,EAAE,GACjC,SAAS,CAeX"}
|
package/dist/sampleStages.js
CHANGED
|
@@ -32,7 +32,7 @@ export async function shaclSampleStages(options) {
|
|
|
32
32
|
const shapes = await extractTargetShapes(options.shapesFile);
|
|
33
33
|
return shapes.map((shape) => new Stage({
|
|
34
34
|
name: `shacl-sample-${localName(shape.targetClass.value)}`,
|
|
35
|
-
itemSelector: subjectSelector(shape.targetClass, samplesPerClass, namespaceAliases),
|
|
35
|
+
itemSelector: subjectSelector(shape.targetClass, samplesPerClass, namespaceAliases, options.excludeResources?.(shape.targetClass)),
|
|
36
36
|
executors: new SparqlConstructExecutor({
|
|
37
37
|
query: buildSampleQuery(shape),
|
|
38
38
|
}),
|
|
@@ -41,7 +41,7 @@ export async function shaclSampleStages(options) {
|
|
|
41
41
|
validation,
|
|
42
42
|
}));
|
|
43
43
|
}
|
|
44
|
-
function subjectSelector(targetClass, limit, namespaceAliases) {
|
|
44
|
+
function subjectSelector(targetClass, limit, namespaceAliases, excludeFilter) {
|
|
45
45
|
assertSafeIri(targetClass.value);
|
|
46
46
|
return {
|
|
47
47
|
// Forward `options` so the Pipeline’s per-dataset TimeoutPolicy
|
|
@@ -53,6 +53,7 @@ function subjectSelector(targetClass, limit, namespaceAliases) {
|
|
|
53
53
|
subjectFilter: distribution.subjectFilter,
|
|
54
54
|
namedGraph: distribution.namedGraph,
|
|
55
55
|
namespaceAliases,
|
|
56
|
+
excludeFilter,
|
|
56
57
|
});
|
|
57
58
|
return new SparqlItemSelector({
|
|
58
59
|
query,
|
|
@@ -61,7 +62,7 @@ function subjectSelector(targetClass, limit, namespaceAliases) {
|
|
|
61
62
|
},
|
|
62
63
|
};
|
|
63
64
|
}
|
|
64
|
-
export function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases = [], }) {
|
|
65
|
+
export function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases = [], excludeFilter, }) {
|
|
65
66
|
let fromClause = '';
|
|
66
67
|
if (namedGraph) {
|
|
67
68
|
assertSafeIri(namedGraph);
|
|
@@ -71,7 +72,7 @@ export function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGra
|
|
|
71
72
|
return [
|
|
72
73
|
'SELECT DISTINCT ?s',
|
|
73
74
|
fromClause,
|
|
74
|
-
`WHERE { ${subjectFilter ?? ''} ${typePattern} }`,
|
|
75
|
+
`WHERE { ${subjectFilter ?? ''} ${typePattern} ${excludeFilter ?? ''} }`,
|
|
75
76
|
].join('\n');
|
|
76
77
|
}
|
|
77
78
|
function buildTypePattern(targetClass, namespaceAliases) {
|
package/package.json
CHANGED