@lde/pipeline-shacl-sampler 0.4.11 → 0.4.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -50,6 +50,7 @@ await new Pipeline({ /* … */, stages }).run();
50
50
  | `validator` | — | Optional [`Validator`](../pipeline/src/validator.ts) attached to every generated stage (typically a `ShaclValidator`). |
51
51
  | `onInvalid` | `'write'` | Behaviour when a sampled batch fails validation: `'write'` \| `'skip'` \| `'halt'`. Only used when `validator` is set. |
52
52
  | `namespaceAliases` | `[]` | Namespaces to treat as equivalent when matching `sh:targetClass` and when handing quads to the validator. See [Namespace aliases](#namespace-aliases). |
53
+ | `excludeResources` | — | Hook returning a SPARQL fragment that subtracts resources from a target class’s sample. See [Excluding resources](#excluding-resources). |
53
54
 
54
55
  ## Namespace aliases
55
56
 
@@ -83,6 +84,45 @@ const stages = await shaclSampleStages({
83
84
  });
84
85
  ```
85
86
 
87
+ ## Excluding resources
88
+
89
+ A profile may target a class that also describes a dataset’s own
90
+ administrative metadata rather than its collection content. For example,
91
+ SCHEMA-AP-NDE targets `schema:Organization`, so a dump containing nothing
92
+ but a `schema:Dataset` and the publisher `Organization` that supports it
93
+ would get that publisher sampled, found trivially conformant (only `name`
94
+ is required), and reported as “tested and passed” — even though the
95
+ dataset holds no real content.
96
+
97
+ `excludeResources` lets the caller subtract such resources from a target
98
+ class’s sample **without weakening the SHACL shapes**. It is a hook called
99
+ once per `sh:targetClass`; the SPARQL fragment it returns is inlined
100
+ verbatim into the subject-selector `WHERE` clause **after** the type
101
+ pattern, so it can reference the bound `?s` (e.g. a `MINUS { … }` clause
102
+ that removes `?s` when it is the dataset’s publisher). Return `''` to
103
+ sample that class unchanged; omitting the option entirely leaves every
104
+ generated query byte-for-byte the same.
105
+
106
+ The fragment is interpolated verbatim and therefore caller-trusted — the
107
+ same trust model as a distribution’s `subjectFilter`.
108
+
109
+ ```ts
110
+ const SCHEMA = 'https://schema.org/';
111
+ const stages = await shaclSampleStages({
112
+ shapesFile,
113
+ validator,
114
+ excludeResources: (targetClass) =>
115
+ targetClass.value === `${SCHEMA}Organization`
116
+ ? `MINUS { ?dataset a <${SCHEMA}Dataset> ; <${SCHEMA}publisher> ?s . }`
117
+ : '',
118
+ });
119
+ ```
120
+
121
+ Anchoring the exclusion to the dataset node (rather than to the predicate
122
+ alone) keeps genuine content resources in the sample: an `Organization`
123
+ reached as the `creator` of a `CreativeWork`, say, is never the dataset’s
124
+ publisher, so it is still sampled and validated.
125
+
86
126
  ## Limitations
87
127
 
88
128
  - Only plain-IRI `sh:path` values are supported. Sequence, alternative
@@ -69,6 +69,21 @@ export interface ShaclSampleStagesOptions {
69
69
  * `[{ canonical: 'https://schema.org/', alias: 'http://schema.org/' }]`.
70
70
  */
71
71
  namespaceAliases?: NamespaceAlias[];
72
+ /**
73
+ * Optional hook returning a SPARQL graph-pattern fragment that subtracts
74
+ * resources from the per-target-class sample. Called once per
75
+ * `sh:targetClass`; the returned fragment is inlined verbatim into the
76
+ * subject-selector `WHERE` clause after the type pattern (so it can
77
+ * reference the bound `?s`), alongside the per-distribution
78
+ * {@link SubjectSelectorQueryOptions.subjectFilter}. Return `''` (the
79
+ * default behaviour when the option is omitted) to sample that class
80
+ * unchanged.
81
+ *
82
+ * Typical use: a caller that knows some resources are administrative
83
+ * metadata rather than collection content (e.g. a dataset’s own publisher)
84
+ * returns a `MINUS { … }` fragment to keep them out of validation.
85
+ */
86
+ excludeResources?: (targetClass: NamedNode) => string;
72
87
  }
73
88
  /**
74
89
  * Build one sampling {@link Stage} per `sh:targetClass` declared in the SHACL
@@ -96,8 +111,14 @@ export interface SubjectSelectorQueryOptions {
96
111
  namedGraph?: string;
97
112
  /** Equivalent namespaces to broaden the type match across. @default [] */
98
113
  namespaceAliases?: NamespaceAlias[];
114
+ /**
115
+ * Optional fragment subtracting resources from the sample, inlined verbatim
116
+ * after the type pattern so it can reference the bound `?s` (e.g. a
117
+ * `MINUS { … }` clause). @default ''
118
+ */
119
+ excludeFilter?: string;
99
120
  }
100
- export declare function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases, }: SubjectSelectorQueryOptions): string;
121
+ export declare function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases, excludeFilter, }: SubjectSelectorQueryOptions): string;
101
122
  export declare function buildSampleQuery(shape: TargetShape): string;
102
123
  /**
103
124
  * Decorate a {@link Validator} so every quad it receives has any IRI in an
@@ -1 +1 @@
1
- {"version":3,"file":"sampleStages.d.ts","sourceRoot":"","sources":["../src/sampleStages.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAIL,KAAK,YAAY,EAGjB,KAAK,SAAS,EACf,MAAM,eAAe,CAAC;AAEvB,OAAO,KAAK,EAAE,SAAS,EAAQ,MAAM,cAAc,CAAC;AAEpD,OAAO,EAAuB,KAAK,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAI3E,KAAK,SAAS,GAAG,WAAW,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;AAEtE;;;;;;;;;;GAUG;AACH,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,KAAK,EAAE,MAAM,CAAC;CACf;AAED,6CAA6C;AAC7C,MAAM,WAAW,wBAAwB;IACvC,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,EAAE,cAAc,EAAE,CAAC;CACrC;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,KAAK,EAAE,CAAC,CAiClB;AA2BD,qDAAqD;AACrD,MAAM,WAAW,2BAA2B;IAC1C,uCAAuC;IACvC,WAAW,EAAE,SAAS,CAAC;IACvB,kFAAkF;IAClF,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0EAA0E;IAC1E,gBAAgB,CAAC,EAAE,cAAc,EAAE,CAAC;CACrC;AAED,wBAAgB,yBAAyB,CAAC,EACxC,WAAW,EACX,aAAa,EACb,UAAU,EACV,gBAAqB,GACtB,EAAE,2BAA2B,GAAG,MAAM,CAYtC;AA+BD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,WAAW,GAAG,MAAM,CAuB3D;AAED;;;;;;GAMG;AACH,wBAAgB,mCAAmC,CACjD,KAAK,EAAE,SAAS,EAChB,gBAAgB,EAAE,cAAc,EAAE,GACjC,SAAS,CAeX"}
1
+ {"version":3,"file":"sampleStages.d.ts","sourceRoot":"","sources":["../src/sampleStages.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EAIL,KAAK,YAAY,EAGjB,KAAK,SAAS,EACf,MAAM,eAAe,CAAC;AAEvB,OAAO,KAAK,EAAE,SAAS,EAAQ,MAAM,cAAc,CAAC;AAEpD,OAAO,EAAuB,KAAK,WAAW,EAAE,MAAM,oBAAoB,CAAC;AAI3E,KAAK,SAAS,GAAG,WAAW,CAAC,YAAY,CAAC,YAAY,CAAC,CAAC,CAAC,WAAW,CAAC,CAAC;AAEtE;;;;;;;;;;GAUG;AACH,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,KAAK,EAAE,MAAM,CAAC;CACf;AAED,6CAA6C;AAC7C,MAAM,WAAW,wBAAwB;IACvC,kDAAkD;IAClD,UAAU,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;OAEG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;OAIG;IACH,SAAS,CAAC,EAAE,SAAS,CAAC;IACtB;;;;;;;;;;;OAWG;IACH,gBAAgB,CAAC,EAAE,cAAc,EAAE,CAAC;IACpC;;;;;;;;;;;;;OAaG;IACH,gBAAgB,CAAC,EAAE,CAAC,WAAW,EAAE,SAAS,KAAK,MAAM,CAAC;CACvD;AAED;;;;;;;;;;;;;;GAcG;AACH,wBAAsB,iBAAiB,CACrC,OAAO,EAAE,wBAAwB,GAChC,OAAO,CAAC,KAAK,EAAE,CAAC,CAkClB;AA6BD,qDAAqD;AACrD,MAAM,WAAW,2BAA2B;IAC1C,uCAAuC;IACvC,WAAW,EAAE,SAAS,CAAC;IACvB,kFAAkF;IAClF,aAAa,CAAC,EAAE,MAAM,CAAC;IACvB,uDAAuD;IACvD,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,0EAA0E;IAC1E,gBAAgB,CAAC,EAAE,cAAc,EAAE,CAAC;IACpC;;;;OAIG;IACH,aAAa,CAAC,EAAE,MAAM,CAAC;CACxB;AAED,wBAAgB,yBAAyB,CAAC,EACxC,WAAW,EACX,aAAa,EACb,UAAU,EACV,gBAAqB,EACrB,aAAa,GACd,EAAE,2BAA2B,GAAG,MAAM,CAYtC;AA+BD,wBAAgB,gBAAgB,CAAC,KAAK,EAAE,WAAW,GAAG,MAAM,CAuB3D;AAED;;;;;;GAMG;AACH,wBAAgB,mCAAmC,CACjD,KAAK,EAAE,SAAS,EAChB,gBAAgB,EAAE,cAAc,EAAE,GACjC,SAAS,CAeX"}
@@ -32,7 +32,7 @@ export async function shaclSampleStages(options) {
32
32
  const shapes = await extractTargetShapes(options.shapesFile);
33
33
  return shapes.map((shape) => new Stage({
34
34
  name: `shacl-sample-${localName(shape.targetClass.value)}`,
35
- itemSelector: subjectSelector(shape.targetClass, samplesPerClass, namespaceAliases),
35
+ itemSelector: subjectSelector(shape.targetClass, samplesPerClass, namespaceAliases, options.excludeResources?.(shape.targetClass)),
36
36
  executors: new SparqlConstructExecutor({
37
37
  query: buildSampleQuery(shape),
38
38
  }),
@@ -41,7 +41,7 @@ export async function shaclSampleStages(options) {
41
41
  validation,
42
42
  }));
43
43
  }
44
- function subjectSelector(targetClass, limit, namespaceAliases) {
44
+ function subjectSelector(targetClass, limit, namespaceAliases, excludeFilter) {
45
45
  assertSafeIri(targetClass.value);
46
46
  return {
47
47
  // Forward `options` so the Pipeline’s per-dataset TimeoutPolicy
@@ -53,6 +53,7 @@ function subjectSelector(targetClass, limit, namespaceAliases) {
53
53
  subjectFilter: distribution.subjectFilter,
54
54
  namedGraph: distribution.namedGraph,
55
55
  namespaceAliases,
56
+ excludeFilter,
56
57
  });
57
58
  return new SparqlItemSelector({
58
59
  query,
@@ -61,7 +62,7 @@ function subjectSelector(targetClass, limit, namespaceAliases) {
61
62
  },
62
63
  };
63
64
  }
64
- export function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases = [], }) {
65
+ export function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGraph, namespaceAliases = [], excludeFilter, }) {
65
66
  let fromClause = '';
66
67
  if (namedGraph) {
67
68
  assertSafeIri(namedGraph);
@@ -71,7 +72,7 @@ export function buildSubjectSelectorQuery({ targetClass, subjectFilter, namedGra
71
72
  return [
72
73
  'SELECT DISTINCT ?s',
73
74
  fromClause,
74
- `WHERE { ${subjectFilter ?? ''} ${typePattern} }`,
75
+ `WHERE { ${subjectFilter ?? ''} ${typePattern} ${excludeFilter ?? ''} }`,
75
76
  ].join('\n');
76
77
  }
77
78
  function buildTypePattern(targetClass, namespaceAliases) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline-shacl-sampler",
3
- "version": "0.4.11",
3
+ "version": "0.4.12",
4
4
  "description": "Per-class sampling stages for @lde/pipeline, derived from SHACL shapes",
5
5
  "repository": {
6
6
  "url": "git+https://github.com/ldelements/lde.git",