@lde/pipeline-shacl-validator 0.11.2 → 0.12.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +61 -13
- package/dist/shacl-validator.d.ts +14 -12
- package/dist/shacl-validator.d.ts.map +1 -1
- package/dist/shacl-validator.js +22 -40
- package/package.json +2 -3
package/README.md
CHANGED
|
@@ -3,18 +3,28 @@
|
|
|
3
3
|
SHACL validation for [`@lde/pipeline`](../pipeline).
|
|
4
4
|
|
|
5
5
|
Validates RDF quads produced by pipeline stages against [SHACL shapes](https://www.w3.org/TR/shacl/),
|
|
6
|
-
|
|
7
|
-
Shapes can be provided in any
|
|
6
|
+
streaming the per-dataset SHACL validation report to any number of configured
|
|
7
|
+
[`Writer`](../pipeline/src/writer/writer.ts)s. Shapes can be provided in any
|
|
8
|
+
RDF serialization (Turtle, JSON-LD, N-Triples etc.).
|
|
8
9
|
|
|
9
10
|
## Usage
|
|
10
11
|
|
|
11
12
|
```typescript
|
|
12
|
-
import {
|
|
13
|
+
import {
|
|
14
|
+
Pipeline,
|
|
15
|
+
Stage,
|
|
16
|
+
SparqlConstructExecutor,
|
|
17
|
+
FileWriter,
|
|
18
|
+
SparqlUpdateWriter,
|
|
19
|
+
} from '@lde/pipeline';
|
|
13
20
|
import { ShaclValidator } from '@lde/pipeline-shacl-validator';
|
|
14
21
|
|
|
15
22
|
const validator = new ShaclValidator({
|
|
16
23
|
shapesFile: './shapes.ttl',
|
|
17
|
-
|
|
24
|
+
reportWriters: [
|
|
25
|
+
new FileWriter({ outputDir: './validation', format: 'turtle' }),
|
|
26
|
+
new SparqlUpdateWriter({ endpoint: new URL('http://store/update') }),
|
|
27
|
+
],
|
|
18
28
|
});
|
|
19
29
|
|
|
20
30
|
const pipeline = new Pipeline({
|
|
@@ -42,16 +52,54 @@ await pipeline.run();
|
|
|
42
52
|
| `'skip'` | Discard invalid quads silently |
|
|
43
53
|
| `'halt'` | Throw an error, stopping the pipeline |
|
|
44
54
|
|
|
45
|
-
### Report
|
|
55
|
+
### Report writers
|
|
46
56
|
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
57
|
+
Each `validate()` call that produces violations fans the SHACL report quads
|
|
58
|
+
(`sh:ValidationResult` triples, etc.) out to every configured `reportWriter`
|
|
59
|
+
via `Writer.write(dataset, quads)`. Each writer's `Writer.flush(dataset)` is
|
|
60
|
+
invoked from `ShaclValidator.report(dataset)` — i.e. once the pipeline
|
|
61
|
+
finishes a dataset.
|
|
50
62
|
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
63
|
+
Validators with no `reportWriters` only produce aggregate counts
|
|
64
|
+
(`{ conforms, violations, quadsValidated }`); the report quads themselves are
|
|
65
|
+
discarded. This is deliberate — callers who only need pass/fail metrics
|
|
66
|
+
don't have to wire up a sink — but it does mean misconfiguring (passing
|
|
67
|
+
`reportWriters: []` while expecting persistence) silently loses violation
|
|
68
|
+
detail. Configure at least one writer in production pipelines.
|
|
69
|
+
|
|
70
|
+
The bundled `FileWriter` and `SparqlUpdateWriter` already implement the
|
|
71
|
+
`Writer` contract; bring your own for custom destinations.
|
|
72
|
+
|
|
73
|
+
#### Filesystem collisions with `FileWriter`
|
|
74
|
+
|
|
75
|
+
`FileWriter` derives its filename from `dataset.iri` only. If the pipeline's
|
|
76
|
+
main writer and a report writer both target the same `outputDir` with the
|
|
77
|
+
same format, they will collide on the same path and the second open will
|
|
78
|
+
truncate the first. Use a separate `outputDir` for validation reports:
|
|
79
|
+
|
|
80
|
+
```ts
|
|
81
|
+
new ShaclValidator({
|
|
82
|
+
shapesFile,
|
|
83
|
+
reportWriters: [new FileWriter({ outputDir: './output/validation' })],
|
|
84
|
+
});
|
|
85
|
+
```
|
|
86
|
+
|
|
87
|
+
#### Named graphs with `SparqlUpdateWriter`
|
|
88
|
+
|
|
89
|
+
`SparqlUpdateWriter` defaults to `dataset.iri.toString()` as the named graph
|
|
90
|
+
URI. A report writer that shares the endpoint with the pipeline's main
|
|
91
|
+
writer would otherwise land the SHACL report in the same graph as the
|
|
92
|
+
dataset's data — and `CLEAR GRAPH` on first write per dataset would erase
|
|
93
|
+
it. To keep validation results in a separate graph, pass `graphIri` to
|
|
94
|
+
derive the target graph from the dataset:
|
|
95
|
+
|
|
96
|
+
```ts
|
|
97
|
+
new SparqlUpdateWriter({
|
|
98
|
+
endpoint,
|
|
99
|
+
auth,
|
|
100
|
+
graphIri: (dataset) =>
|
|
101
|
+
new URL(
|
|
102
|
+
`https://example.org/shacl-validation/${encodeURIComponent(dataset.iri.toString())}`,
|
|
103
|
+
),
|
|
56
104
|
});
|
|
57
105
|
```
|
|
@@ -1,34 +1,36 @@
|
|
|
1
1
|
import type { Quad } from '@rdfjs/types';
|
|
2
2
|
import type { Dataset } from '@lde/dataset';
|
|
3
|
-
import type { Validator, ValidationResult, ValidationReport } from '@lde/pipeline';
|
|
4
|
-
import { type SerializationFormat } from '@lde/pipeline';
|
|
3
|
+
import type { Validator, ValidationResult, ValidationReport, Writer } from '@lde/pipeline';
|
|
5
4
|
/** Options for {@link ShaclValidator}. */
|
|
6
5
|
export interface ShaclValidatorOptions {
|
|
7
6
|
/** Path to an RDF file containing SHACL shapes (any format supported by rdf-dereference). */
|
|
8
7
|
shapesFile: string;
|
|
9
|
-
/**
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
8
|
+
/**
|
|
9
|
+
* Writers that receive the per-dataset SHACL validation report quads. Each
|
|
10
|
+
* batch with violations is streamed to every writer via {@link Writer.write};
|
|
11
|
+
* each writer's {@link Writer.flush} is called from {@link ShaclValidator.report}.
|
|
12
|
+
*
|
|
13
|
+
* Pass a {@link FileWriter} to mirror the previous on-disk behaviour, a
|
|
14
|
+
* {@link SparqlUpdateWriter} to land reports in a named graph, or any custom
|
|
15
|
+
* writer. Validators with no `reportWriters` only produce aggregate counts.
|
|
16
|
+
*/
|
|
17
|
+
reportWriters?: Writer[];
|
|
13
18
|
}
|
|
14
19
|
/**
|
|
15
20
|
* SHACL-based {@link Validator} for `@lde/pipeline`.
|
|
16
21
|
*
|
|
17
22
|
* Validates quads against shapes loaded from an RDF file (any format
|
|
18
|
-
* supported by rdf-dereference) and
|
|
19
|
-
*
|
|
23
|
+
* supported by rdf-dereference) and streams the per-dataset SHACL validation
|
|
24
|
+
* report to any number of configured {@link Writer}s.
|
|
20
25
|
*/
|
|
21
26
|
export declare class ShaclValidator implements Validator {
|
|
22
27
|
private readonly shapesFile;
|
|
23
|
-
private readonly
|
|
24
|
-
private readonly reportFormat;
|
|
28
|
+
private readonly reportWriters;
|
|
25
29
|
private shapesDataset;
|
|
26
30
|
private readonly accumulators;
|
|
27
|
-
private readonly initializedFiles;
|
|
28
31
|
constructor(options: ShaclValidatorOptions);
|
|
29
32
|
validate(quads: Quad[], dataset: Dataset): Promise<ValidationResult>;
|
|
30
33
|
report(dataset: Dataset): Promise<ValidationReport>;
|
|
31
34
|
private getShapes;
|
|
32
|
-
private writeReportFile;
|
|
33
35
|
}
|
|
34
36
|
//# sourceMappingURL=shacl-validator.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"shacl-validator.d.ts","sourceRoot":"","sources":["../src/shacl-validator.ts"],"names":[],"mappings":"
|
|
1
|
+
{"version":3,"file":"shacl-validator.d.ts","sourceRoot":"","sources":["../src/shacl-validator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,KAAK,EACV,SAAS,EACT,gBAAgB,EAChB,gBAAgB,EAChB,MAAM,EACP,MAAM,eAAe,CAAC;AAOvB,0CAA0C;AAC1C,MAAM,WAAW,qBAAqB;IACpC,6FAA6F;IAC7F,UAAU,EAAE,MAAM,CAAC;IACnB;;;;;;;;OAQG;IACH,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAQD;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,SAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAW;IAEzC,OAAO,CAAC,aAAa,CAAkB;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAyC;gBAE1D,OAAO,EAAE,qBAAqB;IAKpC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA2CpE,MAAM,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAkB3C,SAAS;CASxB"}
|
package/dist/shacl-validator.js
CHANGED
|
@@ -1,37 +1,24 @@
|
|
|
1
|
-
import { mkdir, appendFile, writeFile } from 'node:fs/promises';
|
|
2
|
-
import { join } from 'node:path';
|
|
3
|
-
import { serializeQuads } from '@lde/pipeline';
|
|
4
1
|
// @ts-expect-error -- shacl-engine has no type declarations.
|
|
5
2
|
import ShaclEngine from 'shacl-engine/Validator.js';
|
|
6
3
|
// @ts-expect-error -- rdf-ext has no type declarations.
|
|
7
4
|
import rdf from 'rdf-ext';
|
|
8
5
|
import { rdfDereferencer } from 'rdf-dereference';
|
|
9
|
-
import filenamifyUrl from 'filenamify-url';
|
|
10
|
-
/** File extension per serialization format. */
|
|
11
|
-
const formatExtensions = {
|
|
12
|
-
Turtle: '.ttl',
|
|
13
|
-
'N-Triples': '.nt',
|
|
14
|
-
'N-Quads': '.nq',
|
|
15
|
-
};
|
|
16
6
|
/**
|
|
17
7
|
* SHACL-based {@link Validator} for `@lde/pipeline`.
|
|
18
8
|
*
|
|
19
9
|
* Validates quads against shapes loaded from an RDF file (any format
|
|
20
|
-
* supported by rdf-dereference) and
|
|
21
|
-
*
|
|
10
|
+
* supported by rdf-dereference) and streams the per-dataset SHACL validation
|
|
11
|
+
* report to any number of configured {@link Writer}s.
|
|
22
12
|
*/
|
|
23
13
|
export class ShaclValidator {
|
|
24
14
|
shapesFile;
|
|
25
|
-
|
|
26
|
-
reportFormat;
|
|
15
|
+
reportWriters;
|
|
27
16
|
// eslint-disable-next-line @typescript-eslint/no-explicit-any
|
|
28
17
|
shapesDataset;
|
|
29
18
|
accumulators = new Map();
|
|
30
|
-
initializedFiles = new Set();
|
|
31
19
|
constructor(options) {
|
|
32
20
|
this.shapesFile = options.shapesFile;
|
|
33
|
-
this.
|
|
34
|
-
this.reportFormat = options.reportFormat ?? 'Turtle';
|
|
21
|
+
this.reportWriters = options.reportWriters ?? [];
|
|
35
22
|
}
|
|
36
23
|
async validate(quads, dataset) {
|
|
37
24
|
if (quads.length === 0) {
|
|
@@ -55,14 +42,23 @@ export class ShaclValidator {
|
|
|
55
42
|
if (!conforms)
|
|
56
43
|
acc.conforms = false;
|
|
57
44
|
this.accumulators.set(key, acc);
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
const
|
|
61
|
-
|
|
45
|
+
if (violations > 0 && this.reportWriters.length > 0) {
|
|
46
|
+
const reportQuads = [...report.dataset];
|
|
47
|
+
for (const writer of this.reportWriters) {
|
|
48
|
+
await writer.write(dataset, asyncIterableOf(reportQuads));
|
|
49
|
+
}
|
|
62
50
|
}
|
|
63
|
-
|
|
51
|
+
// Surface where to look for the report in halt-mode error messages
|
|
52
|
+
// (read by @lde/pipeline's Stage.validateBuffer when onInvalid:'halt').
|
|
53
|
+
const message = violations > 0 && this.reportWriters.length > 0
|
|
54
|
+
? `Report sent to ${this.reportWriters.length} writer(s)`
|
|
55
|
+
: undefined;
|
|
56
|
+
return { conforms, violations, ...(message !== undefined && { message }) };
|
|
64
57
|
}
|
|
65
58
|
async report(dataset) {
|
|
59
|
+
for (const writer of this.reportWriters) {
|
|
60
|
+
await writer.flush?.(dataset);
|
|
61
|
+
}
|
|
66
62
|
const key = dataset.iri.toString();
|
|
67
63
|
const acc = this.accumulators.get(key);
|
|
68
64
|
if (!acc) {
|
|
@@ -84,22 +80,8 @@ export class ShaclValidator {
|
|
|
84
80
|
}
|
|
85
81
|
return this.shapesDataset;
|
|
86
82
|
}
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
const datasetName = filenamifyUrl(dataset.iri.toString());
|
|
92
|
-
const extension = formatExtensions[this.reportFormat];
|
|
93
|
-
const filePath = join(this.reportDir, `${datasetName}.validation${extension}`);
|
|
94
|
-
const reportQuads = [...report.dataset];
|
|
95
|
-
const serialized = await serializeQuads(reportQuads, this.reportFormat);
|
|
96
|
-
if (this.initializedFiles.has(filePath)) {
|
|
97
|
-
await appendFile(filePath, '\n' + serialized);
|
|
98
|
-
}
|
|
99
|
-
else {
|
|
100
|
-
await writeFile(filePath, serialized);
|
|
101
|
-
this.initializedFiles.add(filePath);
|
|
102
|
-
}
|
|
103
|
-
return filePath;
|
|
104
|
-
}
|
|
83
|
+
}
|
|
84
|
+
async function* asyncIterableOf(items) {
|
|
85
|
+
for (const item of items)
|
|
86
|
+
yield item;
|
|
105
87
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline-shacl-validator",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.12.1",
|
|
4
4
|
"description": "SHACL validation for @lde/pipeline",
|
|
5
5
|
"repository": {
|
|
6
6
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
@@ -26,7 +26,6 @@
|
|
|
26
26
|
],
|
|
27
27
|
"dependencies": {
|
|
28
28
|
"@rdfjs/types": "^2.0.1",
|
|
29
|
-
"filenamify-url": "^4.0.0",
|
|
30
29
|
"rdf-dereference": "^5.0.0",
|
|
31
30
|
"rdf-ext": "^2.5.2",
|
|
32
31
|
"shacl-engine": "^1.1.0",
|
|
@@ -37,6 +36,6 @@
|
|
|
37
36
|
},
|
|
38
37
|
"peerDependencies": {
|
|
39
38
|
"@lde/dataset": "0.7.4",
|
|
40
|
-
"@lde/pipeline": "0.
|
|
39
|
+
"@lde/pipeline": "0.30.1"
|
|
41
40
|
}
|
|
42
41
|
}
|