@lde/pipeline-shacl-validator 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md ADDED
@@ -0,0 +1,57 @@
1
+ # @lde/pipeline-shacl-validator
2
+
3
+ SHACL validation for [`@lde/pipeline`](../pipeline).
4
+
5
+ Validates RDF quads produced by pipeline stages against [SHACL shapes](https://www.w3.org/TR/shacl/),
6
+ writing per-dataset report files in SHACL validation report format.
7
+ Shapes can be provided in any RDF serialization (Turtle, JSON-LD, N-Triples etc.).
8
+
9
+ ## Usage
10
+
11
+ ```typescript
12
+ import { Pipeline, Stage, SparqlConstructExecutor } from '@lde/pipeline';
13
+ import { ShaclValidator } from '@lde/pipeline-shacl-validator';
14
+
15
+ const validator = new ShaclValidator({
16
+ shapesFile: './shapes.ttl',
17
+ reportDir: './validation',
18
+ });
19
+
20
+ const pipeline = new Pipeline({
21
+ // ...
22
+ stages: [
23
+ new Stage({
24
+ name: 'transform',
25
+ executors: new SparqlConstructExecutor({ query: '...' }),
26
+ validation: {
27
+ validator,
28
+ onInvalid: 'write', // 'write' | 'skip' | 'halt'
29
+ },
30
+ }),
31
+ ],
32
+ });
33
+
34
+ await pipeline.run();
35
+ ```
36
+
37
+ ### `onInvalid` options
38
+
39
+ | Value | Behaviour |
40
+ | --------- | ---------------------------------------------------------------- |
41
+ | `'write'` | Write quads to the output even if validation fails **(default)** |
42
+ | `'skip'` | Discard invalid quads silently |
43
+ | `'halt'` | Throw an error, stopping the pipeline |
44
+
45
+ ### Report files
46
+
47
+ Validation violations are written to `<reportDir>/<dataset-iri>.validation.<ext>`
48
+ as SHACL validation report triples. The output format defaults to Turtle (`.ttl`)
49
+ and can be changed with the `reportFormat` option:
50
+
51
+ ```typescript
52
+ const validator = new ShaclValidator({
53
+ shapesFile: './shapes.ttl',
54
+ reportDir: './validation',
55
+ reportFormat: 'N-Triples', // 'Turtle' (default) | 'N-Triples' | 'N-Quads'
56
+ });
57
+ ```
@@ -0,0 +1,3 @@
1
+ export type { Validator, ValidationReport, ValidationResult, } from '@lde/pipeline';
2
+ export { ShaclValidator, type ShaclValidatorOptions, } from './shacl-validator.js';
3
+ //# sourceMappingURL=index.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,YAAY,EACV,SAAS,EACT,gBAAgB,EAChB,gBAAgB,GACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,cAAc,EACd,KAAK,qBAAqB,GAC3B,MAAM,sBAAsB,CAAC"}
package/dist/index.js ADDED
@@ -0,0 +1 @@
1
+ export { ShaclValidator, } from './shacl-validator.js';
@@ -0,0 +1,29 @@
1
+ import type { Quad } from '@rdfjs/types';
2
+ import type { Dataset } from '@lde/dataset';
3
+ import type { Validator, ValidationResult, ValidationReport } from '@lde/pipeline';
4
+ /** Options for {@link ShaclValidator}. */
5
+ export interface ShaclValidatorOptions {
6
+ /** Path to an RDF file containing SHACL shapes (any format supported by rdf-dereference). */
7
+ shapesFile: string;
8
+ /** Directory for validation report files. */
9
+ reportDir: string;
10
+ }
11
+ /**
12
+ * SHACL-based {@link Validator} for `@lde/pipeline`.
13
+ *
14
+ * Validates quads against shapes loaded from an RDF file (any format
15
+ * supported by rdf-dereference) and writes per-dataset report files
16
+ * in SHACL validation report format.
17
+ */
18
+ export declare class ShaclValidator implements Validator {
19
+ private readonly shapesFile;
20
+ private readonly reportDir;
21
+ private shapesDataset;
22
+ private readonly accumulators;
23
+ constructor(options: ShaclValidatorOptions);
24
+ validate(quads: Quad[], dataset: Dataset): Promise<ValidationResult>;
25
+ report(dataset: Dataset): Promise<ValidationReport>;
26
+ private getShapes;
27
+ private writeReportFile;
28
+ }
29
+ //# sourceMappingURL=shacl-validator.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"shacl-validator.d.ts","sourceRoot":"","sources":["../src/shacl-validator.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,KAAK,EACV,SAAS,EACT,gBAAgB,EAChB,gBAAgB,EACjB,MAAM,eAAe,CAAC;AAQvB,0CAA0C;AAC1C,MAAM,WAAW,qBAAqB;IACpC,6FAA6F;IAC7F,UAAU,EAAE,MAAM,CAAC;IACnB,6CAA6C;IAC7C,SAAS,EAAE,MAAM,CAAC;CACnB;AAQD;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,SAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IAEnC,OAAO,CAAC,aAAa,CAAkB;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAyC;gBAE1D,OAAO,EAAE,qBAAqB;IAKpC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAmCpE,MAAM,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAc3C,SAAS;YAcT,eAAe;CAoC9B"}
@@ -0,0 +1,115 @@
1
+ import { mkdir, appendFile, writeFile } from 'node:fs/promises';
2
+ import { join } from 'node:path';
3
+ import { Writer } from 'n3';
4
+ // @ts-expect-error -- shacl-engine has no type declarations.
5
+ import ShaclEngine from 'shacl-engine/Validator.js';
6
+ // @ts-expect-error -- rdf-ext has no type declarations.
7
+ import rdf from 'rdf-ext';
8
+ import { rdfDereferencer } from 'rdf-dereference';
9
+ import filenamifyUrl from 'filenamify-url';
10
+ /**
11
+ * SHACL-based {@link Validator} for `@lde/pipeline`.
12
+ *
13
+ * Validates quads against shapes loaded from an RDF file (any format
14
+ * supported by rdf-dereference) and writes per-dataset report files
15
+ * in SHACL validation report format.
16
+ */
17
+ export class ShaclValidator {
18
+ shapesFile;
19
+ reportDir;
20
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
21
+ shapesDataset;
22
+ accumulators = new Map();
23
+ constructor(options) {
24
+ this.shapesFile = options.shapesFile;
25
+ this.reportDir = options.reportDir;
26
+ }
27
+ async validate(quads, dataset) {
28
+ if (quads.length === 0) {
29
+ return { conforms: true, violations: 0 };
30
+ }
31
+ const shapes = await this.getShapes();
32
+ const dataDataset = rdf.dataset(quads);
33
+ const validator = new ShaclEngine(shapes, { factory: rdf });
34
+ const report = await validator.validate({ dataset: dataDataset });
35
+ const violations = report.results.length;
36
+ const conforms = report.conforms;
37
+ // Accumulate per dataset.
38
+ const key = dataset.iri.toString();
39
+ const acc = this.accumulators.get(key) ?? {
40
+ quadsValidated: 0,
41
+ violations: 0,
42
+ conforms: true,
43
+ };
44
+ acc.quadsValidated += quads.length;
45
+ acc.violations += violations;
46
+ if (!conforms)
47
+ acc.conforms = false;
48
+ this.accumulators.set(key, acc);
49
+ // Write violations to report file.
50
+ if (violations > 0) {
51
+ const reportFile = await this.writeReportFile(dataset, report);
52
+ return { conforms, violations, message: `See ${reportFile}` };
53
+ }
54
+ return { conforms, violations };
55
+ }
56
+ async report(dataset) {
57
+ const key = dataset.iri.toString();
58
+ const acc = this.accumulators.get(key);
59
+ if (!acc) {
60
+ return { conforms: true, violations: 0, quadsValidated: 0 };
61
+ }
62
+ return {
63
+ conforms: acc.conforms,
64
+ violations: acc.violations,
65
+ quadsValidated: acc.quadsValidated,
66
+ };
67
+ }
68
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
69
+ async getShapes() {
70
+ if (this.shapesDataset)
71
+ return this.shapesDataset;
72
+ const { data } = await rdfDereferencer.dereference(this.shapesFile, {
73
+ localFiles: true,
74
+ });
75
+ const dataset = rdf.dataset();
76
+ for await (const quad of data) {
77
+ dataset.add(quad);
78
+ }
79
+ this.shapesDataset = dataset;
80
+ return this.shapesDataset;
81
+ }
82
+ async writeReportFile(dataset,
83
+ // eslint-disable-next-line @typescript-eslint/no-explicit-any
84
+ report) {
85
+ await mkdir(this.reportDir, { recursive: true });
86
+ const datasetName = filenamifyUrl(dataset.iri.toString());
87
+ const filePath = join(this.reportDir, `${datasetName}.validation.ttl`);
88
+ // Serialize the SHACL report dataset to Turtle.
89
+ const reportQuads = [...report.dataset];
90
+ const turtle = await new Promise((resolve, reject) => {
91
+ const writer = new Writer({
92
+ prefixes: {
93
+ sh: 'http://www.w3.org/ns/shacl#',
94
+ },
95
+ });
96
+ for (const quad of reportQuads) {
97
+ writer.addQuad(quad);
98
+ }
99
+ writer.end((error, result) => {
100
+ if (error)
101
+ reject(error);
102
+ else
103
+ resolve(result);
104
+ });
105
+ });
106
+ // Check if file exists; if so, append. Otherwise, create.
107
+ try {
108
+ await appendFile(filePath, '\n' + turtle);
109
+ }
110
+ catch {
111
+ await writeFile(filePath, turtle);
112
+ }
113
+ return filePath;
114
+ }
115
+ }
package/package.json ADDED
@@ -0,0 +1,41 @@
1
+ {
2
+ "name": "@lde/pipeline-shacl-validator",
3
+ "version": "0.2.0",
4
+ "description": "SHACL validation for @lde/pipeline",
5
+ "repository": {
6
+ "url": "git+https://github.com/ldelements/lde.git",
7
+ "directory": "packages/pipeline-shacl-validator"
8
+ },
9
+ "type": "module",
10
+ "exports": {
11
+ "./package.json": "./package.json",
12
+ ".": {
13
+ "types": "./dist/index.d.ts",
14
+ "import": "./dist/index.js",
15
+ "development": "./src/index.ts",
16
+ "default": "./dist/index.js"
17
+ }
18
+ },
19
+ "main": "./dist/index.js",
20
+ "module": "./dist/index.js",
21
+ "types": "./dist/index.d.ts",
22
+ "files": [
23
+ "dist",
24
+ "!**/*.tsbuildinfo"
25
+ ],
26
+ "dependencies": {
27
+ "@rdfjs/types": "^2.0.1",
28
+ "filenamify-url": "^4.0.0",
29
+ "rdf-dereference": "^5.0.0",
30
+ "rdf-ext": "^2.5.2",
31
+ "shacl-engine": "^1.1.0",
32
+ "tslib": "^2.3.0"
33
+ },
34
+ "devDependencies": {
35
+ "n3": "^2.0.1"
36
+ },
37
+ "peerDependencies": {
38
+ "@lde/dataset": "0.7.1",
39
+ "@lde/pipeline": "0.20.0"
40
+ }
41
+ }