@lde/pipeline-shacl-validator 0.11.2 → 0.12.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -3,18 +3,28 @@
3
3
  SHACL validation for [`@lde/pipeline`](../pipeline).
4
4
 
5
5
  Validates RDF quads produced by pipeline stages against [SHACL shapes](https://www.w3.org/TR/shacl/),
6
- writing per-dataset report files in SHACL validation report format.
7
- Shapes can be provided in any RDF serialization (Turtle, JSON-LD, N-Triples etc.).
6
+ streaming the per-dataset SHACL validation report to any number of configured
7
+ [`Writer`](../pipeline/src/writer/writer.ts)s. Shapes can be provided in any
8
+ RDF serialization (Turtle, JSON-LD, N-Triples etc.).
8
9
 
9
10
  ## Usage
10
11
 
11
12
  ```typescript
12
- import { Pipeline, Stage, SparqlConstructExecutor } from '@lde/pipeline';
13
+ import {
14
+ Pipeline,
15
+ Stage,
16
+ SparqlConstructExecutor,
17
+ FileWriter,
18
+ SparqlUpdateWriter,
19
+ } from '@lde/pipeline';
13
20
  import { ShaclValidator } from '@lde/pipeline-shacl-validator';
14
21
 
15
22
  const validator = new ShaclValidator({
16
23
  shapesFile: './shapes.ttl',
17
- reportDir: './validation',
24
+ reportWriters: [
25
+ new FileWriter({ outputDir: './validation', format: 'turtle' }),
26
+ new SparqlUpdateWriter({ endpoint: new URL('http://store/update') }),
27
+ ],
18
28
  });
19
29
 
20
30
  const pipeline = new Pipeline({
@@ -42,16 +52,54 @@ await pipeline.run();
42
52
  | `'skip'` | Discard invalid quads silently |
43
53
  | `'halt'` | Throw an error, stopping the pipeline |
44
54
 
45
- ### Report files
55
+ ### Report writers
46
56
 
47
- Validation violations are written to `<reportDir>/<dataset-iri>.validation.<ext>`
48
- as SHACL validation report triples. The output format defaults to Turtle (`.ttl`)
49
- and can be changed with the `reportFormat` option:
57
+ Each `validate()` call that produces violations fans the SHACL report quads
58
+ (`sh:ValidationResult` triples, etc.) out to every configured `reportWriter`
59
+ via `Writer.write(dataset, quads)`. Each writer's `Writer.flush(dataset)` is
60
+ invoked from `ShaclValidator.report(dataset)` — i.e. once the pipeline
61
+ finishes a dataset.
50
62
 
51
- ```typescript
52
- const validator = new ShaclValidator({
53
- shapesFile: './shapes.ttl',
54
- reportDir: './validation',
55
- reportFormat: 'N-Triples', // 'Turtle' (default) | 'N-Triples' | 'N-Quads'
63
+ Validators with no `reportWriters` only produce aggregate counts
64
+ (`{ conforms, violations, quadsValidated }`); the report quads themselves are
65
+ discarded. This is deliberate — callers who only need pass/fail metrics
66
+ don't have to wire up a sink — but it does mean misconfiguring (passing
67
+ `reportWriters: []` while expecting persistence) silently loses violation
68
+ detail. Configure at least one writer in production pipelines.
69
+
70
+ The bundled `FileWriter` and `SparqlUpdateWriter` already implement the
71
+ `Writer` contract; bring your own for custom destinations.
72
+
73
+ #### Filesystem collisions with `FileWriter`
74
+
75
+ `FileWriter` derives its filename from `dataset.iri` only. If the pipeline's
76
+ main writer and a report writer both target the same `outputDir` with the
77
+ same format, they will collide on the same path and the second open will
78
+ truncate the first. Use a separate `outputDir` for validation reports:
79
+
80
+ ```ts
81
+ new ShaclValidator({
82
+ shapesFile,
83
+ reportWriters: [new FileWriter({ outputDir: './output/validation' })],
84
+ });
85
+ ```
86
+
87
+ #### Named graphs with `SparqlUpdateWriter`
88
+
89
+ `SparqlUpdateWriter` defaults to `dataset.iri.toString()` as the named graph
90
+ URI. A report writer that shares the endpoint with the pipeline's main
91
+ writer would otherwise land the SHACL report in the same graph as the
92
+ dataset's data — and `CLEAR GRAPH` on first write per dataset would erase
93
+ it. To keep validation results in a separate graph, pass `graphIri` to
94
+ derive the target graph from the dataset:
95
+
96
+ ```ts
97
+ new SparqlUpdateWriter({
98
+ endpoint,
99
+ auth,
100
+ graphIri: (dataset) =>
101
+ new URL(
102
+ `https://example.org/shacl-validation/${encodeURIComponent(dataset.iri.toString())}`,
103
+ ),
56
104
  });
57
105
  ```
@@ -1,34 +1,36 @@
1
1
  import type { Quad } from '@rdfjs/types';
2
2
  import type { Dataset } from '@lde/dataset';
3
- import type { Validator, ValidationResult, ValidationReport } from '@lde/pipeline';
4
- import { type SerializationFormat } from '@lde/pipeline';
3
+ import type { Validator, ValidationResult, ValidationReport, Writer } from '@lde/pipeline';
5
4
  /** Options for {@link ShaclValidator}. */
6
5
  export interface ShaclValidatorOptions {
7
6
  /** Path to an RDF file containing SHACL shapes (any format supported by rdf-dereference). */
8
7
  shapesFile: string;
9
- /** Directory for validation report files. */
10
- reportDir: string;
11
- /** Serialization format for report files. @default 'Turtle' */
12
- reportFormat?: SerializationFormat;
8
+ /**
9
+ * Writers that receive the per-dataset SHACL validation report quads. Each
10
+ * batch with violations is streamed to every writer via {@link Writer.write};
11
+ * each writer's {@link Writer.flush} is called from {@link ShaclValidator.report}.
12
+ *
13
+ * Pass a {@link FileWriter} to mirror the previous on-disk behaviour, a
14
+ * {@link SparqlUpdateWriter} to land reports in a named graph, or any custom
15
+ * writer. Validators with no `reportWriters` only produce aggregate counts.
16
+ */
17
+ reportWriters?: Writer[];
13
18
  }
14
19
  /**
15
20
  * SHACL-based {@link Validator} for `@lde/pipeline`.
16
21
  *
17
22
  * Validates quads against shapes loaded from an RDF file (any format
18
- * supported by rdf-dereference) and writes per-dataset report files
19
- * in SHACL validation report format.
23
+ * supported by rdf-dereference) and streams the per-dataset SHACL validation
24
+ * report to any number of configured {@link Writer}s.
20
25
  */
21
26
  export declare class ShaclValidator implements Validator {
22
27
  private readonly shapesFile;
23
- private readonly reportDir;
24
- private readonly reportFormat;
28
+ private readonly reportWriters;
25
29
  private shapesDataset;
26
30
  private readonly accumulators;
27
- private readonly initializedFiles;
28
31
  constructor(options: ShaclValidatorOptions);
29
32
  validate(quads: Quad[], dataset: Dataset): Promise<ValidationResult>;
30
33
  report(dataset: Dataset): Promise<ValidationReport>;
31
34
  private getShapes;
32
- private writeReportFile;
33
35
  }
34
36
  //# sourceMappingURL=shacl-validator.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"shacl-validator.d.ts","sourceRoot":"","sources":["../src/shacl-validator.ts"],"names":[],"mappings":"AAEA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,KAAK,EACV,SAAS,EACT,gBAAgB,EAChB,gBAAgB,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAkB,KAAK,mBAAmB,EAAE,MAAM,eAAe,CAAC;AAezE,0CAA0C;AAC1C,MAAM,WAAW,qBAAqB;IACpC,6FAA6F;IAC7F,UAAU,EAAE,MAAM,CAAC;IACnB,6CAA6C;IAC7C,SAAS,EAAE,MAAM,CAAC;IAClB,+DAA+D;IAC/D,YAAY,CAAC,EAAE,mBAAmB,CAAC;CACpC;AAQD;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,SAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAsB;IAEnD,OAAO,CAAC,aAAa,CAAkB;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAyC;IACtE,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAqB;gBAE1C,OAAO,EAAE,qBAAqB;IAMpC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;IAmCpE,MAAM,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAc3C,SAAS;YAUT,eAAe;CA0B9B"}
1
+ {"version":3,"file":"shacl-validator.d.ts","sourceRoot":"","sources":["../src/shacl-validator.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAE5C,OAAO,KAAK,EACV,SAAS,EACT,gBAAgB,EAChB,gBAAgB,EAChB,MAAM,EACP,MAAM,eAAe,CAAC;AAOvB,0CAA0C;AAC1C,MAAM,WAAW,qBAAqB;IACpC,6FAA6F;IAC7F,UAAU,EAAE,MAAM,CAAC;IACnB;;;;;;;;OAQG;IACH,aAAa,CAAC,EAAE,MAAM,EAAE,CAAC;CAC1B;AAQD;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,SAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;IACpC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAW;IAEzC,OAAO,CAAC,aAAa,CAAkB;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAyC;gBAE1D,OAAO,EAAE,qBAAqB;IAKpC,QAAQ,CAAC,KAAK,EAAE,IAAI,EAAE,EAAE,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;IA2CpE,MAAM,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,gBAAgB,CAAC;YAkB3C,SAAS;CASxB"}
@@ -1,37 +1,24 @@
1
- import { mkdir, appendFile, writeFile } from 'node:fs/promises';
2
- import { join } from 'node:path';
3
- import { serializeQuads } from '@lde/pipeline';
4
1
  // @ts-expect-error -- shacl-engine has no type declarations.
5
2
  import ShaclEngine from 'shacl-engine/Validator.js';
6
3
  // @ts-expect-error -- rdf-ext has no type declarations.
7
4
  import rdf from 'rdf-ext';
8
5
  import { rdfDereferencer } from 'rdf-dereference';
9
- import filenamifyUrl from 'filenamify-url';
10
- /** File extension per serialization format. */
11
- const formatExtensions = {
12
- Turtle: '.ttl',
13
- 'N-Triples': '.nt',
14
- 'N-Quads': '.nq',
15
- };
16
6
  /**
17
7
  * SHACL-based {@link Validator} for `@lde/pipeline`.
18
8
  *
19
9
  * Validates quads against shapes loaded from an RDF file (any format
20
- * supported by rdf-dereference) and writes per-dataset report files
21
- * in SHACL validation report format.
10
+ * supported by rdf-dereference) and streams the per-dataset SHACL validation
11
+ * report to any number of configured {@link Writer}s.
22
12
  */
23
13
  export class ShaclValidator {
24
14
  shapesFile;
25
- reportDir;
26
- reportFormat;
15
+ reportWriters;
27
16
  // eslint-disable-next-line @typescript-eslint/no-explicit-any
28
17
  shapesDataset;
29
18
  accumulators = new Map();
30
- initializedFiles = new Set();
31
19
  constructor(options) {
32
20
  this.shapesFile = options.shapesFile;
33
- this.reportDir = options.reportDir;
34
- this.reportFormat = options.reportFormat ?? 'Turtle';
21
+ this.reportWriters = options.reportWriters ?? [];
35
22
  }
36
23
  async validate(quads, dataset) {
37
24
  if (quads.length === 0) {
@@ -55,14 +42,23 @@ export class ShaclValidator {
55
42
  if (!conforms)
56
43
  acc.conforms = false;
57
44
  this.accumulators.set(key, acc);
58
- // Write violations to report file.
59
- if (violations > 0) {
60
- const reportFile = await this.writeReportFile(dataset, report);
61
- return { conforms, violations, message: `See ${reportFile}` };
45
+ if (violations > 0 && this.reportWriters.length > 0) {
46
+ const reportQuads = [...report.dataset];
47
+ for (const writer of this.reportWriters) {
48
+ await writer.write(dataset, asyncIterableOf(reportQuads));
49
+ }
62
50
  }
63
- return { conforms, violations };
51
+ // Surface where to look for the report in halt-mode error messages
52
+ // (read by @lde/pipeline's Stage.validateBuffer when onInvalid:'halt').
53
+ const message = violations > 0 && this.reportWriters.length > 0
54
+ ? `Report sent to ${this.reportWriters.length} writer(s)`
55
+ : undefined;
56
+ return { conforms, violations, ...(message !== undefined && { message }) };
64
57
  }
65
58
  async report(dataset) {
59
+ for (const writer of this.reportWriters) {
60
+ await writer.flush?.(dataset);
61
+ }
66
62
  const key = dataset.iri.toString();
67
63
  const acc = this.accumulators.get(key);
68
64
  if (!acc) {
@@ -84,22 +80,8 @@ export class ShaclValidator {
84
80
  }
85
81
  return this.shapesDataset;
86
82
  }
87
- async writeReportFile(dataset,
88
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
89
- report) {
90
- await mkdir(this.reportDir, { recursive: true });
91
- const datasetName = filenamifyUrl(dataset.iri.toString());
92
- const extension = formatExtensions[this.reportFormat];
93
- const filePath = join(this.reportDir, `${datasetName}.validation${extension}`);
94
- const reportQuads = [...report.dataset];
95
- const serialized = await serializeQuads(reportQuads, this.reportFormat);
96
- if (this.initializedFiles.has(filePath)) {
97
- await appendFile(filePath, '\n' + serialized);
98
- }
99
- else {
100
- await writeFile(filePath, serialized);
101
- this.initializedFiles.add(filePath);
102
- }
103
- return filePath;
104
- }
83
+ }
84
+ async function* asyncIterableOf(items) {
85
+ for (const item of items)
86
+ yield item;
105
87
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline-shacl-validator",
3
- "version": "0.11.2",
3
+ "version": "0.12.1",
4
4
  "description": "SHACL validation for @lde/pipeline",
5
5
  "repository": {
6
6
  "url": "git+https://github.com/ldelements/lde.git",
@@ -26,7 +26,6 @@
26
26
  ],
27
27
  "dependencies": {
28
28
  "@rdfjs/types": "^2.0.1",
29
- "filenamify-url": "^4.0.0",
30
29
  "rdf-dereference": "^5.0.0",
31
30
  "rdf-ext": "^2.5.2",
32
31
  "shacl-engine": "^1.1.0",
@@ -37,6 +36,6 @@
37
36
  },
38
37
  "peerDependencies": {
39
38
  "@lde/dataset": "0.7.4",
40
- "@lde/pipeline": "0.29.2"
39
+ "@lde/pipeline": "0.30.1"
41
40
  }
42
41
  }