@lde/pipeline 0.30.14 → 0.30.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,38 @@
1
+ import type { Quad } from '@rdfjs/types';
2
+ import type { QuadTransform } from '../stage.js';
3
+ /**
4
+ * Why this guard exists.
5
+ *
6
+ * A file-based served store (e.g. the Dataset Knowledge Graph) rebuilds its
7
+ * index by concatenating every per-dataset n-quads file and parsing the
8
+ * concatenation as ONE RDF document (`qlever index` over
9
+ * `find … -exec cat {} +`). Blank-node labels are only document-scoped, and the
10
+ * pipeline emits deterministic labels (n3 `DataFactory.blankNode()` → `n3-N`,
11
+ * the counter resets per dataset/run), so the same label recurs across files and
12
+ * the indexer fuses those nodes into one — merging unrelated provenance,
13
+ * measurements and linksets across datasets and runs. Named nodes never fuse.
14
+ *
15
+ * The invariant for any quads the pipeline writes into such a store is therefore:
16
+ * NO blank nodes. Mint stable (skolem) IRIs instead — see `skolemIri` in
17
+ * `@lde/dataset`. These helpers make that invariant testable and enforceable.
18
+ *
19
+ * See ldelements/lde#474 and netwerk-digitaal-erfgoed/dataset-knowledge-graph#352.
20
+ */
21
+ /**
22
+ * The distinct blank-node labels appearing in subject, object, or graph position
23
+ * across `quads`. Empty when the quads are blank-node-free.
24
+ */
25
+ export declare function blankNodes(quads: Iterable<Quad>): string[];
26
+ /**
27
+ * Throw if any quad carries a blank node. Use in producer tests to lock in the
28
+ * no-blank-nodes invariant (see module docs).
29
+ */
30
+ export declare function assertNoBlankNodes(quads: Iterable<Quad>): void;
31
+ /**
32
+ * A {@link QuadTransform} that passes quads through unchanged but throws on the
33
+ * first blank node it sees. Insert it just before the writer to turn the
34
+ * no-blank-nodes invariant into a hard pipeline failure (e.g. in a CI/staging
35
+ * run) rather than a per-test opt-in.
36
+ */
37
+ export declare function failOnBlankNodes<Context>(): QuadTransform<Context>;
38
+ //# sourceMappingURL=blankNodes.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"blankNodes.d.ts","sourceRoot":"","sources":["../../src/guard/blankNodes.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAEjD;;;;;;;;;;;;;;;;;GAiBG;AAEH;;;GAGG;AACH,wBAAgB,UAAU,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,MAAM,EAAE,CAU1D;AAED;;;GAGG;AACH,wBAAgB,kBAAkB,CAAC,KAAK,EAAE,QAAQ,CAAC,IAAI,CAAC,GAAG,IAAI,CAU9D;AAED;;;;;GAKG;AACH,wBAAgB,gBAAgB,CAAC,OAAO,KAAK,aAAa,CAAC,OAAO,CAAC,CAelE"}
@@ -0,0 +1,66 @@
1
+ /**
2
+ * Why this guard exists.
3
+ *
4
+ * A file-based served store (e.g. the Dataset Knowledge Graph) rebuilds its
5
+ * index by concatenating every per-dataset n-quads file and parsing the
6
+ * concatenation as ONE RDF document (`qlever index` over
7
+ * `find … -exec cat {} +`). Blank-node labels are only document-scoped, and the
8
+ * pipeline emits deterministic labels (n3 `DataFactory.blankNode()` → `n3-N`,
9
+ * the counter resets per dataset/run), so the same label recurs across files and
10
+ * the indexer fuses those nodes into one — merging unrelated provenance,
11
+ * measurements and linksets across datasets and runs. Named nodes never fuse.
12
+ *
13
+ * The invariant for any quads the pipeline writes into such a store is therefore:
14
+ * NO blank nodes. Mint stable (skolem) IRIs instead — see `skolemIri` in
15
+ * `@lde/dataset`. These helpers make that invariant testable and enforceable.
16
+ *
17
+ * See ldelements/lde#474 and netwerk-digitaal-erfgoed/dataset-knowledge-graph#352.
18
+ */
19
+ /**
20
+ * The distinct blank-node labels appearing in subject, object, or graph position
21
+ * across `quads`. Empty when the quads are blank-node-free.
22
+ */
23
+ export function blankNodes(quads) {
24
+ const offenders = new Set();
25
+ for (const quad of quads) {
26
+ for (const term of [quad.subject, quad.object, quad.graph]) {
27
+ if (term.termType === 'BlankNode') {
28
+ offenders.add(term.value);
29
+ }
30
+ }
31
+ }
32
+ return [...offenders];
33
+ }
34
+ /**
35
+ * Throw if any quad carries a blank node. Use in producer tests to lock in the
36
+ * no-blank-nodes invariant (see module docs).
37
+ */
38
+ export function assertNoBlankNodes(quads) {
39
+ const offenders = blankNodes(quads);
40
+ if (offenders.length > 0) {
41
+ throw new Error(`Output contains ${offenders.length} blank node(s), which fuse across ` +
42
+ `datasets when a file-based store cat-indexes per-dataset files. ` +
43
+ `Mint skolem IRIs instead (see skolemIri in @lde/dataset; ldelements/lde#474). ` +
44
+ `First: ${offenders.slice(0, 10).join(', ')}`);
45
+ }
46
+ }
47
+ /**
48
+ * A {@link QuadTransform} that passes quads through unchanged but throws on the
49
+ * first blank node it sees. Insert it just before the writer to turn the
50
+ * no-blank-nodes invariant into a hard pipeline failure (e.g. in a CI/staging
51
+ * run) rather than a per-test opt-in.
52
+ */
53
+ export function failOnBlankNodes() {
54
+ return async function* (quads) {
55
+ for await (const quad of quads) {
56
+ for (const term of [quad.subject, quad.object, quad.graph]) {
57
+ if (term.termType === 'BlankNode') {
58
+ throw new Error(`Blank node reached the writer (${term.value}); it would fuse ` +
59
+ `across datasets in a cat-built index. Mint a skolem IRI instead ` +
60
+ `(ldelements/lde#474): ${quad.subject.value} ${quad.predicate.value} …`);
61
+ }
62
+ }
63
+ yield quad;
64
+ }
65
+ };
66
+ }
package/dist/index.d.ts CHANGED
@@ -6,6 +6,7 @@ export * from './progressReporter.js';
6
6
  export * from './selector.js';
7
7
  export * from './stage.js';
8
8
  export * from './stageOutputResolver.js';
9
+ export * from './guard/blankNodes.js';
9
10
  export * from './sparql/index.js';
10
11
  export * from './distribution/index.js';
11
12
  export * from './provenance/index.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,gBAAgB,CAAC;AAC/B,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oCAAoC,CAAC;AACnD,cAAc,wBAAwB,CAAC;AACvC,cAAc,oCAAoC,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,gBAAgB,CAAC;AAC/B,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AACtC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,uBAAuB,CAAC;AACtC,cAAc,mBAAmB,CAAC;AAClC,cAAc,oCAAoC,CAAC;AACnD,cAAc,wBAAwB,CAAC;AACvC,cAAc,oCAAoC,CAAC"}
package/dist/index.js CHANGED
@@ -6,6 +6,7 @@ export * from './progressReporter.js';
6
6
  export * from './selector.js';
7
7
  export * from './stage.js';
8
8
  export * from './stageOutputResolver.js';
9
+ export * from './guard/blankNodes.js';
9
10
  export * from './sparql/index.js';
10
11
  export * from './distribution/index.js';
11
12
  export * from './provenance/index.js';
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.30.14",
3
+ "version": "0.30.15",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"