@lde/pipeline 0.30.17 → 0.30.19

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -276,6 +276,21 @@ Writes generated quads to a destination:
276
276
  - `SparqlUpdateWriter` — writes to a SPARQL endpoint via UPDATE queries
277
277
  - `FileWriter` — writes to local files
278
278
 
279
+ ### Reporter
280
+
281
+ A `ProgressReporter` observes the run, receiving lifecycle events such as `pipelineStart`, `stageComplete`, `datasetValidated` and `pipelineComplete`. Every method is optional, so a reporter implements only the events it cares about.
282
+
283
+ Pass a single reporter, or an array to have several observe the same run — for example a console reporter alongside one that collects validation verdicts:
284
+
285
+ ```typescript
286
+ new Pipeline({
287
+ // …
288
+ reporter: [new ConsoleReporter(), verdictCollector],
289
+ });
290
+ ```
291
+
292
+ Each reporter receives every event, in array order; a reporter that does not implement a given event is skipped for it.
293
+
279
294
  ### Provenance store
280
295
 
281
296
  A `ProvenanceStore` gives the pipeline a small per-dataset memory, so a future run can skip datasets that are genuinely unchanged. It is purely a storage seam: the framework owns the skip decision (see [`sourceFingerprint`](#source-change-fingerprint) and `shouldReprocess`), the store owns only how each record is persisted.
@@ -0,0 +1,16 @@
1
+ import type { ProgressReporter } from './progressReporter.js';
2
+ /**
3
+ * Combine several {@link ProgressReporter}s into one that forwards every
4
+ * lifecycle call to each child that implements it. Lets a single run be
5
+ * observed by more than one reporter – e.g. a console reporter alongside a
6
+ * verdict-collecting one.
7
+ *
8
+ * Each method is dispatched to the children in array order; a child that does
9
+ * not implement a given (optional) method is skipped for that call.
10
+ *
11
+ * Internal to the package: not re-exported from `index.ts`. {@link Pipeline}
12
+ * uses it to normalise a `reporter` array into the single reporter its call
13
+ * sites expect, so the broader API need not grow a new public symbol.
14
+ */
15
+ export declare function combineReporters(reporters: readonly ProgressReporter[]): ProgressReporter;
16
+ //# sourceMappingURL=combineReporters.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"combineReporters.d.ts","sourceRoot":"","sources":["../src/combineReporters.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAU9D;;;;;;;;;;;;GAYG;AACH,wBAAgB,gBAAgB,CAC9B,SAAS,EAAE,SAAS,gBAAgB,EAAE,GACrC,gBAAgB,CA4ClB"}
@@ -0,0 +1,51 @@
1
+ /**
2
+ * Combine several {@link ProgressReporter}s into one that forwards every
3
+ * lifecycle call to each child that implements it. Lets a single run be
4
+ * observed by more than one reporter – e.g. a console reporter alongside a
5
+ * verdict-collecting one.
6
+ *
7
+ * Each method is dispatched to the children in array order; a child that does
8
+ * not implement a given (optional) method is skipped for that call.
9
+ *
10
+ * Internal to the package: not re-exported from `index.ts`. {@link Pipeline}
11
+ * uses it to normalise a `reporter` array into the single reporter its call
12
+ * sites expect, so the broader API need not grow a new public symbol.
13
+ */
14
+ export function combineReporters(reporters) {
15
+ const forward = (method, ...args) => {
16
+ for (const reporter of reporters) {
17
+ // Cast to the concrete signature for `method`: indexing by a generic key
18
+ // yields a union of method types TS won't call directly, even though the
19
+ // arguments are correlated.
20
+ const handler = reporter[method];
21
+ // Every method is optional; notify only the children that implement it.
22
+ handler?.(...args);
23
+ }
24
+ };
25
+ // Listing every method explicitly (rather than a Proxy) keeps the forwarding
26
+ // type-safe: typing the result as `Required<ProgressReporter>` forces a new
27
+ // entry here whenever the interface grows, so a forgotten method fails to
28
+ // compile instead of silently going unforwarded.
29
+ const combined = {
30
+ pipelineStart: (...args) => forward('pipelineStart', ...args),
31
+ datasetsSelected: (...args) => forward('datasetsSelected', ...args),
32
+ datasetStart: (...args) => forward('datasetStart', ...args),
33
+ distributionProbed: (...args) => forward('distributionProbed', ...args),
34
+ importStarted: (...args) => forward('importStarted', ...args),
35
+ importFailed: (...args) => forward('importFailed', ...args),
36
+ distributionValidated: (...args) => forward('distributionValidated', ...args),
37
+ distributionSelected: (...args) => forward('distributionSelected', ...args),
38
+ stageStart: (...args) => forward('stageStart', ...args),
39
+ stageProgress: (...args) => forward('stageProgress', ...args),
40
+ stageComplete: (...args) => forward('stageComplete', ...args),
41
+ stageFailed: (...args) => forward('stageFailed', ...args),
42
+ stageSkipped: (...args) => forward('stageSkipped', ...args),
43
+ datasetValidated: (...args) => forward('datasetValidated', ...args),
44
+ datasetComplete: (...args) => forward('datasetComplete', ...args),
45
+ datasetSkipped: (...args) => forward('datasetSkipped', ...args),
46
+ pipelineComplete: (...args) => forward('pipelineComplete', ...args),
47
+ timeoutTightened: (...args) => forward('timeoutTightened', ...args),
48
+ timeoutRelaxed: (...args) => forward('timeoutRelaxed', ...args),
49
+ };
50
+ return combined;
51
+ }
@@ -1 +1 @@
1
- {"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/distribution/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,yBAAyB,CAAC;AAUjC;;;;;;;;;GASG;AACH,wBAAuB,mBAAmB,CACxC,YAAY,EAAE,eAAe,EAAE,EAC/B,UAAU,EAAE,MAAM,EAClB,YAAY,CAAC,EAAE,YAAY,GAC1B,aAAa,CAAC,IAAI,CAAC,CAiDrB"}
1
+ {"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/distribution/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,yBAAyB,CAAC;AAYjC;;;;;;;;;GASG;AACH,wBAAuB,mBAAmB,CACxC,YAAY,EAAE,eAAe,EAAE,EAC/B,UAAU,EAAE,MAAM,EAClB,YAAY,CAAC,EAAE,YAAY,GAC1B,aAAa,CAAC,IAAI,CAAC,CA4CrB"}
@@ -1,12 +1,14 @@
1
1
  import { hashSuffix, skolemIri } from '@lde/dataset';
2
2
  import { DataFactory } from 'n3';
3
3
  import { NetworkError, SparqlProbeResult, } from '@lde/distribution-probe';
4
+ import { rdf, _void, xsd } from '@tpluscode/rdf-ns-builders';
5
+ import namespace from '@rdfjs/namespace';
4
6
  const { quad, namedNode, literal } = DataFactory;
5
- const RDF = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#';
6
- const SCHEMA = 'https://schema.org/';
7
- const VOID = 'http://rdfs.org/ns/void#';
8
- const XSD = 'http://www.w3.org/2001/XMLSchema#';
9
- const HTTP_STATUS = 'https://www.w3.org/2011/http-statusCodes#';
7
+ // Custom namespaces not covered by the bundled builders: the bundled `schema`
8
+ // builder is `http://schema.org/`, but this output normalises to the `https://`
9
+ // scheme, and the HTTP status-codes vocabulary is not bundled at all.
10
+ const schema = namespace('https://schema.org/');
11
+ const httpStatus = namespace('https://www.w3.org/2011/http-statusCodes#');
10
12
  /**
11
13
  * Convert probe results into RDF quads describing each probe as a `schema:Action`.
12
14
  *
@@ -29,49 +31,48 @@ export async function* probeResultsToQuads(probeResults, datasetIri, importResul
29
31
  for (const result of probeResults) {
30
32
  const action = namedNode(skolemIri(actionBase, hashSuffix(result.url)));
31
33
  actionsByUrl.set(result.url, action);
32
- yield quad(action, namedNode(`${RDF}type`), namedNode(`${SCHEMA}Action`));
33
- yield quad(action, namedNode(`${SCHEMA}target`), namedNode(result.url));
34
+ yield quad(action, rdf.type, schema.Action);
35
+ yield quad(action, schema.target, namedNode(result.url));
34
36
  if (result instanceof NetworkError) {
35
- yield quad(action, namedNode(`${SCHEMA}error`), literal(result.message));
37
+ yield quad(action, schema.error, literal(result.message));
36
38
  }
37
39
  else if (result.isSuccess()) {
38
40
  yield* successQuads(action, result, datasetIri);
39
41
  for (const warning of result.warnings) {
40
- yield quad(action, namedNode(`${SCHEMA}error`), literal(warning));
42
+ yield quad(action, schema.error, literal(warning));
41
43
  }
42
44
  }
43
45
  else if (result.failureReason) {
44
- yield quad(action, namedNode(`${SCHEMA}error`), literal(result.failureReason));
46
+ yield quad(action, schema.error, literal(result.failureReason));
45
47
  }
46
48
  else {
47
49
  // HTTP error
48
- const statusUri = `${HTTP_STATUS}${result.statusText.replace(/ /g, '')}`;
49
- yield quad(action, namedNode(`${SCHEMA}error`), namedNode(statusUri));
50
+ yield quad(action, schema.error, httpStatus[result.statusText.replace(/ /g, '')]);
50
51
  }
51
52
  }
52
53
  if (importResult) {
53
54
  const action = actionsByUrl.get(importResult.distribution.accessUrl.toString());
54
55
  if (action) {
55
- yield quad(action, namedNode(`${SCHEMA}error`), literal(importResult.error));
56
+ yield quad(action, schema.error, literal(importResult.error));
56
57
  }
57
58
  }
58
59
  }
59
60
  function* successQuads(action, result, datasetIri) {
60
61
  const distributionUrl = namedNode(result.url);
61
- yield quad(action, namedNode(`${SCHEMA}result`), distributionUrl);
62
+ yield quad(action, schema.result, distributionUrl);
62
63
  if (result.lastModified) {
63
- yield quad(distributionUrl, namedNode(`${SCHEMA}dateModified`), literal(result.lastModified.toISOString(), namedNode(`${XSD}dateTime`)));
64
+ yield quad(distributionUrl, schema.dateModified, literal(result.lastModified.toISOString(), xsd.dateTime));
64
65
  }
65
66
  if (result instanceof SparqlProbeResult) {
66
- yield quad(namedNode(datasetIri), namedNode(`${VOID}sparqlEndpoint`), distributionUrl);
67
+ yield quad(namedNode(datasetIri), _void.sparqlEndpoint, distributionUrl);
67
68
  }
68
69
  else {
69
- yield quad(namedNode(datasetIri), namedNode(`${VOID}dataDump`), distributionUrl);
70
+ yield quad(namedNode(datasetIri), _void.dataDump, distributionUrl);
70
71
  if (result.contentSize) {
71
- yield quad(distributionUrl, namedNode(`${SCHEMA}contentSize`), literal(result.contentSize.toString()));
72
+ yield quad(distributionUrl, schema.contentSize, literal(result.contentSize.toString()));
72
73
  }
73
74
  if (result.contentType) {
74
- yield quad(distributionUrl, namedNode(`${SCHEMA}encodingFormat`), literal(result.contentType));
75
+ yield quad(distributionUrl, schema.encodingFormat, literal(result.contentType));
75
76
  }
76
77
  }
77
78
  }
@@ -42,7 +42,13 @@ export interface PipelineOptions {
42
42
  stageOutputResolver: StageOutputResolver;
43
43
  outputDir: string;
44
44
  };
45
- reporter?: ProgressReporter;
45
+ /**
46
+ * Observer(s) notified of pipeline lifecycle events. Pass an array to have
47
+ * several reporters observe the same run – e.g. a console reporter alongside
48
+ * a verdict-collecting one; every reporter receives each event in array
49
+ * order. A single reporter may be passed directly.
50
+ */
51
+ reporter?: ProgressReporter | readonly ProgressReporter[];
46
52
  /**
47
53
  * Optional per-dataset processing memory. When set, the pipeline skips a
48
54
  * dataset whose source-change fingerprint and {@link pipelineVersion} both
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAG1B,MAAM,4BAA4B,CAAC;AAKpC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAY7D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC;;;;;;;GAOG;AACH,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC,uBAAuB,CAAC,CAAC;CAC3D;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAkFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAyC;IAC3E,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;IACrD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAkB;IACnD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAS;gBAE9B,OAAO,EAAE,eAAe;IA2C9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;IA+K5B,+EAA+E;YACjE,aAAa;YAmBb,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;;;OAKG;IACH,OAAO,CAAC,WAAW;IAMnB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA+DP,SAAS;CAczB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAG1B,MAAM,4BAA4B,CAAC;AAKpC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAY7D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAG/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC;;;;;;;GAOG;AACH,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC,uBAAuB,CAAC,CAAC;CAC3D;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,gBAAgB,GAAG,SAAS,gBAAgB,EAAE,CAAC;IAC1D;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAkFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAyC;IAC3E,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;IACrD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAkB;IACnD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAS;gBAE9B,OAAO,EAAE,eAAe;IA+C9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;IA+K5B,+EAA+E;YACjE,aAAa;YAmBb,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;;;OAKG;IACH,OAAO,CAAC,WAAW;IAMnB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA+DP,SAAS;CAczB"}
package/dist/pipeline.js CHANGED
@@ -9,6 +9,7 @@ import { NetworkError, SparqlProbeResult, } from '@lde/distribution-probe';
9
9
  import { ImportSuccessful } from '@lde/sparql-importer';
10
10
  import { importOutcomeToVerdict, probeResultToVerdict, } from '@lde/distribution-health';
11
11
  import { NotSupported } from './sparql/executor.js';
12
+ import { combineReporters } from './combineReporters.js';
12
13
  import { ConstantTimeoutPolicy, } from './sparql/timeoutPolicy.js';
13
14
  /**
14
15
  * Split an async iterable into `count` branches that can be consumed
@@ -113,7 +114,11 @@ export class Pipeline {
113
114
  this.distributionResolver =
114
115
  options.distributionResolver ?? new SparqlDistributionResolver();
115
116
  this.chaining = options.chaining;
116
- this.reporter = options.reporter;
117
+ // `Array.isArray` narrows the array branch but not the readonly-array out of
118
+ // the else branch, so cast the single-reporter case explicitly.
119
+ this.reporter = Array.isArray(options.reporter)
120
+ ? combineReporters(options.reporter)
121
+ : options.reporter;
117
122
  this.timeoutFactory =
118
123
  options.timeout ?? (() => new ConstantTimeoutPolicy(300_000));
119
124
  this.provenanceStore = options.provenanceStore;
@@ -1 +1 @@
1
- {"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAS5C,MAAM,WAAW,6BAA6B;IAC5C,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;GAOG;AACH,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,6BAA6B,GACrC,aAAa,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,CAErC;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,6BAA6B,GACrC,cAAc,CAKhB"}
1
+ {"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAO5C,MAAM,WAAW,6BAA6B;IAC5C,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;GAOG;AACH,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,6BAA6B,GACrC,aAAa,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,CAErC;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,6BAA6B,GACrC,cAAc,CAKhB"}
@@ -1,7 +1,6 @@
1
1
  import { DataFactory } from 'n3';
2
+ import { _void } from '@tpluscode/rdf-ns-builders';
2
3
  const { namedNode, quad } = DataFactory;
3
- const VOID_CLASS = namedNode('http://rdfs.org/ns/void#class');
4
- const VOID_PROPERTY = namedNode('http://rdfs.org/ns/void#property');
5
4
  /**
6
5
  * Creates a QuadTransform that rewrites namespace prefixes in `void:class` and
7
6
  * `void:property` quad objects from {@link NamespaceNormalizationOptions.from}
@@ -28,7 +27,7 @@ export function namespaceNormalizationPlugin(options) {
28
27
  }
29
28
  async function* normalizeNamespace(quads, { from, to }) {
30
29
  for await (const q of quads) {
31
- if ((q.predicate.equals(VOID_CLASS) || q.predicate.equals(VOID_PROPERTY)) &&
30
+ if ((q.predicate.equals(_void.class) || q.predicate.equals(_void.property)) &&
32
31
  q.object.termType === 'NamedNode' &&
33
32
  q.object.value.startsWith(from)) {
34
33
  yield quad(q.subject, q.predicate, namedNode(to + q.object.value.slice(from.length)), q.graph);
@@ -1 +1 @@
1
- {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAmB9E,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aAAa,CAAC,uBAAuB,CAGK,CAAC;AAE7E,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
1
+ {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAQ9E,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aAAa,CAAC,uBAAuB,CAGK,CAAC;AAE7E,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
@@ -1,13 +1,7 @@
1
1
  import { hashSuffix, skolemIri } from '@lde/dataset';
2
2
  import { DataFactory } from 'n3';
3
+ import { prov, rdf, xsd } from '@tpluscode/rdf-ns-builders';
3
4
  const { namedNode, literal, quad } = DataFactory;
4
- const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
5
- const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
6
- const PROV_ACTIVITY = namedNode('http://www.w3.org/ns/prov#Activity');
7
- const PROV_WAS_GENERATED_BY = namedNode('http://www.w3.org/ns/prov#wasGeneratedBy');
8
- const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime');
9
- const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
10
- const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
11
5
  /** QuadTransform that appends PROV-O provenance quads. */
12
6
  export const provenanceTransform = (quads, { dataset, stage }) => appendProvenanceQuads(quads, dataset.iri.toString(), stage, new Date());
13
7
  /** Pipeline plugin that appends PROV-O provenance to every stage's output. */
@@ -31,9 +25,9 @@ async function* appendProvenanceQuads(quads, iri, stage, startedAt) {
31
25
  // The IRI also makes a re-run idempotent: same (dataset, stage) → same node.
32
26
  // The `.well-known/prov#activity-<hash>` shape mirrors the linkset skolem.
33
27
  const activity = namedNode(skolemIri(`${iri}/.well-known/prov#activity`, hashSuffix(stage)));
34
- yield quad(subject, RDF_TYPE, PROV_ENTITY);
35
- yield quad(subject, PROV_WAS_GENERATED_BY, activity);
36
- yield quad(activity, RDF_TYPE, PROV_ACTIVITY);
37
- yield quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME));
38
- yield quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME));
28
+ yield quad(subject, rdf.type, prov.Entity);
29
+ yield quad(subject, prov.wasGeneratedBy, activity);
30
+ yield quad(activity, rdf.type, prov.Activity);
31
+ yield quad(activity, prov.startedAtTime, literal(startedAt.toISOString(), xsd.dateTime));
32
+ yield quad(activity, prov.endedAtTime, literal(endedAt.toISOString(), xsd.dateTime));
39
33
  }
@@ -1 +1 @@
1
- {"version":3,"file":"fileLoadedSparqlProvenanceStore.d.ts","sourceRoot":"","sources":["../../src/provenance/fileLoadedSparqlProvenanceStore.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AASlD,MAAM,WAAW,sCAAsC;IACrD,wEAAwE;IACxE,aAAa,EAAE,GAAG,CAAC;IACnB;;;OAGG;IACH,WAAW,EAAE,GAAG,CAAC;IACjB;;;;OAIG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;GAQG;AACH,qBAAa,+BAAgC,YAAW,eAAe;IACrE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAM;IACpC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAM;IAClC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,sCAAsC;IAWrD,GAAG,CAAC,UAAU,EAAE,GAAG,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAoB5D,OAAO,CAAC,WAAW;IAmBb,GAAG,CAAC,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;YAMpD,OAAO;CA0BvB"}
1
+ {"version":3,"file":"fileLoadedSparqlProvenanceStore.d.ts","sourceRoot":"","sources":["../../src/provenance/fileLoadedSparqlProvenanceStore.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAI9D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAgBlD,MAAM,WAAW,sCAAsC;IACrD,wEAAwE;IACxE,aAAa,EAAE,GAAG,CAAC;IACnB;;;OAGG;IACH,WAAW,EAAE,GAAG,CAAC;IACjB;;;;OAIG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;GAQG;AACH,qBAAa,+BAAgC,YAAW,eAAe;IACrE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAM;IACpC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAM;IAClC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,sCAAsC;IAWrD,GAAG,CAAC,UAAU,EAAE,GAAG,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAoB5D,OAAO,CAAC,WAAW;IAmBb,GAAG,CAAC,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;YAMpD,OAAO;CAsBvB"}
@@ -1,12 +1,15 @@
1
1
  import { Dataset, assertSafeIri } from '@lde/dataset';
2
2
  import { DataFactory } from 'n3';
3
3
  import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
4
+ import { prov, rdf, xsd } from '@tpluscode/rdf-ns-builders';
5
+ import namespace from '@rdfjs/namespace';
4
6
  import { FileWriter } from '../writer/fileWriter.js';
5
7
  const { namedNode, literal, quad } = DataFactory;
6
- const RDF_TYPE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#type';
7
- const PROV = 'http://www.w3.org/ns/prov#';
8
- const LDE = 'https://w3id.org/lde/provenance#';
9
- const XSD_DATE_TIME = 'http://www.w3.org/2001/XMLSchema#dateTime';
8
+ // Custom namespace for this pipeline’s flat PROV-O records; not a standard
9
+ // vocabulary, so it has no bundled builder. Constraining the term names makes it
10
+ // typo-safe and autocompleting like the bundled builders — `lde.valeu` is a
11
+ // compile error — while the base IRI stays the single source of truth.
12
+ const lde = namespace('https://w3id.org/lde/provenance#');
10
13
  /**
11
14
  * A {@link ProvenanceStore} for a triplestore that is served read-only and
12
15
  * rebuilt by bulk-loading files (e.g. QLever).
@@ -56,10 +59,10 @@ export class FileLoadedSparqlProvenanceStore {
56
59
  const dataset = `<${datasetIri}>`;
57
60
  return `SELECT ?fingerprint ?version ?status ?generatedAt WHERE {
58
61
  GRAPH <${pipelineIri}> {
59
- ${dataset} <${LDE}pipelineVersion> ?version ;
60
- <${LDE}status> ?status ;
61
- <${PROV}generatedAtTime> ?generatedAt .
62
- OPTIONAL { ${dataset} <${LDE}sourceFingerprint> ?fingerprint }
62
+ ${dataset} <${lde.pipelineVersion.value}> ?version ;
63
+ <${lde.status.value}> ?status ;
64
+ <${prov.generatedAtTime.value}> ?generatedAt .
65
+ OPTIONAL { ${dataset} <${lde.sourceFingerprint.value}> ?fingerprint }
63
66
  }
64
67
  } LIMIT 1`;
65
68
  }
@@ -70,12 +73,12 @@ export class FileLoadedSparqlProvenanceStore {
70
73
  }
71
74
  async *toQuads(datasetUri, record) {
72
75
  const subject = namedNode(datasetUri.toString());
73
- yield quad(subject, namedNode(RDF_TYPE), namedNode(`${PROV}Entity`));
74
- yield quad(subject, namedNode(`${PROV}generatedAtTime`), literal(record.generatedAt, namedNode(XSD_DATE_TIME)));
76
+ yield quad(subject, rdf.type, prov.Entity);
77
+ yield quad(subject, prov.generatedAtTime, literal(record.generatedAt, xsd.dateTime));
75
78
  if (record.sourceFingerprint !== null) {
76
- yield quad(subject, namedNode(`${LDE}sourceFingerprint`), literal(record.sourceFingerprint));
79
+ yield quad(subject, lde.sourceFingerprint, literal(record.sourceFingerprint));
77
80
  }
78
- yield quad(subject, namedNode(`${LDE}pipelineVersion`), literal(record.pipelineVersion));
79
- yield quad(subject, namedNode(`${LDE}status`), literal(record.status));
81
+ yield quad(subject, lde.pipelineVersion, literal(record.pipelineVersion));
82
+ yield quad(subject, lde.status, literal(record.status));
80
83
  }
81
84
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.30.17",
3
+ "version": "0.30.19",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"
@@ -26,11 +26,13 @@
26
26
  "dependencies": {
27
27
  "@lde/dataset": "0.7.7",
28
28
  "@lde/dataset-registry-client": "0.8.3",
29
- "@lde/distribution-health": "0.1.2",
30
- "@lde/distribution-probe": "0.1.10",
29
+ "@lde/distribution-health": "0.1.3",
30
+ "@lde/distribution-probe": "0.1.11",
31
31
  "@lde/sparql-importer": "0.6.5",
32
32
  "@lde/sparql-server": "0.4.11",
33
+ "@rdfjs/namespace": "^2.0.1",
33
34
  "@rdfjs/types": "^2.0.1",
35
+ "@tpluscode/rdf-ns-builders": "^5.0.0",
34
36
  "@traqula/generator-sparql-1-1": "^1.1.4",
35
37
  "@traqula/parser-sparql-1-1": "^1.1.4",
36
38
  "@traqula/rules-sparql-1-1": "^1.1.0",