@lde/pipeline 0.30.17 → 0.30.19
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +15 -0
- package/dist/combineReporters.d.ts +16 -0
- package/dist/combineReporters.d.ts.map +1 -0
- package/dist/combineReporters.js +51 -0
- package/dist/distribution/report.d.ts.map +1 -1
- package/dist/distribution/report.js +20 -19
- package/dist/pipeline.d.ts +7 -1
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +6 -1
- package/dist/plugin/namespaceNormalization.d.ts.map +1 -1
- package/dist/plugin/namespaceNormalization.js +2 -3
- package/dist/plugin/provenance.d.ts.map +1 -1
- package/dist/plugin/provenance.js +6 -12
- package/dist/provenance/fileLoadedSparqlProvenanceStore.d.ts.map +1 -1
- package/dist/provenance/fileLoadedSparqlProvenanceStore.js +16 -13
- package/package.json +5 -3
package/README.md
CHANGED
|
@@ -276,6 +276,21 @@ Writes generated quads to a destination:
|
|
|
276
276
|
- `SparqlUpdateWriter` — writes to a SPARQL endpoint via UPDATE queries
|
|
277
277
|
- `FileWriter` — writes to local files
|
|
278
278
|
|
|
279
|
+
### Reporter
|
|
280
|
+
|
|
281
|
+
A `ProgressReporter` observes the run, receiving lifecycle events such as `pipelineStart`, `stageComplete`, `datasetValidated` and `pipelineComplete`. Every method is optional, so a reporter implements only the events it cares about.
|
|
282
|
+
|
|
283
|
+
Pass a single reporter, or an array to have several observe the same run — for example a console reporter alongside one that collects validation verdicts:
|
|
284
|
+
|
|
285
|
+
```typescript
|
|
286
|
+
new Pipeline({
|
|
287
|
+
// …
|
|
288
|
+
reporter: [new ConsoleReporter(), verdictCollector],
|
|
289
|
+
});
|
|
290
|
+
```
|
|
291
|
+
|
|
292
|
+
Each reporter receives every event, in array order; a reporter that does not implement a given event is skipped for it.
|
|
293
|
+
|
|
279
294
|
### Provenance store
|
|
280
295
|
|
|
281
296
|
A `ProvenanceStore` gives the pipeline a small per-dataset memory, so a future run can skip datasets that are genuinely unchanged. It is purely a storage seam: the framework owns the skip decision (see [`sourceFingerprint`](#source-change-fingerprint) and `shouldReprocess`), the store owns only how each record is persisted.
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
import type { ProgressReporter } from './progressReporter.js';
|
|
2
|
+
/**
|
|
3
|
+
* Combine several {@link ProgressReporter}s into one that forwards every
|
|
4
|
+
* lifecycle call to each child that implements it. Lets a single run be
|
|
5
|
+
* observed by more than one reporter – e.g. a console reporter alongside a
|
|
6
|
+
* verdict-collecting one.
|
|
7
|
+
*
|
|
8
|
+
* Each method is dispatched to the children in array order; a child that does
|
|
9
|
+
* not implement a given (optional) method is skipped for that call.
|
|
10
|
+
*
|
|
11
|
+
* Internal to the package: not re-exported from `index.ts`. {@link Pipeline}
|
|
12
|
+
* uses it to normalise a `reporter` array into the single reporter its call
|
|
13
|
+
* sites expect, so the broader API need not grow a new public symbol.
|
|
14
|
+
*/
|
|
15
|
+
export declare function combineReporters(reporters: readonly ProgressReporter[]): ProgressReporter;
|
|
16
|
+
//# sourceMappingURL=combineReporters.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"combineReporters.d.ts","sourceRoot":"","sources":["../src/combineReporters.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAU9D;;;;;;;;;;;;GAYG;AACH,wBAAgB,gBAAgB,CAC9B,SAAS,EAAE,SAAS,gBAAgB,EAAE,GACrC,gBAAgB,CA4ClB"}
|
|
@@ -0,0 +1,51 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Combine several {@link ProgressReporter}s into one that forwards every
|
|
3
|
+
* lifecycle call to each child that implements it. Lets a single run be
|
|
4
|
+
* observed by more than one reporter – e.g. a console reporter alongside a
|
|
5
|
+
* verdict-collecting one.
|
|
6
|
+
*
|
|
7
|
+
* Each method is dispatched to the children in array order; a child that does
|
|
8
|
+
* not implement a given (optional) method is skipped for that call.
|
|
9
|
+
*
|
|
10
|
+
* Internal to the package: not re-exported from `index.ts`. {@link Pipeline}
|
|
11
|
+
* uses it to normalise a `reporter` array into the single reporter its call
|
|
12
|
+
* sites expect, so the broader API need not grow a new public symbol.
|
|
13
|
+
*/
|
|
14
|
+
export function combineReporters(reporters) {
|
|
15
|
+
const forward = (method, ...args) => {
|
|
16
|
+
for (const reporter of reporters) {
|
|
17
|
+
// Cast to the concrete signature for `method`: indexing by a generic key
|
|
18
|
+
// yields a union of method types TS won't call directly, even though the
|
|
19
|
+
// arguments are correlated.
|
|
20
|
+
const handler = reporter[method];
|
|
21
|
+
// Every method is optional; notify only the children that implement it.
|
|
22
|
+
handler?.(...args);
|
|
23
|
+
}
|
|
24
|
+
};
|
|
25
|
+
// Listing every method explicitly (rather than a Proxy) keeps the forwarding
|
|
26
|
+
// type-safe: typing the result as `Required<ProgressReporter>` forces a new
|
|
27
|
+
// entry here whenever the interface grows, so a forgotten method fails to
|
|
28
|
+
// compile instead of silently going unforwarded.
|
|
29
|
+
const combined = {
|
|
30
|
+
pipelineStart: (...args) => forward('pipelineStart', ...args),
|
|
31
|
+
datasetsSelected: (...args) => forward('datasetsSelected', ...args),
|
|
32
|
+
datasetStart: (...args) => forward('datasetStart', ...args),
|
|
33
|
+
distributionProbed: (...args) => forward('distributionProbed', ...args),
|
|
34
|
+
importStarted: (...args) => forward('importStarted', ...args),
|
|
35
|
+
importFailed: (...args) => forward('importFailed', ...args),
|
|
36
|
+
distributionValidated: (...args) => forward('distributionValidated', ...args),
|
|
37
|
+
distributionSelected: (...args) => forward('distributionSelected', ...args),
|
|
38
|
+
stageStart: (...args) => forward('stageStart', ...args),
|
|
39
|
+
stageProgress: (...args) => forward('stageProgress', ...args),
|
|
40
|
+
stageComplete: (...args) => forward('stageComplete', ...args),
|
|
41
|
+
stageFailed: (...args) => forward('stageFailed', ...args),
|
|
42
|
+
stageSkipped: (...args) => forward('stageSkipped', ...args),
|
|
43
|
+
datasetValidated: (...args) => forward('datasetValidated', ...args),
|
|
44
|
+
datasetComplete: (...args) => forward('datasetComplete', ...args),
|
|
45
|
+
datasetSkipped: (...args) => forward('datasetSkipped', ...args),
|
|
46
|
+
pipelineComplete: (...args) => forward('pipelineComplete', ...args),
|
|
47
|
+
timeoutTightened: (...args) => forward('timeoutTightened', ...args),
|
|
48
|
+
timeoutRelaxed: (...args) => forward('timeoutRelaxed', ...args),
|
|
49
|
+
};
|
|
50
|
+
return combined;
|
|
51
|
+
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/distribution/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,yBAAyB,CAAC;
|
|
1
|
+
{"version":3,"file":"report.d.ts","sourceRoot":"","sources":["../../src/distribution/report.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEzD,OAAO,EAAe,KAAK,IAAI,EAAE,MAAM,IAAI,CAAC;AAC5C,OAAO,EAIL,KAAK,eAAe,EACrB,MAAM,yBAAyB,CAAC;AAYjC;;;;;;;;;GASG;AACH,wBAAuB,mBAAmB,CACxC,YAAY,EAAE,eAAe,EAAE,EAC/B,UAAU,EAAE,MAAM,EAClB,YAAY,CAAC,EAAE,YAAY,GAC1B,aAAa,CAAC,IAAI,CAAC,CA4CrB"}
|
|
@@ -1,12 +1,14 @@
|
|
|
1
1
|
import { hashSuffix, skolemIri } from '@lde/dataset';
|
|
2
2
|
import { DataFactory } from 'n3';
|
|
3
3
|
import { NetworkError, SparqlProbeResult, } from '@lde/distribution-probe';
|
|
4
|
+
import { rdf, _void, xsd } from '@tpluscode/rdf-ns-builders';
|
|
5
|
+
import namespace from '@rdfjs/namespace';
|
|
4
6
|
const { quad, namedNode, literal } = DataFactory;
|
|
5
|
-
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
const
|
|
9
|
-
const
|
|
7
|
+
// Custom namespaces not covered by the bundled builders: the bundled `schema`
|
|
8
|
+
// builder is `http://schema.org/`, but this output normalises to the `https://`
|
|
9
|
+
// scheme, and the HTTP status-codes vocabulary is not bundled at all.
|
|
10
|
+
const schema = namespace('https://schema.org/');
|
|
11
|
+
const httpStatus = namespace('https://www.w3.org/2011/http-statusCodes#');
|
|
10
12
|
/**
|
|
11
13
|
* Convert probe results into RDF quads describing each probe as a `schema:Action`.
|
|
12
14
|
*
|
|
@@ -29,49 +31,48 @@ export async function* probeResultsToQuads(probeResults, datasetIri, importResul
|
|
|
29
31
|
for (const result of probeResults) {
|
|
30
32
|
const action = namedNode(skolemIri(actionBase, hashSuffix(result.url)));
|
|
31
33
|
actionsByUrl.set(result.url, action);
|
|
32
|
-
yield quad(action,
|
|
33
|
-
yield quad(action,
|
|
34
|
+
yield quad(action, rdf.type, schema.Action);
|
|
35
|
+
yield quad(action, schema.target, namedNode(result.url));
|
|
34
36
|
if (result instanceof NetworkError) {
|
|
35
|
-
yield quad(action,
|
|
37
|
+
yield quad(action, schema.error, literal(result.message));
|
|
36
38
|
}
|
|
37
39
|
else if (result.isSuccess()) {
|
|
38
40
|
yield* successQuads(action, result, datasetIri);
|
|
39
41
|
for (const warning of result.warnings) {
|
|
40
|
-
yield quad(action,
|
|
42
|
+
yield quad(action, schema.error, literal(warning));
|
|
41
43
|
}
|
|
42
44
|
}
|
|
43
45
|
else if (result.failureReason) {
|
|
44
|
-
yield quad(action,
|
|
46
|
+
yield quad(action, schema.error, literal(result.failureReason));
|
|
45
47
|
}
|
|
46
48
|
else {
|
|
47
49
|
// HTTP error
|
|
48
|
-
|
|
49
|
-
yield quad(action, namedNode(`${SCHEMA}error`), namedNode(statusUri));
|
|
50
|
+
yield quad(action, schema.error, httpStatus[result.statusText.replace(/ /g, '')]);
|
|
50
51
|
}
|
|
51
52
|
}
|
|
52
53
|
if (importResult) {
|
|
53
54
|
const action = actionsByUrl.get(importResult.distribution.accessUrl.toString());
|
|
54
55
|
if (action) {
|
|
55
|
-
yield quad(action,
|
|
56
|
+
yield quad(action, schema.error, literal(importResult.error));
|
|
56
57
|
}
|
|
57
58
|
}
|
|
58
59
|
}
|
|
59
60
|
function* successQuads(action, result, datasetIri) {
|
|
60
61
|
const distributionUrl = namedNode(result.url);
|
|
61
|
-
yield quad(action,
|
|
62
|
+
yield quad(action, schema.result, distributionUrl);
|
|
62
63
|
if (result.lastModified) {
|
|
63
|
-
yield quad(distributionUrl,
|
|
64
|
+
yield quad(distributionUrl, schema.dateModified, literal(result.lastModified.toISOString(), xsd.dateTime));
|
|
64
65
|
}
|
|
65
66
|
if (result instanceof SparqlProbeResult) {
|
|
66
|
-
yield quad(namedNode(datasetIri),
|
|
67
|
+
yield quad(namedNode(datasetIri), _void.sparqlEndpoint, distributionUrl);
|
|
67
68
|
}
|
|
68
69
|
else {
|
|
69
|
-
yield quad(namedNode(datasetIri),
|
|
70
|
+
yield quad(namedNode(datasetIri), _void.dataDump, distributionUrl);
|
|
70
71
|
if (result.contentSize) {
|
|
71
|
-
yield quad(distributionUrl,
|
|
72
|
+
yield quad(distributionUrl, schema.contentSize, literal(result.contentSize.toString()));
|
|
72
73
|
}
|
|
73
74
|
if (result.contentType) {
|
|
74
|
-
yield quad(distributionUrl,
|
|
75
|
+
yield quad(distributionUrl, schema.encodingFormat, literal(result.contentType));
|
|
75
76
|
}
|
|
76
77
|
}
|
|
77
78
|
}
|
package/dist/pipeline.d.ts
CHANGED
|
@@ -42,7 +42,13 @@ export interface PipelineOptions {
|
|
|
42
42
|
stageOutputResolver: StageOutputResolver;
|
|
43
43
|
outputDir: string;
|
|
44
44
|
};
|
|
45
|
-
|
|
45
|
+
/**
|
|
46
|
+
* Observer(s) notified of pipeline lifecycle events. Pass an array to have
|
|
47
|
+
* several reporters observe the same run – e.g. a console reporter alongside
|
|
48
|
+
* a verdict-collecting one; every reporter receives each event in array
|
|
49
|
+
* order. A single reporter may be passed directly.
|
|
50
|
+
*/
|
|
51
|
+
reporter?: ProgressReporter | readonly ProgressReporter[];
|
|
46
52
|
/**
|
|
47
53
|
* Optional per-dataset processing memory. When set, the pipeline skips a
|
|
48
54
|
* dataset whose source-change fingerprint and {@link pipelineVersion} both
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAG1B,MAAM,4BAA4B,CAAC;AAKpC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAY7D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAG1B,MAAM,4BAA4B,CAAC;AAKpC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAY7D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAG/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC;;;;;;;GAOG;AACH,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC,uBAAuB,CAAC,CAAC;CAC3D;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,gBAAgB,GAAG,SAAS,gBAAgB,EAAE,CAAC;IAC1D;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAkFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAyC;IAC3E,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;IACrD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAkB;IACnD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAS;gBAE9B,OAAO,EAAE,eAAe;IA+C9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;IA+K5B,+EAA+E;YACjE,aAAa;YAmBb,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;;;OAKG;IACH,OAAO,CAAC,WAAW;IAMnB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA+DP,SAAS;CAczB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -9,6 +9,7 @@ import { NetworkError, SparqlProbeResult, } from '@lde/distribution-probe';
|
|
|
9
9
|
import { ImportSuccessful } from '@lde/sparql-importer';
|
|
10
10
|
import { importOutcomeToVerdict, probeResultToVerdict, } from '@lde/distribution-health';
|
|
11
11
|
import { NotSupported } from './sparql/executor.js';
|
|
12
|
+
import { combineReporters } from './combineReporters.js';
|
|
12
13
|
import { ConstantTimeoutPolicy, } from './sparql/timeoutPolicy.js';
|
|
13
14
|
/**
|
|
14
15
|
* Split an async iterable into `count` branches that can be consumed
|
|
@@ -113,7 +114,11 @@ export class Pipeline {
|
|
|
113
114
|
this.distributionResolver =
|
|
114
115
|
options.distributionResolver ?? new SparqlDistributionResolver();
|
|
115
116
|
this.chaining = options.chaining;
|
|
116
|
-
|
|
117
|
+
// `Array.isArray` narrows the array branch but not the readonly-array out of
|
|
118
|
+
// the else branch, so cast the single-reporter case explicitly.
|
|
119
|
+
this.reporter = Array.isArray(options.reporter)
|
|
120
|
+
? combineReporters(options.reporter)
|
|
121
|
+
: options.reporter;
|
|
117
122
|
this.timeoutFactory =
|
|
118
123
|
options.timeout ?? (() => new ConstantTimeoutPolicy(300_000));
|
|
119
124
|
this.provenanceStore = options.provenanceStore;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"namespaceNormalization.d.ts","sourceRoot":"","sources":["../../src/plugin/namespaceNormalization.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AACrD,OAAO,KAAK,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAO5C,MAAM,WAAW,6BAA6B;IAC5C,iEAAiE;IACjE,IAAI,EAAE,MAAM,CAAC;IACb,yEAAyE;IACzE,EAAE,EAAE,MAAM,CAAC;CACZ;AAED;;;;;;;GAOG;AACH,wBAAgB,+BAA+B,CAC7C,OAAO,EAAE,6BAA6B,GACrC,aAAa,CAAC;IAAE,OAAO,EAAE,OAAO,CAAA;CAAE,CAAC,CAErC;AAED;;;;;;GAMG;AACH,wBAAgB,4BAA4B,CAC1C,OAAO,EAAE,6BAA6B,GACrC,cAAc,CAKhB"}
|
|
@@ -1,7 +1,6 @@
|
|
|
1
1
|
import { DataFactory } from 'n3';
|
|
2
|
+
import { _void } from '@tpluscode/rdf-ns-builders';
|
|
2
3
|
const { namedNode, quad } = DataFactory;
|
|
3
|
-
const VOID_CLASS = namedNode('http://rdfs.org/ns/void#class');
|
|
4
|
-
const VOID_PROPERTY = namedNode('http://rdfs.org/ns/void#property');
|
|
5
4
|
/**
|
|
6
5
|
* Creates a QuadTransform that rewrites namespace prefixes in `void:class` and
|
|
7
6
|
* `void:property` quad objects from {@link NamespaceNormalizationOptions.from}
|
|
@@ -28,7 +27,7 @@ export function namespaceNormalizationPlugin(options) {
|
|
|
28
27
|
}
|
|
29
28
|
async function* normalizeNamespace(quads, { from, to }) {
|
|
30
29
|
for await (const q of quads) {
|
|
31
|
-
if ((q.predicate.equals(
|
|
30
|
+
if ((q.predicate.equals(_void.class) || q.predicate.equals(_void.property)) &&
|
|
32
31
|
q.object.termType === 'NamedNode' &&
|
|
33
32
|
q.object.value.startsWith(from)) {
|
|
34
33
|
yield quad(q.subject, q.predicate, namedNode(to + q.object.value.slice(from.length)), q.graph);
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../../src/plugin/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AACjD,OAAO,KAAK,EAAE,uBAAuB,EAAE,cAAc,EAAE,MAAM,gBAAgB,CAAC;AAQ9E,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aAAa,CAAC,uBAAuB,CAGK,CAAC;AAE7E,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
|
|
@@ -1,13 +1,7 @@
|
|
|
1
1
|
import { hashSuffix, skolemIri } from '@lde/dataset';
|
|
2
2
|
import { DataFactory } from 'n3';
|
|
3
|
+
import { prov, rdf, xsd } from '@tpluscode/rdf-ns-builders';
|
|
3
4
|
const { namedNode, literal, quad } = DataFactory;
|
|
4
|
-
const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
5
|
-
const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
|
|
6
|
-
const PROV_ACTIVITY = namedNode('http://www.w3.org/ns/prov#Activity');
|
|
7
|
-
const PROV_WAS_GENERATED_BY = namedNode('http://www.w3.org/ns/prov#wasGeneratedBy');
|
|
8
|
-
const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime');
|
|
9
|
-
const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
|
|
10
|
-
const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
|
|
11
5
|
/** QuadTransform that appends PROV-O provenance quads. */
|
|
12
6
|
export const provenanceTransform = (quads, { dataset, stage }) => appendProvenanceQuads(quads, dataset.iri.toString(), stage, new Date());
|
|
13
7
|
/** Pipeline plugin that appends PROV-O provenance to every stage's output. */
|
|
@@ -31,9 +25,9 @@ async function* appendProvenanceQuads(quads, iri, stage, startedAt) {
|
|
|
31
25
|
// The IRI also makes a re-run idempotent: same (dataset, stage) → same node.
|
|
32
26
|
// The `.well-known/prov#activity-<hash>` shape mirrors the linkset skolem.
|
|
33
27
|
const activity = namedNode(skolemIri(`${iri}/.well-known/prov#activity`, hashSuffix(stage)));
|
|
34
|
-
yield quad(subject,
|
|
35
|
-
yield quad(subject,
|
|
36
|
-
yield quad(activity,
|
|
37
|
-
yield quad(activity,
|
|
38
|
-
yield quad(activity,
|
|
28
|
+
yield quad(subject, rdf.type, prov.Entity);
|
|
29
|
+
yield quad(subject, prov.wasGeneratedBy, activity);
|
|
30
|
+
yield quad(activity, rdf.type, prov.Activity);
|
|
31
|
+
yield quad(activity, prov.startedAtTime, literal(startedAt.toISOString(), xsd.dateTime));
|
|
32
|
+
yield quad(activity, prov.endedAtTime, literal(endedAt.toISOString(), xsd.dateTime));
|
|
39
33
|
}
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"fileLoadedSparqlProvenanceStore.d.ts","sourceRoot":"","sources":["../../src/provenance/fileLoadedSparqlProvenanceStore.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"fileLoadedSparqlProvenanceStore.d.ts","sourceRoot":"","sources":["../../src/provenance/fileLoadedSparqlProvenanceStore.ts"],"names":[],"mappings":"AAGA,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAI9D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,aAAa,CAAC;AACpD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,YAAY,CAAC;AAgBlD,MAAM,WAAW,sCAAsC;IACrD,wEAAwE;IACxE,aAAa,EAAE,GAAG,CAAC;IACnB;;;OAGG;IACH,WAAW,EAAE,GAAG,CAAC;IACjB;;;;OAIG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;GAQG;AACH,qBAAa,+BAAgC,YAAW,eAAe;IACrE,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAM;IACpC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAM;IAClC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAa;IACpC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,sCAAsC;IAWrD,GAAG,CAAC,UAAU,EAAE,GAAG,GAAG,OAAO,CAAC,gBAAgB,GAAG,IAAI,CAAC;IAoB5D,OAAO,CAAC,WAAW;IAmBb,GAAG,CAAC,UAAU,EAAE,GAAG,EAAE,MAAM,EAAE,gBAAgB,GAAG,OAAO,CAAC,IAAI,CAAC;YAMpD,OAAO;CAsBvB"}
|
|
@@ -1,12 +1,15 @@
|
|
|
1
1
|
import { Dataset, assertSafeIri } from '@lde/dataset';
|
|
2
2
|
import { DataFactory } from 'n3';
|
|
3
3
|
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
4
|
+
import { prov, rdf, xsd } from '@tpluscode/rdf-ns-builders';
|
|
5
|
+
import namespace from '@rdfjs/namespace';
|
|
4
6
|
import { FileWriter } from '../writer/fileWriter.js';
|
|
5
7
|
const { namedNode, literal, quad } = DataFactory;
|
|
6
|
-
|
|
7
|
-
|
|
8
|
-
|
|
9
|
-
|
|
8
|
+
// Custom namespace for this pipeline’s flat PROV-O records; not a standard
|
|
9
|
+
// vocabulary, so it has no bundled builder. Constraining the term names makes it
|
|
10
|
+
// typo-safe and autocompleting like the bundled builders — `lde.valeu` is a
|
|
11
|
+
// compile error — while the base IRI stays the single source of truth.
|
|
12
|
+
const lde = namespace('https://w3id.org/lde/provenance#');
|
|
10
13
|
/**
|
|
11
14
|
* A {@link ProvenanceStore} for a triplestore that is served read-only and
|
|
12
15
|
* rebuilt by bulk-loading files (e.g. QLever).
|
|
@@ -56,10 +59,10 @@ export class FileLoadedSparqlProvenanceStore {
|
|
|
56
59
|
const dataset = `<${datasetIri}>`;
|
|
57
60
|
return `SELECT ?fingerprint ?version ?status ?generatedAt WHERE {
|
|
58
61
|
GRAPH <${pipelineIri}> {
|
|
59
|
-
${dataset} <${
|
|
60
|
-
<${
|
|
61
|
-
<${
|
|
62
|
-
OPTIONAL { ${dataset} <${
|
|
62
|
+
${dataset} <${lde.pipelineVersion.value}> ?version ;
|
|
63
|
+
<${lde.status.value}> ?status ;
|
|
64
|
+
<${prov.generatedAtTime.value}> ?generatedAt .
|
|
65
|
+
OPTIONAL { ${dataset} <${lde.sourceFingerprint.value}> ?fingerprint }
|
|
63
66
|
}
|
|
64
67
|
} LIMIT 1`;
|
|
65
68
|
}
|
|
@@ -70,12 +73,12 @@ export class FileLoadedSparqlProvenanceStore {
|
|
|
70
73
|
}
|
|
71
74
|
async *toQuads(datasetUri, record) {
|
|
72
75
|
const subject = namedNode(datasetUri.toString());
|
|
73
|
-
yield quad(subject,
|
|
74
|
-
yield quad(subject,
|
|
76
|
+
yield quad(subject, rdf.type, prov.Entity);
|
|
77
|
+
yield quad(subject, prov.generatedAtTime, literal(record.generatedAt, xsd.dateTime));
|
|
75
78
|
if (record.sourceFingerprint !== null) {
|
|
76
|
-
yield quad(subject,
|
|
79
|
+
yield quad(subject, lde.sourceFingerprint, literal(record.sourceFingerprint));
|
|
77
80
|
}
|
|
78
|
-
yield quad(subject,
|
|
79
|
-
yield quad(subject,
|
|
81
|
+
yield quad(subject, lde.pipelineVersion, literal(record.pipelineVersion));
|
|
82
|
+
yield quad(subject, lde.status, literal(record.status));
|
|
80
83
|
}
|
|
81
84
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.30.
|
|
3
|
+
"version": "0.30.19",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -26,11 +26,13 @@
|
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.7",
|
|
28
28
|
"@lde/dataset-registry-client": "0.8.3",
|
|
29
|
-
"@lde/distribution-health": "0.1.
|
|
30
|
-
"@lde/distribution-probe": "0.1.
|
|
29
|
+
"@lde/distribution-health": "0.1.3",
|
|
30
|
+
"@lde/distribution-probe": "0.1.11",
|
|
31
31
|
"@lde/sparql-importer": "0.6.5",
|
|
32
32
|
"@lde/sparql-server": "0.4.11",
|
|
33
|
+
"@rdfjs/namespace": "^2.0.1",
|
|
33
34
|
"@rdfjs/types": "^2.0.1",
|
|
35
|
+
"@tpluscode/rdf-ns-builders": "^5.0.0",
|
|
34
36
|
"@traqula/generator-sparql-1-1": "^1.1.4",
|
|
35
37
|
"@traqula/parser-sparql-1-1": "^1.1.4",
|
|
36
38
|
"@traqula/rules-sparql-1-1": "^1.1.0",
|