@lde/pipeline 0.6.32 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +5 -8
- package/dist/distribution/importResolver.d.ts +22 -0
- package/dist/distribution/importResolver.d.ts.map +1 -0
- package/dist/distribution/importResolver.js +36 -0
- package/dist/distribution/index.d.ts +1 -0
- package/dist/distribution/index.d.ts.map +1 -1
- package/dist/distribution/index.js +1 -0
- package/dist/distribution/resolver.d.ts +2 -10
- package/dist/distribution/resolver.d.ts.map +1 -1
- package/dist/distribution/resolver.js +2 -29
- package/dist/index.d.ts +1 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +1 -0
- package/dist/pipeline.d.ts +8 -1
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +23 -4
- package/dist/provenance.d.ts +7 -0
- package/dist/provenance.d.ts.map +1 -0
- package/dist/provenance.js +31 -0
- package/dist/stage.d.ts +3 -0
- package/dist/stage.d.ts.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -1,6 +1,10 @@
|
|
|
1
1
|
# Pipeline
|
|
2
2
|
|
|
3
|
-
|
|
3
|
+
A framework for transforming large RDF datasets using pure [SPARQL](https://www.w3.org/TR/sparql11-query/) queries.
|
|
4
|
+
|
|
5
|
+
- **SPARQL-native.** Data transformations are plain SPARQL query files — portable, transparent, testable and version-controlled.
|
|
6
|
+
- **Composable.** Decorators wrap executors and resolvers to add behaviour (provenance, vocabulary detection, data import) without subclassing.
|
|
7
|
+
- **Extensible.** A plugin system lets packages like [@lde/pipeline-void](../pipeline-void) (or your own plugins) hook into the pipeline lifecycle.
|
|
4
8
|
|
|
5
9
|
## Components
|
|
6
10
|
|
|
@@ -82,7 +86,6 @@ import {
|
|
|
82
86
|
SparqlItemSelector,
|
|
83
87
|
SparqlUpdateWriter,
|
|
84
88
|
ManualDatasetSelection,
|
|
85
|
-
SparqlDistributionResolver,
|
|
86
89
|
} from '@lde/pipeline';
|
|
87
90
|
|
|
88
91
|
const pipeline = new Pipeline({
|
|
@@ -106,9 +109,3 @@ const pipeline = new Pipeline({
|
|
|
106
109
|
|
|
107
110
|
await pipeline.run();
|
|
108
111
|
```
|
|
109
|
-
|
|
110
|
-
## Validation
|
|
111
|
-
|
|
112
|
-
```sh
|
|
113
|
-
npx nx run-many -t lint test typecheck build --projects=@lde/pipeline
|
|
114
|
-
```
|
|
@@ -0,0 +1,22 @@
|
|
|
1
|
+
import type { Importer } from '@lde/sparql-importer';
|
|
2
|
+
import type { SparqlServer } from '@lde/sparql-server';
|
|
3
|
+
import { type DistributionResolver, NoDistributionAvailable, ResolvedDistribution } from './resolver.js';
|
|
4
|
+
export interface ImportResolverOptions {
|
|
5
|
+
importer: Importer;
|
|
6
|
+
server?: SparqlServer;
|
|
7
|
+
}
|
|
8
|
+
/**
|
|
9
|
+
* A {@link DistributionResolver} decorator that adds import-as-fallback logic.
|
|
10
|
+
*
|
|
11
|
+
* Delegates to an inner resolver first. If the inner resolver returns
|
|
12
|
+
* {@link NoDistributionAvailable}, tries importing the dataset and optionally
|
|
13
|
+
* starts a SPARQL server.
|
|
14
|
+
*/
|
|
15
|
+
export declare class ImportResolver implements DistributionResolver {
|
|
16
|
+
private readonly inner;
|
|
17
|
+
private readonly options;
|
|
18
|
+
constructor(inner: DistributionResolver, options: ImportResolverOptions);
|
|
19
|
+
resolve(...args: Parameters<DistributionResolver['resolve']>): Promise<ResolvedDistribution | NoDistributionAvailable>;
|
|
20
|
+
cleanup(): Promise<void>;
|
|
21
|
+
}
|
|
22
|
+
//# sourceMappingURL=importResolver.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,OAAO,CACX,GAAG,IAAI,EAAE,UAAU,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,GACnD,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IAoCpD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { Distribution } from '@lde/dataset';
|
|
2
|
+
import { ImportFailed, ImportSuccessful } from '@lde/sparql-importer';
|
|
3
|
+
import { NoDistributionAvailable, ResolvedDistribution, } from './resolver.js';
|
|
4
|
+
/**
|
|
5
|
+
* A {@link DistributionResolver} decorator that adds import-as-fallback logic.
|
|
6
|
+
*
|
|
7
|
+
* Delegates to an inner resolver first. If the inner resolver returns
|
|
8
|
+
* {@link NoDistributionAvailable}, tries importing the dataset and optionally
|
|
9
|
+
* starts a SPARQL server.
|
|
10
|
+
*/
|
|
11
|
+
export class ImportResolver {
|
|
12
|
+
inner;
|
|
13
|
+
options;
|
|
14
|
+
constructor(inner, options) {
|
|
15
|
+
this.inner = inner;
|
|
16
|
+
this.options = options;
|
|
17
|
+
}
|
|
18
|
+
async resolve(...args) {
|
|
19
|
+
const result = await this.inner.resolve(...args);
|
|
20
|
+
if (result instanceof ResolvedDistribution)
|
|
21
|
+
return result;
|
|
22
|
+
const [dataset] = args;
|
|
23
|
+
const importResult = await this.options.importer.import(dataset);
|
|
24
|
+
if (importResult instanceof ImportSuccessful) {
|
|
25
|
+
if (this.options.server) {
|
|
26
|
+
await this.options.server.start();
|
|
27
|
+
return new ResolvedDistribution(Distribution.sparql(this.options.server.queryEndpoint, importResult.identifier), result.probeResults);
|
|
28
|
+
}
|
|
29
|
+
return new ResolvedDistribution(Distribution.sparql(importResult.distribution.accessUrl, importResult.identifier), result.probeResults);
|
|
30
|
+
}
|
|
31
|
+
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', result.probeResults, importResult instanceof ImportFailed ? importResult : undefined);
|
|
32
|
+
}
|
|
33
|
+
async cleanup() {
|
|
34
|
+
await this.options.server?.stop();
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeResultType, } from './probe.js';
|
|
2
2
|
export { probeResultsToQuads } from './report.js';
|
|
3
|
+
export { ImportResolver, type ImportResolverOptions, } from './importResolver.js';
|
|
3
4
|
export { ResolvedDistribution, NoDistributionAvailable, SparqlDistributionResolver, type DistributionResolver, type SparqlDistributionResolverOptions, } from './resolver.js';
|
|
4
5
|
export { resolveDistributions, type DistributionStageResult, } from './resolveDistributions.js';
|
|
5
6
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/distribution/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAElD,OAAO,EACL,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,KAAK,oBAAoB,EACzB,KAAK,iCAAiC,GACvC,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,oBAAoB,EACpB,KAAK,uBAAuB,GAC7B,MAAM,2BAA2B,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/distribution/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAElD,OAAO,EACL,cAAc,EACd,KAAK,qBAAqB,GAC3B,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,KAAK,oBAAoB,EACzB,KAAK,iCAAiC,GACvC,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,oBAAoB,EACpB,KAAK,uBAAuB,GAC7B,MAAM,2BAA2B,CAAC"}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
|
|
2
2
|
export { probeResultsToQuads } from './report.js';
|
|
3
|
+
export { ImportResolver, } from './importResolver.js';
|
|
3
4
|
export { ResolvedDistribution, NoDistributionAvailable, SparqlDistributionResolver, } from './resolver.js';
|
|
4
5
|
export { resolveDistributions, } from './resolveDistributions.js';
|
|
@@ -1,7 +1,5 @@
|
|
|
1
1
|
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
-
import type {
|
|
3
|
-
import { ImportFailed } from '@lde/sparql-importer';
|
|
4
|
-
import type { SparqlServer } from '@lde/sparql-server';
|
|
2
|
+
import type { ImportFailed } from '@lde/sparql-importer';
|
|
5
3
|
import { type ProbeResultType } from './probe.js';
|
|
6
4
|
export declare class ResolvedDistribution {
|
|
7
5
|
readonly distribution: Distribution;
|
|
@@ -20,8 +18,6 @@ export interface DistributionResolver {
|
|
|
20
18
|
cleanup?(): Promise<void>;
|
|
21
19
|
}
|
|
22
20
|
export interface SparqlDistributionResolverOptions {
|
|
23
|
-
importer?: Importer;
|
|
24
|
-
server?: SparqlServer;
|
|
25
21
|
timeout?: number;
|
|
26
22
|
}
|
|
27
23
|
/**
|
|
@@ -29,17 +25,13 @@ export interface SparqlDistributionResolverOptions {
|
|
|
29
25
|
*
|
|
30
26
|
* 1. Probes all distributions in parallel.
|
|
31
27
|
* 2. Returns the first valid SPARQL endpoint as a `ResolvedDistribution`.
|
|
32
|
-
* 3. If none:
|
|
33
|
-
* 4. If nothing works: returns `NoDistributionAvailable`.
|
|
28
|
+
* 3. If none: returns `NoDistributionAvailable`.
|
|
34
29
|
*
|
|
35
30
|
* Does not mutate `dataset.distributions`.
|
|
36
31
|
*/
|
|
37
32
|
export declare class SparqlDistributionResolver implements DistributionResolver {
|
|
38
|
-
private readonly importer?;
|
|
39
|
-
private readonly server?;
|
|
40
33
|
private readonly timeout;
|
|
41
34
|
constructor(options?: SparqlDistributionResolverOptions);
|
|
42
35
|
resolve(dataset: Dataset): Promise<ResolvedDistribution | NoDistributionAvailable>;
|
|
43
|
-
cleanup(): Promise<void>;
|
|
44
36
|
}
|
|
45
37
|
//# sourceMappingURL=resolver.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,
|
|
1
|
+
{"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAA4B,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAE5E,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;gBAD/B,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE;CAE3C;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;GAQG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAIjD,OAAO,CACX,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CA2B3D"}
|
|
@@ -1,5 +1,3 @@
|
|
|
1
|
-
import { Distribution } from '@lde/dataset';
|
|
2
|
-
import { ImportFailed, ImportSuccessful } from '@lde/sparql-importer';
|
|
3
1
|
import { probe, SparqlProbeResult } from './probe.js';
|
|
4
2
|
export class ResolvedDistribution {
|
|
5
3
|
distribution;
|
|
@@ -26,18 +24,13 @@ export class NoDistributionAvailable {
|
|
|
26
24
|
*
|
|
27
25
|
* 1. Probes all distributions in parallel.
|
|
28
26
|
* 2. Returns the first valid SPARQL endpoint as a `ResolvedDistribution`.
|
|
29
|
-
* 3. If none:
|
|
30
|
-
* 4. If nothing works: returns `NoDistributionAvailable`.
|
|
27
|
+
* 3. If none: returns `NoDistributionAvailable`.
|
|
31
28
|
*
|
|
32
29
|
* Does not mutate `dataset.distributions`.
|
|
33
30
|
*/
|
|
34
31
|
export class SparqlDistributionResolver {
|
|
35
|
-
importer;
|
|
36
|
-
server;
|
|
37
32
|
timeout;
|
|
38
33
|
constructor(options) {
|
|
39
|
-
this.importer = options?.importer;
|
|
40
|
-
this.server = options?.server;
|
|
41
34
|
this.timeout = options?.timeout ?? 5000;
|
|
42
35
|
}
|
|
43
36
|
async resolve(dataset) {
|
|
@@ -52,26 +45,6 @@ export class SparqlDistributionResolver {
|
|
|
52
45
|
return new ResolvedDistribution(distribution, results);
|
|
53
46
|
}
|
|
54
47
|
}
|
|
55
|
-
|
|
56
|
-
if (this.importer) {
|
|
57
|
-
const importResult = await this.importer.import(dataset);
|
|
58
|
-
if (importResult instanceof ImportSuccessful) {
|
|
59
|
-
// Start server if provided, using its query endpoint.
|
|
60
|
-
if (this.server) {
|
|
61
|
-
await this.server.start();
|
|
62
|
-
const distribution = Distribution.sparql(this.server.queryEndpoint, importResult.identifier);
|
|
63
|
-
return new ResolvedDistribution(distribution, results);
|
|
64
|
-
}
|
|
65
|
-
const distribution = Distribution.sparql(importResult.distribution.accessUrl, importResult.identifier);
|
|
66
|
-
return new ResolvedDistribution(distribution, results);
|
|
67
|
-
}
|
|
68
|
-
if (importResult instanceof ImportFailed) {
|
|
69
|
-
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', results, importResult);
|
|
70
|
-
}
|
|
71
|
-
}
|
|
72
|
-
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', results);
|
|
73
|
-
}
|
|
74
|
-
async cleanup() {
|
|
75
|
-
await this.server?.stop();
|
|
48
|
+
return new NoDistributionAvailable(dataset, 'No SPARQL endpoint available', results);
|
|
76
49
|
}
|
|
77
50
|
}
|
package/dist/index.d.ts
CHANGED
package/dist/index.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC;AAClC,cAAc,iBAAiB,CAAC"}
|
package/dist/index.js
CHANGED
package/dist/pipeline.d.ts
CHANGED
|
@@ -1,19 +1,26 @@
|
|
|
1
1
|
import type { DatasetSelector } from './selector.js';
|
|
2
2
|
import { Stage } from './stage.js';
|
|
3
|
+
import type { QuadTransform } from './stage.js';
|
|
3
4
|
import type { Writer } from './writer/writer.js';
|
|
4
5
|
import { type DistributionResolver } from './distribution/resolver.js';
|
|
5
6
|
import type { StageOutputResolver } from './stageOutputResolver.js';
|
|
6
7
|
import type { ProgressReporter } from './progressReporter.js';
|
|
8
|
+
/** Plugin that hooks into pipeline lifecycle events. */
|
|
9
|
+
export interface PipelinePlugin {
|
|
10
|
+
name: string;
|
|
11
|
+
/** Transform the quad stream before writing. */
|
|
12
|
+
beforeStageWrite?: QuadTransform;
|
|
13
|
+
}
|
|
7
14
|
export interface PipelineOptions {
|
|
8
15
|
datasetSelector: DatasetSelector;
|
|
9
16
|
stages: Stage[];
|
|
10
17
|
writers: Writer | Writer[];
|
|
18
|
+
plugins?: PipelinePlugin[];
|
|
11
19
|
name?: string;
|
|
12
20
|
distributionResolver?: DistributionResolver;
|
|
13
21
|
chaining?: {
|
|
14
22
|
stageOutputResolver: StageOutputResolver;
|
|
15
23
|
outputDir: string;
|
|
16
|
-
outputFormat?: 'turtle' | 'n-triples' | 'n-quads';
|
|
17
24
|
};
|
|
18
25
|
reporter?: ProgressReporter;
|
|
19
26
|
}
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAGpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAGpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AA8BD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA4Bd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -20,6 +20,17 @@ class FanOutWriter {
|
|
|
20
20
|
}
|
|
21
21
|
}
|
|
22
22
|
}
|
|
23
|
+
class TransformWriter {
|
|
24
|
+
inner;
|
|
25
|
+
transform;
|
|
26
|
+
constructor(inner, transform) {
|
|
27
|
+
this.inner = inner;
|
|
28
|
+
this.transform = transform;
|
|
29
|
+
}
|
|
30
|
+
async write(dataset, quads) {
|
|
31
|
+
await this.inner.write(dataset, this.transform(quads, dataset));
|
|
32
|
+
}
|
|
33
|
+
}
|
|
23
34
|
export class Pipeline {
|
|
24
35
|
name;
|
|
25
36
|
datasetSelector;
|
|
@@ -36,9 +47,17 @@ export class Pipeline {
|
|
|
36
47
|
this.name = options.name ?? '';
|
|
37
48
|
this.datasetSelector = options.datasetSelector;
|
|
38
49
|
this.stages = options.stages;
|
|
39
|
-
|
|
50
|
+
let writer = Array.isArray(options.writers)
|
|
40
51
|
? new FanOutWriter(options.writers)
|
|
41
52
|
: options.writers;
|
|
53
|
+
const transforms = options.plugins
|
|
54
|
+
?.map((p) => p.beforeStageWrite)
|
|
55
|
+
.filter((t) => t !== undefined);
|
|
56
|
+
if (transforms?.length) {
|
|
57
|
+
const composed = (quads, dataset) => transforms.reduce((q, fn) => fn(q, dataset), quads);
|
|
58
|
+
writer = new TransformWriter(writer, composed);
|
|
59
|
+
}
|
|
60
|
+
this.writer = writer;
|
|
42
61
|
this.distributionResolver =
|
|
43
62
|
options.distributionResolver ?? new SparqlDistributionResolver();
|
|
44
63
|
this.chaining = options.chaining;
|
|
@@ -103,13 +122,13 @@ export class Pipeline {
|
|
|
103
122
|
}
|
|
104
123
|
}
|
|
105
124
|
async runChain(dataset, distribution, stage) {
|
|
106
|
-
const { stageOutputResolver, outputDir
|
|
125
|
+
const { stageOutputResolver, outputDir } = this.chaining;
|
|
107
126
|
const outputFiles = [];
|
|
108
127
|
try {
|
|
109
128
|
// 1. Run parent stage → FileWriter.
|
|
110
129
|
const parentWriter = new FileWriter({
|
|
111
130
|
outputDir: `${outputDir}/${stage.name}`,
|
|
112
|
-
format:
|
|
131
|
+
format: 'n-triples',
|
|
113
132
|
});
|
|
114
133
|
await this.runChainedStage(dataset, distribution, stage, parentWriter);
|
|
115
134
|
outputFiles.push(parentWriter.getOutputPath(dataset));
|
|
@@ -119,7 +138,7 @@ export class Pipeline {
|
|
|
119
138
|
const child = stage.stages[i];
|
|
120
139
|
const childWriter = new FileWriter({
|
|
121
140
|
outputDir: `${outputDir}/${child.name}`,
|
|
122
|
-
format:
|
|
141
|
+
format: 'n-triples',
|
|
123
142
|
});
|
|
124
143
|
await this.runChainedStage(dataset, currentDistribution, child, childWriter);
|
|
125
144
|
outputFiles.push(childWriter.getOutputPath(dataset));
|
|
@@ -0,0 +1,7 @@
|
|
|
1
|
+
import type { QuadTransform } from './stage.js';
|
|
2
|
+
import type { PipelinePlugin } from './pipeline.js';
|
|
3
|
+
/** QuadTransform that appends PROV-O provenance quads. */
|
|
4
|
+
export declare const provenanceTransform: QuadTransform;
|
|
5
|
+
/** Pipeline plugin that appends PROV-O provenance to every stage's output. */
|
|
6
|
+
export declare function provenancePlugin(): PipelinePlugin;
|
|
7
|
+
//# sourceMappingURL=provenance.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAkBpD,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aACgC,CAAC;AAEnE,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
|
|
@@ -0,0 +1,31 @@
|
|
|
1
|
+
import { DataFactory } from 'n3';
|
|
2
|
+
const { namedNode, literal, blankNode, quad } = DataFactory;
|
|
3
|
+
const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
|
|
4
|
+
const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
|
|
5
|
+
const PROV_ACTIVITY = namedNode('http://www.w3.org/ns/prov#Activity');
|
|
6
|
+
const PROV_WAS_GENERATED_BY = namedNode('http://www.w3.org/ns/prov#wasGeneratedBy');
|
|
7
|
+
const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime');
|
|
8
|
+
const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
|
|
9
|
+
const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
|
|
10
|
+
/** QuadTransform that appends PROV-O provenance quads. */
|
|
11
|
+
export const provenanceTransform = (quads, dataset) => appendProvenanceQuads(quads, dataset.iri.toString(), new Date());
|
|
12
|
+
/** Pipeline plugin that appends PROV-O provenance to every stage's output. */
|
|
13
|
+
export function provenancePlugin() {
|
|
14
|
+
return {
|
|
15
|
+
name: 'provenance',
|
|
16
|
+
beforeStageWrite: provenanceTransform,
|
|
17
|
+
};
|
|
18
|
+
}
|
|
19
|
+
async function* appendProvenanceQuads(quads, iri, startedAt) {
|
|
20
|
+
for await (const q of quads) {
|
|
21
|
+
yield q;
|
|
22
|
+
}
|
|
23
|
+
const endedAt = new Date();
|
|
24
|
+
const subject = namedNode(iri);
|
|
25
|
+
const activity = blankNode();
|
|
26
|
+
yield quad(subject, RDF_TYPE, PROV_ENTITY);
|
|
27
|
+
yield quad(subject, PROV_WAS_GENERATED_BY, activity);
|
|
28
|
+
yield quad(activity, RDF_TYPE, PROV_ACTIVITY);
|
|
29
|
+
yield quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME));
|
|
30
|
+
yield quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME));
|
|
31
|
+
}
|
package/dist/stage.d.ts
CHANGED
|
@@ -1,7 +1,10 @@
|
|
|
1
1
|
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
+
import type { Quad } from '@rdfjs/types';
|
|
2
3
|
import type { Executor, VariableBindings } from './sparql/executor.js';
|
|
3
4
|
import { NotSupported } from './sparql/executor.js';
|
|
4
5
|
import type { Writer } from './writer/writer.js';
|
|
6
|
+
/** Transforms a quad stream, optionally using dataset metadata. */
|
|
7
|
+
export type QuadTransform = (quads: AsyncIterable<Quad>, dataset: Dataset) => AsyncIterable<Quad>;
|
|
5
8
|
export interface StageOptions {
|
|
6
9
|
name: string;
|
|
7
10
|
executors: Executor | Executor[];
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAmBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACrE"}
|