@lde/pipeline 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/analyzer.d.ts +18 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +34 -0
- package/dist/import.d.ts +30 -0
- package/dist/import.d.ts.map +1 -0
- package/dist/import.js +44 -0
- package/dist/index.d.ts +5 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +4 -0
- package/dist/pipeline.d.ts +11 -0
- package/dist/pipeline.d.ts.map +1 -0
- package/dist/pipeline.js +41 -0
- package/dist/selector.d.ts +40 -0
- package/dist/selector.d.ts.map +1 -0
- package/dist/selector.js +43 -0
- package/dist/step/sparqlQuery.d.ts +28 -0
- package/dist/step/sparqlQuery.d.ts.map +1 -0
- package/dist/step/sparqlQuery.js +37 -0
- package/dist/step.d.ts +55 -0
- package/dist/step.d.ts.map +1 -0
- package/dist/step.js +39 -0
- package/dist/writer/fileWriter.d.ts +21 -0
- package/dist/writer/fileWriter.d.ts.map +1 -0
- package/dist/writer/fileWriter.js +29 -0
- package/dist/writer/mergeWriter.d.ts +12 -0
- package/dist/writer/mergeWriter.d.ts.map +1 -0
- package/dist/writer/mergeWriter.js +10 -0
- package/dist/writer.d.ts +6 -0
- package/dist/writer.d.ts.map +1 -0
- package/dist/writer.js +1 -0
- package/package.json +30 -0
|
@@ -0,0 +1,18 @@
|
|
|
1
|
+
import { DataEmittingStep, NotSupported } from './step.js';
|
|
2
|
+
import { Dataset } from '@lde/dataset';
|
|
3
|
+
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
4
|
+
export interface Args {
|
|
5
|
+
identifier: string;
|
|
6
|
+
query: string;
|
|
7
|
+
fetcher?: SparqlEndpointFetcher;
|
|
8
|
+
}
|
|
9
|
+
export declare class SparqlQueryAnalyzer implements DataEmittingStep {
|
|
10
|
+
readonly identifier: string;
|
|
11
|
+
private readonly query;
|
|
12
|
+
private readonly fetcher;
|
|
13
|
+
constructor({ identifier, query, fetcher }: Args);
|
|
14
|
+
execute(dataset: Dataset): Promise<NotSupported | (import("readable-stream").Readable & import("@rdfjs/types").Stream<import("@rdfjs/types").Quad>)>;
|
|
15
|
+
static fromFile(filename: string): Promise<SparqlQueryAnalyzer>;
|
|
16
|
+
}
|
|
17
|
+
export declare function fromFile(filename: string): Promise<string>;
|
|
18
|
+
//# sourceMappingURL=analyzer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../src/analyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAC3D,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAI9D,MAAM,WAAW,IAAI;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED,qBAAa,mBAAoB,YAAW,gBAAgB;IAC1D,SAAgB,UAAU,SAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;gBAEb,EAAE,UAAU,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI;IAM1C,OAAO,CAAC,OAAO,EAAE,OAAO;WAqBV,QAAQ,CAAC,QAAQ,EAAE,MAAM;CAM9C;AAED,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,mBAE9C"}
|
package/dist/analyzer.js
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
import { NotSupported } from './step.js';
|
|
2
|
+
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
3
|
+
import { readFile } from 'node:fs/promises';
|
|
4
|
+
import { resolve } from 'node:path';
|
|
5
|
+
export class SparqlQueryAnalyzer {
|
|
6
|
+
identifier;
|
|
7
|
+
query;
|
|
8
|
+
fetcher;
|
|
9
|
+
constructor({ identifier, query, fetcher }) {
|
|
10
|
+
this.identifier = identifier;
|
|
11
|
+
this.query = query;
|
|
12
|
+
this.fetcher = fetcher ?? new SparqlEndpointFetcher();
|
|
13
|
+
}
|
|
14
|
+
async execute(dataset) {
|
|
15
|
+
const distribution = dataset.getSparqlDistribution();
|
|
16
|
+
if (null === distribution) {
|
|
17
|
+
return new NotSupported('No SPARQL distribution available');
|
|
18
|
+
}
|
|
19
|
+
const query = this.query
|
|
20
|
+
.replace('#subjectFilter#', distribution.subjectFilter ?? '')
|
|
21
|
+
.replace('?dataset', `<${dataset.iri}>`)
|
|
22
|
+
.replace('#namedGraph#', distribution.namedGraph ? `FROM <${distribution.namedGraph}>` : '');
|
|
23
|
+
return await this.fetcher.fetchTriples(distribution.accessUrl.toString(), query);
|
|
24
|
+
}
|
|
25
|
+
static async fromFile(filename) {
|
|
26
|
+
return new SparqlQueryAnalyzer({
|
|
27
|
+
identifier: filename,
|
|
28
|
+
query: await fromFile(filename),
|
|
29
|
+
});
|
|
30
|
+
}
|
|
31
|
+
}
|
|
32
|
+
export async function fromFile(filename) {
|
|
33
|
+
return (await readFile(resolve(filename))).toString();
|
|
34
|
+
}
|
package/dist/import.d.ts
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
import { Dataset } from '@lde/dataset';
|
|
2
|
+
import { Failure, Finishable, NotSupported, SingleStep, Success } from './step.js';
|
|
3
|
+
import { Importer } from '@lde/sparql-importer';
|
|
4
|
+
import { SparqlServer } from '@lde/sparql-server';
|
|
5
|
+
/**
|
|
6
|
+
* A pipeline step that imports a database using an {@link Importer} and makes
|
|
7
|
+
* the import available at a local SPARQL endpoint.
|
|
8
|
+
*/
|
|
9
|
+
export declare class Import implements SingleStep, Finishable {
|
|
10
|
+
readonly identifier = "import";
|
|
11
|
+
private readonly importer;
|
|
12
|
+
private readonly server;
|
|
13
|
+
private readonly forceImport;
|
|
14
|
+
/**
|
|
15
|
+
* Create a Pipeline ImportStep.
|
|
16
|
+
*
|
|
17
|
+
* @param {object} args
|
|
18
|
+
* @param args.importer A concrete importer that will import the distribution if needed.
|
|
19
|
+
* @param args.server SPARQL server that will be started to serve the imported data.
|
|
20
|
+
* @param args.forceImport Whether to force an import even if the dataset already has a SPARQL distribution.
|
|
21
|
+
*/
|
|
22
|
+
constructor({ importer, server, forceImport, }: {
|
|
23
|
+
importer: Importer;
|
|
24
|
+
server: SparqlServer;
|
|
25
|
+
forceImport?: boolean;
|
|
26
|
+
});
|
|
27
|
+
execute(dataset: Dataset): Promise<NotSupported | Failure | Success>;
|
|
28
|
+
finish(): Promise<void>;
|
|
29
|
+
}
|
|
30
|
+
//# sourceMappingURL=import.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"import.d.ts","sourceRoot":"","sources":["../src/import.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,OAAO,EACP,UAAU,EACV,YAAY,EACZ,UAAU,EACV,OAAO,EACR,MAAM,WAAW,CAAC;AACnB,OAAO,EACL,QAAQ,EAGT,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AAElD;;;GAGG;AACH,qBAAa,MAAO,YAAW,UAAU,EAAE,UAAU;IACnD,SAAgB,UAAU,YAAY;IACtC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAW;IACpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAe;IACtC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IAEtC;;;;;;;OAOG;gBACS,EACV,QAAQ,EACR,MAAM,EACN,WAAW,GACZ,EAAE;QACD,QAAQ,EAAE,QAAQ,CAAC;QACnB,MAAM,EAAE,YAAY,CAAC;QACrB,WAAW,CAAC,EAAE,OAAO,CAAC;KACvB;IAMY,OAAO,CAClB,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,YAAY,GAAG,OAAO,GAAG,OAAO,CAAC;IAuB/B,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAGrC"}
|
package/dist/import.js
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
import { Distribution } from '@lde/dataset';
|
|
2
|
+
import { Failure, NotSupported, Success, } from './step.js';
|
|
3
|
+
import { ImportFailed, NotSupported as ImporterNotSupported, } from '@lde/sparql-importer';
|
|
4
|
+
/**
|
|
5
|
+
* A pipeline step that imports a database using an {@link Importer} and makes
|
|
6
|
+
* the import available at a local SPARQL endpoint.
|
|
7
|
+
*/
|
|
8
|
+
export class Import {
|
|
9
|
+
identifier = 'import';
|
|
10
|
+
importer;
|
|
11
|
+
server;
|
|
12
|
+
forceImport;
|
|
13
|
+
/**
|
|
14
|
+
* Create a Pipeline ImportStep.
|
|
15
|
+
*
|
|
16
|
+
* @param {object} args
|
|
17
|
+
* @param args.importer A concrete importer that will import the distribution if needed.
|
|
18
|
+
* @param args.server SPARQL server that will be started to serve the imported data.
|
|
19
|
+
* @param args.forceImport Whether to force an import even if the dataset already has a SPARQL distribution.
|
|
20
|
+
*/
|
|
21
|
+
constructor({ importer, server, forceImport, }) {
|
|
22
|
+
this.importer = importer;
|
|
23
|
+
this.server = server;
|
|
24
|
+
this.forceImport = forceImport ?? false;
|
|
25
|
+
}
|
|
26
|
+
async execute(dataset) {
|
|
27
|
+
if (dataset.getSparqlDistribution()?.isValid && !this.forceImport) {
|
|
28
|
+
return new NotSupported('A valid SPARQL distribution is available so no import needed');
|
|
29
|
+
}
|
|
30
|
+
const result = await this.importer.import(dataset);
|
|
31
|
+
if (result instanceof ImporterNotSupported) {
|
|
32
|
+
return new NotSupported('No download distribution available');
|
|
33
|
+
}
|
|
34
|
+
if (result instanceof ImportFailed) {
|
|
35
|
+
return new Failure(result.distribution, result.error);
|
|
36
|
+
}
|
|
37
|
+
await this.server.start();
|
|
38
|
+
dataset.distributions.push(Distribution.sparql(this.server.queryEndpoint));
|
|
39
|
+
return new Success(dataset, result.distribution);
|
|
40
|
+
}
|
|
41
|
+
async finish() {
|
|
42
|
+
await this.server.stop();
|
|
43
|
+
}
|
|
44
|
+
}
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,eAAe,CAAC;AAC9B,cAAc,eAAe,CAAC;AAC9B,cAAc,WAAW,CAAC;AAC1B,cAAc,uBAAuB,CAAC"}
|
package/dist/index.js
ADDED
|
@@ -0,0 +1,11 @@
|
|
|
1
|
+
import { Selector } from './selector.js';
|
|
2
|
+
import { Step } from './step.js';
|
|
3
|
+
export declare class Pipeline {
|
|
4
|
+
private readonly config;
|
|
5
|
+
constructor(config: {
|
|
6
|
+
selector: Selector;
|
|
7
|
+
steps: Step[];
|
|
8
|
+
});
|
|
9
|
+
run(): Promise<void>;
|
|
10
|
+
}
|
|
11
|
+
//# sourceMappingURL=pipeline.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,QAAQ,EAAE,MAAM,eAAe,CAAC;AACzC,OAAO,EAAc,IAAI,EAAE,MAAM,WAAW,CAAC;AAI7C,qBAAa,QAAQ;IACP,OAAO,CAAC,QAAQ,CAAC,MAAM;gBAAN,MAAM,EAAE;QAAE,QAAQ,EAAE,QAAQ,CAAC;QAAC,KAAK,EAAE,IAAI,EAAE,CAAA;KAAE;IAE7D,GAAG;CAgCjB"}
|
package/dist/pipeline.js
ADDED
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
import { NotSupported } from './step.js';
|
|
2
|
+
import { Readable } from 'node:stream';
|
|
3
|
+
export class Pipeline {
|
|
4
|
+
config;
|
|
5
|
+
constructor(config) {
|
|
6
|
+
this.config = config;
|
|
7
|
+
}
|
|
8
|
+
async run() {
|
|
9
|
+
const datasets = await this.config.selector.select();
|
|
10
|
+
for await (const dataset of datasets) {
|
|
11
|
+
for (const step of this.config.steps) {
|
|
12
|
+
const result = await step.execute(dataset);
|
|
13
|
+
if (result instanceof NotSupported) {
|
|
14
|
+
console.error(result);
|
|
15
|
+
}
|
|
16
|
+
else if (result instanceof Readable) {
|
|
17
|
+
const promise = new Promise((resolve, reject) => {
|
|
18
|
+
result.on('data', (data) => {
|
|
19
|
+
// TODO: pipe to writers.
|
|
20
|
+
console.log('Data:', data);
|
|
21
|
+
});
|
|
22
|
+
result.on('error', (error) => {
|
|
23
|
+
console.error('rejecting');
|
|
24
|
+
reject(error);
|
|
25
|
+
});
|
|
26
|
+
result.on('end', resolve);
|
|
27
|
+
});
|
|
28
|
+
await promise;
|
|
29
|
+
}
|
|
30
|
+
}
|
|
31
|
+
for (const step of this.config.steps) {
|
|
32
|
+
if (isFinishable(step)) {
|
|
33
|
+
await step.finish();
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
37
|
+
}
|
|
38
|
+
}
|
|
39
|
+
const isFinishable = (step) => {
|
|
40
|
+
return typeof step.finish === 'function';
|
|
41
|
+
};
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
import { Dataset } from '@lde/dataset';
|
|
2
|
+
import { Client, Paginator } from '@lde/dataset-registry-client';
|
|
3
|
+
/**
|
|
4
|
+
* Select {@link Dataset}s for processing in a pipeline.
|
|
5
|
+
*/
|
|
6
|
+
export interface Selector {
|
|
7
|
+
select(): Promise<Paginator<Dataset>>;
|
|
8
|
+
}
|
|
9
|
+
export declare class ManualDatasetSelection implements Selector {
|
|
10
|
+
private readonly datasets;
|
|
11
|
+
constructor(datasets: Dataset[]);
|
|
12
|
+
select(): Promise<Paginator<Dataset>>;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* Select Datasets from a Dataset Registry.
|
|
16
|
+
*
|
|
17
|
+
*
|
|
18
|
+
*
|
|
19
|
+
* @example
|
|
20
|
+
* ```typescript
|
|
21
|
+
*
|
|
22
|
+
* ```
|
|
23
|
+
*
|
|
24
|
+
* @param {object} options
|
|
25
|
+
* @param Client options.registry The Dataset Registry Client to query for datasets.
|
|
26
|
+
* @param string options.query Optional custom SPARQL query to select datasets.
|
|
27
|
+
* @param object options.criteria Optional search criteria to select datasets.
|
|
28
|
+
*/
|
|
29
|
+
export declare class RegistrySelector implements Selector {
|
|
30
|
+
private readonly registry;
|
|
31
|
+
private readonly query?;
|
|
32
|
+
private readonly criteria?;
|
|
33
|
+
constructor({ registry, query, criteria, }: {
|
|
34
|
+
registry: Client;
|
|
35
|
+
query?: string;
|
|
36
|
+
criteria?: object;
|
|
37
|
+
});
|
|
38
|
+
select(): Promise<Paginator<Dataset>>;
|
|
39
|
+
}
|
|
40
|
+
//# sourceMappingURL=selector.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../src/selector.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,8BAA8B,CAAC;AAEjE;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC,CAAC;CACvC;AAED,qBAAa,sBAAuB,YAAW,QAAQ;IACzC,OAAO,CAAC,QAAQ,CAAC,QAAQ;gBAAR,QAAQ,EAAE,OAAO,EAAE;IAE1C,MAAM,IAAI,OAAO,CAAC,SAAS,CAAC,OAAO,CAAC,CAAC;CAG5C;AAED;;;;;;;;;;;;;;GAcG;AACH,qBAAa,gBAAiB,YAAW,QAAQ;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAS;gBAEvB,EACV,QAAQ,EACR,KAAK,EACL,QAAQ,GACT,EAAE;QACD,QAAQ,EAAE,MAAM,CAAC;QACjB,KAAK,CAAC,EAAE,MAAM,CAAC;QACf,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB;IAMK,MAAM;CAOb"}
|
package/dist/selector.js
ADDED
|
@@ -0,0 +1,43 @@
|
|
|
1
|
+
import { Paginator } from '@lde/dataset-registry-client';
|
|
2
|
+
export class ManualDatasetSelection {
|
|
3
|
+
datasets;
|
|
4
|
+
constructor(datasets) {
|
|
5
|
+
this.datasets = datasets;
|
|
6
|
+
}
|
|
7
|
+
async select() {
|
|
8
|
+
return new Paginator(async () => this.datasets, this.datasets.length);
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Select Datasets from a Dataset Registry.
|
|
13
|
+
*
|
|
14
|
+
*
|
|
15
|
+
*
|
|
16
|
+
* @example
|
|
17
|
+
* ```typescript
|
|
18
|
+
*
|
|
19
|
+
* ```
|
|
20
|
+
*
|
|
21
|
+
* @param {object} options
|
|
22
|
+
* @param Client options.registry The Dataset Registry Client to query for datasets.
|
|
23
|
+
* @param string options.query Optional custom SPARQL query to select datasets.
|
|
24
|
+
* @param object options.criteria Optional search criteria to select datasets.
|
|
25
|
+
*/
|
|
26
|
+
export class RegistrySelector {
|
|
27
|
+
registry;
|
|
28
|
+
query;
|
|
29
|
+
criteria;
|
|
30
|
+
constructor({ registry, query, criteria, }) {
|
|
31
|
+
this.registry = registry;
|
|
32
|
+
this.query = query;
|
|
33
|
+
this.criteria = criteria;
|
|
34
|
+
}
|
|
35
|
+
async select() {
|
|
36
|
+
if (this.query) {
|
|
37
|
+
return this.registry.query(this.query);
|
|
38
|
+
}
|
|
39
|
+
else {
|
|
40
|
+
return this.registry.query(this.criteria ?? {});
|
|
41
|
+
}
|
|
42
|
+
}
|
|
43
|
+
}
|
|
@@ -0,0 +1,28 @@
|
|
|
1
|
+
import { DataEmittingStep, NotSupported } from './../step.js';
|
|
2
|
+
import { Dataset } from '@lde/dataset';
|
|
3
|
+
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
4
|
+
/**
|
|
5
|
+
* Arguments for the SparqlQuery step.
|
|
6
|
+
*
|
|
7
|
+
* @param identifier Unique identifier for the step.
|
|
8
|
+
* @param query: SPARQL CONSTRUCT query to execute.
|
|
9
|
+
* @param fetcher Optional SPARQL endpoint fetcher; defaults to SparqlEndpointFetcher.
|
|
10
|
+
*/
|
|
11
|
+
export interface Args {
|
|
12
|
+
identifier: string;
|
|
13
|
+
query: string;
|
|
14
|
+
fetcher?: SparqlEndpointFetcher;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Executes a SPARQL CONSTRUCT query and emits the resulting
|
|
18
|
+
*/
|
|
19
|
+
export declare class SparqlQuery implements DataEmittingStep {
|
|
20
|
+
readonly identifier: string;
|
|
21
|
+
private readonly query;
|
|
22
|
+
private readonly fetcher;
|
|
23
|
+
constructor({ identifier, query, fetcher }: Args);
|
|
24
|
+
execute(dataset: Dataset): Promise<NotSupported | (import("readable-stream").Readable & import("@rdfjs/types").Stream<import("@rdfjs/types").Quad>)>;
|
|
25
|
+
static fromFile(filename: string): Promise<SparqlQuery>;
|
|
26
|
+
}
|
|
27
|
+
export declare function fromFile(filename: string): Promise<string>;
|
|
28
|
+
//# sourceMappingURL=sparqlQuery.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"sparqlQuery.d.ts","sourceRoot":"","sources":["../../src/step/sparqlQuery.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,gBAAgB,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC9D,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAI9D;;;;;;GAMG;AACH,MAAM,WAAW,IAAI;IACnB,UAAU,EAAE,MAAM,CAAC;IACnB,KAAK,EAAE,MAAM,CAAC;IACd,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;GAEG;AACH,qBAAa,WAAY,YAAW,gBAAgB;IAClD,SAAgB,UAAU,SAAC;IAC3B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAC;IACvB,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAC;gBAEb,EAAE,UAAU,EAAE,KAAK,EAAE,OAAO,EAAE,EAAE,IAAI;IAM1C,OAAO,CAAC,OAAO,EAAE,OAAO;WAqBV,QAAQ,CAAC,QAAQ,EAAE,MAAM;CAM9C;AAED,wBAAsB,QAAQ,CAAC,QAAQ,EAAE,MAAM,mBAE9C"}
|
|
@@ -0,0 +1,37 @@
|
|
|
1
|
+
import { NotSupported } from './../step.js';
|
|
2
|
+
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
3
|
+
import { readFile } from 'node:fs/promises';
|
|
4
|
+
import { resolve } from 'node:path';
|
|
5
|
+
/**
|
|
6
|
+
* Executes a SPARQL CONSTRUCT query and emits the resulting
|
|
7
|
+
*/
|
|
8
|
+
export class SparqlQuery {
|
|
9
|
+
identifier;
|
|
10
|
+
query;
|
|
11
|
+
fetcher;
|
|
12
|
+
constructor({ identifier, query, fetcher }) {
|
|
13
|
+
this.identifier = identifier;
|
|
14
|
+
this.query = query;
|
|
15
|
+
this.fetcher = fetcher ?? new SparqlEndpointFetcher();
|
|
16
|
+
}
|
|
17
|
+
async execute(dataset) {
|
|
18
|
+
const distribution = dataset.getSparqlDistribution();
|
|
19
|
+
if (null === distribution || !distribution.isValid) {
|
|
20
|
+
return new NotSupported('No SPARQL distribution available');
|
|
21
|
+
}
|
|
22
|
+
const query = this.query
|
|
23
|
+
.replace('#subjectFilter#', distribution.subjectFilter ?? '')
|
|
24
|
+
.replace('?dataset', `<${dataset.iri}>`)
|
|
25
|
+
.replace('#namedGraph#', distribution.namedGraph ? `FROM <${distribution.namedGraph}>` : '');
|
|
26
|
+
return await this.fetcher.fetchTriples(distribution.accessUrl.toString(), query);
|
|
27
|
+
}
|
|
28
|
+
static async fromFile(filename) {
|
|
29
|
+
return new this({
|
|
30
|
+
identifier: filename,
|
|
31
|
+
query: await fromFile(filename),
|
|
32
|
+
});
|
|
33
|
+
}
|
|
34
|
+
}
|
|
35
|
+
export async function fromFile(filename) {
|
|
36
|
+
return (await readFile(resolve(filename))).toString();
|
|
37
|
+
}
|
package/dist/step.d.ts
ADDED
|
@@ -0,0 +1,55 @@
|
|
|
1
|
+
import { Dataset, Distribution } from '@lde/dataset';
|
|
2
|
+
import type { Stream } from '@rdfjs/types';
|
|
3
|
+
interface AbstractStep {
|
|
4
|
+
readonly identifier: string;
|
|
5
|
+
}
|
|
6
|
+
export type Step = DataEmittingStep | SingleStep;
|
|
7
|
+
/**
|
|
8
|
+
* A pipeline step that returns a data-emitting stream of RDF quads.
|
|
9
|
+
* Failure is expressed by emitting an error event; success by the end event.
|
|
10
|
+
*/
|
|
11
|
+
export interface DataEmittingStep extends AbstractStep {
|
|
12
|
+
execute(dataset: Dataset): Promise<Stream | NotSupported>;
|
|
13
|
+
}
|
|
14
|
+
/**
|
|
15
|
+
* A pipeline step that executes an operation without emitting data.
|
|
16
|
+
*/
|
|
17
|
+
export interface SingleStep extends AbstractStep {
|
|
18
|
+
execute(dataset: Dataset): Promise<NotSupported | Failure | Success>;
|
|
19
|
+
}
|
|
20
|
+
export interface Finishable {
|
|
21
|
+
finish(): Promise<void>;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* A pipeline step failed to run.
|
|
25
|
+
*
|
|
26
|
+
* @param distribution The distribution that was processed.
|
|
27
|
+
* @param message Optional error message.
|
|
28
|
+
*/
|
|
29
|
+
export declare class Failure {
|
|
30
|
+
readonly distribution: Distribution;
|
|
31
|
+
readonly message?: string | undefined;
|
|
32
|
+
constructor(distribution: Distribution, message?: string | undefined);
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* A pipeline ran successfully.
|
|
36
|
+
*
|
|
37
|
+
* @param dataset: The dataset, with possible modifications, that was processed.
|
|
38
|
+
* @param distribution The distribution that was processed.
|
|
39
|
+
*/
|
|
40
|
+
export declare class Success {
|
|
41
|
+
readonly dataset: Dataset;
|
|
42
|
+
readonly distribution: Distribution;
|
|
43
|
+
constructor(dataset: Dataset, distribution: Distribution);
|
|
44
|
+
}
|
|
45
|
+
/**
|
|
46
|
+
* A pipeline step could not be run because the dataset lacks a distribution supported by the step.
|
|
47
|
+
*
|
|
48
|
+
* @param message: A message explaining why the step is not supported.
|
|
49
|
+
*/
|
|
50
|
+
export declare class NotSupported {
|
|
51
|
+
readonly message: string;
|
|
52
|
+
constructor(message: string);
|
|
53
|
+
}
|
|
54
|
+
export {};
|
|
55
|
+
//# sourceMappingURL=step.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"step.d.ts","sourceRoot":"","sources":["../src/step.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE3C,UAAU,YAAY;IACpB,QAAQ,CAAC,UAAU,EAAE,MAAM,CAAC;CAC7B;AAED,MAAM,MAAM,IAAI,GAAG,gBAAgB,GAAG,UAAU,CAAC;AAEjD;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,MAAM,GAAG,YAAY,CAAC,CAAC;CAC3D;AAED;;GAEG;AACH,MAAM,WAAW,UAAW,SAAQ,YAAY;IAC9C,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,YAAY,GAAG,OAAO,GAAG,OAAO,CAAC,CAAC;CACtE;AAED,MAAM,WAAW,UAAU;IACzB,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CACzB;AAED;;;;;GAKG;AACH,qBAAa,OAAO;aAEA,YAAY,EAAE,YAAY;aAC1B,OAAO,CAAC,EAAE,MAAM;gBADhB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED;;;;;GAKG;AACH,qBAAa,OAAO;aAEA,OAAO,EAAE,OAAO;aAChB,YAAY,EAAE,YAAY;gBAD1B,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY;CAE7C;AAED;;;;GAIG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C"}
|
package/dist/step.js
ADDED
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* A pipeline step failed to run.
|
|
3
|
+
*
|
|
4
|
+
* @param distribution The distribution that was processed.
|
|
5
|
+
* @param message Optional error message.
|
|
6
|
+
*/
|
|
7
|
+
export class Failure {
|
|
8
|
+
distribution;
|
|
9
|
+
message;
|
|
10
|
+
constructor(distribution, message) {
|
|
11
|
+
this.distribution = distribution;
|
|
12
|
+
this.message = message;
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
/**
|
|
16
|
+
* A pipeline ran successfully.
|
|
17
|
+
*
|
|
18
|
+
* @param dataset: The dataset, with possible modifications, that was processed.
|
|
19
|
+
* @param distribution The distribution that was processed.
|
|
20
|
+
*/
|
|
21
|
+
export class Success {
|
|
22
|
+
dataset;
|
|
23
|
+
distribution;
|
|
24
|
+
constructor(dataset, distribution) {
|
|
25
|
+
this.dataset = dataset;
|
|
26
|
+
this.distribution = distribution;
|
|
27
|
+
}
|
|
28
|
+
}
|
|
29
|
+
/**
|
|
30
|
+
* A pipeline step could not be run because the dataset lacks a distribution supported by the step.
|
|
31
|
+
*
|
|
32
|
+
* @param message: A message explaining why the step is not supported.
|
|
33
|
+
*/
|
|
34
|
+
export class NotSupported {
|
|
35
|
+
message;
|
|
36
|
+
constructor(message) {
|
|
37
|
+
this.message = message;
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,21 @@
|
|
|
1
|
+
import { Writer } from './../writer.js';
|
|
2
|
+
import { Dataset, RdfFormat } from '@lde/dataset';
|
|
3
|
+
import { Readable } from 'node:stream';
|
|
4
|
+
import { Quad, Stream } from '@rdfjs/types';
|
|
5
|
+
export declare class FileWriter implements Writer {
|
|
6
|
+
private readonly directory;
|
|
7
|
+
private readonly datasetToFilename;
|
|
8
|
+
private readonly format;
|
|
9
|
+
private readonly prefixes;
|
|
10
|
+
constructor({ directory, datasetToFilename, format, prefixes, }: {
|
|
11
|
+
directory?: string;
|
|
12
|
+
datasetToFilename: (identifier: string, dataset: Dataset, format: RdfFormat) => stringq;
|
|
13
|
+
format: RdfFormat;
|
|
14
|
+
prefixes?: {
|
|
15
|
+
[key: string]: string;
|
|
16
|
+
};
|
|
17
|
+
});
|
|
18
|
+
write(identifier: string, dataset: Dataset, data: Stream<Quad> & Readable): Promise<void>;
|
|
19
|
+
private writeStream;
|
|
20
|
+
}
|
|
21
|
+
//# sourceMappingURL=fileWriter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,SAAS,EAA4B,MAAM,cAAc,CAAC;AAE5E,OAAO,EAAE,QAAQ,EAAE,MAAM,aAAa,CAAC;AAKvC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE5C,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,iBAAiB,CAItB;IACZ,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAY;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA4B;gBAEzC,EACV,SAAoB,EACpB,iBAG0C,EAC1C,MAA+B,EAC/B,QAAa,GACd,EAAE;QACD,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,iBAAiB,EAAE,CACjB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE,SAAS,KACd,OAAO,CAAC;QACb,MAAM,EAAE,SAAS,CAAC;QAClB,QAAQ,CAAC,EAAE;YAAE,CAAC,GAAG,EAAE,MAAM,GAAG,MAAM,CAAA;SAAE,CAAC;KACtC;IAOY,KAAK,CAChB,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,OAAO,EAChB,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,QAAQ;IAY/B,OAAO,CAAC,WAAW;CASpB"}
|
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
import { RdfFormat, rdfFormatToFileExtension } from '@lde/dataset';
|
|
2
|
+
import fs from 'node:fs';
|
|
3
|
+
import path from 'node:path';
|
|
4
|
+
import { pipeline } from 'node:stream/promises';
|
|
5
|
+
import filenamyifyUrl from 'filenamify-url';
|
|
6
|
+
import { StreamWriter } from 'n3';
|
|
7
|
+
export class FileWriter {
|
|
8
|
+
directory;
|
|
9
|
+
datasetToFilename;
|
|
10
|
+
format;
|
|
11
|
+
prefixes;
|
|
12
|
+
constructor({ directory = 'output', datasetToFilename = (identifier, dataset, format) => `${filenamyifyUrl(dataset.iri.toString(), {
|
|
13
|
+
replacement: '-',
|
|
14
|
+
})}.${rdfFormatToFileExtension(format)}`, format = RdfFormat['N-Triples'], prefixes = {}, }) {
|
|
15
|
+
this.directory = directory;
|
|
16
|
+
this.datasetToFilename = datasetToFilename;
|
|
17
|
+
this.format = format;
|
|
18
|
+
this.prefixes = prefixes;
|
|
19
|
+
}
|
|
20
|
+
async write(identifier, dataset, data) {
|
|
21
|
+
await pipeline(data, new StreamWriter({
|
|
22
|
+
prefixes: this.prefixes,
|
|
23
|
+
format: this.format,
|
|
24
|
+
}), this.writeStream(identifier, dataset));
|
|
25
|
+
}
|
|
26
|
+
writeStream(identifier, dataset) {
|
|
27
|
+
return fs.createWriteStream(path.join(this.directory, this.datasetToFilename(identifier, dataset, this.format)), { flags: 'a' });
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1,12 @@
|
|
|
1
|
+
import { Writer } from './../writer.js';
|
|
2
|
+
import { Dataset } from '@lde/dataset';
|
|
3
|
+
import { Quad, Stream } from '@rdfjs/types';
|
|
4
|
+
/**
|
|
5
|
+
* Buffers output from steps and merges it so you get a single output per dataset.
|
|
6
|
+
*/
|
|
7
|
+
export declare class MergeWriter implements Writer {
|
|
8
|
+
private readonly decorated;
|
|
9
|
+
constructor(decorated: Writer);
|
|
10
|
+
write(dataset: Dataset, data: Stream<Quad>): Promise<void>;
|
|
11
|
+
}
|
|
12
|
+
//# sourceMappingURL=mergeWriter.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"mergeWriter.d.ts","sourceRoot":"","sources":["../../src/writer/mergeWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,MAAM,EAAE,MAAM,gBAAgB,CAAC;AACxC,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAE5C;;GAEG;AACH,qBAAa,WAAY,YAAW,MAAM;IAC5B,OAAO,CAAC,QAAQ,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IAEjC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;CACxE"}
|
package/dist/writer.d.ts
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../src/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,IAAI,EAAE,MAAM,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAEvC,MAAM,WAAW,MAAM;IACrB,KAAK,CACH,UAAU,EAAE,MAAM,EAClB,OAAO,EAAE,OAAO,EAChB,IAAI,EAAE,MAAM,CAAC,IAAI,CAAC,GACjB,OAAO,CAAC,IAAI,CAAC,CAAC;CAClB"}
|
package/dist/writer.js
ADDED
|
@@ -0,0 +1 @@
|
|
|
1
|
+
export {};
|
package/package.json
ADDED
|
@@ -0,0 +1,30 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@lde/pipeline",
|
|
3
|
+
"version": "0.2.0",
|
|
4
|
+
"type": "module",
|
|
5
|
+
"main": "./dist/index.js",
|
|
6
|
+
"module": "./dist/index.js",
|
|
7
|
+
"types": "./dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
"./package.json": "./package.json",
|
|
10
|
+
".": {
|
|
11
|
+
"development": "./src/index.ts",
|
|
12
|
+
"types": "./dist/index.d.ts",
|
|
13
|
+
"import": "./dist/index.js",
|
|
14
|
+
"default": "./dist/index.js"
|
|
15
|
+
}
|
|
16
|
+
},
|
|
17
|
+
"files": [
|
|
18
|
+
"dist",
|
|
19
|
+
"!**/*.tsbuildinfo"
|
|
20
|
+
],
|
|
21
|
+
"dependencies": {
|
|
22
|
+
"@lde/dataset": "0.2.0",
|
|
23
|
+
"@lde/dataset-registry-client": "0.2.0",
|
|
24
|
+
"@lde/sparql-importer": "0.0.3",
|
|
25
|
+
"@lde/sparql-server": "0.2.0",
|
|
26
|
+
"fetch-sparql-endpoint": "^6.0.0",
|
|
27
|
+
"tslib": "^2.3.0",
|
|
28
|
+
"@rdfjs/types": "^2.0.1"
|
|
29
|
+
}
|
|
30
|
+
}
|