@lde/pipeline 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +44 -0
- package/dist/analyzer.d.ts +36 -0
- package/dist/analyzer.d.ts.map +1 -0
- package/dist/analyzer.js +29 -0
- package/dist/builder.d.ts +114 -0
- package/dist/builder.d.ts.map +1 -0
- package/dist/builder.js +115 -0
- package/dist/config.d.ts +70 -0
- package/dist/config.d.ts.map +1 -0
- package/dist/config.js +110 -0
- package/dist/distribution/analyzer.d.ts +58 -0
- package/dist/distribution/analyzer.d.ts.map +1 -0
- package/dist/distribution/analyzer.js +120 -0
- package/dist/distribution/index.d.ts +3 -0
- package/dist/distribution/index.d.ts.map +1 -0
- package/dist/distribution/index.js +2 -0
- package/dist/distribution/probe.d.ts +47 -0
- package/dist/distribution/probe.d.ts.map +1 -0
- package/dist/distribution/probe.js +120 -0
- package/dist/index.d.ts +4 -0
- package/dist/index.d.ts.map +1 -1
- package/dist/index.js +4 -0
- package/dist/sparql/collect.d.ts +19 -0
- package/dist/sparql/collect.d.ts.map +1 -0
- package/dist/sparql/collect.js +23 -0
- package/dist/sparql/executor.d.ts +121 -0
- package/dist/sparql/executor.d.ts.map +1 -0
- package/dist/sparql/executor.js +107 -0
- package/dist/sparql/index.d.ts +3 -0
- package/dist/sparql/index.d.ts.map +1 -0
- package/dist/sparql/index.js +2 -0
- package/dist/step/sparqlQuery.d.ts +10 -5
- package/dist/step/sparqlQuery.d.ts.map +1 -1
- package/dist/step/sparqlQuery.js +16 -20
- package/dist/writer/fileWriter.d.ts +23 -0
- package/dist/writer/fileWriter.d.ts.map +1 -0
- package/dist/writer/fileWriter.js +51 -0
- package/dist/writer/index.d.ts +5 -0
- package/dist/writer/index.d.ts.map +1 -0
- package/dist/writer/index.js +4 -0
- package/dist/writer/serialize.d.ts +7 -0
- package/dist/writer/serialize.d.ts.map +1 -0
- package/dist/writer/serialize.js +20 -0
- package/dist/writer/sparqlUpdateWriter.d.ts +34 -0
- package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -0
- package/dist/writer/sparqlUpdateWriter.js +43 -0
- package/dist/writer/writer.d.ts +15 -0
- package/dist/writer/writer.d.ts.map +1 -0
- package/dist/writer/writer.js +1 -0
- package/package.json +20 -5
package/README.md
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
# Pipeline
|
|
2
|
+
|
|
3
|
+
Framework for building RDF data processing pipelines with SPARQL.
|
|
4
|
+
|
|
5
|
+
## Features
|
|
6
|
+
|
|
7
|
+
- **Pipeline** — orchestrates steps that process DCAT datasets
|
|
8
|
+
- **PipelineBuilder** — fluent API for constructing pipelines from steps and selectors
|
|
9
|
+
- **PipelineConfig** — load pipeline configuration from YAML/JSON files
|
|
10
|
+
- **SparqlConstructExecutor** — streaming SPARQL CONSTRUCT with template substitution and variable bindings
|
|
11
|
+
- **Distribution analysis** — probe and analyze dataset distributions
|
|
12
|
+
|
|
13
|
+
## Subpath exports
|
|
14
|
+
|
|
15
|
+
| Export | Description |
|
|
16
|
+
| ------------------------ | ------------------------------------------------------------- |
|
|
17
|
+
| `@lde/pipeline` | Steps, pipeline, builder, config, SPARQL |
|
|
18
|
+
| `@lde/pipeline/analyzer` | Analyzer contracts (`Analyzer`, `BaseAnalyzer`, result types) |
|
|
19
|
+
| `@lde/pipeline/writer` | Write RDF data to files or SPARQL endpoints |
|
|
20
|
+
|
|
21
|
+
## Usage
|
|
22
|
+
|
|
23
|
+
```typescript
|
|
24
|
+
import {
|
|
25
|
+
PipelineBuilder,
|
|
26
|
+
SparqlConstructExecutor,
|
|
27
|
+
collect,
|
|
28
|
+
} from '@lde/pipeline';
|
|
29
|
+
|
|
30
|
+
// Build a pipeline from steps
|
|
31
|
+
const pipeline = new PipelineBuilder().addStep(myStep).build();
|
|
32
|
+
|
|
33
|
+
// Or use the SPARQL executor directly
|
|
34
|
+
const executor = new SparqlConstructExecutor({
|
|
35
|
+
query: 'CONSTRUCT { ?dataset ?p ?o } WHERE { ?s ?p ?o }',
|
|
36
|
+
});
|
|
37
|
+
const result = await executor.execute(dataset);
|
|
38
|
+
```
|
|
39
|
+
|
|
40
|
+
## Validation
|
|
41
|
+
|
|
42
|
+
```sh
|
|
43
|
+
npx nx run-many -t lint test typecheck build --projects=@lde/pipeline
|
|
44
|
+
```
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import { Dataset } from '@lde/dataset';
|
|
2
|
+
import type { DatasetCore } from '@rdfjs/types';
|
|
3
|
+
import { NotSupported } from './step.js';
|
|
4
|
+
export { NotSupported } from './step.js';
|
|
5
|
+
/**
|
|
6
|
+
* Result of a successful analysis.
|
|
7
|
+
*/
|
|
8
|
+
export declare class Success {
|
|
9
|
+
readonly data: DatasetCore;
|
|
10
|
+
constructor(data: DatasetCore);
|
|
11
|
+
}
|
|
12
|
+
/**
|
|
13
|
+
* Analysis failed.
|
|
14
|
+
*/
|
|
15
|
+
export declare class Failure {
|
|
16
|
+
readonly endpoint: URL;
|
|
17
|
+
readonly message?: string | undefined;
|
|
18
|
+
constructor(endpoint: URL, message?: string | undefined);
|
|
19
|
+
}
|
|
20
|
+
/**
|
|
21
|
+
* Interface for analyzers.
|
|
22
|
+
*/
|
|
23
|
+
export interface Analyzer {
|
|
24
|
+
readonly name: string;
|
|
25
|
+
execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
|
|
26
|
+
finish?(): Promise<void>;
|
|
27
|
+
}
|
|
28
|
+
/**
|
|
29
|
+
* Base class for analyzers with default implementations.
|
|
30
|
+
*/
|
|
31
|
+
export declare abstract class BaseAnalyzer implements Analyzer {
|
|
32
|
+
abstract readonly name: string;
|
|
33
|
+
abstract execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
|
|
34
|
+
finish(): Promise<void>;
|
|
35
|
+
}
|
|
36
|
+
//# sourceMappingURL=analyzer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../src/analyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC;;GAEG;AACH,qBAAa,OAAO;aACU,IAAI,EAAE,WAAW;gBAAjB,IAAI,EAAE,WAAW;CAC9C;AAED;;GAEG;AACH,qBAAa,OAAO;aAEA,QAAQ,EAAE,GAAG;aACb,OAAO,CAAC,EAAE,MAAM;gBADhB,QAAQ,EAAE,GAAG,EACb,OAAO,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC,CAAC;IACrE,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;GAEG;AACH,8BAAsB,YAAa,YAAW,QAAQ;IACpD,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC;IAEvE,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAG9B"}
|
package/dist/analyzer.js
ADDED
|
@@ -0,0 +1,29 @@
|
|
|
1
|
+
export { NotSupported } from './step.js';
|
|
2
|
+
/**
|
|
3
|
+
* Result of a successful analysis.
|
|
4
|
+
*/
|
|
5
|
+
export class Success {
|
|
6
|
+
data;
|
|
7
|
+
constructor(data) {
|
|
8
|
+
this.data = data;
|
|
9
|
+
}
|
|
10
|
+
}
|
|
11
|
+
/**
|
|
12
|
+
* Analysis failed.
|
|
13
|
+
*/
|
|
14
|
+
export class Failure {
|
|
15
|
+
endpoint;
|
|
16
|
+
message;
|
|
17
|
+
constructor(endpoint, message) {
|
|
18
|
+
this.endpoint = endpoint;
|
|
19
|
+
this.message = message;
|
|
20
|
+
}
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Base class for analyzers with default implementations.
|
|
24
|
+
*/
|
|
25
|
+
export class BaseAnalyzer {
|
|
26
|
+
async finish() {
|
|
27
|
+
// Default no-op implementation.
|
|
28
|
+
}
|
|
29
|
+
}
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
import { Selector, ManualDatasetSelection, RegistrySelector } from './selector.js';
|
|
2
|
+
import { Step } from './step.js';
|
|
3
|
+
/**
|
|
4
|
+
* Configuration for QLever SPARQL server.
|
|
5
|
+
*/
|
|
6
|
+
export interface QleverConfig {
|
|
7
|
+
/**
|
|
8
|
+
* Execution mode: 'docker' for containerized, 'native' for local binary.
|
|
9
|
+
*/
|
|
10
|
+
mode: 'docker' | 'native';
|
|
11
|
+
/**
|
|
12
|
+
* Docker image to use (for docker mode).
|
|
13
|
+
* @default 'adfreiburg/qlever'
|
|
14
|
+
*/
|
|
15
|
+
image?: string;
|
|
16
|
+
/**
|
|
17
|
+
* Port for the SPARQL endpoint.
|
|
18
|
+
* @default 7001
|
|
19
|
+
*/
|
|
20
|
+
port?: number;
|
|
21
|
+
/**
|
|
22
|
+
* Working directory for imports.
|
|
23
|
+
*/
|
|
24
|
+
workingDir?: string;
|
|
25
|
+
}
|
|
26
|
+
/**
|
|
27
|
+
* Writer configuration.
|
|
28
|
+
*/
|
|
29
|
+
export interface WriterConfig {
|
|
30
|
+
type: 'file' | 'sparql';
|
|
31
|
+
outputDir?: string;
|
|
32
|
+
endpoint?: URL;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Complete pipeline configuration.
|
|
36
|
+
*/
|
|
37
|
+
export interface PipelineConfig {
|
|
38
|
+
selector: Selector;
|
|
39
|
+
steps: Step[];
|
|
40
|
+
writers?: WriterConfig[];
|
|
41
|
+
qlever?: QleverConfig;
|
|
42
|
+
}
|
|
43
|
+
/**
|
|
44
|
+
* Fluent builder for creating pipeline configurations.
|
|
45
|
+
*
|
|
46
|
+
* @example
|
|
47
|
+
* ```typescript
|
|
48
|
+
* const config = PipelineBuilder.create()
|
|
49
|
+
* .withSelector(registry('https://example.com/sparql'))
|
|
50
|
+
* .addStep(sparqlQuery('queries/triples.rq'))
|
|
51
|
+
* .addWriter(fileWriter({ outputDir: 'output' }))
|
|
52
|
+
* .build();
|
|
53
|
+
* ```
|
|
54
|
+
*/
|
|
55
|
+
export declare class PipelineBuilder {
|
|
56
|
+
private selector?;
|
|
57
|
+
private steps;
|
|
58
|
+
private writers;
|
|
59
|
+
private qleverConfig?;
|
|
60
|
+
/**
|
|
61
|
+
* Create a new PipelineBuilder instance.
|
|
62
|
+
*/
|
|
63
|
+
static create(): PipelineBuilder;
|
|
64
|
+
/**
|
|
65
|
+
* Set the dataset selector.
|
|
66
|
+
*/
|
|
67
|
+
withSelector(selector: Selector): this;
|
|
68
|
+
/**
|
|
69
|
+
* Configure QLever for local SPARQL imports.
|
|
70
|
+
*/
|
|
71
|
+
withQlever(config: QleverConfig): this;
|
|
72
|
+
/**
|
|
73
|
+
* Add a single step to the pipeline.
|
|
74
|
+
*/
|
|
75
|
+
addStep(step: Step): this;
|
|
76
|
+
/**
|
|
77
|
+
* Add multiple steps to the pipeline.
|
|
78
|
+
*/
|
|
79
|
+
addSteps(...steps: Step[]): this;
|
|
80
|
+
/**
|
|
81
|
+
* Add a writer for pipeline output.
|
|
82
|
+
*/
|
|
83
|
+
addWriter(writer: WriterConfig): this;
|
|
84
|
+
/**
|
|
85
|
+
* Build the final pipeline configuration.
|
|
86
|
+
* @throws Error if selector is not set
|
|
87
|
+
*/
|
|
88
|
+
build(): PipelineConfig;
|
|
89
|
+
}
|
|
90
|
+
/**
|
|
91
|
+
* Create a selector that queries a Dataset Registry.
|
|
92
|
+
*
|
|
93
|
+
* @param endpoint SPARQL endpoint URL of the registry
|
|
94
|
+
*/
|
|
95
|
+
export declare function registry(endpoint: string | URL): RegistrySelector;
|
|
96
|
+
/**
|
|
97
|
+
* Create a selector for manually specified datasets.
|
|
98
|
+
*
|
|
99
|
+
* @param datasets Array of dataset IRIs
|
|
100
|
+
*/
|
|
101
|
+
export declare function manual(...datasetIris: URL[]): ManualDatasetSelection;
|
|
102
|
+
/**
|
|
103
|
+
* Create a file writer configuration.
|
|
104
|
+
*/
|
|
105
|
+
export declare function fileWriter(options: {
|
|
106
|
+
outputDir: string;
|
|
107
|
+
}): WriterConfig;
|
|
108
|
+
/**
|
|
109
|
+
* Create a SPARQL UPDATE writer configuration.
|
|
110
|
+
*/
|
|
111
|
+
export declare function sparqlWriter(options: {
|
|
112
|
+
endpoint: URL;
|
|
113
|
+
}): WriterConfig;
|
|
114
|
+
//# sourceMappingURL=builder.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"builder.d.ts","sourceRoot":"","sources":["../src/builder.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,IAAI,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC1B;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,GAAG,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;IACzB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,CAAW;IAC5B,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,YAAY,CAAC,CAAe;IAEpC;;OAEG;IACH,MAAM,CAAC,MAAM,IAAI,eAAe;IAIhC;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKtC;;OAEG;IACH,UAAU,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKtC;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;IAKzB;;OAEG;IACH,QAAQ,CAAC,GAAG,KAAK,EAAE,IAAI,EAAE,GAAG,IAAI;IAKhC;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKrC;;;OAGG;IACH,KAAK,IAAI,cAAc;CAYxB;AAID;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,GAAG,GAAG,gBAAgB,CAMjE;AAED;;;;GAIG;AACH,wBAAgB,MAAM,CAAC,GAAG,WAAW,EAAE,GAAG,EAAE,GAAG,sBAAsB,CAKpE;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GAAG,YAAY,CAKvE;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE;IAAE,QAAQ,EAAE,GAAG,CAAA;CAAE,GAAG,YAAY,CAKrE"}
|
package/dist/builder.js
ADDED
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
import { Dataset } from '@lde/dataset';
|
|
2
|
+
import { ManualDatasetSelection, RegistrySelector, } from './selector.js';
|
|
3
|
+
import { Client } from '@lde/dataset-registry-client';
|
|
4
|
+
/**
|
|
5
|
+
* Fluent builder for creating pipeline configurations.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* const config = PipelineBuilder.create()
|
|
10
|
+
* .withSelector(registry('https://example.com/sparql'))
|
|
11
|
+
* .addStep(sparqlQuery('queries/triples.rq'))
|
|
12
|
+
* .addWriter(fileWriter({ outputDir: 'output' }))
|
|
13
|
+
* .build();
|
|
14
|
+
* ```
|
|
15
|
+
*/
|
|
16
|
+
export class PipelineBuilder {
|
|
17
|
+
selector;
|
|
18
|
+
steps = [];
|
|
19
|
+
writers = [];
|
|
20
|
+
qleverConfig;
|
|
21
|
+
/**
|
|
22
|
+
* Create a new PipelineBuilder instance.
|
|
23
|
+
*/
|
|
24
|
+
static create() {
|
|
25
|
+
return new PipelineBuilder();
|
|
26
|
+
}
|
|
27
|
+
/**
|
|
28
|
+
* Set the dataset selector.
|
|
29
|
+
*/
|
|
30
|
+
withSelector(selector) {
|
|
31
|
+
this.selector = selector;
|
|
32
|
+
return this;
|
|
33
|
+
}
|
|
34
|
+
/**
|
|
35
|
+
* Configure QLever for local SPARQL imports.
|
|
36
|
+
*/
|
|
37
|
+
withQlever(config) {
|
|
38
|
+
this.qleverConfig = config;
|
|
39
|
+
return this;
|
|
40
|
+
}
|
|
41
|
+
/**
|
|
42
|
+
* Add a single step to the pipeline.
|
|
43
|
+
*/
|
|
44
|
+
addStep(step) {
|
|
45
|
+
this.steps.push(step);
|
|
46
|
+
return this;
|
|
47
|
+
}
|
|
48
|
+
/**
|
|
49
|
+
* Add multiple steps to the pipeline.
|
|
50
|
+
*/
|
|
51
|
+
addSteps(...steps) {
|
|
52
|
+
this.steps.push(...steps);
|
|
53
|
+
return this;
|
|
54
|
+
}
|
|
55
|
+
/**
|
|
56
|
+
* Add a writer for pipeline output.
|
|
57
|
+
*/
|
|
58
|
+
addWriter(writer) {
|
|
59
|
+
this.writers.push(writer);
|
|
60
|
+
return this;
|
|
61
|
+
}
|
|
62
|
+
/**
|
|
63
|
+
* Build the final pipeline configuration.
|
|
64
|
+
* @throws Error if selector is not set
|
|
65
|
+
*/
|
|
66
|
+
build() {
|
|
67
|
+
if (!this.selector) {
|
|
68
|
+
throw new Error('Selector is required. Use withSelector() to set it.');
|
|
69
|
+
}
|
|
70
|
+
return {
|
|
71
|
+
selector: this.selector,
|
|
72
|
+
steps: this.steps,
|
|
73
|
+
writers: this.writers.length > 0 ? this.writers : undefined,
|
|
74
|
+
qlever: this.qleverConfig,
|
|
75
|
+
};
|
|
76
|
+
}
|
|
77
|
+
}
|
|
78
|
+
// Helper functions for fluent construction.
|
|
79
|
+
/**
|
|
80
|
+
* Create a selector that queries a Dataset Registry.
|
|
81
|
+
*
|
|
82
|
+
* @param endpoint SPARQL endpoint URL of the registry
|
|
83
|
+
*/
|
|
84
|
+
export function registry(endpoint) {
|
|
85
|
+
return new RegistrySelector({
|
|
86
|
+
registry: new Client(typeof endpoint === 'string' ? new URL(endpoint) : endpoint),
|
|
87
|
+
});
|
|
88
|
+
}
|
|
89
|
+
/**
|
|
90
|
+
* Create a selector for manually specified datasets.
|
|
91
|
+
*
|
|
92
|
+
* @param datasets Array of dataset IRIs
|
|
93
|
+
*/
|
|
94
|
+
export function manual(...datasetIris) {
|
|
95
|
+
const datasets = datasetIris.map((iri) => new Dataset({ iri, distributions: [] }));
|
|
96
|
+
return new ManualDatasetSelection(datasets);
|
|
97
|
+
}
|
|
98
|
+
/**
|
|
99
|
+
* Create a file writer configuration.
|
|
100
|
+
*/
|
|
101
|
+
export function fileWriter(options) {
|
|
102
|
+
return {
|
|
103
|
+
type: 'file',
|
|
104
|
+
outputDir: options.outputDir,
|
|
105
|
+
};
|
|
106
|
+
}
|
|
107
|
+
/**
|
|
108
|
+
* Create a SPARQL UPDATE writer configuration.
|
|
109
|
+
*/
|
|
110
|
+
export function sparqlWriter(options) {
|
|
111
|
+
return {
|
|
112
|
+
type: 'sparql',
|
|
113
|
+
endpoint: options.endpoint,
|
|
114
|
+
};
|
|
115
|
+
}
|
package/dist/config.d.ts
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
import { PipelineConfig, QleverConfig } from './builder.js';
|
|
2
|
+
/**
|
|
3
|
+
* Raw configuration schema from YAML/JSON files.
|
|
4
|
+
*/
|
|
5
|
+
export interface RawPipelineConfig {
|
|
6
|
+
selector?: {
|
|
7
|
+
type: 'registry' | 'manual';
|
|
8
|
+
endpoint?: string;
|
|
9
|
+
datasets?: string[];
|
|
10
|
+
};
|
|
11
|
+
qlever?: QleverConfig;
|
|
12
|
+
steps?: Array<{
|
|
13
|
+
type: 'sparql-query';
|
|
14
|
+
query: string;
|
|
15
|
+
}>;
|
|
16
|
+
writers?: Array<{
|
|
17
|
+
type: 'file' | 'sparql';
|
|
18
|
+
outputDir?: string;
|
|
19
|
+
endpoint?: string;
|
|
20
|
+
}>;
|
|
21
|
+
}
|
|
22
|
+
/**
|
|
23
|
+
* Options for loading pipeline configuration.
|
|
24
|
+
*/
|
|
25
|
+
export interface LoadConfigOptions {
|
|
26
|
+
/**
|
|
27
|
+
* Configuration file name (without extension).
|
|
28
|
+
* @default 'pipeline.config'
|
|
29
|
+
*/
|
|
30
|
+
name?: string;
|
|
31
|
+
/**
|
|
32
|
+
* Working directory to search for config files.
|
|
33
|
+
* @default process.cwd()
|
|
34
|
+
*/
|
|
35
|
+
cwd?: string;
|
|
36
|
+
}
|
|
37
|
+
/**
|
|
38
|
+
* Define a pipeline configuration with TypeScript type checking.
|
|
39
|
+
*
|
|
40
|
+
* @example
|
|
41
|
+
* ```typescript
|
|
42
|
+
* // pipeline.config.ts
|
|
43
|
+
* import { defineConfig } from '@lde/pipeline';
|
|
44
|
+
*
|
|
45
|
+
* export default defineConfig({
|
|
46
|
+
* selector: { type: 'registry', endpoint: 'https://example.com/sparql' },
|
|
47
|
+
* steps: [{ type: 'sparql-query', query: 'queries/triples.rq' }],
|
|
48
|
+
* });
|
|
49
|
+
* ```
|
|
50
|
+
*/
|
|
51
|
+
export declare function defineConfig(config: RawPipelineConfig): RawPipelineConfig;
|
|
52
|
+
/**
|
|
53
|
+
* Load pipeline configuration from files.
|
|
54
|
+
*
|
|
55
|
+
* Searches for configuration files in the following order:
|
|
56
|
+
* - pipeline.config.ts
|
|
57
|
+
* - pipeline.config.js
|
|
58
|
+
* - pipeline.config.yaml
|
|
59
|
+
* - pipeline.config.yml
|
|
60
|
+
* - pipeline.config.json
|
|
61
|
+
*
|
|
62
|
+
* @param options Load options
|
|
63
|
+
* @returns Resolved pipeline configuration
|
|
64
|
+
*/
|
|
65
|
+
export declare function loadPipelineConfig(options?: LoadConfigOptions): Promise<PipelineConfig>;
|
|
66
|
+
/**
|
|
67
|
+
* Normalize raw configuration into a typed PipelineConfig.
|
|
68
|
+
*/
|
|
69
|
+
export declare function normalizeConfig(raw: RawPipelineConfig): PipelineConfig;
|
|
70
|
+
//# sourceMappingURL=config.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AACA,OAAO,EACL,cAAc,EACd,YAAY,EAIb,MAAM,cAAc,CAAC;AAKtB;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE;QACT,IAAI,EAAE,UAAU,GAAG,QAAQ,CAAC;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;IACF,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,cAAc,CAAC;QACrB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;QACxB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,iBAAiB,GAAG,iBAAiB,CAEzE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,cAAc,CAAC,CAWzB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,iBAAiB,GAAG,cAAc,CAOtE"}
|
package/dist/config.js
ADDED
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
import { loadConfig } from 'c12';
|
|
2
|
+
import { registry, manual, } from './builder.js';
|
|
3
|
+
import { SparqlQuery } from './step/sparqlQuery.js';
|
|
4
|
+
/**
|
|
5
|
+
* Define a pipeline configuration with TypeScript type checking.
|
|
6
|
+
*
|
|
7
|
+
* @example
|
|
8
|
+
* ```typescript
|
|
9
|
+
* // pipeline.config.ts
|
|
10
|
+
* import { defineConfig } from '@lde/pipeline';
|
|
11
|
+
*
|
|
12
|
+
* export default defineConfig({
|
|
13
|
+
* selector: { type: 'registry', endpoint: 'https://example.com/sparql' },
|
|
14
|
+
* steps: [{ type: 'sparql-query', query: 'queries/triples.rq' }],
|
|
15
|
+
* });
|
|
16
|
+
* ```
|
|
17
|
+
*/
|
|
18
|
+
export function defineConfig(config) {
|
|
19
|
+
return config;
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Load pipeline configuration from files.
|
|
23
|
+
*
|
|
24
|
+
* Searches for configuration files in the following order:
|
|
25
|
+
* - pipeline.config.ts
|
|
26
|
+
* - pipeline.config.js
|
|
27
|
+
* - pipeline.config.yaml
|
|
28
|
+
* - pipeline.config.yml
|
|
29
|
+
* - pipeline.config.json
|
|
30
|
+
*
|
|
31
|
+
* @param options Load options
|
|
32
|
+
* @returns Resolved pipeline configuration
|
|
33
|
+
*/
|
|
34
|
+
export async function loadPipelineConfig(options) {
|
|
35
|
+
const { config } = await loadConfig({
|
|
36
|
+
name: options?.name ?? 'pipeline.config',
|
|
37
|
+
cwd: options?.cwd,
|
|
38
|
+
});
|
|
39
|
+
if (!config) {
|
|
40
|
+
throw new Error('No pipeline configuration found');
|
|
41
|
+
}
|
|
42
|
+
return normalizeConfig(config);
|
|
43
|
+
}
|
|
44
|
+
/**
|
|
45
|
+
* Normalize raw configuration into a typed PipelineConfig.
|
|
46
|
+
*/
|
|
47
|
+
export function normalizeConfig(raw) {
|
|
48
|
+
return {
|
|
49
|
+
selector: normalizeSelector(raw.selector),
|
|
50
|
+
steps: normalizeSteps(raw.steps),
|
|
51
|
+
writers: normalizeWriters(raw.writers),
|
|
52
|
+
qlever: raw.qlever,
|
|
53
|
+
};
|
|
54
|
+
}
|
|
55
|
+
function normalizeSelector(raw) {
|
|
56
|
+
if (!raw) {
|
|
57
|
+
throw new Error('Selector configuration is required');
|
|
58
|
+
}
|
|
59
|
+
switch (raw.type) {
|
|
60
|
+
case 'registry':
|
|
61
|
+
if (!raw.endpoint) {
|
|
62
|
+
throw new Error('Registry selector requires endpoint');
|
|
63
|
+
}
|
|
64
|
+
return registry(raw.endpoint);
|
|
65
|
+
case 'manual':
|
|
66
|
+
if (!raw.datasets || raw.datasets.length === 0) {
|
|
67
|
+
throw new Error('Manual selector requires datasets');
|
|
68
|
+
}
|
|
69
|
+
return manual(...raw.datasets.map((d) => new URL(d)));
|
|
70
|
+
default:
|
|
71
|
+
throw new Error(`Unknown selector type: ${raw.type}`);
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
function normalizeSteps(raw) {
|
|
75
|
+
if (!raw) {
|
|
76
|
+
return [];
|
|
77
|
+
}
|
|
78
|
+
return raw.map((step) => {
|
|
79
|
+
switch (step.type) {
|
|
80
|
+
case 'sparql-query':
|
|
81
|
+
return new SparqlQuery({
|
|
82
|
+
identifier: step.query,
|
|
83
|
+
query: step.query, // Will be loaded from file by SparqlQuery.fromFile if path
|
|
84
|
+
});
|
|
85
|
+
default:
|
|
86
|
+
throw new Error(`Unknown step type: ${step.type}`);
|
|
87
|
+
}
|
|
88
|
+
});
|
|
89
|
+
}
|
|
90
|
+
function normalizeWriters(raw) {
|
|
91
|
+
if (!raw || raw.length === 0) {
|
|
92
|
+
return undefined;
|
|
93
|
+
}
|
|
94
|
+
return raw.map((writer) => {
|
|
95
|
+
switch (writer.type) {
|
|
96
|
+
case 'file':
|
|
97
|
+
if (!writer.outputDir) {
|
|
98
|
+
throw new Error('File writer requires outputDir');
|
|
99
|
+
}
|
|
100
|
+
return { type: 'file', outputDir: writer.outputDir };
|
|
101
|
+
case 'sparql':
|
|
102
|
+
if (!writer.endpoint) {
|
|
103
|
+
throw new Error('SPARQL writer requires endpoint');
|
|
104
|
+
}
|
|
105
|
+
return { type: 'sparql', endpoint: new URL(writer.endpoint) };
|
|
106
|
+
default:
|
|
107
|
+
throw new Error(`Unknown writer type: ${writer.type}`);
|
|
108
|
+
}
|
|
109
|
+
});
|
|
110
|
+
}
|
|
@@ -0,0 +1,58 @@
|
|
|
1
|
+
import { Dataset } from '@lde/dataset';
|
|
2
|
+
import { Importer, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
|
|
3
|
+
import { Store } from 'n3';
|
|
4
|
+
export type { Importer };
|
|
5
|
+
export { ImportFailed, ImportSuccessful, NotSupported };
|
|
6
|
+
/**
|
|
7
|
+
* Extended importer interface with optional cleanup method.
|
|
8
|
+
*/
|
|
9
|
+
export interface ImporterWithFinish extends Importer {
|
|
10
|
+
finish?(): Promise<void>;
|
|
11
|
+
}
|
|
12
|
+
export interface DistributionAnalyzerOptions {
|
|
13
|
+
/**
|
|
14
|
+
* Optional importer for loading data dumps when no SPARQL endpoint is available.
|
|
15
|
+
*/
|
|
16
|
+
importer?: ImporterWithFinish;
|
|
17
|
+
/**
|
|
18
|
+
* Timeout for probe requests in milliseconds.
|
|
19
|
+
* @default 5000
|
|
20
|
+
*/
|
|
21
|
+
timeout?: number;
|
|
22
|
+
}
|
|
23
|
+
/**
|
|
24
|
+
* Result indicating the analyzer could not find a usable distribution.
|
|
25
|
+
*/
|
|
26
|
+
export declare class NoDistributionAvailable {
|
|
27
|
+
readonly message: string;
|
|
28
|
+
constructor(message: string);
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Analyzes dataset distributions by probing their availability.
|
|
32
|
+
*
|
|
33
|
+
* - Probes SPARQL endpoints with a simple SELECT query
|
|
34
|
+
* - Probes data dumps with HEAD/GET requests
|
|
35
|
+
* - Records probe results as RDF (schema:Action)
|
|
36
|
+
* - Updates distribution metadata (isValid, lastModified, byteSize)
|
|
37
|
+
* - Optionally imports data dumps if no SPARQL endpoint is available
|
|
38
|
+
*/
|
|
39
|
+
export declare class DistributionAnalyzer {
|
|
40
|
+
readonly name = "distribution";
|
|
41
|
+
private readonly importer?;
|
|
42
|
+
private readonly timeout;
|
|
43
|
+
constructor(options?: DistributionAnalyzerOptions);
|
|
44
|
+
/**
|
|
45
|
+
* Analyze all distributions of a dataset.
|
|
46
|
+
*
|
|
47
|
+
* @returns Store with probe results as RDF, or NoDistributionAvailable if no usable distribution found
|
|
48
|
+
*/
|
|
49
|
+
execute(dataset: Dataset): Promise<Store | NoDistributionAvailable>;
|
|
50
|
+
/**
|
|
51
|
+
* Cleanup resources (e.g., importer connections).
|
|
52
|
+
*/
|
|
53
|
+
finish(): Promise<void>;
|
|
54
|
+
private buildProbeResultsRdf;
|
|
55
|
+
private addSuccessTriples;
|
|
56
|
+
private addImportError;
|
|
57
|
+
}
|
|
58
|
+
//# sourceMappingURL=analyzer.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../../src/distribution/analyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAe,KAAK,EAAE,MAAM,IAAI,CAAC;AASxC,YAAY,EAAE,QAAQ,EAAE,CAAC;AACzB,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,CAAC;AAWxD;;GAEG;AACH,MAAM,WAAW,kBAAmB,SAAQ,QAAQ;IAClD,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED,MAAM,WAAW,2BAA2B;IAC1C;;OAEG;IACH,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAE9B;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,uBAAuB;aACN,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED;;;;;;;;GAQG;AACH,qBAAa,oBAAoB;IAC/B,SAAgB,IAAI,kBAAkB;IACtC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAqB;IAC/C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,2BAA2B;IAKjD;;;;OAIG;IACG,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,KAAK,GAAG,uBAAuB,CAAC;IAmCzE;;OAEG;IACG,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,OAAO,CAAC,oBAAoB;IAwC5B,OAAO,CAAC,iBAAiB;IAiDzB,OAAO,CAAC,cAAc;CAevB"}
|