@lde/pipeline 0.5.1 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. package/README.md +44 -0
  2. package/dist/analyzer.d.ts +36 -0
  3. package/dist/analyzer.d.ts.map +1 -0
  4. package/dist/analyzer.js +29 -0
  5. package/dist/builder.d.ts +114 -0
  6. package/dist/builder.d.ts.map +1 -0
  7. package/dist/builder.js +115 -0
  8. package/dist/config.d.ts +70 -0
  9. package/dist/config.d.ts.map +1 -0
  10. package/dist/config.js +110 -0
  11. package/dist/distribution/analyzer.d.ts +58 -0
  12. package/dist/distribution/analyzer.d.ts.map +1 -0
  13. package/dist/distribution/analyzer.js +120 -0
  14. package/dist/distribution/index.d.ts +3 -0
  15. package/dist/distribution/index.d.ts.map +1 -0
  16. package/dist/distribution/index.js +2 -0
  17. package/dist/distribution/probe.d.ts +47 -0
  18. package/dist/distribution/probe.d.ts.map +1 -0
  19. package/dist/distribution/probe.js +120 -0
  20. package/dist/index.d.ts +4 -0
  21. package/dist/index.d.ts.map +1 -1
  22. package/dist/index.js +4 -0
  23. package/dist/sparql/collect.d.ts +19 -0
  24. package/dist/sparql/collect.d.ts.map +1 -0
  25. package/dist/sparql/collect.js +23 -0
  26. package/dist/sparql/executor.d.ts +121 -0
  27. package/dist/sparql/executor.d.ts.map +1 -0
  28. package/dist/sparql/executor.js +107 -0
  29. package/dist/sparql/index.d.ts +3 -0
  30. package/dist/sparql/index.d.ts.map +1 -0
  31. package/dist/sparql/index.js +2 -0
  32. package/dist/step/sparqlQuery.d.ts +10 -5
  33. package/dist/step/sparqlQuery.d.ts.map +1 -1
  34. package/dist/step/sparqlQuery.js +16 -20
  35. package/dist/writer/fileWriter.d.ts +23 -0
  36. package/dist/writer/fileWriter.d.ts.map +1 -0
  37. package/dist/writer/fileWriter.js +51 -0
  38. package/dist/writer/index.d.ts +5 -0
  39. package/dist/writer/index.d.ts.map +1 -0
  40. package/dist/writer/index.js +4 -0
  41. package/dist/writer/serialize.d.ts +7 -0
  42. package/dist/writer/serialize.d.ts.map +1 -0
  43. package/dist/writer/serialize.js +20 -0
  44. package/dist/writer/sparqlUpdateWriter.d.ts +34 -0
  45. package/dist/writer/sparqlUpdateWriter.d.ts.map +1 -0
  46. package/dist/writer/sparqlUpdateWriter.js +43 -0
  47. package/dist/writer/writer.d.ts +15 -0
  48. package/dist/writer/writer.d.ts.map +1 -0
  49. package/dist/writer/writer.js +1 -0
  50. package/package.json +20 -5
package/README.md ADDED
@@ -0,0 +1,44 @@
1
+ # Pipeline
2
+
3
+ Framework for building RDF data processing pipelines with SPARQL.
4
+
5
+ ## Features
6
+
7
+ - **Pipeline** — orchestrates steps that process DCAT datasets
8
+ - **PipelineBuilder** — fluent API for constructing pipelines from steps and selectors
9
+ - **PipelineConfig** — load pipeline configuration from YAML/JSON files
10
+ - **SparqlConstructExecutor** — streaming SPARQL CONSTRUCT with template substitution and variable bindings
11
+ - **Distribution analysis** — probe and analyze dataset distributions
12
+
13
+ ## Subpath exports
14
+
15
+ | Export | Description |
16
+ | ------------------------ | ------------------------------------------------------------- |
17
+ | `@lde/pipeline` | Steps, pipeline, builder, config, SPARQL |
18
+ | `@lde/pipeline/analyzer` | Analyzer contracts (`Analyzer`, `BaseAnalyzer`, result types) |
19
+ | `@lde/pipeline/writer` | Write RDF data to files or SPARQL endpoints |
20
+
21
+ ## Usage
22
+
23
+ ```typescript
24
+ import {
25
+ PipelineBuilder,
26
+ SparqlConstructExecutor,
27
+ collect,
28
+ } from '@lde/pipeline';
29
+
30
+ // Build a pipeline from steps
31
+ const pipeline = new PipelineBuilder().addStep(myStep).build();
32
+
33
+ // Or use the SPARQL executor directly
34
+ const executor = new SparqlConstructExecutor({
35
+ query: 'CONSTRUCT { ?dataset ?p ?o } WHERE { ?s ?p ?o }',
36
+ });
37
+ const result = await executor.execute(dataset);
38
+ ```
39
+
40
+ ## Validation
41
+
42
+ ```sh
43
+ npx nx run-many -t lint test typecheck build --projects=@lde/pipeline
44
+ ```
@@ -0,0 +1,36 @@
1
+ import { Dataset } from '@lde/dataset';
2
+ import type { DatasetCore } from '@rdfjs/types';
3
+ import { NotSupported } from './step.js';
4
+ export { NotSupported } from './step.js';
5
+ /**
6
+ * Result of a successful analysis.
7
+ */
8
+ export declare class Success {
9
+ readonly data: DatasetCore;
10
+ constructor(data: DatasetCore);
11
+ }
12
+ /**
13
+ * Analysis failed.
14
+ */
15
+ export declare class Failure {
16
+ readonly endpoint: URL;
17
+ readonly message?: string | undefined;
18
+ constructor(endpoint: URL, message?: string | undefined);
19
+ }
20
+ /**
21
+ * Interface for analyzers.
22
+ */
23
+ export interface Analyzer {
24
+ readonly name: string;
25
+ execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
26
+ finish?(): Promise<void>;
27
+ }
28
+ /**
29
+ * Base class for analyzers with default implementations.
30
+ */
31
+ export declare abstract class BaseAnalyzer implements Analyzer {
32
+ abstract readonly name: string;
33
+ abstract execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
34
+ finish(): Promise<void>;
35
+ }
36
+ //# sourceMappingURL=analyzer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../src/analyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAChD,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC,OAAO,EAAE,YAAY,EAAE,MAAM,WAAW,CAAC;AAEzC;;GAEG;AACH,qBAAa,OAAO;aACU,IAAI,EAAE,WAAW;gBAAjB,IAAI,EAAE,WAAW;CAC9C;AAED;;GAEG;AACH,qBAAa,OAAO;aAEA,QAAQ,EAAE,GAAG;aACb,OAAO,CAAC,EAAE,MAAM;gBADhB,QAAQ,EAAE,GAAG,EACb,OAAO,CAAC,EAAE,MAAM,YAAA;CAEnC;AAED;;GAEG;AACH,MAAM,WAAW,QAAQ;IACvB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC,CAAC;IACrE,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED;;GAEG;AACH,8BAAsB,YAAa,YAAW,QAAQ;IACpD,QAAQ,CAAC,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IAC/B,QAAQ,CAAC,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC;IAEvE,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAG9B"}
@@ -0,0 +1,29 @@
1
+ export { NotSupported } from './step.js';
2
+ /**
3
+ * Result of a successful analysis.
4
+ */
5
+ export class Success {
6
+ data;
7
+ constructor(data) {
8
+ this.data = data;
9
+ }
10
+ }
11
+ /**
12
+ * Analysis failed.
13
+ */
14
+ export class Failure {
15
+ endpoint;
16
+ message;
17
+ constructor(endpoint, message) {
18
+ this.endpoint = endpoint;
19
+ this.message = message;
20
+ }
21
+ }
22
+ /**
23
+ * Base class for analyzers with default implementations.
24
+ */
25
+ export class BaseAnalyzer {
26
+ async finish() {
27
+ // Default no-op implementation.
28
+ }
29
+ }
@@ -0,0 +1,114 @@
1
+ import { Selector, ManualDatasetSelection, RegistrySelector } from './selector.js';
2
+ import { Step } from './step.js';
3
+ /**
4
+ * Configuration for QLever SPARQL server.
5
+ */
6
+ export interface QleverConfig {
7
+ /**
8
+ * Execution mode: 'docker' for containerized, 'native' for local binary.
9
+ */
10
+ mode: 'docker' | 'native';
11
+ /**
12
+ * Docker image to use (for docker mode).
13
+ * @default 'adfreiburg/qlever'
14
+ */
15
+ image?: string;
16
+ /**
17
+ * Port for the SPARQL endpoint.
18
+ * @default 7001
19
+ */
20
+ port?: number;
21
+ /**
22
+ * Working directory for imports.
23
+ */
24
+ workingDir?: string;
25
+ }
26
+ /**
27
+ * Writer configuration.
28
+ */
29
+ export interface WriterConfig {
30
+ type: 'file' | 'sparql';
31
+ outputDir?: string;
32
+ endpoint?: URL;
33
+ }
34
+ /**
35
+ * Complete pipeline configuration.
36
+ */
37
+ export interface PipelineConfig {
38
+ selector: Selector;
39
+ steps: Step[];
40
+ writers?: WriterConfig[];
41
+ qlever?: QleverConfig;
42
+ }
43
+ /**
44
+ * Fluent builder for creating pipeline configurations.
45
+ *
46
+ * @example
47
+ * ```typescript
48
+ * const config = PipelineBuilder.create()
49
+ * .withSelector(registry('https://example.com/sparql'))
50
+ * .addStep(sparqlQuery('queries/triples.rq'))
51
+ * .addWriter(fileWriter({ outputDir: 'output' }))
52
+ * .build();
53
+ * ```
54
+ */
55
+ export declare class PipelineBuilder {
56
+ private selector?;
57
+ private steps;
58
+ private writers;
59
+ private qleverConfig?;
60
+ /**
61
+ * Create a new PipelineBuilder instance.
62
+ */
63
+ static create(): PipelineBuilder;
64
+ /**
65
+ * Set the dataset selector.
66
+ */
67
+ withSelector(selector: Selector): this;
68
+ /**
69
+ * Configure QLever for local SPARQL imports.
70
+ */
71
+ withQlever(config: QleverConfig): this;
72
+ /**
73
+ * Add a single step to the pipeline.
74
+ */
75
+ addStep(step: Step): this;
76
+ /**
77
+ * Add multiple steps to the pipeline.
78
+ */
79
+ addSteps(...steps: Step[]): this;
80
+ /**
81
+ * Add a writer for pipeline output.
82
+ */
83
+ addWriter(writer: WriterConfig): this;
84
+ /**
85
+ * Build the final pipeline configuration.
86
+ * @throws Error if selector is not set
87
+ */
88
+ build(): PipelineConfig;
89
+ }
90
+ /**
91
+ * Create a selector that queries a Dataset Registry.
92
+ *
93
+ * @param endpoint SPARQL endpoint URL of the registry
94
+ */
95
+ export declare function registry(endpoint: string | URL): RegistrySelector;
96
+ /**
97
+ * Create a selector for manually specified datasets.
98
+ *
99
+ * @param datasets Array of dataset IRIs
100
+ */
101
+ export declare function manual(...datasetIris: URL[]): ManualDatasetSelection;
102
+ /**
103
+ * Create a file writer configuration.
104
+ */
105
+ export declare function fileWriter(options: {
106
+ outputDir: string;
107
+ }): WriterConfig;
108
+ /**
109
+ * Create a SPARQL UPDATE writer configuration.
110
+ */
111
+ export declare function sparqlWriter(options: {
112
+ endpoint: URL;
113
+ }): WriterConfig;
114
+ //# sourceMappingURL=builder.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"builder.d.ts","sourceRoot":"","sources":["../src/builder.ts"],"names":[],"mappings":"AACA,OAAO,EACL,QAAQ,EACR,sBAAsB,EACtB,gBAAgB,EACjB,MAAM,eAAe,CAAC;AACvB,OAAO,EAAE,IAAI,EAAE,MAAM,WAAW,CAAC;AAGjC;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B;;OAEG;IACH,IAAI,EAAE,QAAQ,GAAG,QAAQ,CAAC;IAC1B;;;OAGG;IACH,KAAK,CAAC,EAAE,MAAM,CAAC;IACf;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;OAEG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;IACxB,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,QAAQ,CAAC,EAAE,GAAG,CAAC;CAChB;AAED;;GAEG;AACH,MAAM,WAAW,cAAc;IAC7B,QAAQ,EAAE,QAAQ,CAAC;IACnB,KAAK,EAAE,IAAI,EAAE,CAAC;IACd,OAAO,CAAC,EAAE,YAAY,EAAE,CAAC;IACzB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,eAAe;IAC1B,OAAO,CAAC,QAAQ,CAAC,CAAW;IAC5B,OAAO,CAAC,KAAK,CAAc;IAC3B,OAAO,CAAC,OAAO,CAAsB;IACrC,OAAO,CAAC,YAAY,CAAC,CAAe;IAEpC;;OAEG;IACH,MAAM,CAAC,MAAM,IAAI,eAAe;IAIhC;;OAEG;IACH,YAAY,CAAC,QAAQ,EAAE,QAAQ,GAAG,IAAI;IAKtC;;OAEG;IACH,UAAU,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKtC;;OAEG;IACH,OAAO,CAAC,IAAI,EAAE,IAAI,GAAG,IAAI;IAKzB;;OAEG;IACH,QAAQ,CAAC,GAAG,KAAK,EAAE,IAAI,EAAE,GAAG,IAAI;IAKhC;;OAEG;IACH,SAAS,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;IAKrC;;;OAGG;IACH,KAAK,IAAI,cAAc;CAYxB;AAID;;;;GAIG;AACH,wBAAgB,QAAQ,CAAC,QAAQ,EAAE,MAAM,GAAG,GAAG,GAAG,gBAAgB,CAMjE;AAED;;;;GAIG;AACH,wBAAgB,MAAM,CAAC,GAAG,WAAW,EAAE,GAAG,EAAE,GAAG,sBAAsB,CAKpE;AAED;;GAEG;AACH,wBAAgB,UAAU,CAAC,OAAO,EAAE;IAAE,SAAS,EAAE,MAAM,CAAA;CAAE,GAAG,YAAY,CAKvE;AAED;;GAEG;AACH,wBAAgB,YAAY,CAAC,OAAO,EAAE;IAAE,QAAQ,EAAE,GAAG,CAAA;CAAE,GAAG,YAAY,CAKrE"}
@@ -0,0 +1,115 @@
1
+ import { Dataset } from '@lde/dataset';
2
+ import { ManualDatasetSelection, RegistrySelector, } from './selector.js';
3
+ import { Client } from '@lde/dataset-registry-client';
4
+ /**
5
+ * Fluent builder for creating pipeline configurations.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * const config = PipelineBuilder.create()
10
+ * .withSelector(registry('https://example.com/sparql'))
11
+ * .addStep(sparqlQuery('queries/triples.rq'))
12
+ * .addWriter(fileWriter({ outputDir: 'output' }))
13
+ * .build();
14
+ * ```
15
+ */
16
+ export class PipelineBuilder {
17
+ selector;
18
+ steps = [];
19
+ writers = [];
20
+ qleverConfig;
21
+ /**
22
+ * Create a new PipelineBuilder instance.
23
+ */
24
+ static create() {
25
+ return new PipelineBuilder();
26
+ }
27
+ /**
28
+ * Set the dataset selector.
29
+ */
30
+ withSelector(selector) {
31
+ this.selector = selector;
32
+ return this;
33
+ }
34
+ /**
35
+ * Configure QLever for local SPARQL imports.
36
+ */
37
+ withQlever(config) {
38
+ this.qleverConfig = config;
39
+ return this;
40
+ }
41
+ /**
42
+ * Add a single step to the pipeline.
43
+ */
44
+ addStep(step) {
45
+ this.steps.push(step);
46
+ return this;
47
+ }
48
+ /**
49
+ * Add multiple steps to the pipeline.
50
+ */
51
+ addSteps(...steps) {
52
+ this.steps.push(...steps);
53
+ return this;
54
+ }
55
+ /**
56
+ * Add a writer for pipeline output.
57
+ */
58
+ addWriter(writer) {
59
+ this.writers.push(writer);
60
+ return this;
61
+ }
62
+ /**
63
+ * Build the final pipeline configuration.
64
+ * @throws Error if selector is not set
65
+ */
66
+ build() {
67
+ if (!this.selector) {
68
+ throw new Error('Selector is required. Use withSelector() to set it.');
69
+ }
70
+ return {
71
+ selector: this.selector,
72
+ steps: this.steps,
73
+ writers: this.writers.length > 0 ? this.writers : undefined,
74
+ qlever: this.qleverConfig,
75
+ };
76
+ }
77
+ }
78
+ // Helper functions for fluent construction.
79
+ /**
80
+ * Create a selector that queries a Dataset Registry.
81
+ *
82
+ * @param endpoint SPARQL endpoint URL of the registry
83
+ */
84
+ export function registry(endpoint) {
85
+ return new RegistrySelector({
86
+ registry: new Client(typeof endpoint === 'string' ? new URL(endpoint) : endpoint),
87
+ });
88
+ }
89
+ /**
90
+ * Create a selector for manually specified datasets.
91
+ *
92
+ * @param datasets Array of dataset IRIs
93
+ */
94
+ export function manual(...datasetIris) {
95
+ const datasets = datasetIris.map((iri) => new Dataset({ iri, distributions: [] }));
96
+ return new ManualDatasetSelection(datasets);
97
+ }
98
+ /**
99
+ * Create a file writer configuration.
100
+ */
101
+ export function fileWriter(options) {
102
+ return {
103
+ type: 'file',
104
+ outputDir: options.outputDir,
105
+ };
106
+ }
107
+ /**
108
+ * Create a SPARQL UPDATE writer configuration.
109
+ */
110
+ export function sparqlWriter(options) {
111
+ return {
112
+ type: 'sparql',
113
+ endpoint: options.endpoint,
114
+ };
115
+ }
@@ -0,0 +1,70 @@
1
+ import { PipelineConfig, QleverConfig } from './builder.js';
2
+ /**
3
+ * Raw configuration schema from YAML/JSON files.
4
+ */
5
+ export interface RawPipelineConfig {
6
+ selector?: {
7
+ type: 'registry' | 'manual';
8
+ endpoint?: string;
9
+ datasets?: string[];
10
+ };
11
+ qlever?: QleverConfig;
12
+ steps?: Array<{
13
+ type: 'sparql-query';
14
+ query: string;
15
+ }>;
16
+ writers?: Array<{
17
+ type: 'file' | 'sparql';
18
+ outputDir?: string;
19
+ endpoint?: string;
20
+ }>;
21
+ }
22
+ /**
23
+ * Options for loading pipeline configuration.
24
+ */
25
+ export interface LoadConfigOptions {
26
+ /**
27
+ * Configuration file name (without extension).
28
+ * @default 'pipeline.config'
29
+ */
30
+ name?: string;
31
+ /**
32
+ * Working directory to search for config files.
33
+ * @default process.cwd()
34
+ */
35
+ cwd?: string;
36
+ }
37
+ /**
38
+ * Define a pipeline configuration with TypeScript type checking.
39
+ *
40
+ * @example
41
+ * ```typescript
42
+ * // pipeline.config.ts
43
+ * import { defineConfig } from '@lde/pipeline';
44
+ *
45
+ * export default defineConfig({
46
+ * selector: { type: 'registry', endpoint: 'https://example.com/sparql' },
47
+ * steps: [{ type: 'sparql-query', query: 'queries/triples.rq' }],
48
+ * });
49
+ * ```
50
+ */
51
+ export declare function defineConfig(config: RawPipelineConfig): RawPipelineConfig;
52
+ /**
53
+ * Load pipeline configuration from files.
54
+ *
55
+ * Searches for configuration files in the following order:
56
+ * - pipeline.config.ts
57
+ * - pipeline.config.js
58
+ * - pipeline.config.yaml
59
+ * - pipeline.config.yml
60
+ * - pipeline.config.json
61
+ *
62
+ * @param options Load options
63
+ * @returns Resolved pipeline configuration
64
+ */
65
+ export declare function loadPipelineConfig(options?: LoadConfigOptions): Promise<PipelineConfig>;
66
+ /**
67
+ * Normalize raw configuration into a typed PipelineConfig.
68
+ */
69
+ export declare function normalizeConfig(raw: RawPipelineConfig): PipelineConfig;
70
+ //# sourceMappingURL=config.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"config.d.ts","sourceRoot":"","sources":["../src/config.ts"],"names":[],"mappings":"AACA,OAAO,EACL,cAAc,EACd,YAAY,EAIb,MAAM,cAAc,CAAC;AAKtB;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC,QAAQ,CAAC,EAAE;QACT,IAAI,EAAE,UAAU,GAAG,QAAQ,CAAC;QAC5B,QAAQ,CAAC,EAAE,MAAM,CAAC;QAClB,QAAQ,CAAC,EAAE,MAAM,EAAE,CAAC;KACrB,CAAC;IACF,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,KAAK,CAAC,EAAE,KAAK,CAAC;QACZ,IAAI,EAAE,cAAc,CAAC;QACrB,KAAK,EAAE,MAAM,CAAC;KACf,CAAC,CAAC;IACH,OAAO,CAAC,EAAE,KAAK,CAAC;QACd,IAAI,EAAE,MAAM,GAAG,QAAQ,CAAC;QACxB,SAAS,CAAC,EAAE,MAAM,CAAC;QACnB,QAAQ,CAAC,EAAE,MAAM,CAAC;KACnB,CAAC,CAAC;CACJ;AAED;;GAEG;AACH,MAAM,WAAW,iBAAiB;IAChC;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,GAAG,CAAC,EAAE,MAAM,CAAC;CACd;AAED;;;;;;;;;;;;;GAaG;AACH,wBAAgB,YAAY,CAAC,MAAM,EAAE,iBAAiB,GAAG,iBAAiB,CAEzE;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,kBAAkB,CACtC,OAAO,CAAC,EAAE,iBAAiB,GAC1B,OAAO,CAAC,cAAc,CAAC,CAWzB;AAED;;GAEG;AACH,wBAAgB,eAAe,CAAC,GAAG,EAAE,iBAAiB,GAAG,cAAc,CAOtE"}
package/dist/config.js ADDED
@@ -0,0 +1,110 @@
1
+ import { loadConfig } from 'c12';
2
+ import { registry, manual, } from './builder.js';
3
+ import { SparqlQuery } from './step/sparqlQuery.js';
4
+ /**
5
+ * Define a pipeline configuration with TypeScript type checking.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * // pipeline.config.ts
10
+ * import { defineConfig } from '@lde/pipeline';
11
+ *
12
+ * export default defineConfig({
13
+ * selector: { type: 'registry', endpoint: 'https://example.com/sparql' },
14
+ * steps: [{ type: 'sparql-query', query: 'queries/triples.rq' }],
15
+ * });
16
+ * ```
17
+ */
18
+ export function defineConfig(config) {
19
+ return config;
20
+ }
21
+ /**
22
+ * Load pipeline configuration from files.
23
+ *
24
+ * Searches for configuration files in the following order:
25
+ * - pipeline.config.ts
26
+ * - pipeline.config.js
27
+ * - pipeline.config.yaml
28
+ * - pipeline.config.yml
29
+ * - pipeline.config.json
30
+ *
31
+ * @param options Load options
32
+ * @returns Resolved pipeline configuration
33
+ */
34
+ export async function loadPipelineConfig(options) {
35
+ const { config } = await loadConfig({
36
+ name: options?.name ?? 'pipeline.config',
37
+ cwd: options?.cwd,
38
+ });
39
+ if (!config) {
40
+ throw new Error('No pipeline configuration found');
41
+ }
42
+ return normalizeConfig(config);
43
+ }
44
+ /**
45
+ * Normalize raw configuration into a typed PipelineConfig.
46
+ */
47
+ export function normalizeConfig(raw) {
48
+ return {
49
+ selector: normalizeSelector(raw.selector),
50
+ steps: normalizeSteps(raw.steps),
51
+ writers: normalizeWriters(raw.writers),
52
+ qlever: raw.qlever,
53
+ };
54
+ }
55
+ function normalizeSelector(raw) {
56
+ if (!raw) {
57
+ throw new Error('Selector configuration is required');
58
+ }
59
+ switch (raw.type) {
60
+ case 'registry':
61
+ if (!raw.endpoint) {
62
+ throw new Error('Registry selector requires endpoint');
63
+ }
64
+ return registry(raw.endpoint);
65
+ case 'manual':
66
+ if (!raw.datasets || raw.datasets.length === 0) {
67
+ throw new Error('Manual selector requires datasets');
68
+ }
69
+ return manual(...raw.datasets.map((d) => new URL(d)));
70
+ default:
71
+ throw new Error(`Unknown selector type: ${raw.type}`);
72
+ }
73
+ }
74
+ function normalizeSteps(raw) {
75
+ if (!raw) {
76
+ return [];
77
+ }
78
+ return raw.map((step) => {
79
+ switch (step.type) {
80
+ case 'sparql-query':
81
+ return new SparqlQuery({
82
+ identifier: step.query,
83
+ query: step.query, // Will be loaded from file by SparqlQuery.fromFile if path
84
+ });
85
+ default:
86
+ throw new Error(`Unknown step type: ${step.type}`);
87
+ }
88
+ });
89
+ }
90
+ function normalizeWriters(raw) {
91
+ if (!raw || raw.length === 0) {
92
+ return undefined;
93
+ }
94
+ return raw.map((writer) => {
95
+ switch (writer.type) {
96
+ case 'file':
97
+ if (!writer.outputDir) {
98
+ throw new Error('File writer requires outputDir');
99
+ }
100
+ return { type: 'file', outputDir: writer.outputDir };
101
+ case 'sparql':
102
+ if (!writer.endpoint) {
103
+ throw new Error('SPARQL writer requires endpoint');
104
+ }
105
+ return { type: 'sparql', endpoint: new URL(writer.endpoint) };
106
+ default:
107
+ throw new Error(`Unknown writer type: ${writer.type}`);
108
+ }
109
+ });
110
+ }
@@ -0,0 +1,58 @@
1
+ import { Dataset } from '@lde/dataset';
2
+ import { Importer, ImportFailed, ImportSuccessful, NotSupported } from '@lde/sparql-importer';
3
+ import { Store } from 'n3';
4
+ export type { Importer };
5
+ export { ImportFailed, ImportSuccessful, NotSupported };
6
+ /**
7
+ * Extended importer interface with optional cleanup method.
8
+ */
9
+ export interface ImporterWithFinish extends Importer {
10
+ finish?(): Promise<void>;
11
+ }
12
+ export interface DistributionAnalyzerOptions {
13
+ /**
14
+ * Optional importer for loading data dumps when no SPARQL endpoint is available.
15
+ */
16
+ importer?: ImporterWithFinish;
17
+ /**
18
+ * Timeout for probe requests in milliseconds.
19
+ * @default 5000
20
+ */
21
+ timeout?: number;
22
+ }
23
+ /**
24
+ * Result indicating the analyzer could not find a usable distribution.
25
+ */
26
+ export declare class NoDistributionAvailable {
27
+ readonly message: string;
28
+ constructor(message: string);
29
+ }
30
+ /**
31
+ * Analyzes dataset distributions by probing their availability.
32
+ *
33
+ * - Probes SPARQL endpoints with a simple SELECT query
34
+ * - Probes data dumps with HEAD/GET requests
35
+ * - Records probe results as RDF (schema:Action)
36
+ * - Updates distribution metadata (isValid, lastModified, byteSize)
37
+ * - Optionally imports data dumps if no SPARQL endpoint is available
38
+ */
39
+ export declare class DistributionAnalyzer {
40
+ readonly name = "distribution";
41
+ private readonly importer?;
42
+ private readonly timeout;
43
+ constructor(options?: DistributionAnalyzerOptions);
44
+ /**
45
+ * Analyze all distributions of a dataset.
46
+ *
47
+ * @returns Store with probe results as RDF, or NoDistributionAvailable if no usable distribution found
48
+ */
49
+ execute(dataset: Dataset): Promise<Store | NoDistributionAvailable>;
50
+ /**
51
+ * Cleanup resources (e.g., importer connections).
52
+ */
53
+ finish(): Promise<void>;
54
+ private buildProbeResultsRdf;
55
+ private addSuccessTriples;
56
+ private addImportError;
57
+ }
58
+ //# sourceMappingURL=analyzer.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"analyzer.d.ts","sourceRoot":"","sources":["../../src/distribution/analyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,QAAQ,EACR,YAAY,EACZ,gBAAgB,EAChB,YAAY,EACb,MAAM,sBAAsB,CAAC;AAC9B,OAAO,EAAe,KAAK,EAAE,MAAM,IAAI,CAAC;AASxC,YAAY,EAAE,QAAQ,EAAE,CAAC;AACzB,OAAO,EAAE,YAAY,EAAE,gBAAgB,EAAE,YAAY,EAAE,CAAC;AAWxD;;GAEG;AACH,MAAM,WAAW,kBAAmB,SAAQ,QAAQ;IAClD,MAAM,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC1B;AAED,MAAM,WAAW,2BAA2B;IAC1C;;OAEG;IACH,QAAQ,CAAC,EAAE,kBAAkB,CAAC;IAE9B;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;GAEG;AACH,qBAAa,uBAAuB;aACN,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED;;;;;;;;GAQG;AACH,qBAAa,oBAAoB;IAC/B,SAAgB,IAAI,kBAAkB;IACtC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAqB;IAC/C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,2BAA2B;IAKjD;;;;OAIG;IACG,OAAO,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,KAAK,GAAG,uBAAuB,CAAC;IAmCzE;;OAEG;IACG,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;IAI7B,OAAO,CAAC,oBAAoB;IAwC5B,OAAO,CAAC,iBAAiB;IAiDzB,OAAO,CAAC,cAAc;CAevB"}