@lde/pipeline-void 0.2.15 → 0.2.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,10 +2,17 @@
2
2
 
3
3
  VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets.
4
4
 
5
- ## Analyzers
5
+ ## Query stages
6
6
 
7
- - **SparqlQueryAnalyzer**Execute SPARQL CONSTRUCT queries with template substitution
8
- - **PerClassAnalyzer**Two-phase analyzer that iterates over classes to avoid timeouts
7
+ - `createQueryStage(filename, distribution)` Create a `Stage` from a SPARQL CONSTRUCT query file
8
+ - `createDatatypeStage(distribution)`Per-class datatype partitions
9
+ - `createLanguageStage(distribution)` — Per-class language tags
10
+ - `createObjectClassStage(distribution)` — Per-class object class partitions
11
+
12
+ ## Streaming transformers
13
+
14
+ - `withVocabularies(quads, datasetIri)` — Detect and append `void:vocabulary` triples
15
+ - `withProvenance(quads, iri, startedAt, endedAt)` — Append PROV-O provenance metadata
9
16
 
10
17
  ## SPARQL Queries
11
18
 
@@ -33,14 +40,37 @@ Generic VOiD analysis queries included:
33
40
  ## Usage
34
41
 
35
42
  ```typescript
36
- import { SparqlQueryAnalyzer } from '@lde/pipeline-void';
43
+ import {
44
+ createQueryStage,
45
+ createDatatypeStage,
46
+ withVocabularies,
47
+ withProvenance,
48
+ } from '@lde/pipeline-void';
49
+ import { Distribution } from '@lde/dataset';
50
+
51
+ const distribution = Distribution.sparql(new URL('http://example.com/sparql'));
52
+
53
+ // Simple CONSTRUCT query stage
54
+ const stage = await createQueryStage('triples.rq', distribution);
55
+ const quads = await stage.run(dataset, distribution);
56
+
57
+ // Per-class stage (streaming)
58
+ const datatypeStage = await createDatatypeStage(distribution);
59
+ const datatypeQuads = await datatypeStage.run(dataset, distribution);
60
+
61
+ // Enrich with vocabulary detection and provenance
62
+ const enriched = withProvenance(
63
+ withVocabularies(quads, dataset.iri.toString()),
64
+ dataset.iri.toString(),
65
+ startedAt,
66
+ endedAt
67
+ );
68
+ ```
37
69
 
38
- // Load a query from file
39
- const analyzer = await SparqlQueryAnalyzer.fromFile('triples.rq');
70
+ ## Validation
40
71
 
41
- // Execute against a dataset
42
- const result = await analyzer.execute(dataset);
43
- if (result instanceof Success) {
44
- // result.data contains the VOiD statistics as RDF
45
- }
72
+ ```sh
73
+ npx nx test pipeline-void
74
+ npx nx lint pipeline-void
75
+ npx nx typecheck pipeline-void
46
76
  ```
package/dist/index.d.ts CHANGED
@@ -1,4 +1,4 @@
1
- export { type Analyzer, BaseAnalyzer, Success, Failure, NotSupported, } from '@lde/pipeline/analyzer';
1
+ export { Stage, NotSupported } from '@lde/pipeline';
2
2
  export * from './sparqlQueryAnalyzer.js';
3
3
  export * from './perClassAnalyzer.js';
4
4
  export * from './vocabularyAnalyzer.js';
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,QAAQ,EACb,YAAY,EACZ,OAAO,EACP,OAAO,EACP,YAAY,GACb,MAAM,wBAAwB,CAAC;AAChC,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,iBAAiB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,EAAE,YAAY,EAAE,MAAM,eAAe,CAAC;AACpD,cAAc,0BAA0B,CAAC;AACzC,cAAc,uBAAuB,CAAC;AACtC,cAAc,yBAAyB,CAAC;AACxC,cAAc,iBAAiB,CAAC"}
package/dist/index.js CHANGED
@@ -1,4 +1,4 @@
1
- export { BaseAnalyzer, Success, Failure, NotSupported, } from '@lde/pipeline/analyzer';
1
+ export { Stage, NotSupported } from '@lde/pipeline';
2
2
  export * from './sparqlQueryAnalyzer.js';
3
3
  export * from './perClassAnalyzer.js';
4
4
  export * from './vocabularyAnalyzer.js';
@@ -1,51 +1,6 @@
1
- import { Dataset } from '@lde/dataset';
2
- import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
3
- import { BaseAnalyzer, Success, Failure, NotSupported } from '@lde/pipeline/analyzer';
4
- export interface PerClassAnalyzerOptions {
5
- /**
6
- * Timeout for SPARQL queries in milliseconds.
7
- * @default 300000 (5 minutes)
8
- */
9
- timeout?: number;
10
- /**
11
- * Custom SparqlEndpointFetcher instance.
12
- */
13
- fetcher?: SparqlEndpointFetcher;
14
- /**
15
- * Maximum number of classes to analyze.
16
- * @default 1000
17
- */
18
- maxClasses?: number;
19
- }
20
- /**
21
- * Two-phase analyzer that first retrieves classes, then runs a query for each class.
22
- *
23
- * This approach prevents timeouts and OOM errors on large datasets by splitting
24
- * the analysis into smaller queries per class.
25
- *
26
- * Supports legacy template substitution:
27
- * - `#subjectFilter#` — replaced with the dataset's subject filter (if any)
28
- * - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
29
- * - `?dataset` — replaced with the dataset IRI
30
- * - `<#class#>` — replaced with the current class IRI
31
- */
32
- export declare class PerClassAnalyzer extends BaseAnalyzer {
33
- readonly name: string;
34
- private readonly fetcher;
35
- private readonly query;
36
- private readonly maxClasses;
37
- constructor(name: string, query: string, options?: PerClassAnalyzerOptions);
38
- /**
39
- * Create an analyzer from a query file in the queries directory.
40
- *
41
- * @param filename Query filename (e.g., 'class-property-datatypes.rq')
42
- * @param options Optional analyzer options
43
- */
44
- static fromFile(filename: string, options?: PerClassAnalyzerOptions): Promise<PerClassAnalyzer>;
45
- execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
46
- private getClasses;
47
- }
48
- export declare function createDatatypeAnalyzer(options?: PerClassAnalyzerOptions): Promise<PerClassAnalyzer>;
49
- export declare function createLanguageAnalyzer(options?: PerClassAnalyzerOptions): Promise<PerClassAnalyzer>;
50
- export declare function createObjectClassAnalyzer(options?: PerClassAnalyzerOptions): Promise<PerClassAnalyzer>;
1
+ import { Distribution } from '@lde/dataset';
2
+ import { Stage } from '@lde/pipeline';
3
+ export declare function createDatatypeStage(distribution: Distribution): Promise<Stage>;
4
+ export declare function createLanguageStage(distribution: Distribution): Promise<Stage>;
5
+ export declare function createObjectClassStage(distribution: Distribution): Promise<Stage>;
51
6
  //# sourceMappingURL=perClassAnalyzer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"perClassAnalyzer.d.ts","sourceRoot":"","sources":["../src/perClassAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAQrD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAG9D,OAAO,EACL,YAAY,EACZ,OAAO,EACP,OAAO,EACP,YAAY,EACb,MAAM,wBAAwB,CAAC;AAIhC,MAAM,WAAW,uBAAuB;IACtC;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;IAChC;;;OAGG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;CACrB;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,gBAAiB,SAAQ,YAAY;aAM9B,IAAI,EAAE,MAAM;IAL9B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAS;gBAGlB,IAAI,EAAE,MAAM,EAC5B,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,uBAAuB;IAYnC;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,uBAAuB,GAChC,OAAO,CAAC,gBAAgB,CAAC;IAKf,OAAO,CAClB,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC;YAsC9B,UAAU;CAkCzB;AAED,wBAAgB,sBAAsB,CACpC,OAAO,CAAC,EAAE,uBAAuB,GAChC,OAAO,CAAC,gBAAgB,CAAC,CAE3B;AAED,wBAAgB,sBAAsB,CACpC,OAAO,CAAC,EAAE,uBAAuB,GAChC,OAAO,CAAC,gBAAgB,CAAC,CAE3B;AAED,wBAAgB,yBAAyB,CACvC,OAAO,CAAC,EAAE,uBAAuB,GAChC,OAAO,CAAC,gBAAgB,CAAC,CAE3B"}
1
+ {"version":3,"file":"perClassAnalyzer.d.ts","sourceRoot":"","sources":["../src/perClassAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EACL,KAAK,EAIN,MAAM,eAAe,CAAC;AAkDvB,wBAAgB,mBAAmB,CACjC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,KAAK,CAAC,CAEhB;AAED,wBAAgB,mBAAmB,CACjC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,KAAK,CAAC,CAEhB;AAED,wBAAgB,sBAAsB,CACpC,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,KAAK,CAAC,CAEhB"}
@@ -1,102 +1,46 @@
1
- import { SparqlConstructExecutor, substituteQueryTemplates, readQueryFile, collect, } from '@lde/pipeline';
2
- import { Store } from 'n3';
3
- import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
1
+ import { Stage, SparqlSelector, SparqlConstructExecutor, readQueryFile, } from '@lde/pipeline';
4
2
  import { resolve, dirname } from 'node:path';
5
3
  import { fileURLToPath } from 'node:url';
6
- import { BaseAnalyzer, Success, Failure, NotSupported, } from '@lde/pipeline/analyzer';
7
4
  const __dirname = dirname(fileURLToPath(import.meta.url));
8
5
  /**
9
- * Two-phase analyzer that first retrieves classes, then runs a query for each class.
6
+ * Create a Stage that first selects classes from the endpoint,
7
+ * then runs a per-class CONSTRUCT query with `?class` bound via VALUES.
10
8
  *
11
- * This approach prevents timeouts and OOM errors on large datasets by splitting
12
- * the analysis into smaller queries per class.
13
- *
14
- * Supports legacy template substitution:
15
- * - `#subjectFilter#` — replaced with the dataset's subject filter (if any)
16
- * - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
17
- * - `?dataset` — replaced with the dataset IRI
18
- * - `<#class#>` — replaced with the current class IRI
9
+ * Replaces the legacy `PerClassAnalyzer` two-phase loop with streaming.
19
10
  */
20
- export class PerClassAnalyzer extends BaseAnalyzer {
21
- name;
22
- fetcher;
23
- query;
24
- maxClasses;
25
- constructor(name, query, options) {
26
- super();
27
- this.name = name;
28
- this.query = query;
29
- this.fetcher =
30
- options?.fetcher ??
31
- new SparqlEndpointFetcher({
32
- timeout: options?.timeout ?? 300_000,
33
- });
34
- this.maxClasses = options?.maxClasses ?? 1000;
35
- }
36
- /**
37
- * Create an analyzer from a query file in the queries directory.
38
- *
39
- * @param filename Query filename (e.g., 'class-property-datatypes.rq')
40
- * @param options Optional analyzer options
41
- */
42
- static async fromFile(filename, options) {
43
- const query = await readQueryFile(resolve(__dirname, 'queries', filename));
44
- return new PerClassAnalyzer(filename, query, options);
45
- }
46
- async execute(dataset) {
47
- const sparqlDistribution = dataset.getSparqlDistribution();
48
- if (sparqlDistribution === null) {
49
- return new NotSupported('No SPARQL distribution available');
50
- }
51
- const store = new Store();
52
- try {
53
- // Phase 1: Get all classes.
54
- const classes = await this.getClasses(sparqlDistribution, dataset);
55
- // Phase 2: Run query for each class.
56
- for (const classIri of classes) {
57
- const substituted = substituteQueryTemplates(this.query.replaceAll('<#class#>', `<${classIri}>`), sparqlDistribution, dataset);
58
- const executor = new SparqlConstructExecutor({
59
- query: substituted,
60
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
61
- fetcher: this.fetcher,
62
- });
63
- const stream = await executor.execute(dataset, sparqlDistribution);
64
- store.addQuads([...(await collect(stream))]);
65
- }
66
- }
67
- catch (e) {
68
- const accessUrl = sparqlDistribution.accessUrl;
69
- return new Failure(accessUrl ?? new URL('unknown://'), e instanceof Error ? e.message : undefined);
70
- }
71
- return new Success(store);
72
- }
73
- async getClasses(distribution, dataset) {
74
- const classQuery = substituteQueryTemplates(`SELECT DISTINCT ?class
75
- #namedGraph#
76
- WHERE {
77
- #subjectFilter#
78
- ?s a ?class .
79
- }
80
- LIMIT ${this.maxClasses}`, distribution, dataset);
81
- const bindings = await this.fetcher.fetchBindings(distribution.accessUrl.toString(), classQuery);
82
- const classes = [];
83
- for await (const binding of bindings) {
84
- // Bindings are Record<string, RDF.Term>.
85
- const bindingRecord = binding;
86
- const classValue = bindingRecord['class'];
87
- if (classValue && classValue.termType === 'NamedNode') {
88
- classes.push(classValue.value);
89
- }
90
- }
91
- return classes;
92
- }
11
+ async function createPerClassStage(queryFilename, distribution) {
12
+ const rawQuery = await readQueryFile(resolve(__dirname, 'queries', queryFilename));
13
+ // Pre-process #subjectFilter# before the query is parsed as SPARQL.
14
+ const subjectFilter = distribution.subjectFilter ?? '';
15
+ const query = rawQuery.replace('#subjectFilter#', subjectFilter);
16
+ // Build the selector SELECT query (same substitution for subjectFilter).
17
+ const fromClause = distribution.namedGraph
18
+ ? `FROM <${distribution.namedGraph}>`
19
+ : '';
20
+ const selectorQuery = [
21
+ 'SELECT DISTINCT ?class',
22
+ fromClause,
23
+ `WHERE { ${subjectFilter} ?s a ?class . }`,
24
+ 'LIMIT 1000',
25
+ ].join('\n');
26
+ const selector = new SparqlSelector({
27
+ query: selectorQuery,
28
+ endpoint: distribution.accessUrl,
29
+ pageSize: 1000,
30
+ });
31
+ const executor = new SparqlConstructExecutor({ query });
32
+ return new Stage({
33
+ name: queryFilename,
34
+ selector,
35
+ executors: executor,
36
+ });
93
37
  }
94
- export function createDatatypeAnalyzer(options) {
95
- return PerClassAnalyzer.fromFile('class-property-datatypes.rq', options);
38
+ export function createDatatypeStage(distribution) {
39
+ return createPerClassStage('class-property-datatypes.rq', distribution);
96
40
  }
97
- export function createLanguageAnalyzer(options) {
98
- return PerClassAnalyzer.fromFile('class-property-languages.rq', options);
41
+ export function createLanguageStage(distribution) {
42
+ return createPerClassStage('class-property-languages.rq', distribution);
99
43
  }
100
- export function createObjectClassAnalyzer(options) {
101
- return PerClassAnalyzer.fromFile('class-property-object-classes.rq', options);
44
+ export function createObjectClassStage(distribution) {
45
+ return createPerClassStage('class-property-object-classes.rq', distribution);
102
46
  }
@@ -1,18 +1,14 @@
1
- import type { DatasetCore } from '@rdfjs/types';
1
+ import type { Quad } from '@rdfjs/types';
2
2
  /**
3
- * Add PROV-O provenance metadata to a dataset.
3
+ * Streaming transformer that passes through all quads and appends
4
+ * PROV-O provenance metadata.
4
5
  *
5
- * Adds:
6
+ * Appended quads:
6
7
  * - `<iri> a prov:Entity`
7
8
  * - `<iri> prov:wasGeneratedBy _:activity`
8
9
  * - `_:activity a prov:Activity`
9
10
  * - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
10
11
  * - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
11
- *
12
- * @param data The dataset to add provenance to
13
- * @param iri The IRI of the entity
14
- * @param startedAt Start time of the activity
15
- * @param endedAt End time of the activity
16
12
  */
17
- export declare function withProvenance(data: DatasetCore, iri: string, startedAt: Date, endedAt: Date): DatasetCore;
13
+ export declare function withProvenance(quads: AsyncIterable<Quad>, iri: string, startedAt: Date, endedAt: Date): AsyncIterable<Quad>;
18
14
  //# sourceMappingURL=provenance.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,WAAW,EAAE,MAAM,cAAc,CAAC;AAiBhD;;;;;;;;;;;;;;GAcG;AACH,wBAAgB,cAAc,CAC5B,IAAI,EAAE,WAAW,EACjB,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,IAAI,EACf,OAAO,EAAE,IAAI,GACZ,WAAW,CAwBb"}
1
+ {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC;;;;;;;;;;GAUG;AACH,wBAAuB,cAAc,CACnC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,IAAI,EACf,OAAO,EAAE,IAAI,GACZ,aAAa,CAAC,IAAI,CAAC,CAqBrB"}
@@ -1,4 +1,4 @@
1
- import { DataFactory, Store } from 'n3';
1
+ import { DataFactory } from 'n3';
2
2
  const { namedNode, literal, blankNode, quad } = DataFactory;
3
3
  const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
4
4
  const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
@@ -8,28 +8,25 @@ const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime'
8
8
  const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
9
9
  const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
10
10
  /**
11
- * Add PROV-O provenance metadata to a dataset.
11
+ * Streaming transformer that passes through all quads and appends
12
+ * PROV-O provenance metadata.
12
13
  *
13
- * Adds:
14
+ * Appended quads:
14
15
  * - `<iri> a prov:Entity`
15
16
  * - `<iri> prov:wasGeneratedBy _:activity`
16
17
  * - `_:activity a prov:Activity`
17
18
  * - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
18
19
  * - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
19
- *
20
- * @param data The dataset to add provenance to
21
- * @param iri The IRI of the entity
22
- * @param startedAt Start time of the activity
23
- * @param endedAt End time of the activity
24
20
  */
25
- export function withProvenance(data, iri, startedAt, endedAt) {
26
- const store = new Store([...data]);
21
+ export async function* withProvenance(quads, iri, startedAt, endedAt) {
22
+ for await (const q of quads) {
23
+ yield q;
24
+ }
27
25
  const subject = namedNode(iri);
28
26
  const activity = blankNode();
29
- store.addQuad(quad(subject, RDF_TYPE, PROV_ENTITY));
30
- store.addQuad(quad(subject, PROV_WAS_GENERATED_BY, activity));
31
- store.addQuad(quad(activity, RDF_TYPE, PROV_ACTIVITY));
32
- store.addQuad(quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME)));
33
- store.addQuad(quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME)));
34
- return store;
27
+ yield quad(subject, RDF_TYPE, PROV_ENTITY);
28
+ yield quad(subject, PROV_WAS_GENERATED_BY, activity);
29
+ yield quad(activity, RDF_TYPE, PROV_ACTIVITY);
30
+ yield quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME));
31
+ yield quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME));
35
32
  }
@@ -1,39 +1,10 @@
1
- import { Dataset } from '@lde/dataset';
2
- import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
3
- import { BaseAnalyzer, Success, Failure, NotSupported } from '@lde/pipeline/analyzer';
4
- export interface SparqlQueryAnalyzerOptions {
5
- /**
6
- * Timeout for SPARQL queries in milliseconds.
7
- * @default 300000 (5 minutes)
8
- */
9
- timeout?: number;
10
- /**
11
- * Custom SparqlEndpointFetcher instance.
12
- */
13
- fetcher?: SparqlEndpointFetcher;
14
- }
1
+ import { Distribution } from '@lde/dataset';
2
+ import { Stage } from '@lde/pipeline';
15
3
  /**
16
- * Analyzer that executes a SPARQL CONSTRUCT query against a dataset's SPARQL endpoint.
4
+ * Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
17
5
  *
18
- * Supports legacy template substitution:
19
- * - `#subjectFilter#` replaced with the dataset's subject filter (if any)
20
- * - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
21
- * - `?dataset` — replaced with the dataset IRI
22
- *
23
- * This class wraps the SparqlConstructExecutor from @lde/pipeline.
6
+ * Pre-processes `#subjectFilter#` before the query is parsed as SPARQL;
7
+ * `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
24
8
  */
25
- export declare class SparqlQueryAnalyzer extends BaseAnalyzer {
26
- readonly name: string;
27
- private readonly query;
28
- private readonly fetcher;
29
- constructor(name: string, query: string, options?: SparqlQueryAnalyzerOptions);
30
- /**
31
- * Create an analyzer from a query file in the queries directory.
32
- *
33
- * @param filename Query filename (e.g., 'triples.rq')
34
- * @param options Optional analyzer options
35
- */
36
- static fromFile(filename: string, options?: SparqlQueryAnalyzerOptions): Promise<SparqlQueryAnalyzer>;
37
- execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
38
- }
9
+ export declare function createQueryStage(filename: string, distribution: Distribution): Promise<Stage>;
39
10
  //# sourceMappingURL=sparqlQueryAnalyzer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"sparqlQueryAnalyzer.d.ts","sourceRoot":"","sources":["../src/sparqlQueryAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAOvC,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAG9D,OAAO,EACL,YAAY,EACZ,OAAO,EACP,OAAO,EACP,YAAY,EACb,MAAM,wBAAwB,CAAC;AAIhC,MAAM,WAAW,0BAA0B;IACzC;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;GASG;AACH,qBAAa,mBAAoB,SAAQ,YAAY;aAKjC,IAAI,EAAE,MAAM;IAJ9B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAG9B,IAAI,EAAE,MAAM,EAC5B,KAAK,EAAE,MAAM,EACb,OAAO,CAAC,EAAE,0BAA0B;IAWtC;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,0BAA0B,GACnC,OAAO,CAAC,mBAAmB,CAAC;IAKlB,OAAO,CAClB,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC;CA2B7C"}
1
+ {"version":3,"file":"sparqlQueryAnalyzer.d.ts","sourceRoot":"","sources":["../src/sparqlQueryAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC5C,OAAO,EAAE,KAAK,EAA0C,MAAM,eAAe,CAAC;AAM9E;;;;;GAKG;AACH,wBAAsB,gBAAgB,CACpC,QAAQ,EAAE,MAAM,EAChB,YAAY,EAAE,YAAY,GACzB,OAAO,CAAC,KAAK,CAAC,CAShB"}
@@ -1,61 +1,17 @@
1
- import { SparqlConstructExecutor, substituteQueryTemplates, collect, readQueryFile, } from '@lde/pipeline';
2
- import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
1
+ import { Stage, SparqlConstructExecutor, readQueryFile } from '@lde/pipeline';
3
2
  import { resolve, dirname } from 'node:path';
4
3
  import { fileURLToPath } from 'node:url';
5
- import { BaseAnalyzer, Success, Failure, NotSupported, } from '@lde/pipeline/analyzer';
6
4
  const __dirname = dirname(fileURLToPath(import.meta.url));
7
5
  /**
8
- * Analyzer that executes a SPARQL CONSTRUCT query against a dataset's SPARQL endpoint.
6
+ * Create a Stage that executes a SPARQL CONSTRUCT query from the queries directory.
9
7
  *
10
- * Supports legacy template substitution:
11
- * - `#subjectFilter#` replaced with the dataset's subject filter (if any)
12
- * - `#namedGraph#` — replaced with `FROM <graph>` clause if the distribution has a named graph
13
- * - `?dataset` — replaced with the dataset IRI
14
- *
15
- * This class wraps the SparqlConstructExecutor from @lde/pipeline.
8
+ * Pre-processes `#subjectFilter#` before the query is parsed as SPARQL;
9
+ * `?dataset` and `FROM <graph>` are handled at the AST level by the executor.
16
10
  */
17
- export class SparqlQueryAnalyzer extends BaseAnalyzer {
18
- name;
19
- query;
20
- fetcher;
21
- constructor(name, query, options) {
22
- super();
23
- this.name = name;
24
- this.query = query;
25
- this.fetcher =
26
- options?.fetcher ??
27
- new SparqlEndpointFetcher({
28
- timeout: options?.timeout ?? 300_000,
29
- });
30
- }
31
- /**
32
- * Create an analyzer from a query file in the queries directory.
33
- *
34
- * @param filename Query filename (e.g., 'triples.rq')
35
- * @param options Optional analyzer options
36
- */
37
- static async fromFile(filename, options) {
38
- const query = await readQueryFile(resolve(__dirname, 'queries', filename));
39
- return new SparqlQueryAnalyzer(filename, query, options);
40
- }
41
- async execute(dataset) {
42
- const sparqlDistribution = dataset.getSparqlDistribution();
43
- if (sparqlDistribution === null) {
44
- return new NotSupported('No SPARQL distribution available');
45
- }
46
- try {
47
- const substituted = substituteQueryTemplates(this.query, sparqlDistribution, dataset);
48
- const executor = new SparqlConstructExecutor({
49
- query: substituted,
50
- // eslint-disable-next-line @typescript-eslint/no-explicit-any
51
- fetcher: this.fetcher,
52
- });
53
- const stream = await executor.execute(dataset, sparqlDistribution);
54
- const store = await collect(stream);
55
- return new Success(store);
56
- }
57
- catch (e) {
58
- return new Failure(sparqlDistribution.accessUrl ?? new URL('unknown://'), e instanceof Error ? e.message : undefined);
59
- }
60
- }
11
+ export async function createQueryStage(filename, distribution) {
12
+ const rawQuery = await readQueryFile(resolve(__dirname, 'queries', filename));
13
+ const subjectFilter = distribution.subjectFilter ?? '';
14
+ const query = rawQuery.replace('#subjectFilter#', subjectFilter);
15
+ const executor = new SparqlConstructExecutor({ query });
16
+ return new Stage({ name: filename, executors: executor });
61
17
  }
@@ -1,17 +1,11 @@
1
- import { Dataset } from '@lde/dataset';
2
- import { type Analyzer, Success, type Failure, type NotSupported } from '@lde/pipeline/analyzer';
1
+ import type { Quad } from '@rdfjs/types';
3
2
  /**
4
- * Decorator analyzer that enriches results with `void:vocabulary` triples.
3
+ * Streaming transformer that passes through all quads and appends
4
+ * `void:vocabulary` triples for detected vocabulary prefixes.
5
5
  *
6
- * Wraps another analyzer, runs it, then inspects `void:property` triples
7
- * to detect known vocabulary prefixes and add corresponding `void:vocabulary`
8
- * triples to the result.
6
+ * Inspects quads with predicate `void:property` to detect known vocabulary
7
+ * namespace prefixes, then yields the corresponding `void:vocabulary` quads
8
+ * after all input quads have been consumed.
9
9
  */
10
- export declare class VocabularyAnalyzer implements Analyzer {
11
- private readonly inner;
12
- readonly name: string;
13
- constructor(inner: Analyzer);
14
- execute(dataset: Dataset): Promise<Success | Failure | NotSupported>;
15
- finish(): Promise<void>;
16
- }
10
+ export declare function withVocabularies(quads: AsyncIterable<Quad>, datasetIri: string): AsyncIterable<Quad>;
17
11
  //# sourceMappingURL=vocabularyAnalyzer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AAGvC,OAAO,EACL,KAAK,QAAQ,EACb,OAAO,EACP,KAAK,OAAO,EACZ,KAAK,YAAY,EAClB,MAAM,wBAAwB,CAAC;AAiChC;;;;;;GAMG;AACH,qBAAa,kBAAmB,YAAW,QAAQ;IAGrC,OAAO,CAAC,QAAQ,CAAC,KAAK;IAFlC,SAAgB,IAAI,EAAE,MAAM,CAAC;gBAEA,KAAK,EAAE,QAAQ;IAI/B,OAAO,CAClB,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,OAAO,GAAG,OAAO,GAAG,YAAY,CAAC;IAU/B,MAAM,IAAI,OAAO,CAAC,IAAI,CAAC;CAGrC"}
1
+ {"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAkCzC;;;;;;;GAOG;AACH,wBAAuB,gBAAgB,CACrC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,UAAU,EAAE,MAAM,GACjB,aAAa,CAAC,IAAI,CAAC,CAqBrB"}
@@ -1,5 +1,4 @@
1
- import { DataFactory, Store } from 'n3';
2
- import { Success, } from '@lde/pipeline/analyzer';
1
+ import { DataFactory } from 'n3';
3
2
  const { namedNode, quad } = DataFactory;
4
3
  const VOID = 'http://rdfs.org/ns/void#';
5
4
  const voidProperty = namedNode(`${VOID}property`);
@@ -29,47 +28,29 @@ const vocabularyPrefixes = new Map([
29
28
  ['http://xmlns.com/foaf/0.1/', 'http://xmlns.com/foaf/0.1/'],
30
29
  ]);
31
30
  /**
32
- * Decorator analyzer that enriches results with `void:vocabulary` triples.
31
+ * Streaming transformer that passes through all quads and appends
32
+ * `void:vocabulary` triples for detected vocabulary prefixes.
33
33
  *
34
- * Wraps another analyzer, runs it, then inspects `void:property` triples
35
- * to detect known vocabulary prefixes and add corresponding `void:vocabulary`
36
- * triples to the result.
34
+ * Inspects quads with predicate `void:property` to detect known vocabulary
35
+ * namespace prefixes, then yields the corresponding `void:vocabulary` quads
36
+ * after all input quads have been consumed.
37
37
  */
38
- export class VocabularyAnalyzer {
39
- inner;
40
- name;
41
- constructor(inner) {
42
- this.inner = inner;
43
- this.name = inner.name;
44
- }
45
- async execute(dataset) {
46
- const result = await this.inner.execute(dataset);
47
- if (!(result instanceof Success)) {
48
- return result;
49
- }
50
- const enriched = addVocabularyTriples(result.data, dataset.iri.toString());
51
- return new Success(enriched);
52
- }
53
- async finish() {
54
- await this.inner.finish?.();
55
- }
56
- }
57
- function addVocabularyTriples(data, datasetIri) {
58
- const store = new Store([...data]);
59
- const datasetNode = namedNode(datasetIri);
60
- // Collect unique vocabulary URIs from void:property triples.
38
+ export async function* withVocabularies(quads, datasetIri) {
61
39
  const detectedVocabularies = new Set();
62
- for (const q of store.match(null, voidProperty, null)) {
63
- const propertyUri = q.object.value;
64
- for (const [prefix, vocabUri] of vocabularyPrefixes) {
65
- if (propertyUri.startsWith(prefix)) {
66
- detectedVocabularies.add(vocabUri);
67
- break;
40
+ for await (const q of quads) {
41
+ yield q;
42
+ if (q.predicate.equals(voidProperty)) {
43
+ const propertyUri = q.object.value;
44
+ for (const [prefix, vocabUri] of vocabularyPrefixes) {
45
+ if (propertyUri.startsWith(prefix)) {
46
+ detectedVocabularies.add(vocabUri);
47
+ break;
48
+ }
68
49
  }
69
50
  }
70
51
  }
52
+ const datasetNode = namedNode(datasetIri);
71
53
  for (const vocabUri of detectedVocabularies) {
72
- store.addQuad(quad(datasetNode, voidVocabulary, namedNode(vocabUri)));
54
+ yield quad(datasetNode, voidVocabulary, namedNode(vocabUri));
73
55
  }
74
- return store;
75
56
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline-void",
3
- "version": "0.2.15",
3
+ "version": "0.2.17",
4
4
  "description": "VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets",
5
5
  "repository": {
6
6
  "url": "https://github.com/ldengine/lde",
@@ -27,7 +27,6 @@
27
27
  "@lde/dataset": "0.6.8",
28
28
  "@lde/pipeline": "0.6.15",
29
29
  "@rdfjs/types": "^2.0.1",
30
- "fetch-sparql-endpoint": "^6.0.0",
31
30
  "n3": "^1.17.0",
32
31
  "tslib": "^2.3.0"
33
32
  }