@lde/pipeline-void 0.2.31 → 0.2.32

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -9,10 +9,10 @@ VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets.
9
9
  - `createLanguageStage(distribution)` — Per-class language tags
10
10
  - `createObjectClassStage(distribution)` — Per-class object class partitions
11
11
 
12
- ## Streaming transformers
12
+ ## Executor decorators
13
13
 
14
- - `withVocabularies(quads, datasetIri)` — Detect and append `void:vocabulary` triples
15
- - `withProvenance(quads, iri, startedAt, endedAt)` Append PROV-O provenance metadata
14
+ - `VocabularyExecutor` — Wraps an executor; detects and appends `void:vocabulary` triples
15
+ - `ProvenanceExecutor` Wraps an executor; appends PROV-O provenance metadata with automatic timing
16
16
 
17
17
  ## SPARQL Queries
18
18
 
@@ -43,20 +43,23 @@ Generic VOiD analysis queries included:
43
43
  import {
44
44
  createQueryStage,
45
45
  createDatatypeStage,
46
- withVocabularies,
47
- withProvenance,
46
+ VocabularyExecutor,
47
+ Stage,
48
48
  } from '@lde/pipeline-void';
49
- import { Distribution } from '@lde/dataset';
50
-
51
- const distribution = Distribution.sparql(new URL('http://example.com/sparql'));
49
+ import { SparqlConstructExecutor } from '@lde/pipeline';
52
50
 
53
51
  // Simple CONSTRUCT query stage
54
- const stage = await createQueryStage('triples.rq', distribution);
52
+ const stage = await createQueryStage('triples.rq');
55
53
  await stage.run(dataset, distribution, writer);
56
54
 
57
- // Per-class stage (streaming)
58
- const datatypeStage = await createDatatypeStage(distribution);
59
- await datatypeStage.run(dataset, distribution, writer);
55
+ // Executor decorator: vocabulary detection wraps entity-properties executor
56
+ const executor = await SparqlConstructExecutor.fromFile(
57
+ 'queries/entity-properties.rq',
58
+ );
59
+ const entityPropertiesStage = new Stage({
60
+ name: 'entity-properties',
61
+ executors: new VocabularyExecutor(executor),
62
+ });
60
63
  ```
61
64
 
62
65
  ## Validation
@@ -1,14 +1,23 @@
1
+ import { Dataset, Distribution } from '@lde/dataset';
2
+ import { NotSupported, type Executor, type ExecuteOptions } from '@lde/pipeline';
1
3
  import type { Quad } from '@rdfjs/types';
2
4
  /**
3
- * Streaming transformer that passes through all quads and appends
4
- * PROV-O provenance metadata.
5
+ * Executor decorator that passes through all quads from the inner executor
6
+ * and appends PROV-O provenance metadata.
7
+ *
8
+ * Timestamps are captured automatically: `startedAt` when `execute()` is
9
+ * called, `endedAt` when the inner quad stream is fully consumed.
5
10
  *
6
11
  * Appended quads:
7
- * - `<iri> a prov:Entity`
8
- * - `<iri> prov:wasGeneratedBy _:activity`
12
+ * - `<dataset> a prov:Entity`
13
+ * - `<dataset> prov:wasGeneratedBy _:activity`
9
14
  * - `_:activity a prov:Activity`
10
15
  * - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
11
16
  * - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
12
17
  */
13
- export declare function withProvenance(quads: AsyncIterable<Quad>, iri: string, startedAt: Date, endedAt: Date): AsyncIterable<Quad>;
18
+ export declare class ProvenanceExecutor implements Executor {
19
+ private readonly inner;
20
+ constructor(inner: Executor);
21
+ execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
22
+ }
14
23
  //# sourceMappingURL=provenance.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC;;;;;;;;;;GAUG;AACH,wBAAuB,cAAc,CACnC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,GAAG,EAAE,MAAM,EACX,SAAS,EAAE,IAAI,EACf,OAAO,EAAE,IAAI,GACZ,aAAa,CAAC,IAAI,CAAC,CAqBrB"}
1
+ {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,YAAY,EACZ,KAAK,QAAQ,EACb,KAAK,cAAc,EACpB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAiBzC;;;;;;;;;;;;;GAaG;AACH,qBAAa,kBAAmB,YAAW,QAAQ;IACrC,OAAO,CAAC,QAAQ,CAAC,KAAK;gBAAL,KAAK,EAAE,QAAQ;IAEtC,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC;CAQ/C"}
@@ -1,3 +1,4 @@
1
+ import { NotSupported, } from '@lde/pipeline';
1
2
  import { DataFactory } from 'n3';
2
3
  const { namedNode, literal, blankNode, quad } = DataFactory;
3
4
  const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
@@ -8,20 +9,38 @@ const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime'
8
9
  const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
9
10
  const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
10
11
  /**
11
- * Streaming transformer that passes through all quads and appends
12
- * PROV-O provenance metadata.
12
+ * Executor decorator that passes through all quads from the inner executor
13
+ * and appends PROV-O provenance metadata.
14
+ *
15
+ * Timestamps are captured automatically: `startedAt` when `execute()` is
16
+ * called, `endedAt` when the inner quad stream is fully consumed.
13
17
  *
14
18
  * Appended quads:
15
- * - `<iri> a prov:Entity`
16
- * - `<iri> prov:wasGeneratedBy _:activity`
19
+ * - `<dataset> a prov:Entity`
20
+ * - `<dataset> prov:wasGeneratedBy _:activity`
17
21
  * - `_:activity a prov:Activity`
18
22
  * - `_:activity prov:startedAtTime "..."^^xsd:dateTime`
19
23
  * - `_:activity prov:endedAtTime "..."^^xsd:dateTime`
20
24
  */
21
- export async function* withProvenance(quads, iri, startedAt, endedAt) {
25
+ export class ProvenanceExecutor {
26
+ inner;
27
+ constructor(inner) {
28
+ this.inner = inner;
29
+ }
30
+ async execute(dataset, distribution, options) {
31
+ const startedAt = new Date();
32
+ const result = await this.inner.execute(dataset, distribution, options);
33
+ if (result instanceof NotSupported) {
34
+ return result;
35
+ }
36
+ return withProvenance(result, dataset.iri.toString(), startedAt);
37
+ }
38
+ }
39
+ async function* withProvenance(quads, iri, startedAt) {
22
40
  for await (const q of quads) {
23
41
  yield q;
24
42
  }
43
+ const endedAt = new Date();
25
44
  const subject = namedNode(iri);
26
45
  const activity = blankNode();
27
46
  yield quad(subject, RDF_TYPE, PROV_ENTITY);
@@ -1,11 +1,18 @@
1
+ import { Dataset, Distribution } from '@lde/dataset';
2
+ import { NotSupported, type Executor, type ExecuteOptions } from '@lde/pipeline';
1
3
  import type { Quad } from '@rdfjs/types';
2
4
  /**
3
- * Streaming transformer that passes through all quads and appends
4
- * `void:vocabulary` triples for detected vocabulary prefixes.
5
+ * Executor decorator that passes through all quads from the inner executor
6
+ * and appends `void:vocabulary` triples for detected vocabulary prefixes.
5
7
  *
6
8
  * Inspects quads with predicate `void:property` to detect known vocabulary
7
9
  * namespace prefixes, then yields the corresponding `void:vocabulary` quads
8
- * after all input quads have been consumed.
10
+ * after all inner quads have been consumed.
9
11
  */
10
- export declare function withVocabularies(quads: AsyncIterable<Quad>, datasetIri: string, vocabularies?: readonly string[]): AsyncIterable<Quad>;
12
+ export declare class VocabularyExecutor implements Executor {
13
+ private readonly inner;
14
+ private readonly vocabularies;
15
+ constructor(inner: Executor, vocabularies?: readonly string[]);
16
+ execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
17
+ }
11
18
  //# sourceMappingURL=vocabularyAnalyzer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAczC;;;;;;;GAOG;AACH,wBAAuB,gBAAgB,CACrC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,UAAU,EAAE,MAAM,EAClB,YAAY,GAAE,SAAS,MAAM,EAAwB,GACpD,aAAa,CAAC,IAAI,CAAC,CAqBrB"}
1
+ {"version":3,"file":"vocabularyAnalyzer.d.ts","sourceRoot":"","sources":["../src/vocabularyAnalyzer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,EACL,YAAY,EACZ,KAAK,QAAQ,EACb,KAAK,cAAc,EACpB,MAAM,eAAe,CAAC;AACvB,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAczC;;;;;;;GAOG;AACH,qBAAa,kBAAmB,YAAW,QAAQ;IAE/C,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,YAAY;gBADZ,KAAK,EAAE,QAAQ,EACf,YAAY,GAAE,SAAS,MAAM,EAAwB;IAGlE,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC;CAO/C"}
@@ -1,3 +1,4 @@
1
+ import { NotSupported, } from '@lde/pipeline';
1
2
  import prefixes from '@zazuko/prefixes';
2
3
  import { DataFactory } from 'n3';
3
4
  const { namedNode, quad } = DataFactory;
@@ -8,14 +9,29 @@ const defaultVocabularies = [
8
9
  ...new Set(Object.values(prefixes)),
9
10
  ];
10
11
  /**
11
- * Streaming transformer that passes through all quads and appends
12
- * `void:vocabulary` triples for detected vocabulary prefixes.
12
+ * Executor decorator that passes through all quads from the inner executor
13
+ * and appends `void:vocabulary` triples for detected vocabulary prefixes.
13
14
  *
14
15
  * Inspects quads with predicate `void:property` to detect known vocabulary
15
16
  * namespace prefixes, then yields the corresponding `void:vocabulary` quads
16
- * after all input quads have been consumed.
17
+ * after all inner quads have been consumed.
17
18
  */
18
- export async function* withVocabularies(quads, datasetIri, vocabularies = defaultVocabularies) {
19
+ export class VocabularyExecutor {
20
+ inner;
21
+ vocabularies;
22
+ constructor(inner, vocabularies = defaultVocabularies) {
23
+ this.inner = inner;
24
+ this.vocabularies = vocabularies;
25
+ }
26
+ async execute(dataset, distribution, options) {
27
+ const result = await this.inner.execute(dataset, distribution, options);
28
+ if (result instanceof NotSupported) {
29
+ return result;
30
+ }
31
+ return withVocabularies(result, dataset.iri.toString(), this.vocabularies);
32
+ }
33
+ }
34
+ async function* withVocabularies(quads, datasetIri, vocabularies) {
19
35
  const detectedVocabularies = new Set();
20
36
  for await (const q of quads) {
21
37
  yield q;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline-void",
3
- "version": "0.2.31",
3
+ "version": "0.2.32",
4
4
  "description": "VOiD (Vocabulary of Interlinked Datasets) statistical analysis for RDF datasets",
5
5
  "repository": {
6
6
  "url": "https://github.com/ldengine/lde",
@@ -24,6 +24,7 @@
24
24
  "!**/*.tsbuildinfo"
25
25
  ],
26
26
  "dependencies": {
27
+ "@lde/dataset": "0.6.9",
27
28
  "@lde/pipeline": "0.6.28",
28
29
  "@rdfjs/types": "^2.0.1",
29
30
  "@zazuko/prefixes": "^2.6.1",