@lde/pipeline 0.6.32 → 0.7.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -1,6 +1,10 @@
1
1
  # Pipeline
2
2
 
3
- Framework for building RDF data processing pipelines with SPARQL.
3
+ A framework for transforming large RDF datasets using pure [SPARQL](https://www.w3.org/TR/sparql11-query/) queries.
4
+
5
+ - **SPARQL-native.** Data transformations are plain SPARQL query files — portable, transparent, testable and version-controlled.
6
+ - **Composable.** Decorators wrap executors and resolvers to add behaviour (provenance, vocabulary detection, data import) without subclassing.
7
+ - **Extensible.** A plugin system lets packages like [@lde/pipeline-void](../pipeline-void) (or your own plugins) hook into the pipeline lifecycle.
4
8
 
5
9
  ## Components
6
10
 
@@ -82,7 +86,6 @@ import {
82
86
  SparqlItemSelector,
83
87
  SparqlUpdateWriter,
84
88
  ManualDatasetSelection,
85
- SparqlDistributionResolver,
86
89
  } from '@lde/pipeline';
87
90
 
88
91
  const pipeline = new Pipeline({
@@ -106,9 +109,3 @@ const pipeline = new Pipeline({
106
109
 
107
110
  await pipeline.run();
108
111
  ```
109
-
110
- ## Validation
111
-
112
- ```sh
113
- npx nx run-many -t lint test typecheck build --projects=@lde/pipeline
114
- ```
@@ -0,0 +1,22 @@
1
+ import type { Importer } from '@lde/sparql-importer';
2
+ import type { SparqlServer } from '@lde/sparql-server';
3
+ import { type DistributionResolver, NoDistributionAvailable, ResolvedDistribution } from './resolver.js';
4
+ export interface ImportResolverOptions {
5
+ importer: Importer;
6
+ server?: SparqlServer;
7
+ }
8
+ /**
9
+ * A {@link DistributionResolver} decorator that adds import-as-fallback logic.
10
+ *
11
+ * Delegates to an inner resolver first. If the inner resolver returns
12
+ * {@link NoDistributionAvailable}, tries importing the dataset and optionally
13
+ * starts a SPARQL server.
14
+ */
15
+ export declare class ImportResolver implements DistributionResolver {
16
+ private readonly inner;
17
+ private readonly options;
18
+ constructor(inner: DistributionResolver, options: ImportResolverOptions);
19
+ resolve(...args: Parameters<DistributionResolver['resolve']>): Promise<ResolvedDistribution | NoDistributionAvailable>;
20
+ cleanup(): Promise<void>;
21
+ }
22
+ //# sourceMappingURL=importResolver.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAErD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EACzB,uBAAuB,EACvB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAEvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,CAAC,EAAE,YAAY,CAAC;CACvB;AAED;;;;;;GAMG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,OAAO,CACX,GAAG,IAAI,EAAE,UAAU,CAAC,oBAAoB,CAAC,SAAS,CAAC,CAAC,GACnD,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IAoCpD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
@@ -0,0 +1,36 @@
1
+ import { Distribution } from '@lde/dataset';
2
+ import { ImportFailed, ImportSuccessful } from '@lde/sparql-importer';
3
+ import { NoDistributionAvailable, ResolvedDistribution, } from './resolver.js';
4
+ /**
5
+ * A {@link DistributionResolver} decorator that adds import-as-fallback logic.
6
+ *
7
+ * Delegates to an inner resolver first. If the inner resolver returns
8
+ * {@link NoDistributionAvailable}, tries importing the dataset and optionally
9
+ * starts a SPARQL server.
10
+ */
11
+ export class ImportResolver {
12
+ inner;
13
+ options;
14
+ constructor(inner, options) {
15
+ this.inner = inner;
16
+ this.options = options;
17
+ }
18
+ async resolve(...args) {
19
+ const result = await this.inner.resolve(...args);
20
+ if (result instanceof ResolvedDistribution)
21
+ return result;
22
+ const [dataset] = args;
23
+ const importResult = await this.options.importer.import(dataset);
24
+ if (importResult instanceof ImportSuccessful) {
25
+ if (this.options.server) {
26
+ await this.options.server.start();
27
+ return new ResolvedDistribution(Distribution.sparql(this.options.server.queryEndpoint, importResult.identifier), result.probeResults);
28
+ }
29
+ return new ResolvedDistribution(Distribution.sparql(importResult.distribution.accessUrl, importResult.identifier), result.probeResults);
30
+ }
31
+ return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', result.probeResults, importResult instanceof ImportFailed ? importResult : undefined);
32
+ }
33
+ async cleanup() {
34
+ await this.options.server?.stop();
35
+ }
36
+ }
@@ -1,5 +1,6 @@
1
1
  export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeResultType, } from './probe.js';
2
2
  export { probeResultsToQuads } from './report.js';
3
+ export { ImportResolver, type ImportResolverOptions, } from './importResolver.js';
3
4
  export { ResolvedDistribution, NoDistributionAvailable, SparqlDistributionResolver, type DistributionResolver, type SparqlDistributionResolverOptions, } from './resolver.js';
4
5
  export { resolveDistributions, type DistributionStageResult, } from './resolveDistributions.js';
5
6
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/distribution/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAElD,OAAO,EACL,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,KAAK,oBAAoB,EACzB,KAAK,iCAAiC,GACvC,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,oBAAoB,EACpB,KAAK,uBAAuB,GAC7B,MAAM,2BAA2B,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/distribution/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC;AAEpB,OAAO,EAAE,mBAAmB,EAAE,MAAM,aAAa,CAAC;AAElD,OAAO,EACL,cAAc,EACd,KAAK,qBAAqB,GAC3B,MAAM,qBAAqB,CAAC;AAE7B,OAAO,EACL,oBAAoB,EACpB,uBAAuB,EACvB,0BAA0B,EAC1B,KAAK,oBAAoB,EACzB,KAAK,iCAAiC,GACvC,MAAM,eAAe,CAAC;AAEvB,OAAO,EACL,oBAAoB,EACpB,KAAK,uBAAuB,GAC7B,MAAM,2BAA2B,CAAC"}
@@ -1,4 +1,5 @@
1
1
  export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
2
2
  export { probeResultsToQuads } from './report.js';
3
+ export { ImportResolver, } from './importResolver.js';
3
4
  export { ResolvedDistribution, NoDistributionAvailable, SparqlDistributionResolver, } from './resolver.js';
4
5
  export { resolveDistributions, } from './resolveDistributions.js';
@@ -1,7 +1,5 @@
1
1
  import { Dataset, Distribution } from '@lde/dataset';
2
- import type { Importer } from '@lde/sparql-importer';
3
- import { ImportFailed } from '@lde/sparql-importer';
4
- import type { SparqlServer } from '@lde/sparql-server';
2
+ import type { ImportFailed } from '@lde/sparql-importer';
5
3
  import { type ProbeResultType } from './probe.js';
6
4
  export declare class ResolvedDistribution {
7
5
  readonly distribution: Distribution;
@@ -20,8 +18,6 @@ export interface DistributionResolver {
20
18
  cleanup?(): Promise<void>;
21
19
  }
22
20
  export interface SparqlDistributionResolverOptions {
23
- importer?: Importer;
24
- server?: SparqlServer;
25
21
  timeout?: number;
26
22
  }
27
23
  /**
@@ -29,17 +25,13 @@ export interface SparqlDistributionResolverOptions {
29
25
  *
30
26
  * 1. Probes all distributions in parallel.
31
27
  * 2. Returns the first valid SPARQL endpoint as a `ResolvedDistribution`.
32
- * 3. If none: tries the importer (if provided) and returns the imported distribution.
33
- * 4. If nothing works: returns `NoDistributionAvailable`.
28
+ * 3. If none: returns `NoDistributionAvailable`.
34
29
  *
35
30
  * Does not mutate `dataset.distributions`.
36
31
  */
37
32
  export declare class SparqlDistributionResolver implements DistributionResolver {
38
- private readonly importer?;
39
- private readonly server?;
40
33
  private readonly timeout;
41
34
  constructor(options?: SparqlDistributionResolverOptions);
42
35
  resolve(dataset: Dataset): Promise<ResolvedDistribution | NoDistributionAvailable>;
43
- cleanup(): Promise<void>;
44
36
  }
45
37
  //# sourceMappingURL=resolver.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AACrD,OAAO,EAAE,YAAY,EAAoB,MAAM,sBAAsB,CAAC;AACtE,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EAA4B,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAE5E,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;gBAD/B,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE;CAE3C;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,QAAQ,CAAC,EAAE,QAAQ,CAAC;IACpB,MAAM,CAAC,EAAE,YAAY,CAAC;IACtB,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;;GASG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAW;IACrC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAC,CAAe;IACvC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAMjD,OAAO,CACX,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IA0DpD,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
1
+ {"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAA4B,KAAK,eAAe,EAAE,MAAM,YAAY,CAAC;AAE5E,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;gBAD/B,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE;CAE3C;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED,MAAM,WAAW,oBAAoB;IACnC,OAAO,CACL,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;GAQG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAIjD,OAAO,CACX,OAAO,EAAE,OAAO,GACf,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CA2B3D"}
@@ -1,5 +1,3 @@
1
- import { Distribution } from '@lde/dataset';
2
- import { ImportFailed, ImportSuccessful } from '@lde/sparql-importer';
3
1
  import { probe, SparqlProbeResult } from './probe.js';
4
2
  export class ResolvedDistribution {
5
3
  distribution;
@@ -26,18 +24,13 @@ export class NoDistributionAvailable {
26
24
  *
27
25
  * 1. Probes all distributions in parallel.
28
26
  * 2. Returns the first valid SPARQL endpoint as a `ResolvedDistribution`.
29
- * 3. If none: tries the importer (if provided) and returns the imported distribution.
30
- * 4. If nothing works: returns `NoDistributionAvailable`.
27
+ * 3. If none: returns `NoDistributionAvailable`.
31
28
  *
32
29
  * Does not mutate `dataset.distributions`.
33
30
  */
34
31
  export class SparqlDistributionResolver {
35
- importer;
36
- server;
37
32
  timeout;
38
33
  constructor(options) {
39
- this.importer = options?.importer;
40
- this.server = options?.server;
41
34
  this.timeout = options?.timeout ?? 5000;
42
35
  }
43
36
  async resolve(dataset) {
@@ -52,26 +45,6 @@ export class SparqlDistributionResolver {
52
45
  return new ResolvedDistribution(distribution, results);
53
46
  }
54
47
  }
55
- // No SPARQL endpoint; try importer if available.
56
- if (this.importer) {
57
- const importResult = await this.importer.import(dataset);
58
- if (importResult instanceof ImportSuccessful) {
59
- // Start server if provided, using its query endpoint.
60
- if (this.server) {
61
- await this.server.start();
62
- const distribution = Distribution.sparql(this.server.queryEndpoint, importResult.identifier);
63
- return new ResolvedDistribution(distribution, results);
64
- }
65
- const distribution = Distribution.sparql(importResult.distribution.accessUrl, importResult.identifier);
66
- return new ResolvedDistribution(distribution, results);
67
- }
68
- if (importResult instanceof ImportFailed) {
69
- return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', results, importResult);
70
- }
71
- }
72
- return new NoDistributionAvailable(dataset, 'No SPARQL endpoint or importable data dump available', results);
73
- }
74
- async cleanup() {
75
- await this.server?.stop();
48
+ return new NoDistributionAvailable(dataset, 'No SPARQL endpoint available', results);
76
49
  }
77
50
  }
package/dist/index.d.ts CHANGED
@@ -8,4 +8,5 @@ export * from './stageOutputResolver.js';
8
8
  export * from './sparql/index.js';
9
9
  export * from './distribution/index.js';
10
10
  export * from './writer/index.js';
11
+ export * from './provenance.js';
11
12
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,cAAc,iBAAiB,CAAC;AAChC,cAAc,YAAY,CAAC;AAC3B,cAAc,eAAe,CAAC;AAC9B,cAAc,uBAAuB,CAAC;AACtC,cAAc,eAAe,CAAC;AAC9B,cAAc,YAAY,CAAC;AAC3B,cAAc,0BAA0B,CAAC;AACzC,cAAc,mBAAmB,CAAC;AAClC,cAAc,yBAAyB,CAAC;AACxC,cAAc,mBAAmB,CAAC;AAClC,cAAc,iBAAiB,CAAC"}
package/dist/index.js CHANGED
@@ -8,3 +8,4 @@ export * from './stageOutputResolver.js';
8
8
  export * from './sparql/index.js';
9
9
  export * from './distribution/index.js';
10
10
  export * from './writer/index.js';
11
+ export * from './provenance.js';
@@ -1,19 +1,26 @@
1
1
  import type { DatasetSelector } from './selector.js';
2
2
  import { Stage } from './stage.js';
3
+ import type { QuadTransform } from './stage.js';
3
4
  import type { Writer } from './writer/writer.js';
4
5
  import { type DistributionResolver } from './distribution/resolver.js';
5
6
  import type { StageOutputResolver } from './stageOutputResolver.js';
6
7
  import type { ProgressReporter } from './progressReporter.js';
8
+ /** Plugin that hooks into pipeline lifecycle events. */
9
+ export interface PipelinePlugin {
10
+ name: string;
11
+ /** Transform the quad stream before writing. */
12
+ beforeStageWrite?: QuadTransform;
13
+ }
7
14
  export interface PipelineOptions {
8
15
  datasetSelector: DatasetSelector;
9
16
  stages: Stage[];
10
17
  writers: Writer | Writer[];
18
+ plugins?: PipelinePlugin[];
11
19
  name?: string;
12
20
  distributionResolver?: DistributionResolver;
13
21
  chaining?: {
14
22
  stageOutputResolver: StageOutputResolver;
15
23
  outputDir: string;
16
- outputFormat?: 'turtle' | 'n-triples' | 'n-quads';
17
24
  };
18
25
  reporter?: ProgressReporter;
19
26
  }
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAGpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;QAClB,YAAY,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;KACnD,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAmBD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAoB9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA4Bd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAGpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,uBAAuB,CAAC;AAE9D,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AA8BD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAaZ,cAAc;YA4Bd,QAAQ;YA8BR,QAAQ;YAmDR,eAAe;YAkCd,SAAS;CAUzB"}
package/dist/pipeline.js CHANGED
@@ -20,6 +20,17 @@ class FanOutWriter {
20
20
  }
21
21
  }
22
22
  }
23
+ class TransformWriter {
24
+ inner;
25
+ transform;
26
+ constructor(inner, transform) {
27
+ this.inner = inner;
28
+ this.transform = transform;
29
+ }
30
+ async write(dataset, quads) {
31
+ await this.inner.write(dataset, this.transform(quads, dataset));
32
+ }
33
+ }
23
34
  export class Pipeline {
24
35
  name;
25
36
  datasetSelector;
@@ -36,9 +47,17 @@ export class Pipeline {
36
47
  this.name = options.name ?? '';
37
48
  this.datasetSelector = options.datasetSelector;
38
49
  this.stages = options.stages;
39
- this.writer = Array.isArray(options.writers)
50
+ let writer = Array.isArray(options.writers)
40
51
  ? new FanOutWriter(options.writers)
41
52
  : options.writers;
53
+ const transforms = options.plugins
54
+ ?.map((p) => p.beforeStageWrite)
55
+ .filter((t) => t !== undefined);
56
+ if (transforms?.length) {
57
+ const composed = (quads, dataset) => transforms.reduce((q, fn) => fn(q, dataset), quads);
58
+ writer = new TransformWriter(writer, composed);
59
+ }
60
+ this.writer = writer;
42
61
  this.distributionResolver =
43
62
  options.distributionResolver ?? new SparqlDistributionResolver();
44
63
  this.chaining = options.chaining;
@@ -103,13 +122,13 @@ export class Pipeline {
103
122
  }
104
123
  }
105
124
  async runChain(dataset, distribution, stage) {
106
- const { stageOutputResolver, outputDir, outputFormat } = this.chaining;
125
+ const { stageOutputResolver, outputDir } = this.chaining;
107
126
  const outputFiles = [];
108
127
  try {
109
128
  // 1. Run parent stage → FileWriter.
110
129
  const parentWriter = new FileWriter({
111
130
  outputDir: `${outputDir}/${stage.name}`,
112
- format: outputFormat,
131
+ format: 'n-triples',
113
132
  });
114
133
  await this.runChainedStage(dataset, distribution, stage, parentWriter);
115
134
  outputFiles.push(parentWriter.getOutputPath(dataset));
@@ -119,7 +138,7 @@ export class Pipeline {
119
138
  const child = stage.stages[i];
120
139
  const childWriter = new FileWriter({
121
140
  outputDir: `${outputDir}/${child.name}`,
122
- format: outputFormat,
141
+ format: 'n-triples',
123
142
  });
124
143
  await this.runChainedStage(dataset, currentDistribution, child, childWriter);
125
144
  outputFiles.push(childWriter.getOutputPath(dataset));
@@ -0,0 +1,7 @@
1
+ import type { QuadTransform } from './stage.js';
2
+ import type { PipelinePlugin } from './pipeline.js';
3
+ /** QuadTransform that appends PROV-O provenance quads. */
4
+ export declare const provenanceTransform: QuadTransform;
5
+ /** Pipeline plugin that appends PROV-O provenance to every stage's output. */
6
+ export declare function provenancePlugin(): PipelinePlugin;
7
+ //# sourceMappingURL=provenance.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"provenance.d.ts","sourceRoot":"","sources":["../src/provenance.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,eAAe,CAAC;AAkBpD,0DAA0D;AAC1D,eAAO,MAAM,mBAAmB,EAAE,aACgC,CAAC;AAEnE,8EAA8E;AAC9E,wBAAgB,gBAAgB,IAAI,cAAc,CAKjD"}
@@ -0,0 +1,31 @@
1
+ import { DataFactory } from 'n3';
2
+ const { namedNode, literal, blankNode, quad } = DataFactory;
3
+ const RDF_TYPE = namedNode('http://www.w3.org/1999/02/22-rdf-syntax-ns#type');
4
+ const PROV_ENTITY = namedNode('http://www.w3.org/ns/prov#Entity');
5
+ const PROV_ACTIVITY = namedNode('http://www.w3.org/ns/prov#Activity');
6
+ const PROV_WAS_GENERATED_BY = namedNode('http://www.w3.org/ns/prov#wasGeneratedBy');
7
+ const PROV_STARTED_AT_TIME = namedNode('http://www.w3.org/ns/prov#startedAtTime');
8
+ const PROV_ENDED_AT_TIME = namedNode('http://www.w3.org/ns/prov#endedAtTime');
9
+ const XSD_DATE_TIME = namedNode('http://www.w3.org/2001/XMLSchema#dateTime');
10
+ /** QuadTransform that appends PROV-O provenance quads. */
11
+ export const provenanceTransform = (quads, dataset) => appendProvenanceQuads(quads, dataset.iri.toString(), new Date());
12
+ /** Pipeline plugin that appends PROV-O provenance to every stage's output. */
13
+ export function provenancePlugin() {
14
+ return {
15
+ name: 'provenance',
16
+ beforeStageWrite: provenanceTransform,
17
+ };
18
+ }
19
+ async function* appendProvenanceQuads(quads, iri, startedAt) {
20
+ for await (const q of quads) {
21
+ yield q;
22
+ }
23
+ const endedAt = new Date();
24
+ const subject = namedNode(iri);
25
+ const activity = blankNode();
26
+ yield quad(subject, RDF_TYPE, PROV_ENTITY);
27
+ yield quad(subject, PROV_WAS_GENERATED_BY, activity);
28
+ yield quad(activity, RDF_TYPE, PROV_ACTIVITY);
29
+ yield quad(activity, PROV_STARTED_AT_TIME, literal(startedAt.toISOString(), XSD_DATE_TIME));
30
+ yield quad(activity, PROV_ENDED_AT_TIME, literal(endedAt.toISOString(), XSD_DATE_TIME));
31
+ }
package/dist/stage.d.ts CHANGED
@@ -1,7 +1,10 @@
1
1
  import { Dataset, Distribution } from '@lde/dataset';
2
+ import type { Quad } from '@rdfjs/types';
2
3
  import type { Executor, VariableBindings } from './sparql/executor.js';
3
4
  import { NotSupported } from './sparql/executor.js';
4
5
  import type { Writer } from './writer/writer.js';
6
+ /** Transforms a quad stream, optionally using dataset metadata. */
7
+ export type QuadTransform = (quads: AsyncIterable<Quad>, dataset: Dataset) => AsyncIterable<Quad>;
5
8
  export interface StageOptions {
6
9
  name: string;
7
10
  executors: Executor | Executor[];
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAErD,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAmBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACrE"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B,gEAAgE;IAChE,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB,iEAAiE;IACjE,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;CAClB;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,iBAAiB,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CAC1E;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;gBAE5B,OAAO,EAAE,YAAY;IAW3B,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAmBjB,eAAe;YA+Gf,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CAAC,YAAY,EAAE,YAAY,GAAG,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACrE"}
@@ -15,6 +15,7 @@ export interface FileWriterOptions {
15
15
  export declare class FileWriter implements Writer {
16
16
  private readonly outputDir;
17
17
  readonly format: 'turtle' | 'n-triples' | 'n-quads';
18
+ private readonly writtenFiles;
18
19
  constructor(options: FileWriterOptions);
19
20
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
20
21
  getOutputPath(dataset: Dataset): string;
@@ -1 +1 @@
1
- {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAaD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;gBAExC,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAyBxE,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,YAAY;CAUrB"}
1
+ {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;CAC7C;AAsBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAqB;gBAEtC,OAAO,EAAE,iBAAiB;IAKhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IA4BxE,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,YAAY;CAUrB"}
@@ -7,6 +7,15 @@ import { Writer as N3Writer } from 'n3';
7
7
  * Streams RDF quads to files on disk using N3 Writer.
8
8
  *
9
9
  * Files are named based on the dataset IRI using filenamify-url.
10
+ *
11
+ * The first {@link write} call for a given dataset creates (or overwrites) the file.
12
+ * Subsequent calls for the same dataset append to it, so that multiple pipeline stages
13
+ * can each contribute quads to a single output file.
14
+ *
15
+ * **Note:** With `format: 'turtle'` (the default) each append will repeat the prefix
16
+ * declarations at the start of each chunk. For multi-stage pipelines, prefer
17
+ * `format: 'n-triples'` or `format: 'n-quads'`, which produce clean line-oriented
18
+ * output without repeated headers.
10
19
  */
11
20
  const formatMap = {
12
21
  turtle: 'Turtle',
@@ -16,6 +25,7 @@ const formatMap = {
16
25
  export class FileWriter {
17
26
  outputDir;
18
27
  format;
28
+ writtenFiles = new Set();
19
29
  constructor(options) {
20
30
  this.outputDir = options.outputDir;
21
31
  this.format = options.format ?? 'turtle';
@@ -28,7 +38,9 @@ export class FileWriter {
28
38
  return;
29
39
  const filePath = join(this.outputDir, this.getFilename(dataset));
30
40
  await mkdir(dirname(filePath), { recursive: true });
31
- const stream = createWriteStream(filePath);
41
+ const flags = this.writtenFiles.has(filePath) ? 'a' : 'w';
42
+ this.writtenFiles.add(filePath);
43
+ const stream = createWriteStream(filePath, { flags });
32
44
  const writer = new N3Writer(stream, { format: formatMap[this.format] });
33
45
  writer.addQuad(first.value);
34
46
  for await (const quad of { [Symbol.asyncIterator]: () => iterator }) {
@@ -26,14 +26,16 @@ export interface SparqlWriterOptions {
26
26
  /**
27
27
  * Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
28
28
  *
29
- * Clears the named graph before writing, then streams quads in batches
30
- * to avoid accumulating the entire dataset in memory.
29
+ * Clears the named graph before the first write per dataset per instance, then
30
+ * streams quads in batches to avoid accumulating the entire dataset in memory.
31
+ * Subsequent calls to {@link write} for the same dataset append rather than replace.
31
32
  */
32
33
  export declare class SparqlUpdateWriter implements Writer {
33
34
  private readonly endpoint;
34
35
  private readonly auth?;
35
36
  private readonly fetch;
36
37
  private readonly batchSize;
38
+ private readonly clearedGraphs;
37
39
  constructor(options: SparqlWriterOptions);
38
40
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
39
41
  private clearGraph;
@@ -1 +1 @@
1
- {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;GAKG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;gBAEvB,OAAO,EAAE,mBAAmB;IAOlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAS1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAqB5B"}
1
+ {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;CACpB;AAED;;;;;;GAMG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqB;gBAEvC,OAAO,EAAE,mBAAmB;IAOlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAa1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAqB5B"}
@@ -3,14 +3,16 @@ import { serializeQuads } from './serialize.js';
3
3
  /**
4
4
  * Writes RDF data to a SPARQL endpoint using SPARQL UPDATE INSERT DATA queries.
5
5
  *
6
- * Clears the named graph before writing, then streams quads in batches
7
- * to avoid accumulating the entire dataset in memory.
6
+ * Clears the named graph before the first write per dataset per instance, then
7
+ * streams quads in batches to avoid accumulating the entire dataset in memory.
8
+ * Subsequent calls to {@link write} for the same dataset append rather than replace.
8
9
  */
9
10
  export class SparqlUpdateWriter {
10
11
  endpoint;
11
12
  auth;
12
13
  fetch;
13
14
  batchSize;
15
+ clearedGraphs = new Set();
14
16
  constructor(options) {
15
17
  this.endpoint = options.endpoint;
16
18
  this.auth = options.auth;
@@ -19,7 +21,10 @@ export class SparqlUpdateWriter {
19
21
  }
20
22
  async write(dataset, quads) {
21
23
  const graphUri = dataset.iri.toString();
22
- await this.clearGraph(graphUri);
24
+ if (!this.clearedGraphs.has(graphUri)) {
25
+ await this.clearGraph(graphUri);
26
+ this.clearedGraphs.add(graphUri);
27
+ }
23
28
  for await (const chunk of batch(quads, this.batchSize)) {
24
29
  await this.insertBatch(graphUri, chunk);
25
30
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.6.32",
3
+ "version": "0.7.1",
4
4
  "repository": {
5
5
  "url": "https://github.com/ldengine/lde",
6
6
  "directory": "packages/pipeline"