@lde/pipeline 0.31.0 → 0.31.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -30,6 +30,24 @@ const selector = new RegistrySelector({
30
30
  const selector = new ManualDatasetSelection([dataset]);
31
31
  ```
32
32
 
33
+ ### Distribution Resolver
34
+
35
+ Resolves each dataset to a usable SPARQL endpoint. `SparqlDistributionResolver` probes a dataset’s own endpoint; wrap it in `ImportResolver` to add the ability to import a data dump into a local SPARQL server.
36
+
37
+ `ImportResolver`’s `strategy` controls how the source is chosen, ordered by how eagerly a dump is imported:
38
+
39
+ ```typescript
40
+ const resolver = new ImportResolver(new SparqlDistributionResolver(), {
41
+ importer,
42
+ server,
43
+ strategy: 'sparqlWithImportFallback',
44
+ });
45
+ ```
46
+
47
+ - `'sparql'` (default) — use the dataset’s own SPARQL endpoint when one is available; import a data dump only when no endpoint responds.
48
+ - `'sparqlWithImportFallback'` — like `'sparql'`, but also fall back to the data dump when the endpoint passes probing yet a stage fails against it at runtime. The pipeline discards the endpoint-sourced partial output and re-runs all stages against the import. Use this when endpoints are present but unreliable for heavy aggregate queries.
49
+ - `'import'` — always import the data dump, even when a working endpoint is advertised.
50
+
33
51
  ### Stage
34
52
 
35
53
  A stage groups an item selector, one or more executors, and configuration:
@@ -68,6 +86,12 @@ new Stage({
68
86
 
69
87
  `maxConcurrency` (default: 10) limits the total number of concurrent SPARQL queries. Within each batch, all executors run in parallel; the number of concurrent batches is automatically reduced to `⌊maxConcurrency / executorCount⌋` so the total query pressure stays within the limit. For example, with `maxConcurrency: 10` and two executors per stage, up to 5 batches run concurrently (10 SPARQL queries total).
70
88
 
89
+ #### Expecting output
90
+
91
+ `expectsOutput` (default: `false`) marks a stage whose query must yield at least one quad. A supported stage that produces none is then treated as a hard failure rather than a legitimately empty result.
92
+
93
+ Set it for scalar aggregates such as `SELECT (COUNT(*) AS ?n)`, which always return exactly one row — so zero output can only mean the endpoint truncated or aborted the response (e.g. a timeout surfaced as an empty `HTTP 200`). The failure flows through like any other hard stage failure, triggering the [reactive dump fallback](#distribution-resolver) when `strategy: 'sparqlWithImportFallback'` is configured. Leave it `false` for stages that may legitimately be empty, such as class or property partitions of a dataset that lacks that structure.
94
+
71
95
  ### Item Selector
72
96
 
73
97
  Selects resources from the distribution and fans out executor calls per batch of results. Implements the `ItemSelector` interface:
@@ -6,19 +6,24 @@ export interface ImportResolverOptions {
6
6
  importer: Importer;
7
7
  server: SparqlServer;
8
8
  /**
9
- * Controls how a dataset's distribution is selected.
9
+ * Controls how a dataset's distribution is selected, ordered by how eagerly a
10
+ * data dump is imported:
10
11
  *
11
12
  * - `'sparql'` (default) — use a dataset's own SPARQL endpoint when one is
12
- * available; fall back to importing a data dump only when no endpoint
13
- * responds.
13
+ * available; import a data dump only when no endpoint responds.
14
+ * - `'sparqlWithImportFallback'` — like `'sparql'`, but additionally fall
15
+ * back to importing the data dump when the endpoint passes probing yet fails
16
+ * to serve an analysis stage at runtime. The pipeline then re-runs all
17
+ * stages locally against the import (see
18
+ * {@link ImportResolver.resolveFallback}).
14
19
  * - `'import'` — always import a data dump into a local SPARQL server,
15
20
  * even when the dataset advertises a working SPARQL endpoint. Useful when
16
21
  * the remote endpoint is too slow or unreliable.
17
22
  *
18
- * In both modes the inner resolver still runs so that probe results are
23
+ * In every mode the inner resolver still runs so that probe results are
19
24
  * collected for reporting and the dataset knowledge graph.
20
25
  */
21
- strategy?: 'sparql' | 'import';
26
+ strategy?: 'sparql' | 'sparqlWithImportFallback' | 'import';
22
27
  }
23
28
  /**
24
29
  * A {@link DistributionResolver} decorator that adds data-dump import logic.
@@ -39,6 +44,7 @@ export declare class ImportResolver implements DistributionResolver {
39
44
  constructor(inner: DistributionResolver, options: ImportResolverOptions);
40
45
  probe(dataset: Dataset, callbacks?: ResolveCallbacks): Promise<ProbedDistributions>;
41
46
  resolve(probed: ProbedDistributions, callbacks?: ResolveCallbacks): Promise<ResolvedDistribution | NoDistributionAvailable>;
47
+ resolveFallback(probed: ProbedDistributions, callbacks?: ResolveCallbacks): Promise<ResolvedDistribution | NoDistributionAvailable>;
42
48
  /**
43
49
  * The preferred importable data dump and its probe result, or `null` if no
44
50
  * downloadable distribution passed probing.
@@ -1 +1 @@
1
- {"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,OAAO,EAAgB,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAMrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EAEzB,KAAK,gBAAgB,EACrB,uBAAuB,EACvB,mBAAmB,EACnB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAGvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;IACrB;;;;;;;;;;;;OAYG;IACH,QAAQ,CAAC,EAAE,QAAQ,GAAG,QAAQ,CAAC;CAChC;AAED;;;;;;;;;;;;GAYG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,KAAK,CACT,OAAO,EAAE,OAAO,EAChB,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,mBAAmB,CAAC;IAgBzB,OAAO,CACX,MAAM,EAAE,mBAAmB,EAC3B,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IAoB1D;;;OAGG;IACH,OAAO,CAAC,qBAAqB;IAY7B;;;OAGG;IACH,OAAO,CAAC,gBAAgB;YAeV,aAAa;IAmGrB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
1
+ {"version":3,"file":"importResolver.d.ts","sourceRoot":"","sources":["../../src/distribution/importResolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,KAAK,OAAO,EAAgB,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,QAAQ,EAAE,MAAM,sBAAsB,CAAC;AAMrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,oBAAoB,CAAC;AACvD,OAAO,EACL,KAAK,oBAAoB,EAEzB,KAAK,gBAAgB,EACrB,uBAAuB,EACvB,mBAAmB,EACnB,oBAAoB,EACrB,MAAM,eAAe,CAAC;AAGvB,MAAM,WAAW,qBAAqB;IACpC,QAAQ,EAAE,QAAQ,CAAC;IACnB,MAAM,EAAE,YAAY,CAAC;IACrB;;;;;;;;;;;;;;;;;OAiBG;IACH,QAAQ,CAAC,EAAE,QAAQ,GAAG,0BAA0B,GAAG,QAAQ,CAAC;CAC7D;AAED;;;;;;;;;;;;GAYG;AACH,qBAAa,cAAe,YAAW,oBAAoB;IAEvD,OAAO,CAAC,QAAQ,CAAC,KAAK;IACtB,OAAO,CAAC,QAAQ,CAAC,OAAO;gBADP,KAAK,EAAE,oBAAoB,EAC3B,OAAO,EAAE,qBAAqB;IAG3C,KAAK,CACT,OAAO,EAAE,OAAO,EAChB,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,mBAAmB,CAAC;IAgBzB,OAAO,CACX,MAAM,EAAE,mBAAmB,EAC3B,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IAoBpD,eAAe,CACnB,MAAM,EAAE,mBAAmB,EAC3B,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;IAc1D;;;OAGG;IACH,OAAO,CAAC,qBAAqB;IAY7B;;;OAGG;IACH,OAAO,CAAC,gBAAgB;YAeV,aAAa;IAmGrB,OAAO,IAAI,OAAO,CAAC,IAAI,CAAC;CAG/B"}
@@ -45,6 +45,14 @@ export class ImportResolver {
45
45
  }
46
46
  return this.importDataset(probed.dataset, probed.probeResults, callbacks);
47
47
  }
48
+ async resolveFallback(probed, callbacks) {
49
+ if (this.options.strategy !== 'sparqlWithImportFallback') {
50
+ return new NoDistributionAvailable(probed.dataset, 'Import fallback is not enabled', probed.probeResults);
51
+ }
52
+ // Import the data dump regardless of the endpoint chosen at probe time:
53
+ // the endpoint is empirically incapable, so the dump is the fallback.
54
+ return this.importDataset(probed.dataset, probed.probeResults, callbacks);
55
+ }
48
56
  /**
49
57
  * The preferred importable data dump and its probe result, or `null` if no
50
58
  * downloadable distribution passed probing.
@@ -58,6 +58,18 @@ export interface ResolveCallbacks {
58
58
  export interface DistributionResolver {
59
59
  probe(dataset: Dataset, callbacks?: ResolveCallbacks): Promise<ProbedDistributions>;
60
60
  resolve(probed: ProbedDistributions, callbacks?: ResolveCallbacks): Promise<ResolvedDistribution | NoDistributionAvailable>;
61
+ /**
62
+ * Re-resolve a dataset to an alternative source after the primary source
63
+ * (a live SPARQL endpoint) failed to serve the analysis stages. Returns an
64
+ * imported data dump as a {@link ResolvedDistribution}, or
65
+ * {@link NoDistributionAvailable} when no fallback exists or reactive
66
+ * fallback is not enabled.
67
+ *
68
+ * Resolvers without a dump to fall back to (e.g.
69
+ * {@link SparqlDistributionResolver}) omit this method; the pipeline then
70
+ * keeps the endpoint-sourced partial results.
71
+ */
72
+ resolveFallback?(probed: ProbedDistributions, callbacks?: ResolveCallbacks): Promise<ResolvedDistribution | NoDistributionAvailable>;
61
73
  cleanup?(): Promise<void>;
62
74
  }
63
75
  export interface SparqlDistributionResolverOptions {
@@ -1 +1 @@
1
- {"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAGL,KAAK,eAAe,EACrB,MAAM,yBAAyB,CAAC;AAEjC,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;IACpC,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM;IAChC,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM;gBAJpB,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA,EAC3B,cAAc,CAAC,EAAE,MAAM,YAAA,EACvB,WAAW,CAAC,EAAE,MAAM,YAAA;CAEhC;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,YAAY,CAAC;IAC3B,WAAW,EAAE,eAAe,CAAC;CAC9B;AAED;;;;;GAKG;AACH,qBAAa,mBAAmB;IAE5B,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;gBAF3B,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,eAAe,EAAE,EAC/B,MAAM,EAAE,YAAY,GAAG,IAAI;CAEvC;AAED,kEAAkE;AAClE,MAAM,WAAW,gBAAgB;IAC/B,4EAA4E;IAC5E,OAAO,CAAC,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,EAAE,eAAe,KAAK,IAAI,CAAC;IACxE,6DAA6D;IAC7D,aAAa,CAAC,EAAE,MAAM,IAAI,CAAC;IAC3B,kEAAkE;IAClE,cAAc,CAAC,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACtE;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC,KAAK,CACH,OAAO,EAAE,OAAO,EAChB,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAChC,OAAO,CACL,MAAM,EAAE,mBAAmB,EAC3B,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;;;GAUG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAIjD,KAAK,CACT,OAAO,EAAE,OAAO,EAChB,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,mBAAmB,CAAC;IA4BzB,OAAO,CACX,MAAM,EAAE,mBAAmB,GAC1B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CAc3D"}
1
+ {"version":3,"file":"resolver.d.ts","sourceRoot":"","sources":["../../src/distribution/resolver.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACzD,OAAO,EAGL,KAAK,eAAe,EACrB,MAAM,yBAAyB,CAAC;AAEjC,qBAAa,oBAAoB;IAE7B,QAAQ,CAAC,YAAY,EAAE,YAAY;IACnC,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;IACpC,QAAQ,CAAC,cAAc,CAAC,EAAE,MAAM;IAChC,QAAQ,CAAC,WAAW,CAAC,EAAE,MAAM;gBAJpB,YAAY,EAAE,YAAY,EAC1B,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA,EAC3B,cAAc,CAAC,EAAE,MAAM,YAAA,EACvB,WAAW,CAAC,EAAE,MAAM,YAAA;CAEhC;AAED,qBAAa,uBAAuB;IAEhC,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,OAAO,EAAE,MAAM;IACxB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,YAAY,CAAC,EAAE,YAAY;gBAH3B,OAAO,EAAE,OAAO,EAChB,OAAO,EAAE,MAAM,EACf,YAAY,EAAE,eAAe,EAAE,EAC/B,YAAY,CAAC,EAAE,YAAY,YAAA;CAEvC;AAED;;;;GAIG;AACH,MAAM,WAAW,YAAY;IAC3B,YAAY,EAAE,YAAY,CAAC;IAC3B,WAAW,EAAE,eAAe,CAAC;CAC9B;AAED;;;;;GAKG;AACH,qBAAa,mBAAmB;IAE5B,QAAQ,CAAC,OAAO,EAAE,OAAO;IACzB,QAAQ,CAAC,YAAY,EAAE,eAAe,EAAE;IACxC,QAAQ,CAAC,MAAM,EAAE,YAAY,GAAG,IAAI;gBAF3B,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,eAAe,EAAE,EAC/B,MAAM,EAAE,YAAY,GAAG,IAAI;CAEvC;AAED,kEAAkE;AAClE,MAAM,WAAW,gBAAgB;IAC/B,4EAA4E;IAC5E,OAAO,CAAC,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,MAAM,EAAE,eAAe,KAAK,IAAI,CAAC;IACxE,6DAA6D;IAC7D,aAAa,CAAC,EAAE,MAAM,IAAI,CAAC;IAC3B,kEAAkE;IAClE,cAAc,CAAC,EAAE,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACtE;AAED;;;;;;;;GAQG;AACH,MAAM,WAAW,oBAAoB;IACnC,KAAK,CACH,OAAO,EAAE,OAAO,EAChB,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,mBAAmB,CAAC,CAAC;IAChC,OAAO,CACL,MAAM,EAAE,mBAAmB,EAC3B,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D;;;;;;;;;;OAUG;IACH,eAAe,CAAC,CACd,MAAM,EAAE,mBAAmB,EAC3B,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC,CAAC;IAC3D,OAAO,CAAC,IAAI,OAAO,CAAC,IAAI,CAAC,CAAC;CAC3B;AAED,MAAM,WAAW,iCAAiC;IAChD,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AAED;;;;;;;;;;GAUG;AACH,qBAAa,0BAA2B,YAAW,oBAAoB;IACrE,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;gBAErB,OAAO,CAAC,EAAE,iCAAiC;IAIjD,KAAK,CACT,OAAO,EAAE,OAAO,EAChB,SAAS,CAAC,EAAE,gBAAgB,GAC3B,OAAO,CAAC,mBAAmB,CAAC;IA4BzB,OAAO,CACX,MAAM,EAAE,mBAAmB,GAC1B,OAAO,CAAC,oBAAoB,GAAG,uBAAuB,CAAC;CAc3D"}
@@ -103,6 +103,23 @@ export declare class Pipeline {
103
103
  * per-`(dataset, stage)` IRIs rather than blank nodes.
104
104
  */
105
105
  private stageWriter;
106
+ /**
107
+ * Report a resolved distribution as the dataset's selected source, plus its
108
+ * deep validity verdict when it was imported. Shared by the primary resolve
109
+ * path and the reactive dump fallback so both surface the same reporter
110
+ * events for the source they actually use. A completed data-dump import is a
111
+ * deep validity verdict on the imported distribution (valid, or empty when it
112
+ * yielded no triples); native SPARQL endpoints are not imported and carry no
113
+ * deep verdict.
114
+ */
115
+ private reportSelectedDistribution;
116
+ /**
117
+ * Run every top-level stage against one distribution, catching and reporting
118
+ * per-stage failures so one failing stage does not abort the rest. Returns
119
+ * whether any stage hard-failed – the signal the reactive dump fallback
120
+ * reacts to.
121
+ */
122
+ private runStages;
106
123
  /**
107
124
  * Run a stage with reporting and return whether it was supported.
108
125
  * Returns `true` if the stage produced results, `false` if NotSupported.
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAG1B,MAAM,4BAA4B,CAAC;AAKpC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAY7D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAG/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC;;;;;;;GAOG;AACH,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC,uBAAuB,CAAC,CAAC;CAC3D;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,gBAAgB,GAAG,SAAS,gBAAgB,EAAE,CAAC;IAC1D;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAkFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAyC;IAC3E,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;IACrD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAkB;IACnD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAS;gBAE9B,OAAO,EAAE,eAAe;IA+C9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;IA+K5B,+EAA+E;YACjE,aAAa;YAmBb,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;;;OAKG;IACH,OAAO,CAAC,WAAW;IAMnB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA+DP,SAAS;CAczB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AACA,OAAO,EAAE,OAAO,EAAgB,MAAM,cAAc,CAAC;AAGrD,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAI1B,MAAM,4BAA4B,CAAC;AAKpC,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,uBAAuB,CAAC;AAY7D,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAG/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC;;;;;;;GAOG;AACH,MAAM,WAAW,uBAAuB;IACtC,OAAO,EAAE,OAAO,CAAC;IACjB,KAAK,EAAE,MAAM,CAAC;CACf;AAED,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb;;;;;OAKG;IACH,gBAAgB,CAAC,EAAE,aAAa,CAAC,uBAAuB,CAAC,CAAC;CAC3D;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF;;;;;OAKG;IACH,QAAQ,CAAC,EAAE,gBAAgB,GAAG,SAAS,gBAAgB,EAAE,CAAC;IAC1D;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,eAAe,CAAC;IAClC;;;;;;OAMG;IACH,eAAe,CAAC,EAAE,MAAM,CAAC;IACzB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAsFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,gBAAgB,CAAC,CAAyC;IAC3E,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;IACrD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAkB;IACnD,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAC,CAAS;gBAE9B,OAAO,EAAE,eAAe;IA+C9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;IA6M5B,+EAA+E;YACjE,aAAa;YAmBb,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;;;OAKG;IACH,OAAO,CAAC,WAAW;IAMnB;;;;;;;;OAQG;IACH,OAAO,CAAC,0BAA0B;IA4BlC;;;;;OAKG;YACW,SAAS;IA8BvB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA+DP,SAAS;CAczB"}
package/dist/pipeline.js CHANGED
@@ -1,7 +1,7 @@
1
1
  import { createReadStream } from 'node:fs';
2
2
  import { StreamParser } from 'n3';
3
3
  import { FileWriter } from './writer/fileWriter.js';
4
- import { NoDistributionAvailable, } from './distribution/resolver.js';
4
+ import { NoDistributionAvailable, ResolvedDistribution, } from './distribution/resolver.js';
5
5
  import { SparqlDistributionResolver } from './distribution/index.js';
6
6
  import { sourceFingerprint } from './provenance/sourceFingerprint.js';
7
7
  import { shouldReprocess } from './provenance/reprocessDecision.js';
@@ -62,6 +62,10 @@ class FanOutWriter {
62
62
  for (const w of this.writers)
63
63
  await w.flush?.(dataset);
64
64
  }
65
+ async reset(dataset) {
66
+ for (const w of this.writers)
67
+ await w.reset?.(dataset);
68
+ }
65
69
  }
66
70
  class TransformWriter {
67
71
  inner;
@@ -164,7 +168,9 @@ export class Pipeline {
164
168
  }
165
169
  // Derive the source-change fingerprint from the probed source: null for a
166
170
  // live SPARQL endpoint (always reprocess) or when no source is available.
167
- const fingerprint = probed.source
171
+ // Reassigned to the dump's fingerprint if a reactive fallback later imports
172
+ // one, so change-detection can skip an unchanged dump on the next run.
173
+ let fingerprint = probed.source
168
174
  ? sourceFingerprint(probed.source.distribution, probed.source.probeResult)
169
175
  : null;
170
176
  // Gate: skip an unchanged dataset before paying any import cost.
@@ -218,13 +224,7 @@ export class Pipeline {
218
224
  this.reporter?.datasetSkipped?.(dataset, resolved.message);
219
225
  return;
220
226
  }
221
- this.reporter?.distributionSelected?.(dataset, resolved.distribution, resolved.importedFrom, resolved.importDuration, resolved.tripleCount);
222
- // A completed data-dump import is a deep validity verdict on the imported
223
- // distribution (valid, or empty when it yielded no triples). Native SPARQL
224
- // endpoints are not imported, so they carry no deep verdict.
225
- if (resolved.importedFrom) {
226
- this.reporter?.distributionValidated?.(resolved.importedFrom, importOutcomeToVerdict(new ImportSuccessful(resolved.importedFrom, undefined, resolved.tripleCount), fingerprint));
227
- }
227
+ this.reportSelectedDistribution(dataset, resolved, fingerprint);
228
228
  const timeout = this.timeoutFactory();
229
229
  const unsubscribe = timeout.subscribe?.({
230
230
  onTighten: (event) => this.reporter?.timeoutTightened?.(event),
@@ -232,18 +232,51 @@ export class Pipeline {
232
232
  });
233
233
  let stageFailed = false;
234
234
  try {
235
- for (const stage of this.stages) {
235
+ stageFailed = await this.runStages(dataset, resolved.distribution, timeout);
236
+ // Reactive fallback: an endpoint that passed probing but could not serve
237
+ // the analysis stages is empirically incapable. Switch to the dataset’s
238
+ // data dump and re-run all stages locally, discarding the
239
+ // endpoint-sourced partial results. Only a live endpoint
240
+ // (`importedFrom === undefined`) can fall back – a run already on an
241
+ // imported dump has nowhere further to go.
242
+ if (stageFailed &&
243
+ resolved.importedFrom === undefined &&
244
+ this.distributionResolver.resolveFallback) {
245
+ // A failing fallback import must abort only this dataset, never the
246
+ // whole run – matching the per-dataset isolation of the primary resolve
247
+ // path. The dataset stays recorded as failed (stageFailed is already
248
+ // true) and processing continues with the next dataset.
236
249
  try {
237
- if (stage.stages.length > 0) {
238
- await this.runChain(dataset, resolved.distribution, stage, timeout);
250
+ const fallback = await this.distributionResolver.resolveFallback(probed, {
251
+ onImportStart: () => this.reporter?.importStarted?.(),
252
+ onImportFailed: (distribution, error) => this.reporter?.importFailed?.(distribution, error),
253
+ });
254
+ if (fallback instanceof ResolvedDistribution) {
255
+ // The dump is now the dataset's effective source: report it as
256
+ // selected/validated and adopt its change fingerprint so the next
257
+ // run can skip an unchanged dump (the endpoint's fingerprint is
258
+ // null, which would force a re-import every run).
259
+ if (fallback.importedFrom) {
260
+ const dumpProbeResult = probed.probeResults.find((result) => result.url === fallback.importedFrom.accessUrl.toString());
261
+ if (dumpProbeResult) {
262
+ fingerprint = sourceFingerprint(fallback.importedFrom, dumpProbeResult);
263
+ }
264
+ }
265
+ this.reportSelectedDistribution(dataset, fallback, fingerprint);
266
+ // Discard the endpoint-sourced partial output before the re-run so
267
+ // the dump-sourced stats replace it rather than appending to it.
268
+ await this.writer.reset?.(dataset);
269
+ stageFailed = await this.runStages(dataset, fallback.distribution, timeout);
239
270
  }
240
- else {
241
- await this.runStage(dataset, resolved.distribution, stage, this.stageWriter(stage.name), timeout);
271
+ else if (fallback.importFailed) {
272
+ // A failed dump import is a deep validity verdict on that dump –
273
+ // surface it rather than silently keeping the endpoint's partial
274
+ // output, matching the primary NoDistributionAvailable path.
275
+ this.reporter?.distributionValidated?.(fallback.importFailed.distribution, importOutcomeToVerdict(fallback.importFailed, fingerprint));
242
276
  }
243
277
  }
244
278
  catch (error) {
245
- stageFailed = true;
246
- this.reporter?.stageFailed?.(stage.name, error instanceof Error ? error : new Error(String(error)));
279
+ this.reporter?.stageFailed?.('reactive-dump-fallback', error instanceof Error ? error : new Error(String(error)));
247
280
  }
248
281
  }
249
282
  }
@@ -308,6 +341,45 @@ export class Pipeline {
308
341
  ? new TransformWriter(this.writer, this.beforeStageWrite, stage)
309
342
  : this.writer;
310
343
  }
344
+ /**
345
+ * Report a resolved distribution as the dataset's selected source, plus its
346
+ * deep validity verdict when it was imported. Shared by the primary resolve
347
+ * path and the reactive dump fallback so both surface the same reporter
348
+ * events for the source they actually use. A completed data-dump import is a
349
+ * deep validity verdict on the imported distribution (valid, or empty when it
350
+ * yielded no triples); native SPARQL endpoints are not imported and carry no
351
+ * deep verdict.
352
+ */
353
+ reportSelectedDistribution(dataset, resolved, fingerprint) {
354
+ this.reporter?.distributionSelected?.(dataset, resolved.distribution, resolved.importedFrom, resolved.importDuration, resolved.tripleCount);
355
+ if (resolved.importedFrom) {
356
+ this.reporter?.distributionValidated?.(resolved.importedFrom, importOutcomeToVerdict(new ImportSuccessful(resolved.importedFrom, undefined, resolved.tripleCount), fingerprint));
357
+ }
358
+ }
359
+ /**
360
+ * Run every top-level stage against one distribution, catching and reporting
361
+ * per-stage failures so one failing stage does not abort the rest. Returns
362
+ * whether any stage hard-failed – the signal the reactive dump fallback
363
+ * reacts to.
364
+ */
365
+ async runStages(dataset, distribution, timeout) {
366
+ let stageFailed = false;
367
+ for (const stage of this.stages) {
368
+ try {
369
+ if (stage.stages.length > 0) {
370
+ await this.runChain(dataset, distribution, stage, timeout);
371
+ }
372
+ else {
373
+ await this.runStage(dataset, distribution, stage, this.stageWriter(stage.name), timeout);
374
+ }
375
+ }
376
+ catch (error) {
377
+ stageFailed = true;
378
+ this.reporter?.stageFailed?.(stage.name, error instanceof Error ? error : new Error(String(error)));
379
+ }
380
+ }
381
+ return stageFailed;
382
+ }
311
383
  /**
312
384
  * Run a stage with reporting and return whether it was supported.
313
385
  * Returns `true` if the stage produced results, `false` if NotSupported.
package/dist/stage.d.ts CHANGED
@@ -70,6 +70,23 @@ export interface StageOptions {
70
70
  maxConcurrency?: number;
71
71
  /** Child stages that chain off this stage's output. */
72
72
  stages?: Stage[];
73
+ /**
74
+ * Treat a supported stage that produces no quads as a hard failure (throws),
75
+ * rather than a legitimately empty result.
76
+ *
77
+ * Set this for stages whose query must yield output — typically a scalar
78
+ * aggregate such as `SELECT (COUNT(*) AS ?n)`, which always returns exactly
79
+ * one row, so zero quads can only mean the endpoint truncated or aborted the
80
+ * response (e.g. a timeout surfaced as an empty `HTTP 200`). The resulting
81
+ * failure flows through the pipeline like any other hard stage failure,
82
+ * triggering the reactive dump fallback when one is configured.
83
+ *
84
+ * Leave it `false` (default) for stages that may legitimately be empty, such
85
+ * as class/property partitions of a dataset that lacks that structure.
86
+ *
87
+ * @default false
88
+ */
89
+ expectsOutput?: boolean;
73
90
  /** Optional validation of the combined quads produced by all executors per batch. */
74
91
  validation?: {
75
92
  validator: Validator;
@@ -95,6 +112,8 @@ export interface SelectOptions {
95
112
  export declare class Stage {
96
113
  readonly name: string;
97
114
  readonly stages: readonly Stage[];
115
+ /** Whether an empty result is treated as a hard failure. @see {@link StageOptions.expectsOutput} */
116
+ readonly expectsOutput: boolean;
98
117
  private readonly executors;
99
118
  private readonly itemSelector?;
100
119
  private readonly batchSize;
@@ -104,6 +123,12 @@ export declare class Stage {
104
123
  /** The validator for this stage, if configured. */
105
124
  get validator(): Validator | undefined;
106
125
  run(dataset: Dataset, distribution: Distribution, writer: Writer, options?: RunOptions): Promise<NotSupported | void>;
126
+ /**
127
+ * Throw when {@link StageOptions.expectsOutput} is set but the stage produced
128
+ * no quads — a supported-but-empty result that signals a truncated or aborted
129
+ * endpoint response rather than a legitimately empty one.
130
+ */
131
+ private assertProduced;
107
132
  private runWithSelector;
108
133
  /**
109
134
  * Validate a buffer of quads. Throws on halt, returns the quads to write
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,CAAC,GAAG,IAAI,CAC/B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,GAAG,KACT,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,YAAY,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,aAAa,CAAC,eAAe,CAAC,GAAG,aAAa,CAAC,eAAe,CAAC,EAAE,CAAC;CAC/E;AAED,2EAA2E;AAC3E,MAAM,MAAM,cAAc,GACtB,QAAQ,GACR,gBAAgB,GAChB,CAAC,QAAQ,GAAG,gBAAgB,CAAC,EAAE,CAAC;AAQpC,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,cAAc,CAAC;IAC1B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAUjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAqDjB,eAAe;IA8J7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;IA6BxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;CAiBxB;AA4BD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD;;;;;;;GAOG;AACH,MAAM,MAAM,aAAa,CAAC,GAAG,IAAI,CAC/B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,GAAG,KACT,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB;;;;;;GAMG;AACH,MAAM,WAAW,eAAe;IAC9B,OAAO,EAAE,OAAO,CAAC;IACjB,YAAY,EAAE,YAAY,CAAC;IAC3B,KAAK,EAAE,MAAM,CAAC;CACf;AAED;;;;;;;;;;;;GAYG;AACH,MAAM,WAAW,gBAAgB;IAC/B,QAAQ,EAAE,QAAQ,CAAC;IACnB,SAAS,CAAC,EAAE,aAAa,CAAC,eAAe,CAAC,GAAG,aAAa,CAAC,eAAe,CAAC,EAAE,CAAC;CAC/E;AAED,2EAA2E;AAC3E,MAAM,MAAM,cAAc,GACtB,QAAQ,GACR,gBAAgB,GAChB,CAAC,QAAQ,GAAG,gBAAgB,CAAC,EAAE,CAAC;AAQpC,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,cAAc,CAAC;IAC1B,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB;;;;;;;;;;;;;;;OAeG;IACH,aAAa,CAAC,EAAE,OAAO,CAAC;IACxB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,oGAAoG;IACpG,QAAQ,CAAC,aAAa,EAAE,OAAO,CAAC;IAChC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAuB;IACjD,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAWjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;IAqE/B;;;;OAIG;IACH,OAAO,CAAC,cAAc;YAMR,eAAe;IAgK7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;IA6BxB;;;;;OAKG;IACH,OAAO,CAAC,eAAe;CAiBxB;AAiDD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
package/dist/stage.js CHANGED
@@ -4,6 +4,8 @@ import { AsyncQueue } from './asyncQueue.js';
4
4
  export class Stage {
5
5
  name;
6
6
  stages;
7
+ /** Whether an empty result is treated as a hard failure. @see {@link StageOptions.expectsOutput} */
8
+ expectsOutput;
7
9
  executors;
8
10
  itemSelector;
9
11
  batchSize;
@@ -17,6 +19,7 @@ export class Stage {
17
19
  this.batchSize = options.batchSize ?? 10;
18
20
  this.maxConcurrency = options.maxConcurrency ?? 10;
19
21
  this.validation = options.validation;
22
+ this.expectsOutput = options.expectsOutput ?? false;
20
23
  }
21
24
  /** The validator for this stage, if configured. */
22
25
  get validator() {
@@ -33,6 +36,9 @@ export class Stage {
33
36
  if (streams instanceof NotSupported) {
34
37
  return streams;
35
38
  }
39
+ // Quads the executors produced (before any validation filtering); used to
40
+ // enforce `expectsOutput` below.
41
+ let produced = 0;
36
42
  if (this.validation) {
37
43
  const buffer = [];
38
44
  for (const stream of streams) {
@@ -40,6 +46,7 @@ export class Stage {
40
46
  buffer.push(quad);
41
47
  }
42
48
  }
49
+ produced = buffer.length;
43
50
  const onInvalid = this.validation.onInvalid ?? 'write';
44
51
  if (onInvalid === 'write') {
45
52
  await Promise.all([
@@ -58,9 +65,27 @@ export class Stage {
58
65
  }
59
66
  }
60
67
  }
68
+ else if (this.expectsOutput) {
69
+ // Only thread the per-quad counter through when the count is actually
70
+ // needed; the default path stays a plain streaming write with no overhead.
71
+ await writer.write(dataset, countQuads(mergeStreams(streams), (count) => {
72
+ produced = count;
73
+ }));
74
+ }
61
75
  else {
62
76
  await writer.write(dataset, mergeStreams(streams));
63
77
  }
78
+ this.assertProduced(produced);
79
+ }
80
+ /**
81
+ * Throw when {@link StageOptions.expectsOutput} is set but the stage produced
82
+ * no quads — a supported-but-empty result that signals a truncated or aborted
83
+ * endpoint response rather than a legitimately empty one.
84
+ */
85
+ assertProduced(produced) {
86
+ if (this.expectsOutput && produced === 0) {
87
+ throw new Error(`Stage '${this.name}' expected output but produced none`);
88
+ }
64
89
  }
65
90
  async runWithSelector(selector, dataset, distribution, writer, options) {
66
91
  // Peek the first batch to detect an empty selector before starting the
@@ -185,6 +210,7 @@ export class Stage {
185
210
  if (!hasResults) {
186
211
  return new NotSupported('All executors returned NotSupported');
187
212
  }
213
+ this.assertProduced(quadsGenerated);
188
214
  }
189
215
  /**
190
216
  * Validate a buffer of quads. Throws on halt, returns the quads to write
@@ -260,3 +286,20 @@ async function* mergeStreams(streams) {
260
286
  yield* stream;
261
287
  }
262
288
  }
289
+ /**
290
+ * Pass a quad stream through unchanged while counting it, reporting the total
291
+ * via `onCount` once the stream is exhausted. Lets a streaming write enforce
292
+ * {@link StageOptions.expectsOutput} without buffering.
293
+ *
294
+ * `onCount` fires only when the consumer drains the stream — which the pipeline
295
+ * writers do. A writer that stops early would leave the count short; callers
296
+ * relying on it for `expectsOutput` must consume the stream fully.
297
+ */
298
+ async function* countQuads(stream, onCount) {
299
+ let count = 0;
300
+ for await (const quad of stream) {
301
+ count++;
302
+ yield quad;
303
+ }
304
+ onCount(count);
305
+ }
@@ -42,6 +42,7 @@ export declare class FileWriter implements Writer {
42
42
  constructor(options: FileWriterOptions);
43
43
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
44
44
  flush(dataset: Dataset): Promise<void>;
45
+ reset(dataset: Dataset): Promise<void>;
45
46
  getOutputPath(dataset: Dataset): string;
46
47
  getFilename(dataset: Dataset): string;
47
48
  private getFilePath;
@@ -1 +1 @@
1
- {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClC;;;;;;;;OAQG;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,GAAG,CAAC;CACtC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA4B;IACtD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAQhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAiClE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IA8B5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IA4B/B,OAAO,CAAC,YAAY;CAUrB"}
1
+ {"version":3,"file":"fileWriter.d.ts","sourceRoot":"","sources":["../../src/writer/fileWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAMzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAErC,MAAM,WAAW,iBAAiB;IAChC;;OAEG;IACH,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,MAAM,CAAC,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IAC5C;;;OAGG;IACH,oBAAoB,CAAC,EAAE,MAAM,CAAC;IAC9B;;;OAGG;IACH,QAAQ,CAAC,EAAE,MAAM,CAAC,MAAM,EAAE,MAAM,CAAC,CAAC;IAClC;;;;;;;;OAQG;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,GAAG,CAAC;CACtC;AAiBD,qBAAa,UAAW,YAAW,MAAM;IACvC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,QAAQ,CAAC,MAAM,EAAE,QAAQ,GAAG,WAAW,GAAG,SAAS,CAAC;IACpD,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAS;IAC9C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAyB;IACnD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA4B;IACtD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAG1B;gBAEQ,OAAO,EAAE,iBAAiB;IAQhC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAiClE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IA8BtC,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;IAiB5C,aAAa,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAIvC,WAAW,CAAC,OAAO,EAAE,OAAO,GAAG,MAAM;IAQrC,OAAO,CAAC,WAAW;YAIL,iBAAiB;IA4B/B,OAAO,CAAC,YAAY;CAUrB"}
@@ -82,6 +82,22 @@ export class FileWriter {
82
82
  }
83
83
  await rename(entry.tempPath, key);
84
84
  }
85
+ async reset(dataset) {
86
+ const key = this.getFilePath(dataset);
87
+ const entry = this.activeWriters.get(key);
88
+ if (!entry)
89
+ return;
90
+ // Drop the open writer and remove its temp file so the next write starts a
91
+ // fresh file, discarding everything streamed during the previous pass. Await
92
+ // the stream closing before removing: the write stream opens its fd lazily,
93
+ // so a pending open could otherwise recreate the file after rm() ran.
94
+ this.activeWriters.delete(key);
95
+ await new Promise((resolve) => {
96
+ entry.stream.once('close', resolve);
97
+ entry.stream.destroy();
98
+ });
99
+ await rm(entry.tempPath, { force: true, recursive: true });
100
+ }
85
101
  getOutputPath(dataset) {
86
102
  return this.getFilePath(dataset);
87
103
  }
@@ -50,6 +50,7 @@ export declare class SparqlUpdateWriter implements Writer {
50
50
  private readonly clearedGraphs;
51
51
  constructor(options: SparqlWriterOptions);
52
52
  write(dataset: Dataset, quads: AsyncIterable<Quad>): Promise<void>;
53
+ reset(dataset: Dataset): Promise<void>;
53
54
  private clearGraph;
54
55
  private insertBatch;
55
56
  private executeUpdate;
@@ -1 +1 @@
1
- {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAiB,MAAM,cAAc,CAAC;AACtD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,GAAG,CAAC;CACtC;AAED;;;;;;GAMG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA4B;IACrD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqB;gBAEvC,OAAO,EAAE,mBAAmB;IAQlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;YAc1D,UAAU;YAIV,WAAW;YAOX,aAAa;CAuB5B"}
1
+ {"version":3,"file":"sparqlUpdateWriter.d.ts","sourceRoot":"","sources":["../../src/writer/sparqlUpdateWriter.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAiB,MAAM,cAAc,CAAC;AACtD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC,OAAO,EAAE,MAAM,EAAE,MAAM,aAAa,CAAC;AAGrC,MAAM,WAAW,mBAAmB;IAClC;;OAEG;IACH,QAAQ,EAAE,GAAG,CAAC;IACd;;;OAGG;IACH,IAAI,CAAC,EAAE,MAAM,CAAC;IACd;;;OAGG;IACH,KAAK,CAAC,EAAE,OAAO,UAAU,CAAC,KAAK,CAAC;IAChC;;;;OAIG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;;;OASG;IACH,QAAQ,CAAC,EAAE,CAAC,OAAO,EAAE,OAAO,KAAK,GAAG,CAAC;CACtC;AAED;;;;;;GAMG;AACH,qBAAa,kBAAmB,YAAW,MAAM;IAC/C,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAM;IAC/B,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAC,CAAS;IAC/B,OAAO,CAAC,QAAQ,CAAC,KAAK,CAA0B;IAChD,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAA4B;IACrD,OAAO,CAAC,QAAQ,CAAC,aAAa,CAAqB;gBAEvC,OAAO,EAAE,mBAAmB;IAQlC,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC;IAclE,KAAK,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC;YAM9B,UAAU;YAIV,WAAW;YAOX,aAAa;CAuB5B"}
@@ -33,6 +33,11 @@ export class SparqlUpdateWriter {
33
33
  await this.insertBatch(graphUri, chunk);
34
34
  }
35
35
  }
36
+ async reset(dataset) {
37
+ // Forget the graph’s cleared state so the next write re-issues CLEAR GRAPH,
38
+ // replacing the prior output instead of appending to it.
39
+ this.clearedGraphs.delete(this.graphIri(dataset).toString());
40
+ }
36
41
  async clearGraph(graphUri) {
37
42
  await this.executeUpdate(`CLEAR GRAPH <${graphUri}>`);
38
43
  }
@@ -19,5 +19,17 @@ export interface Writer {
19
19
  * data and release resources.
20
20
  */
21
21
  flush?(dataset: Dataset): Promise<void>;
22
+ /**
23
+ * Discard a dataset’s already-written output so a subsequent run starts from
24
+ * a clean slate. Called by the pipeline before it re-runs all stages against
25
+ * a fallback source (an imported data dump), so endpoint-sourced partial
26
+ * results are not mixed with the dump-sourced re-run.
27
+ *
28
+ * Writers that build a complete replacement per dataset (e.g.
29
+ * {@link SparqlUpdateWriter}, which clears each graph on first write) should
30
+ * implement this to reset that per-dataset state. Writers without
31
+ * replaceable output may omit it; the re-run then appends.
32
+ */
33
+ reset?(dataset: Dataset): Promise<void>;
22
34
  }
23
35
  //# sourceMappingURL=writer.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnE;;;;;;OAMG;IACH,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC"}
1
+ {"version":3,"file":"writer.d.ts","sourceRoot":"","sources":["../../src/writer/writer.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,MAAM,cAAc,CAAC;AACvC,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAEzC;;GAEG;AACH,MAAM,WAAW,MAAM;IACrB;;;;;OAKG;IACH,KAAK,CAAC,OAAO,EAAE,OAAO,EAAE,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAEnE;;;;;;OAMG;IACH,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;IAExC;;;;;;;;;;OAUG;IACH,KAAK,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,OAAO,CAAC,IAAI,CAAC,CAAC;CACzC"}
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.31.0",
3
+ "version": "0.31.2",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"