@lde/pipeline 0.28.13 → 0.29.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -74,7 +74,10 @@ Selects resources from the distribution and fans out executor calls per batch of
74
74
 
75
75
  ```typescript
76
76
  interface ItemSelector {
77
- select(distribution: Distribution, batchSize?: number): AsyncIterable<VariableBindings>;
77
+ select(
78
+ distribution: Distribution,
79
+ batchSize?: number,
80
+ ): AsyncIterable<VariableBindings>;
78
81
  }
79
82
  ```
80
83
 
@@ -86,6 +89,25 @@ new SparqlItemSelector({
86
89
  });
87
90
  ```
88
91
 
92
+ #### Capping total results with `maxResults`
93
+
94
+ By default, `SparqlItemSelector` paginates through **all** matching rows: any `LIMIT` clause in the query is interpreted as the page size, then it walks pages with `OFFSET` until the source is exhausted. To cap the total bindings yielded across all pages — for sampling, testing, prototyping, or just safety — set `maxResults`:
95
+
96
+ ```typescript
97
+ new SparqlItemSelector({
98
+ query: 'SELECT DISTINCT ?s WHERE { ?s a <http://example.com/Class> }',
99
+ maxResults: 50,
100
+ });
101
+ ```
102
+
103
+ When `maxResults` is set:
104
+
105
+ - Pagination stops as soon as `maxResults` bindings have been yielded — no wasted page request after the cap is hit.
106
+ - The last (partial) page's `LIMIT` is shrunk to the remaining cap so the endpoint doesn't over-fetch on the remainder (e.g. with `maxResults: 85` and `pageSize: 10`, the 9th page request is `LIMIT 5`, not `LIMIT 10`).
107
+ - The first page uses the configured page size as-is; `maxResults` and page size stay orthogonal. If `maxResults < pageSize`, the first page may return a few rows that aren't yielded.
108
+ - `maxResults: 0` is a valid no-op; the selector yields nothing without issuing any SPARQL request.
109
+ - `maxResults` is independent of any `LIMIT` clause in the query, which still controls page size when the cap is larger than one page.
110
+
89
111
  For dynamic queries that depend on the distribution, implement `ItemSelector` directly:
90
112
 
91
113
  ```typescript
@@ -218,6 +240,10 @@ new Stage({
218
240
 
219
241
  `Validator` is an interface, so you can implement your own validation strategy. See [@lde/pipeline-shacl-validator](../pipeline-shacl-validator) for the SHACL implementation.
220
242
 
243
+ #### Per-dataset reporting
244
+
245
+ After all stages for a dataset have run, the pipeline calls `validator.report(dataset)` once for each distinct validator attached to any stage and emits a `datasetValidated(dataset, report)` event on the reporter. The call happens **regardless of whether any stage actually invoked `validate()`** — for SHACL that means a dataset whose stages produced no input typically reports `quadsValidated: 0` and `conforms: true` (the SHACL vacuous-truth default). Consumers that want to distinguish ‘not tested’ from ‘tested and passed’ can read `quadsValidated`.
246
+
221
247
  ### Writer
222
248
 
223
249
  Writes generated quads to a destination:
@@ -35,6 +35,8 @@ export declare class Pipeline {
35
35
  constructor(options: PipelineOptions);
36
36
  run(): Promise<void>;
37
37
  private processDataset;
38
+ private reportValidators;
39
+ private collectStages;
38
40
  /**
39
41
  * Run a stage with reporting and return whether it was supported.
40
42
  * Returns `true` if the stage produced results, `false` if NotSupported.
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;IAkE5B;;;OAGG;YACW,QAAQ;IA6CtB,2EAA2E;YAC7D,eAAe;YAcf,QAAQ;YAmDP,SAAS;CAczB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAG/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAmEd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IAwCtB,2EAA2E;YAC7D,eAAe;YAcf,QAAQ;YAmDP,SAAS;CAczB"}
package/dist/pipeline.js CHANGED
@@ -163,12 +163,31 @@ export class Pipeline {
163
163
  await this.distributionResolver.cleanup?.();
164
164
  }
165
165
  await this.writer.flush?.(dataset);
166
+ await this.reportValidators(dataset);
166
167
  const datasetMemory = process.memoryUsage();
167
168
  this.reporter?.datasetComplete?.(dataset, {
168
169
  memoryUsageBytes: datasetMemory.rss,
169
170
  heapUsedBytes: datasetMemory.heapUsed,
170
171
  });
171
172
  }
173
+ async reportValidators(dataset) {
174
+ const validators = new Set();
175
+ for (const stage of this.collectStages(this.stages)) {
176
+ if (stage.validator)
177
+ validators.add(stage.validator);
178
+ }
179
+ for (const validator of validators) {
180
+ const report = await validator.report(dataset);
181
+ this.reporter?.datasetValidated?.(dataset, report);
182
+ }
183
+ }
184
+ *collectStages(stages) {
185
+ for (const stage of stages) {
186
+ yield stage;
187
+ if (stage.stages.length > 0)
188
+ yield* this.collectStages(stage.stages);
189
+ }
190
+ }
172
191
  /**
173
192
  * Run a stage with reporting and return whether it was supported.
174
193
  * Returns `true` if the stage produced results, `false` if NotSupported.
@@ -200,10 +219,6 @@ export class Pipeline {
200
219
  quadsGenerated,
201
220
  duration: Date.now() - stageStart,
202
221
  });
203
- if (stage.validator) {
204
- const report = await stage.validator.report(dataset);
205
- this.reporter?.stageValidated?.(stage.name, report);
206
- }
207
222
  return true;
208
223
  }
209
224
  /** Run a stage in chained mode, throwing if the stage is not supported. */
@@ -32,9 +32,17 @@ export interface ProgressReporter {
32
32
  duration: number;
33
33
  }): void;
34
34
  stageFailed?(stage: string, error: Error): void;
35
- /** Called after a stage completes if it has a validator. */
36
- stageValidated?(stage: string, report: ValidationReport): void;
37
35
  stageSkipped?(stage: string, reason: string): void;
36
+ /**
37
+ * Called once per (dataset, validator) pair after all stages for a dataset
38
+ * have run. Fires regardless of whether any stage actually invoked
39
+ * `validate()` — the report reflects the validator’s accumulated state.
40
+ * When no stage produced data, the report typically carries
41
+ * `quadsValidated: 0` and `conforms: true` (the SHACL vacuous-truth
42
+ * default); consumers that want to distinguish ‘not tested’ from ‘tested
43
+ * and passed’ can read `quadsValidated`.
44
+ */
45
+ datasetValidated?(dataset: Dataset, report: ValidationReport): void;
38
46
  datasetComplete?(dataset: Dataset, result: {
39
47
  memoryUsageBytes: number;
40
48
  heapUsedBytes: number;
@@ -1 +1 @@
1
- {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAEvD,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,8DAA8D;IAC9D,kBAAkB,CAAC,CAAC,MAAM,EAAE,0BAA0B,GAAG,IAAI,CAAC;IAC9D,6CAA6C;IAC7C,aAAa,CAAC,IAAI,IAAI,CAAC;IACvB,kDAAkD;IAClD,YAAY,CAAC,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,4DAA4D;IAC5D,cAAc,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IAC/D,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD,eAAe,CAAC,CACd,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,GAC1D,IAAI,CAAC;IACR,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;CACV"}
1
+ {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAEvD,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,8DAA8D;IAC9D,kBAAkB,CAAC,CAAC,MAAM,EAAE,0BAA0B,GAAG,IAAI,CAAC;IAC9D,6CAA6C;IAC7C,aAAa,CAAC,IAAI,IAAI,CAAC;IACvB,kDAAkD;IAClD,YAAY,CAAC,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD;;;;;;;;OAQG;IACH,gBAAgB,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACpE,eAAe,CAAC,CACd,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,GAC1D,IAAI,CAAC;IACR,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;CACV"}
@@ -8,8 +8,19 @@ export interface SparqlItemSelectorOptions {
8
8
  *
9
9
  * A `LIMIT` clause in the query overrides the stage's `batchSize` as the
10
10
  * page size — use this when the SPARQL endpoint enforces a result limit.
11
+ * It does **not** cap the total number of bindings the selector yields;
12
+ * pagination continues with `OFFSET` until the source is exhausted. Use
13
+ * {@link maxResults} to cap the total.
11
14
  */
12
15
  query: string;
16
+ /**
17
+ * Maximum number of bindings the selector yields across all pages.
18
+ * Use this for sampling — “give me at most N items, don’t walk the full
19
+ * source”. Independent of {@link query}’s `LIMIT`, which controls page
20
+ * size. Pagination stops as soon as `maxResults` bindings have been
21
+ * yielded.
22
+ */
23
+ maxResults?: number;
13
24
  /** Custom fetcher instance. */
14
25
  fetcher?: SparqlEndpointFetcher;
15
26
  }
@@ -24,10 +35,17 @@ export interface SparqlItemSelectorOptions {
24
35
  * 1. A `LIMIT` clause in the selector query (for endpoints with hard result limits)
25
36
  * 2. The stage's {@link StageOptions.batchSize} (passed via {@link select})
26
37
  * 3. A default of 10
38
+ *
39
+ * {@link SparqlItemSelectorOptions.maxResults} is independent of page size:
40
+ * it caps the *total* bindings yielded across pages without changing how
41
+ * the first page is requested. The last (partial) page’s `LIMIT` is
42
+ * shrunk to whatever’s left of the cap so the endpoint doesn’t over-fetch
43
+ * on the remainder.
27
44
  */
28
45
  export declare class SparqlItemSelector implements ItemSelector {
29
46
  private readonly parsed;
30
47
  private readonly queryLimit?;
48
+ private readonly maxResults?;
31
49
  private readonly fetcher;
32
50
  constructor(options: SparqlItemSelectorOptions);
33
51
  select(distribution: Distribution, batchSize?: number): AsyncIterableIterator<VariableBindings>;
@@ -1 +1 @@
1
- {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAMtD,MAAM,WAAW,yBAAyB;IACxC;;;;;OAKG;IACH,KAAK,EAAE,MAAM,CAAC;IACd,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;GAWG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,yBAAyB;IAkBvC,MAAM,CACX,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,qBAAqB,CAAC,gBAAgB,CAAC;CAuC3C"}
1
+ {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAMtD,MAAM,WAAW,yBAAyB;IACxC;;;;;;;;OAQG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,yBAAyB;IAmBvC,MAAM,CACX,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,qBAAqB,CAAC,gBAAgB,CAAC;CAyD3C"}
@@ -16,10 +16,17 @@ const F = new AstFactory();
16
16
  * 1. A `LIMIT` clause in the selector query (for endpoints with hard result limits)
17
17
  * 2. The stage's {@link StageOptions.batchSize} (passed via {@link select})
18
18
  * 3. A default of 10
19
+ *
20
+ * {@link SparqlItemSelectorOptions.maxResults} is independent of page size:
21
+ * it caps the *total* bindings yielded across pages without changing how
22
+ * the first page is requested. The last (partial) page’s `LIMIT` is
23
+ * shrunk to whatever’s left of the cap so the endpoint doesn’t over-fetch
24
+ * on the remainder.
19
25
  */
20
26
  export class SparqlItemSelector {
21
27
  parsed;
22
28
  queryLimit;
29
+ maxResults;
23
30
  fetcher;
24
31
  constructor(options) {
25
32
  const parsed = parser.parse(options.query);
@@ -32,13 +39,24 @@ export class SparqlItemSelector {
32
39
  }
33
40
  this.parsed = parsed;
34
41
  this.queryLimit = this.parsed.solutionModifiers.limitOffset?.limit;
42
+ this.maxResults = options.maxResults;
35
43
  this.fetcher = options.fetcher ?? new SparqlEndpointFetcher();
36
44
  }
37
45
  async *select(distribution, batchSize) {
38
- const effectivePageSize = this.queryLimit ?? batchSize ?? 10;
46
+ if (this.maxResults === 0)
47
+ return;
48
+ const basePageSize = this.queryLimit ?? batchSize ?? 10;
39
49
  const endpoint = distribution.accessUrl;
40
50
  let offset = 0;
51
+ let totalYielded = 0;
41
52
  while (true) {
53
+ const remaining = this.maxResults !== undefined
54
+ ? this.maxResults - totalYielded
55
+ : Infinity;
56
+ // The first page uses the configured page size as-is — keeps page-size
57
+ // and total-cap orthogonal. Subsequent pages clamp to `remaining` so
58
+ // the last (partial) page doesn’t over-fetch.
59
+ const effectivePageSize = offset === 0 ? basePageSize : Math.min(basePageSize, remaining);
42
60
  this.parsed.solutionModifiers.limitOffset = F.solutionModifierLimitOffset(effectivePageSize, offset, F.gen());
43
61
  const paginatedQuery = generator.generate(this.parsed);
44
62
  const stream = (await this.fetcher.fetchBindings(endpoint.toString(), paginatedQuery));
@@ -48,6 +66,11 @@ export class SparqlItemSelector {
48
66
  if (Object.keys(row).length > 0) {
49
67
  yield row;
50
68
  count++;
69
+ totalYielded++;
70
+ if (this.maxResults !== undefined &&
71
+ totalYielded >= this.maxResults) {
72
+ return;
73
+ }
51
74
  }
52
75
  }
53
76
  if (count === 0 || count < effectivePageSize) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.28.13",
3
+ "version": "0.29.0",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"
@@ -30,12 +30,12 @@
30
30
  "@lde/sparql-importer": "0.6.1",
31
31
  "@lde/sparql-server": "0.4.11",
32
32
  "@rdfjs/types": "^2.0.1",
33
- "@traqula/generator-sparql-1-1": "^1.0.7",
34
- "@traqula/parser-sparql-1-1": "^1.0.4",
35
- "@traqula/rules-sparql-1-1": "^1.0.4",
33
+ "@traqula/generator-sparql-1-1": "^1.1.1",
34
+ "@traqula/parser-sparql-1-1": "^1.1.1",
35
+ "@traqula/rules-sparql-1-1": "^1.1.0",
36
36
  "fetch-sparql-endpoint": "^7.1.0",
37
37
  "filenamify-url": "^4.0.0",
38
- "is-network-error": "^1.3.1",
38
+ "is-network-error": "^1.3.2",
39
39
  "n3": "^2.0.1",
40
40
  "p-retry": "^8.0.0",
41
41
  "rdf-string": "^2.0.1",