@lde/pipeline 0.28.13 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +27 -1
- package/dist/pipeline.d.ts +2 -0
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +19 -4
- package/dist/progressReporter.d.ts +10 -2
- package/dist/progressReporter.d.ts.map +1 -1
- package/dist/sparql/selector.d.ts +18 -0
- package/dist/sparql/selector.d.ts.map +1 -1
- package/dist/sparql/selector.js +24 -1
- package/package.json +5 -5
package/README.md
CHANGED
|
@@ -74,7 +74,10 @@ Selects resources from the distribution and fans out executor calls per batch of
|
|
|
74
74
|
|
|
75
75
|
```typescript
|
|
76
76
|
interface ItemSelector {
|
|
77
|
-
select(
|
|
77
|
+
select(
|
|
78
|
+
distribution: Distribution,
|
|
79
|
+
batchSize?: number,
|
|
80
|
+
): AsyncIterable<VariableBindings>;
|
|
78
81
|
}
|
|
79
82
|
```
|
|
80
83
|
|
|
@@ -86,6 +89,25 @@ new SparqlItemSelector({
|
|
|
86
89
|
});
|
|
87
90
|
```
|
|
88
91
|
|
|
92
|
+
#### Capping total results with `maxResults`
|
|
93
|
+
|
|
94
|
+
By default, `SparqlItemSelector` paginates through **all** matching rows: any `LIMIT` clause in the query is interpreted as the page size, then it walks pages with `OFFSET` until the source is exhausted. To cap the total bindings yielded across all pages — for sampling, testing, prototyping, or just safety — set `maxResults`:
|
|
95
|
+
|
|
96
|
+
```typescript
|
|
97
|
+
new SparqlItemSelector({
|
|
98
|
+
query: 'SELECT DISTINCT ?s WHERE { ?s a <http://example.com/Class> }',
|
|
99
|
+
maxResults: 50,
|
|
100
|
+
});
|
|
101
|
+
```
|
|
102
|
+
|
|
103
|
+
When `maxResults` is set:
|
|
104
|
+
|
|
105
|
+
- Pagination stops as soon as `maxResults` bindings have been yielded — no wasted page request after the cap is hit.
|
|
106
|
+
- The last (partial) page's `LIMIT` is shrunk to the remaining cap so the endpoint doesn't over-fetch on the remainder (e.g. with `maxResults: 85` and `pageSize: 10`, the 9th page request is `LIMIT 5`, not `LIMIT 10`).
|
|
107
|
+
- The first page uses the configured page size as-is; `maxResults` and page size stay orthogonal. If `maxResults < pageSize`, the first page may return a few rows that aren't yielded.
|
|
108
|
+
- `maxResults: 0` is a valid no-op; the selector yields nothing without issuing any SPARQL request.
|
|
109
|
+
- `maxResults` is independent of any `LIMIT` clause in the query, which still controls page size when the cap is larger than one page.
|
|
110
|
+
|
|
89
111
|
For dynamic queries that depend on the distribution, implement `ItemSelector` directly:
|
|
90
112
|
|
|
91
113
|
```typescript
|
|
@@ -218,6 +240,10 @@ new Stage({
|
|
|
218
240
|
|
|
219
241
|
`Validator` is an interface, so you can implement your own validation strategy. See [@lde/pipeline-shacl-validator](../pipeline-shacl-validator) for the SHACL implementation.
|
|
220
242
|
|
|
243
|
+
#### Per-dataset reporting
|
|
244
|
+
|
|
245
|
+
After all stages for a dataset have run, the pipeline calls `validator.report(dataset)` once for each distinct validator attached to any stage and emits a `datasetValidated(dataset, report)` event on the reporter. The call happens **regardless of whether any stage actually invoked `validate()`** — for SHACL that means a dataset whose stages produced no input typically reports `quadsValidated: 0` and `conforms: true` (the SHACL vacuous-truth default). Consumers that want to distinguish ‘not tested’ from ‘tested and passed’ can read `quadsValidated`.
|
|
246
|
+
|
|
221
247
|
### Writer
|
|
222
248
|
|
|
223
249
|
Writes generated quads to a destination:
|
package/dist/pipeline.d.ts
CHANGED
|
@@ -35,6 +35,8 @@ export declare class Pipeline {
|
|
|
35
35
|
constructor(options: PipelineOptions);
|
|
36
36
|
run(): Promise<void>;
|
|
37
37
|
private processDataset;
|
|
38
|
+
private reportValidators;
|
|
39
|
+
private collectStages;
|
|
38
40
|
/**
|
|
39
41
|
* Run a stage with reporting and return whether it was supported.
|
|
40
42
|
* Returns `true` if the stage produced results, `false` if NotSupported.
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAG/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAmEd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IAwCtB,2EAA2E;YAC7D,eAAe;YAcf,QAAQ;YAmDP,SAAS;CAczB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -163,12 +163,31 @@ export class Pipeline {
|
|
|
163
163
|
await this.distributionResolver.cleanup?.();
|
|
164
164
|
}
|
|
165
165
|
await this.writer.flush?.(dataset);
|
|
166
|
+
await this.reportValidators(dataset);
|
|
166
167
|
const datasetMemory = process.memoryUsage();
|
|
167
168
|
this.reporter?.datasetComplete?.(dataset, {
|
|
168
169
|
memoryUsageBytes: datasetMemory.rss,
|
|
169
170
|
heapUsedBytes: datasetMemory.heapUsed,
|
|
170
171
|
});
|
|
171
172
|
}
|
|
173
|
+
async reportValidators(dataset) {
|
|
174
|
+
const validators = new Set();
|
|
175
|
+
for (const stage of this.collectStages(this.stages)) {
|
|
176
|
+
if (stage.validator)
|
|
177
|
+
validators.add(stage.validator);
|
|
178
|
+
}
|
|
179
|
+
for (const validator of validators) {
|
|
180
|
+
const report = await validator.report(dataset);
|
|
181
|
+
this.reporter?.datasetValidated?.(dataset, report);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
*collectStages(stages) {
|
|
185
|
+
for (const stage of stages) {
|
|
186
|
+
yield stage;
|
|
187
|
+
if (stage.stages.length > 0)
|
|
188
|
+
yield* this.collectStages(stage.stages);
|
|
189
|
+
}
|
|
190
|
+
}
|
|
172
191
|
/**
|
|
173
192
|
* Run a stage with reporting and return whether it was supported.
|
|
174
193
|
* Returns `true` if the stage produced results, `false` if NotSupported.
|
|
@@ -200,10 +219,6 @@ export class Pipeline {
|
|
|
200
219
|
quadsGenerated,
|
|
201
220
|
duration: Date.now() - stageStart,
|
|
202
221
|
});
|
|
203
|
-
if (stage.validator) {
|
|
204
|
-
const report = await stage.validator.report(dataset);
|
|
205
|
-
this.reporter?.stageValidated?.(stage.name, report);
|
|
206
|
-
}
|
|
207
222
|
return true;
|
|
208
223
|
}
|
|
209
224
|
/** Run a stage in chained mode, throwing if the stage is not supported. */
|
|
@@ -32,9 +32,17 @@ export interface ProgressReporter {
|
|
|
32
32
|
duration: number;
|
|
33
33
|
}): void;
|
|
34
34
|
stageFailed?(stage: string, error: Error): void;
|
|
35
|
-
/** Called after a stage completes if it has a validator. */
|
|
36
|
-
stageValidated?(stage: string, report: ValidationReport): void;
|
|
37
35
|
stageSkipped?(stage: string, reason: string): void;
|
|
36
|
+
/**
|
|
37
|
+
* Called once per (dataset, validator) pair after all stages for a dataset
|
|
38
|
+
* have run. Fires regardless of whether any stage actually invoked
|
|
39
|
+
* `validate()` — the report reflects the validator’s accumulated state.
|
|
40
|
+
* When no stage produced data, the report typically carries
|
|
41
|
+
* `quadsValidated: 0` and `conforms: true` (the SHACL vacuous-truth
|
|
42
|
+
* default); consumers that want to distinguish ‘not tested’ from ‘tested
|
|
43
|
+
* and passed’ can read `quadsValidated`.
|
|
44
|
+
*/
|
|
45
|
+
datasetValidated?(dataset: Dataset, report: ValidationReport): void;
|
|
38
46
|
datasetComplete?(dataset: Dataset, result: {
|
|
39
47
|
memoryUsageBytes: number;
|
|
40
48
|
heapUsedBytes: number;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAEvD,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,8DAA8D;IAC9D,kBAAkB,CAAC,CAAC,MAAM,EAAE,0BAA0B,GAAG,IAAI,CAAC;IAC9D,6CAA6C;IAC7C,aAAa,CAAC,IAAI,IAAI,CAAC;IACvB,kDAAkD;IAClD,YAAY,CAAC,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,
|
|
1
|
+
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAEvD,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,8DAA8D;IAC9D,kBAAkB,CAAC,CAAC,MAAM,EAAE,0BAA0B,GAAG,IAAI,CAAC;IAC9D,6CAA6C;IAC7C,aAAa,CAAC,IAAI,IAAI,CAAC;IACvB,kDAAkD;IAClD,YAAY,CAAC,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD;;;;;;;;OAQG;IACH,gBAAgB,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACpE,eAAe,CAAC,CACd,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,GAC1D,IAAI,CAAC;IACR,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;CACV"}
|
|
@@ -8,8 +8,19 @@ export interface SparqlItemSelectorOptions {
|
|
|
8
8
|
*
|
|
9
9
|
* A `LIMIT` clause in the query overrides the stage's `batchSize` as the
|
|
10
10
|
* page size — use this when the SPARQL endpoint enforces a result limit.
|
|
11
|
+
* It does **not** cap the total number of bindings the selector yields;
|
|
12
|
+
* pagination continues with `OFFSET` until the source is exhausted. Use
|
|
13
|
+
* {@link maxResults} to cap the total.
|
|
11
14
|
*/
|
|
12
15
|
query: string;
|
|
16
|
+
/**
|
|
17
|
+
* Maximum number of bindings the selector yields across all pages.
|
|
18
|
+
* Use this for sampling — “give me at most N items, don’t walk the full
|
|
19
|
+
* source”. Independent of {@link query}’s `LIMIT`, which controls page
|
|
20
|
+
* size. Pagination stops as soon as `maxResults` bindings have been
|
|
21
|
+
* yielded.
|
|
22
|
+
*/
|
|
23
|
+
maxResults?: number;
|
|
13
24
|
/** Custom fetcher instance. */
|
|
14
25
|
fetcher?: SparqlEndpointFetcher;
|
|
15
26
|
}
|
|
@@ -24,10 +35,17 @@ export interface SparqlItemSelectorOptions {
|
|
|
24
35
|
* 1. A `LIMIT` clause in the selector query (for endpoints with hard result limits)
|
|
25
36
|
* 2. The stage's {@link StageOptions.batchSize} (passed via {@link select})
|
|
26
37
|
* 3. A default of 10
|
|
38
|
+
*
|
|
39
|
+
* {@link SparqlItemSelectorOptions.maxResults} is independent of page size:
|
|
40
|
+
* it caps the *total* bindings yielded across pages without changing how
|
|
41
|
+
* the first page is requested. The last (partial) page’s `LIMIT` is
|
|
42
|
+
* shrunk to whatever’s left of the cap so the endpoint doesn’t over-fetch
|
|
43
|
+
* on the remainder.
|
|
27
44
|
*/
|
|
28
45
|
export declare class SparqlItemSelector implements ItemSelector {
|
|
29
46
|
private readonly parsed;
|
|
30
47
|
private readonly queryLimit?;
|
|
48
|
+
private readonly maxResults?;
|
|
31
49
|
private readonly fetcher;
|
|
32
50
|
constructor(options: SparqlItemSelectorOptions);
|
|
33
51
|
select(distribution: Distribution, batchSize?: number): AsyncIterableIterator<VariableBindings>;
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAMtD,MAAM,WAAW,yBAAyB;IACxC
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAMtD,MAAM,WAAW,yBAAyB;IACxC;;;;;;;;OAQG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,yBAAyB;IAmBvC,MAAM,CACX,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,qBAAqB,CAAC,gBAAgB,CAAC;CAyD3C"}
|
package/dist/sparql/selector.js
CHANGED
|
@@ -16,10 +16,17 @@ const F = new AstFactory();
|
|
|
16
16
|
* 1. A `LIMIT` clause in the selector query (for endpoints with hard result limits)
|
|
17
17
|
* 2. The stage's {@link StageOptions.batchSize} (passed via {@link select})
|
|
18
18
|
* 3. A default of 10
|
|
19
|
+
*
|
|
20
|
+
* {@link SparqlItemSelectorOptions.maxResults} is independent of page size:
|
|
21
|
+
* it caps the *total* bindings yielded across pages without changing how
|
|
22
|
+
* the first page is requested. The last (partial) page’s `LIMIT` is
|
|
23
|
+
* shrunk to whatever’s left of the cap so the endpoint doesn’t over-fetch
|
|
24
|
+
* on the remainder.
|
|
19
25
|
*/
|
|
20
26
|
export class SparqlItemSelector {
|
|
21
27
|
parsed;
|
|
22
28
|
queryLimit;
|
|
29
|
+
maxResults;
|
|
23
30
|
fetcher;
|
|
24
31
|
constructor(options) {
|
|
25
32
|
const parsed = parser.parse(options.query);
|
|
@@ -32,13 +39,24 @@ export class SparqlItemSelector {
|
|
|
32
39
|
}
|
|
33
40
|
this.parsed = parsed;
|
|
34
41
|
this.queryLimit = this.parsed.solutionModifiers.limitOffset?.limit;
|
|
42
|
+
this.maxResults = options.maxResults;
|
|
35
43
|
this.fetcher = options.fetcher ?? new SparqlEndpointFetcher();
|
|
36
44
|
}
|
|
37
45
|
async *select(distribution, batchSize) {
|
|
38
|
-
|
|
46
|
+
if (this.maxResults === 0)
|
|
47
|
+
return;
|
|
48
|
+
const basePageSize = this.queryLimit ?? batchSize ?? 10;
|
|
39
49
|
const endpoint = distribution.accessUrl;
|
|
40
50
|
let offset = 0;
|
|
51
|
+
let totalYielded = 0;
|
|
41
52
|
while (true) {
|
|
53
|
+
const remaining = this.maxResults !== undefined
|
|
54
|
+
? this.maxResults - totalYielded
|
|
55
|
+
: Infinity;
|
|
56
|
+
// The first page uses the configured page size as-is — keeps page-size
|
|
57
|
+
// and total-cap orthogonal. Subsequent pages clamp to `remaining` so
|
|
58
|
+
// the last (partial) page doesn’t over-fetch.
|
|
59
|
+
const effectivePageSize = offset === 0 ? basePageSize : Math.min(basePageSize, remaining);
|
|
42
60
|
this.parsed.solutionModifiers.limitOffset = F.solutionModifierLimitOffset(effectivePageSize, offset, F.gen());
|
|
43
61
|
const paginatedQuery = generator.generate(this.parsed);
|
|
44
62
|
const stream = (await this.fetcher.fetchBindings(endpoint.toString(), paginatedQuery));
|
|
@@ -48,6 +66,11 @@ export class SparqlItemSelector {
|
|
|
48
66
|
if (Object.keys(row).length > 0) {
|
|
49
67
|
yield row;
|
|
50
68
|
count++;
|
|
69
|
+
totalYielded++;
|
|
70
|
+
if (this.maxResults !== undefined &&
|
|
71
|
+
totalYielded >= this.maxResults) {
|
|
72
|
+
return;
|
|
73
|
+
}
|
|
51
74
|
}
|
|
52
75
|
}
|
|
53
76
|
if (count === 0 || count < effectivePageSize) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.
|
|
3
|
+
"version": "0.29.0",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -30,12 +30,12 @@
|
|
|
30
30
|
"@lde/sparql-importer": "0.6.1",
|
|
31
31
|
"@lde/sparql-server": "0.4.11",
|
|
32
32
|
"@rdfjs/types": "^2.0.1",
|
|
33
|
-
"@traqula/generator-sparql-1-1": "^1.
|
|
34
|
-
"@traqula/parser-sparql-1-1": "^1.
|
|
35
|
-
"@traqula/rules-sparql-1-1": "^1.0
|
|
33
|
+
"@traqula/generator-sparql-1-1": "^1.1.1",
|
|
34
|
+
"@traqula/parser-sparql-1-1": "^1.1.1",
|
|
35
|
+
"@traqula/rules-sparql-1-1": "^1.1.0",
|
|
36
36
|
"fetch-sparql-endpoint": "^7.1.0",
|
|
37
37
|
"filenamify-url": "^4.0.0",
|
|
38
|
-
"is-network-error": "^1.3.
|
|
38
|
+
"is-network-error": "^1.3.2",
|
|
39
39
|
"n3": "^2.0.1",
|
|
40
40
|
"p-retry": "^8.0.0",
|
|
41
41
|
"rdf-string": "^2.0.1",
|