@lde/pipeline 0.30.2 → 0.30.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +38 -0
- package/dist/pipeline.d.ts +13 -0
- package/dist/pipeline.d.ts.map +1 -1
- package/dist/pipeline.js +19 -8
- package/dist/progressReporter.d.ts +12 -0
- package/dist/progressReporter.d.ts.map +1 -1
- package/dist/sparql/executor.d.ts +42 -6
- package/dist/sparql/executor.d.ts.map +1 -1
- package/dist/sparql/executor.js +82 -10
- package/dist/sparql/index.d.ts +1 -0
- package/dist/sparql/index.d.ts.map +1 -1
- package/dist/sparql/index.js +1 -0
- package/dist/sparql/selector.d.ts +9 -3
- package/dist/sparql/selector.d.ts.map +1 -1
- package/dist/sparql/selector.js +59 -4
- package/dist/sparql/timeoutPolicy.d.ts +152 -0
- package/dist/sparql/timeoutPolicy.d.ts.map +1 -0
- package/dist/sparql/timeoutPolicy.js +138 -0
- package/dist/stage.d.ts +14 -1
- package/dist/stage.d.ts.map +1 -1
- package/dist/stage.js +11 -5
- package/package.json +2 -2
package/README.md
CHANGED
|
@@ -208,6 +208,44 @@ new Stage({
|
|
|
208
208
|
|
|
209
209
|
This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `VocabularyExecutor` for a real-world example of this pattern.
|
|
210
210
|
|
|
211
|
+
#### Adaptive timeouts
|
|
212
|
+
|
|
213
|
+
By default, every SPARQL request uses the same 5-minute budget. When a pipeline runs against many third-party endpoints, that fixed budget can cost ~80 minutes on a single dataset whose endpoint times out repeatedly on heavy queries — light stages on the same endpoint then sit behind the heavy ones that will never succeed.
|
|
214
|
+
|
|
215
|
+
A `TimeoutPolicy` decides the budget for each SPARQL request and observes the outcome. Two are built in:
|
|
216
|
+
|
|
217
|
+
- **`ConstantTimeoutPolicy(timeoutMs)`** – returns the same budget for every request. The implicit default when `PipelineOptions.timeout` is omitted (`constantTimeoutPolicy(300_000)`).
|
|
218
|
+
- **`AdaptiveTimeoutPolicy({ defaultMs, tightenedMs, tightenAfterTimeouts })`** – per-endpoint state machine. Each endpoint is either _healthy_ (use `defaultMs`) or _tightened_ (use `tightenedMs`). After `tightenAfterTimeouts` consecutive `timeout` outcomes the endpoint flips to _tightened_; a single `ok` flips it back to _healthy_.
|
|
219
|
+
|
|
220
|
+
`PipelineOptions.timeout` accepts a `() => TimeoutPolicy` factory. The pipeline invokes it once per dataset, so policy state resets between datasets and one bad dataset can’t poison the next:
|
|
221
|
+
|
|
222
|
+
```typescript
|
|
223
|
+
import { adaptiveTimeoutPolicy } from '@lde/pipeline';
|
|
224
|
+
|
|
225
|
+
new Pipeline({
|
|
226
|
+
// …
|
|
227
|
+
timeout: adaptiveTimeoutPolicy({
|
|
228
|
+
defaultMs: 300_000, // 5 min while the endpoint is healthy
|
|
229
|
+
tightenedMs: 10_000, // 10 s once the endpoint is tightened
|
|
230
|
+
tightenAfterTimeouts: 2, // flip to tightened after 2 consecutive timeouts
|
|
231
|
+
}),
|
|
232
|
+
});
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
Outcomes are classified as:
|
|
236
|
+
|
|
237
|
+
| outcome | source |
|
|
238
|
+
| --------- | ------------------------------------------------------------------------ |
|
|
239
|
+
| `ok` | the request resolved |
|
|
240
|
+
| `timeout` | client-side `AbortSignal.timeout()` fired, or upstream returned HTTP 504 |
|
|
241
|
+
| `error` | anything else (other HTTP errors, parse errors, …) – neutral |
|
|
242
|
+
|
|
243
|
+
Transitions are forwarded to the `ProgressReporter` via `timeoutTightened` / `timeoutRelaxed`; `ConsoleReporter` prints them as `↘ Tightened` / `↗ Relaxed` lines so operators can tell a fast-failed stage from an unexpected speedup.
|
|
244
|
+
|
|
245
|
+
Implement `TimeoutPolicy` directly for custom strategies (closing over shared state in the factory if you want it to span datasets).
|
|
246
|
+
|
|
247
|
+
Timeouts live at the pipeline level — neither `SparqlConstructExecutor` nor `SparqlItemSelector` accept their own `timeout` option. Per-endpoint state belongs in the adaptive policy, and per-stage budgets aren’t supported. Reusable stage facades (`@lde/pipeline-void`, `@lde/pipeline-shacl-sampler`) follow the same convention.
|
|
248
|
+
|
|
211
249
|
### Validation
|
|
212
250
|
|
|
213
251
|
Stages can optionally validate their output quads against a `Validator`. Validation operates on the **combined output of all executors per batch**, not on individual quads or per-executor output. A batch produces a complete result set — a self-contained cluster of linked resources — that can be meaningfully matched against SHACL shapes. Even with a single executor, each batch is a complete unit; with multiple executors, shapes that reference triples from different executors are validated correctly.
|
package/dist/pipeline.d.ts
CHANGED
|
@@ -5,6 +5,7 @@ import type { Writer } from './writer/writer.js';
|
|
|
5
5
|
import { type DistributionResolver } from './distribution/resolver.js';
|
|
6
6
|
import type { StageOutputResolver } from './stageOutputResolver.js';
|
|
7
7
|
import type { ProgressReporter } from './progressReporter.js';
|
|
8
|
+
import { type TimeoutPolicy } from './sparql/timeoutPolicy.js';
|
|
8
9
|
/** Plugin that hooks into pipeline lifecycle events. */
|
|
9
10
|
export interface PipelinePlugin {
|
|
10
11
|
name: string;
|
|
@@ -23,6 +24,17 @@ export interface PipelineOptions {
|
|
|
23
24
|
outputDir: string;
|
|
24
25
|
};
|
|
25
26
|
reporter?: ProgressReporter;
|
|
27
|
+
/**
|
|
28
|
+
* Factory producing a fresh {@link TimeoutPolicy} per dataset. Defaults
|
|
29
|
+
* to {@link constantTimeoutPolicy}`(300_000)` so existing call sites
|
|
30
|
+
* keep today’s 5-minute fixed budget.
|
|
31
|
+
*
|
|
32
|
+
* Use {@link adaptiveTimeoutPolicy} to fast-fail stages on endpoints
|
|
33
|
+
* that have shown a run of consecutive timeouts. State is per
|
|
34
|
+
* {@link TimeoutPolicy} instance, and the Pipeline invokes the factory
|
|
35
|
+
* once per dataset so state resets between datasets.
|
|
36
|
+
*/
|
|
37
|
+
timeout?: () => TimeoutPolicy;
|
|
26
38
|
}
|
|
27
39
|
export declare class Pipeline {
|
|
28
40
|
private readonly name;
|
|
@@ -32,6 +44,7 @@ export declare class Pipeline {
|
|
|
32
44
|
private readonly distributionResolver;
|
|
33
45
|
private readonly chaining?;
|
|
34
46
|
private readonly reporter?;
|
|
47
|
+
private readonly timeoutFactory;
|
|
35
48
|
constructor(options: PipelineOptions);
|
|
36
49
|
run(): Promise<void>;
|
|
37
50
|
private processDataset;
|
package/dist/pipeline.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;
|
|
1
|
+
{"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;gBAEzC,OAAO,EAAE,eAAe;IAkC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAgFd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA2DP,SAAS;CAczB"}
|
package/dist/pipeline.js
CHANGED
|
@@ -5,6 +5,7 @@ import { NoDistributionAvailable, } from './distribution/resolver.js';
|
|
|
5
5
|
import { SparqlDistributionResolver } from './distribution/index.js';
|
|
6
6
|
import { NetworkError, SparqlProbeResult, } from '@lde/distribution-probe';
|
|
7
7
|
import { NotSupported } from './sparql/executor.js';
|
|
8
|
+
import { ConstantTimeoutPolicy, } from './sparql/timeoutPolicy.js';
|
|
8
9
|
/**
|
|
9
10
|
* Split an async iterable into `count` branches that can be consumed
|
|
10
11
|
* independently. Backpressure is enforced by the slowest consumer –
|
|
@@ -79,6 +80,7 @@ export class Pipeline {
|
|
|
79
80
|
distributionResolver;
|
|
80
81
|
chaining;
|
|
81
82
|
reporter;
|
|
83
|
+
timeoutFactory;
|
|
82
84
|
constructor(options) {
|
|
83
85
|
const hasSubStages = options.stages.some((stage) => stage.stages.length > 0);
|
|
84
86
|
if (hasSubStages && !options.chaining) {
|
|
@@ -102,6 +104,8 @@ export class Pipeline {
|
|
|
102
104
|
options.distributionResolver ?? new SparqlDistributionResolver();
|
|
103
105
|
this.chaining = options.chaining;
|
|
104
106
|
this.reporter = options.reporter;
|
|
107
|
+
this.timeoutFactory =
|
|
108
|
+
options.timeout ?? (() => new ConstantTimeoutPolicy(300_000));
|
|
105
109
|
}
|
|
106
110
|
async run() {
|
|
107
111
|
const start = Date.now();
|
|
@@ -121,6 +125,11 @@ export class Pipeline {
|
|
|
121
125
|
}
|
|
122
126
|
async processDataset(dataset) {
|
|
123
127
|
this.reporter?.datasetStart?.(dataset);
|
|
128
|
+
const timeout = this.timeoutFactory();
|
|
129
|
+
const unsubscribe = timeout.subscribe?.({
|
|
130
|
+
onTighten: (event) => this.reporter?.timeoutTightened?.(event),
|
|
131
|
+
onRelax: (event) => this.reporter?.timeoutRelaxed?.(event),
|
|
132
|
+
});
|
|
124
133
|
let resolved;
|
|
125
134
|
try {
|
|
126
135
|
resolved = await this.distributionResolver.resolve(dataset, {
|
|
@@ -148,10 +157,10 @@ export class Pipeline {
|
|
|
148
157
|
for (const stage of this.stages) {
|
|
149
158
|
try {
|
|
150
159
|
if (stage.stages.length > 0) {
|
|
151
|
-
await this.runChain(dataset, resolved.distribution, stage);
|
|
160
|
+
await this.runChain(dataset, resolved.distribution, stage, timeout);
|
|
152
161
|
}
|
|
153
162
|
else {
|
|
154
|
-
await this.runStage(dataset, resolved.distribution, stage);
|
|
163
|
+
await this.runStage(dataset, resolved.distribution, stage, this.writer, timeout);
|
|
155
164
|
}
|
|
156
165
|
}
|
|
157
166
|
catch (error) {
|
|
@@ -161,6 +170,7 @@ export class Pipeline {
|
|
|
161
170
|
}
|
|
162
171
|
finally {
|
|
163
172
|
await this.distributionResolver.cleanup?.();
|
|
173
|
+
unsubscribe?.();
|
|
164
174
|
}
|
|
165
175
|
await this.writer.flush?.(dataset);
|
|
166
176
|
await this.reportValidators(dataset);
|
|
@@ -192,7 +202,7 @@ export class Pipeline {
|
|
|
192
202
|
* Run a stage with reporting and return whether it was supported.
|
|
193
203
|
* Returns `true` if the stage produced results, `false` if NotSupported.
|
|
194
204
|
*/
|
|
195
|
-
async runStage(dataset, distribution, stage, writer = this.writer) {
|
|
205
|
+
async runStage(dataset, distribution, stage, writer = this.writer, timeout) {
|
|
196
206
|
this.reporter?.stageStart?.(stage.name);
|
|
197
207
|
const stageStart = Date.now();
|
|
198
208
|
let itemsProcessed = 0;
|
|
@@ -209,6 +219,7 @@ export class Pipeline {
|
|
|
209
219
|
heapUsedBytes: stageMemory.heapUsed,
|
|
210
220
|
});
|
|
211
221
|
},
|
|
222
|
+
timeout,
|
|
212
223
|
});
|
|
213
224
|
if (result instanceof NotSupported) {
|
|
214
225
|
this.reporter?.stageSkipped?.(stage.name, result.message);
|
|
@@ -222,13 +233,13 @@ export class Pipeline {
|
|
|
222
233
|
return true;
|
|
223
234
|
}
|
|
224
235
|
/** Run a stage in chained mode, throwing if the stage is not supported. */
|
|
225
|
-
async runChainedStage(dataset, distribution, stage, writer) {
|
|
226
|
-
const supported = await this.runStage(dataset, distribution, stage, writer);
|
|
236
|
+
async runChainedStage(dataset, distribution, stage, writer, timeout) {
|
|
237
|
+
const supported = await this.runStage(dataset, distribution, stage, writer, timeout);
|
|
227
238
|
if (!supported) {
|
|
228
239
|
throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
|
|
229
240
|
}
|
|
230
241
|
}
|
|
231
|
-
async runChain(dataset, distribution, stage) {
|
|
242
|
+
async runChain(dataset, distribution, stage, timeout) {
|
|
232
243
|
const { stageOutputResolver, outputDir } = this.chaining;
|
|
233
244
|
const outputFiles = [];
|
|
234
245
|
try {
|
|
@@ -237,7 +248,7 @@ export class Pipeline {
|
|
|
237
248
|
outputDir: `${outputDir}/${stage.name}`,
|
|
238
249
|
format: 'n-triples',
|
|
239
250
|
});
|
|
240
|
-
await this.runChainedStage(dataset, distribution, stage, parentWriter);
|
|
251
|
+
await this.runChainedStage(dataset, distribution, stage, parentWriter, timeout);
|
|
241
252
|
outputFiles.push(parentWriter.getOutputPath(dataset));
|
|
242
253
|
// 2. Chain through children.
|
|
243
254
|
let currentDistribution = await stageOutputResolver.resolve(parentWriter.getOutputPath(dataset));
|
|
@@ -247,7 +258,7 @@ export class Pipeline {
|
|
|
247
258
|
outputDir: `${outputDir}/${child.name}`,
|
|
248
259
|
format: 'n-triples',
|
|
249
260
|
});
|
|
250
|
-
await this.runChainedStage(dataset, currentDistribution, child, childWriter);
|
|
261
|
+
await this.runChainedStage(dataset, currentDistribution, child, childWriter, timeout);
|
|
251
262
|
outputFiles.push(childWriter.getOutputPath(dataset));
|
|
252
263
|
if (i < stage.stages.length - 1) {
|
|
253
264
|
currentDistribution = await stageOutputResolver.resolve(childWriter.getOutputPath(dataset));
|
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import type { Dataset, Distribution } from '@lde/dataset';
|
|
2
2
|
import type { ValidationReport } from './validator.js';
|
|
3
|
+
import type { TimeoutTransitionEvent } from './sparql/timeoutPolicy.js';
|
|
3
4
|
export interface DistributionAnalysisResult {
|
|
4
5
|
distribution: Distribution;
|
|
5
6
|
type: 'sparql' | 'data-dump' | 'network-error';
|
|
@@ -53,5 +54,16 @@ export interface ProgressReporter {
|
|
|
53
54
|
memoryUsageBytes: number;
|
|
54
55
|
heapUsedBytes: number;
|
|
55
56
|
}): void;
|
|
57
|
+
/**
|
|
58
|
+
* Called when a {@link TimeoutPolicy} tightens the budget for an
|
|
59
|
+
* endpoint after a run of consecutive timeouts. Lets operators
|
|
60
|
+
* distinguish a fast-failed stage from an unexpected speedup.
|
|
61
|
+
*/
|
|
62
|
+
timeoutTightened?(event: TimeoutTransitionEvent): void;
|
|
63
|
+
/**
|
|
64
|
+
* Called when a {@link TimeoutPolicy} relaxes the budget back to the
|
|
65
|
+
* default after a successful request on a previously-tightened endpoint.
|
|
66
|
+
*/
|
|
67
|
+
timeoutRelaxed?(event: TimeoutTransitionEvent): void;
|
|
56
68
|
}
|
|
57
69
|
//# sourceMappingURL=progressReporter.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;
|
|
1
|
+
{"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AAExE,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,8DAA8D;IAC9D,kBAAkB,CAAC,CAAC,MAAM,EAAE,0BAA0B,GAAG,IAAI,CAAC;IAC9D,6CAA6C;IAC7C,aAAa,CAAC,IAAI,IAAI,CAAC;IACvB,kDAAkD;IAClD,YAAY,CAAC,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD;;;;;;;;OAQG;IACH,gBAAgB,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACpE,eAAe,CAAC,CACd,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,GAC1D,IAAI,CAAC;IACR,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT;;;;OAIG;IACH,gBAAgB,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;IACvD;;;OAGG;IACH,cAAc,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;CACtD"}
|
|
@@ -2,6 +2,7 @@ import { Dataset, Distribution } from '@lde/dataset';
|
|
|
2
2
|
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
3
3
|
import type { NamedNode, Quad } from '@rdfjs/types';
|
|
4
4
|
import { Transform } from 'node:stream';
|
|
5
|
+
import { type TimeoutPolicy } from './timeoutPolicy.js';
|
|
5
6
|
/**
|
|
6
7
|
* An executor could not run because the dataset lacks a supported distribution.
|
|
7
8
|
*/
|
|
@@ -17,6 +18,16 @@ export interface ExecuteOptions {
|
|
|
17
18
|
* When non-empty, a VALUES block is prepended to the WHERE clause.
|
|
18
19
|
*/
|
|
19
20
|
bindings?: VariableBindings[];
|
|
21
|
+
/**
|
|
22
|
+
* Per-call {@link TimeoutPolicy}. When supplied, the executor calls
|
|
23
|
+
* {@link TimeoutPolicy.beforeRequest} once per attempt (including
|
|
24
|
+
* retries), installs an {@link AbortSignal} with the returned budget,
|
|
25
|
+
* and reports the outcome via {@link TimeoutPolicy.afterRequest}.
|
|
26
|
+
*
|
|
27
|
+
* Overrides the executor-level policy passed at construction time.
|
|
28
|
+
* Pipeline runners use this to thread the per-dataset policy through.
|
|
29
|
+
*/
|
|
30
|
+
timeout?: TimeoutPolicy;
|
|
20
31
|
}
|
|
21
32
|
export interface Executor {
|
|
22
33
|
execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
|
|
@@ -29,11 +40,6 @@ export interface SparqlConstructExecutorOptions {
|
|
|
29
40
|
* SPARQL CONSTRUCT query to execute.
|
|
30
41
|
*/
|
|
31
42
|
query: string;
|
|
32
|
-
/**
|
|
33
|
-
* Optional timeout for SPARQL queries in milliseconds.
|
|
34
|
-
* @default 300000 (5 minutes)
|
|
35
|
-
*/
|
|
36
|
-
timeout?: number;
|
|
37
43
|
/**
|
|
38
44
|
* Number of retries for transient errors (network failures and HTTP 502/503/504).
|
|
39
45
|
* @default 3
|
|
@@ -41,6 +47,20 @@ export interface SparqlConstructExecutorOptions {
|
|
|
41
47
|
retries?: number;
|
|
42
48
|
/**
|
|
43
49
|
* Optional custom SparqlEndpointFetcher instance.
|
|
50
|
+
*
|
|
51
|
+
* When supplied, the executor uses this fetcher as-is for every attempt
|
|
52
|
+
* — the per-attempt timeout from the {@link TimeoutPolicy} is **not**
|
|
53
|
+
* enforced (the supplied fetcher’s own `timeout` governs). Policy
|
|
54
|
+
* `beforeRequest`/`afterRequest` hooks still fire so outcome
|
|
55
|
+
* classification works, but adaptive tightening cannot apply.
|
|
56
|
+
*
|
|
57
|
+
* When omitted, the executor builds a fresh
|
|
58
|
+
* {@link SparqlEndpointFetcher} per attempt with the per-attempt timeout
|
|
59
|
+
* baked in.
|
|
60
|
+
*
|
|
61
|
+
* This option is intended for tests (mocking `fetchTriples`) and
|
|
62
|
+
* advanced cases that need full control of the fetcher. Most callers
|
|
63
|
+
* should leave it unset.
|
|
44
64
|
*/
|
|
45
65
|
fetcher?: SparqlEndpointFetcher;
|
|
46
66
|
/**
|
|
@@ -101,7 +121,7 @@ export interface SparqlConstructExecutorOptions {
|
|
|
101
121
|
export declare class SparqlConstructExecutor implements Executor {
|
|
102
122
|
private readonly rawQuery;
|
|
103
123
|
private readonly preParsed?;
|
|
104
|
-
private readonly
|
|
124
|
+
private readonly userFetcher?;
|
|
105
125
|
private readonly retries;
|
|
106
126
|
private readonly lineBuffer;
|
|
107
127
|
private readonly deduplicate;
|
|
@@ -116,6 +136,22 @@ export declare class SparqlConstructExecutor implements Executor {
|
|
|
116
136
|
* @returns AsyncIterable<Quad> stream of results.
|
|
117
137
|
*/
|
|
118
138
|
execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad>>;
|
|
139
|
+
/**
|
|
140
|
+
* Run a single attempt against the endpoint with a per-call abort
|
|
141
|
+
* signal derived from {@link TimeoutPolicy.beforeRequest}. Reports the
|
|
142
|
+
* outcome via {@link TimeoutPolicy.afterRequest} regardless of whether
|
|
143
|
+
* the attempt resolved or threw.
|
|
144
|
+
*/
|
|
145
|
+
private fetchQuadsWithPolicy;
|
|
146
|
+
/**
|
|
147
|
+
* Pick the fetcher to use for a single attempt. A user-supplied fetcher
|
|
148
|
+
* is used as-is and its own timeout governs the request; the per-attempt
|
|
149
|
+
* policy budget is bypassed in that case (see the JSDoc on
|
|
150
|
+
* {@link SparqlConstructExecutorOptions.fetcher}). Otherwise a fresh
|
|
151
|
+
* {@link SparqlEndpointFetcher} is constructed per attempt with the
|
|
152
|
+
* policy-supplied timeout baked in.
|
|
153
|
+
*/
|
|
154
|
+
private fetcherForAttempt;
|
|
119
155
|
/**
|
|
120
156
|
* Fetch quads from the endpoint, optionally line-buffering the response
|
|
121
157
|
* stream before it reaches the N3 parser to work around
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAiB,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAiB,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAGL,KAAK,aAAa,EACnB,MAAM,oBAAoB,CAAC;AAS5B;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAE9B;;;;;;;;OAQG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;;;;;;;;;;;;OAgBG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;IAEhC;;;;;;;;;OASG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IAErB;;;;;;;;;;;;;;OAcG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAwB;IACrD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAU;IACrC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IACtC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAiBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IA4C/B;;;;;OAKG;YACW,oBAAoB;IA+BlC;;;;;;;OAOG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;YACW,UAAU;IAmBxB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,mBAAoB,SAAQ,SAAS;IAChD,OAAO,CAAC,SAAS,CAAM;IAEd,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,IAAI;IAUjE,MAAM,CAAC,QAAQ,EAAE,MAAM,IAAI;CAMrC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAuB,gBAAgB,CACrC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GACzB,aAAa,CAAC,IAAI,CAAC,CASrB"}
|
package/dist/sparql/executor.js
CHANGED
|
@@ -11,6 +11,13 @@ import pRetry from 'p-retry';
|
|
|
11
11
|
import { quadToStringQuad } from 'rdf-string';
|
|
12
12
|
import { withDefaultGraph } from './graph.js';
|
|
13
13
|
import { injectValues } from './values.js';
|
|
14
|
+
import { ConstantTimeoutPolicy, } from './timeoutPolicy.js';
|
|
15
|
+
/**
|
|
16
|
+
* Fallback policy when no per-call `TimeoutPolicy` is supplied via
|
|
17
|
+
* {@link ExecuteOptions.timeout}. Pipeline always supplies one, so this only
|
|
18
|
+
* matters when the executor is driven directly (without a Pipeline).
|
|
19
|
+
*/
|
|
20
|
+
const defaultTimeoutPolicy = new ConstantTimeoutPolicy(300_000);
|
|
14
21
|
/**
|
|
15
22
|
* An executor could not run because the dataset lacks a supported distribution.
|
|
16
23
|
*/
|
|
@@ -50,7 +57,7 @@ export class NotSupported {
|
|
|
50
57
|
export class SparqlConstructExecutor {
|
|
51
58
|
rawQuery;
|
|
52
59
|
preParsed;
|
|
53
|
-
|
|
60
|
+
userFetcher;
|
|
54
61
|
retries;
|
|
55
62
|
lineBuffer;
|
|
56
63
|
deduplicate;
|
|
@@ -67,11 +74,7 @@ export class SparqlConstructExecutor {
|
|
|
67
74
|
}
|
|
68
75
|
this.preParsed = parsed;
|
|
69
76
|
}
|
|
70
|
-
this.
|
|
71
|
-
options.fetcher ??
|
|
72
|
-
new SparqlEndpointFetcher({
|
|
73
|
-
timeout: options.timeout ?? 300_000,
|
|
74
|
-
});
|
|
77
|
+
this.userFetcher = options.fetcher;
|
|
75
78
|
}
|
|
76
79
|
/**
|
|
77
80
|
* Execute the SPARQL CONSTRUCT query against the distribution's endpoint.
|
|
@@ -105,22 +108,65 @@ export class SparqlConstructExecutor {
|
|
|
105
108
|
let query = this.generator.generate(ast);
|
|
106
109
|
assertSafeIri(dataset.iri.toString());
|
|
107
110
|
query = query.replaceAll('?dataset', `<${dataset.iri}>`);
|
|
108
|
-
const
|
|
111
|
+
const policy = options?.timeout ?? defaultTimeoutPolicy;
|
|
112
|
+
const quads = await pRetry(() => this.fetchQuadsWithPolicy(endpoint, query, policy), {
|
|
109
113
|
retries: this.retries,
|
|
110
114
|
shouldRetry: ({ error }) => isTransientError(error),
|
|
111
115
|
});
|
|
112
116
|
return this.deduplicate ? deduplicateQuads(quads) : quads;
|
|
113
117
|
}
|
|
118
|
+
/**
|
|
119
|
+
* Run a single attempt against the endpoint with a per-call abort
|
|
120
|
+
* signal derived from {@link TimeoutPolicy.beforeRequest}. Reports the
|
|
121
|
+
* outcome via {@link TimeoutPolicy.afterRequest} regardless of whether
|
|
122
|
+
* the attempt resolved or threw.
|
|
123
|
+
*/
|
|
124
|
+
async fetchQuadsWithPolicy(endpointUrl, query, policy) {
|
|
125
|
+
const timeoutMs = policy.beforeRequest({ endpoint: endpointUrl });
|
|
126
|
+
const fetcher = this.fetcherForAttempt(timeoutMs);
|
|
127
|
+
const start = Date.now();
|
|
128
|
+
try {
|
|
129
|
+
const quads = await this.fetchQuads(fetcher, endpointUrl.toString(), query);
|
|
130
|
+
policy.afterRequest({
|
|
131
|
+
endpoint: endpointUrl,
|
|
132
|
+
outcome: 'ok',
|
|
133
|
+
durationMs: Date.now() - start,
|
|
134
|
+
});
|
|
135
|
+
return quads;
|
|
136
|
+
}
|
|
137
|
+
catch (error) {
|
|
138
|
+
policy.afterRequest({
|
|
139
|
+
endpoint: endpointUrl,
|
|
140
|
+
outcome: classifyOutcome(error),
|
|
141
|
+
durationMs: Date.now() - start,
|
|
142
|
+
error,
|
|
143
|
+
});
|
|
144
|
+
throw error;
|
|
145
|
+
}
|
|
146
|
+
}
|
|
147
|
+
/**
|
|
148
|
+
* Pick the fetcher to use for a single attempt. A user-supplied fetcher
|
|
149
|
+
* is used as-is and its own timeout governs the request; the per-attempt
|
|
150
|
+
* policy budget is bypassed in that case (see the JSDoc on
|
|
151
|
+
* {@link SparqlConstructExecutorOptions.fetcher}). Otherwise a fresh
|
|
152
|
+
* {@link SparqlEndpointFetcher} is constructed per attempt with the
|
|
153
|
+
* policy-supplied timeout baked in.
|
|
154
|
+
*/
|
|
155
|
+
fetcherForAttempt(timeoutMs) {
|
|
156
|
+
if (this.userFetcher)
|
|
157
|
+
return this.userFetcher;
|
|
158
|
+
return new SparqlEndpointFetcher({ timeout: timeoutMs });
|
|
159
|
+
}
|
|
114
160
|
/**
|
|
115
161
|
* Fetch quads from the endpoint, optionally line-buffering the response
|
|
116
162
|
* stream before it reaches the N3 parser to work around
|
|
117
163
|
* {@link https://github.com/rdfjs/N3.js/issues/578 | N3.js#578}.
|
|
118
164
|
*/
|
|
119
|
-
async fetchQuads(endpoint, query) {
|
|
165
|
+
async fetchQuads(fetcher, endpoint, query) {
|
|
120
166
|
if (!this.lineBuffer) {
|
|
121
|
-
return
|
|
167
|
+
return fetcher.fetchTriples(endpoint, query);
|
|
122
168
|
}
|
|
123
|
-
const [contentType, , responseStream] = await
|
|
169
|
+
const [contentType, , responseStream] = await fetcher.fetchRawStream(endpoint, query, SparqlEndpointFetcher.CONTENTTYPE_TURTLE);
|
|
124
170
|
return responseStream
|
|
125
171
|
.pipe(new LineBufferTransform())
|
|
126
172
|
.pipe(new StreamParser({ format: contentType }));
|
|
@@ -219,3 +265,29 @@ function isTransientError(error) {
|
|
|
219
265
|
const status = Number(match[1]);
|
|
220
266
|
return status === 502 || status === 503 || status === 504;
|
|
221
267
|
}
|
|
268
|
+
/**
|
|
269
|
+
* Classify a fetch error for {@link TimeoutPolicy} reporting.
|
|
270
|
+
*
|
|
271
|
+
* - HTTP 504 → `'timeout'`: the upstream reported it ran out of time. This
|
|
272
|
+
* is the exact failure mode adaptive timeouts exist to react to.
|
|
273
|
+
* - `AbortError` / `TimeoutError`: our own `AbortSignal.timeout()` fired.
|
|
274
|
+
* - Anything else → `'error'`: neutral with respect to tightening.
|
|
275
|
+
*/
|
|
276
|
+
function classifyOutcome(error) {
|
|
277
|
+
if (error instanceof Error) {
|
|
278
|
+
if (error.name === 'AbortError' || error.name === 'TimeoutError') {
|
|
279
|
+
return 'timeout';
|
|
280
|
+
}
|
|
281
|
+
if (error.cause instanceof Error) {
|
|
282
|
+
if (error.cause.name === 'AbortError' ||
|
|
283
|
+
error.cause.name === 'TimeoutError') {
|
|
284
|
+
return 'timeout';
|
|
285
|
+
}
|
|
286
|
+
}
|
|
287
|
+
const match = error.message.match(transientStatusPattern);
|
|
288
|
+
if (match && Number(match[1]) === 504) {
|
|
289
|
+
return 'timeout';
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
return 'error';
|
|
293
|
+
}
|
package/dist/sparql/index.d.ts
CHANGED
|
@@ -2,4 +2,5 @@ export { deduplicateQuads, SparqlConstructExecutor, LineBufferTransform, NotSupp
|
|
|
2
2
|
export { SparqlItemSelector, type SparqlItemSelectorOptions, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
|
5
|
+
export { AdaptiveTimeoutPolicy, ConstantTimeoutPolicy, adaptiveTimeoutPolicy, constantTimeoutPolicy, type AdaptiveTimeoutPolicyOptions, type AfterRequestContext, type BeforeRequestContext, type TimeoutOutcome, type TimeoutPolicy, type TimeoutPolicyObserver, type TimeoutTransitionEvent, } from './timeoutPolicy.js';
|
|
5
6
|
//# sourceMappingURL=index.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,uBAAuB,EACvB,mBAAmB,EACnB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
|
|
1
|
+
{"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,uBAAuB,EACvB,mBAAmB,EACnB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,OAAO,EACL,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,KAAK,4BAA4B,EACjC,KAAK,mBAAmB,EACxB,KAAK,oBAAoB,EACzB,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,qBAAqB,EAC1B,KAAK,sBAAsB,GAC5B,MAAM,oBAAoB,CAAC"}
|
package/dist/sparql/index.js
CHANGED
|
@@ -2,3 +2,4 @@ export { deduplicateQuads, SparqlConstructExecutor, LineBufferTransform, NotSupp
|
|
|
2
2
|
export { SparqlItemSelector, } from './selector.js';
|
|
3
3
|
export { injectValues } from './values.js';
|
|
4
4
|
export { withDefaultGraph } from './graph.js';
|
|
5
|
+
export { AdaptiveTimeoutPolicy, ConstantTimeoutPolicy, adaptiveTimeoutPolicy, constantTimeoutPolicy, } from './timeoutPolicy.js';
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
import type { Distribution } from '@lde/dataset';
|
|
2
2
|
import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
3
|
-
import type { ItemSelector } from '../stage.js';
|
|
3
|
+
import type { ItemSelector, SelectOptions } from '../stage.js';
|
|
4
4
|
import type { VariableBindings } from './executor.js';
|
|
5
5
|
export interface SparqlItemSelectorOptions {
|
|
6
6
|
/**
|
|
@@ -46,8 +46,14 @@ export declare class SparqlItemSelector implements ItemSelector {
|
|
|
46
46
|
private readonly parsed;
|
|
47
47
|
private readonly queryLimit?;
|
|
48
48
|
private readonly maxResults?;
|
|
49
|
-
private readonly
|
|
49
|
+
private readonly userFetcher?;
|
|
50
50
|
constructor(options: SparqlItemSelectorOptions);
|
|
51
|
-
select(distribution: Distribution, batchSize?: number): AsyncIterableIterator<VariableBindings>;
|
|
51
|
+
select(distribution: Distribution, batchSize?: number, options?: SelectOptions): AsyncIterableIterator<VariableBindings>;
|
|
52
|
+
/**
|
|
53
|
+
* Run a single SPARQL request against the endpoint, threading the
|
|
54
|
+
* per-call timeout from {@link TimeoutPolicy.beforeRequest} and
|
|
55
|
+
* reporting the outcome to {@link TimeoutPolicy.afterRequest}.
|
|
56
|
+
*/
|
|
57
|
+
private fetchBindingsWithPolicy;
|
|
52
58
|
}
|
|
53
59
|
//# sourceMappingURL=selector.d.ts.map
|
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;
|
|
1
|
+
{"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAoBtD,MAAM,WAAW,yBAAyB;IACxC;;;;;;;;OAQG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAwB;gBAEzC,OAAO,EAAE,yBAAyB;IAmBvC,MAAM,CACX,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,qBAAqB,CAAC,gBAAgB,CAAC;IA4D1C;;;;OAIG;YACW,uBAAuB;CA8BtC"}
|
package/dist/sparql/selector.js
CHANGED
|
@@ -2,6 +2,14 @@ import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
|
|
|
2
2
|
import { Parser } from '@traqula/parser-sparql-1-1';
|
|
3
3
|
import { Generator } from '@traqula/generator-sparql-1-1';
|
|
4
4
|
import { AstFactory, } from '@traqula/rules-sparql-1-1';
|
|
5
|
+
import { ConstantTimeoutPolicy, } from './timeoutPolicy.js';
|
|
6
|
+
const transientStatusPattern = /HTTP status (\d+)/;
|
|
7
|
+
/**
|
|
8
|
+
* Fallback policy when no per-call `TimeoutPolicy` is supplied via
|
|
9
|
+
* {@link SelectOptions.timeout}. Pipeline always supplies one, so this only
|
|
10
|
+
* matters when the selector is driven directly (without a Pipeline).
|
|
11
|
+
*/
|
|
12
|
+
const defaultTimeoutPolicy = new ConstantTimeoutPolicy(300_000);
|
|
5
13
|
const parser = new Parser();
|
|
6
14
|
const generator = new Generator();
|
|
7
15
|
const F = new AstFactory();
|
|
@@ -27,7 +35,7 @@ export class SparqlItemSelector {
|
|
|
27
35
|
parsed;
|
|
28
36
|
queryLimit;
|
|
29
37
|
maxResults;
|
|
30
|
-
|
|
38
|
+
userFetcher;
|
|
31
39
|
constructor(options) {
|
|
32
40
|
const parsed = parser.parse(options.query);
|
|
33
41
|
if (parsed.type !== 'query' || parsed.subType !== 'select') {
|
|
@@ -40,13 +48,14 @@ export class SparqlItemSelector {
|
|
|
40
48
|
this.parsed = parsed;
|
|
41
49
|
this.queryLimit = this.parsed.solutionModifiers.limitOffset?.limit;
|
|
42
50
|
this.maxResults = options.maxResults;
|
|
43
|
-
this.
|
|
51
|
+
this.userFetcher = options.fetcher;
|
|
44
52
|
}
|
|
45
|
-
async *select(distribution, batchSize) {
|
|
53
|
+
async *select(distribution, batchSize, options) {
|
|
46
54
|
if (this.maxResults === 0)
|
|
47
55
|
return;
|
|
48
56
|
const basePageSize = this.queryLimit ?? batchSize ?? 10;
|
|
49
57
|
const endpoint = distribution.accessUrl;
|
|
58
|
+
const policy = options?.timeout ?? defaultTimeoutPolicy;
|
|
50
59
|
let offset = 0;
|
|
51
60
|
let totalYielded = 0;
|
|
52
61
|
while (true) {
|
|
@@ -59,7 +68,7 @@ export class SparqlItemSelector {
|
|
|
59
68
|
const effectivePageSize = offset === 0 ? basePageSize : Math.min(basePageSize, remaining);
|
|
60
69
|
this.parsed.solutionModifiers.limitOffset = F.solutionModifierLimitOffset(effectivePageSize, offset, F.gen());
|
|
61
70
|
const paginatedQuery = generator.generate(this.parsed);
|
|
62
|
-
const stream =
|
|
71
|
+
const stream = await this.fetchBindingsWithPolicy(endpoint, paginatedQuery, policy);
|
|
63
72
|
let count = 0;
|
|
64
73
|
for await (const record of stream) {
|
|
65
74
|
const row = Object.fromEntries(Object.entries(record).filter(([, term]) => term.termType === 'NamedNode'));
|
|
@@ -79,6 +88,52 @@ export class SparqlItemSelector {
|
|
|
79
88
|
offset += count;
|
|
80
89
|
}
|
|
81
90
|
}
|
|
91
|
+
/**
|
|
92
|
+
* Run a single SPARQL request against the endpoint, threading the
|
|
93
|
+
* per-call timeout from {@link TimeoutPolicy.beforeRequest} and
|
|
94
|
+
* reporting the outcome to {@link TimeoutPolicy.afterRequest}.
|
|
95
|
+
*/
|
|
96
|
+
async fetchBindingsWithPolicy(endpoint, paginatedQuery, policy) {
|
|
97
|
+
const timeoutMs = policy.beforeRequest({ endpoint });
|
|
98
|
+
const fetcher = this.userFetcher ?? new SparqlEndpointFetcher({ timeout: timeoutMs });
|
|
99
|
+
const start = Date.now();
|
|
100
|
+
try {
|
|
101
|
+
const stream = (await fetcher.fetchBindings(endpoint.toString(), paginatedQuery));
|
|
102
|
+
policy.afterRequest({
|
|
103
|
+
endpoint,
|
|
104
|
+
outcome: 'ok',
|
|
105
|
+
durationMs: Date.now() - start,
|
|
106
|
+
});
|
|
107
|
+
return stream;
|
|
108
|
+
}
|
|
109
|
+
catch (error) {
|
|
110
|
+
policy.afterRequest({
|
|
111
|
+
endpoint,
|
|
112
|
+
outcome: classifyOutcome(error),
|
|
113
|
+
durationMs: Date.now() - start,
|
|
114
|
+
error,
|
|
115
|
+
});
|
|
116
|
+
throw error;
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
}
|
|
120
|
+
function classifyOutcome(error) {
|
|
121
|
+
if (error instanceof Error) {
|
|
122
|
+
if (error.name === 'AbortError' || error.name === 'TimeoutError') {
|
|
123
|
+
return 'timeout';
|
|
124
|
+
}
|
|
125
|
+
if (error.cause instanceof Error) {
|
|
126
|
+
if (error.cause.name === 'AbortError' ||
|
|
127
|
+
error.cause.name === 'TimeoutError') {
|
|
128
|
+
return 'timeout';
|
|
129
|
+
}
|
|
130
|
+
}
|
|
131
|
+
const match = error.message.match(transientStatusPattern);
|
|
132
|
+
if (match && Number(match[1]) === 504) {
|
|
133
|
+
return 'timeout';
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
return 'error';
|
|
82
137
|
}
|
|
83
138
|
function isVariableTerm(v) {
|
|
84
139
|
return ('type' in v &&
|
|
@@ -0,0 +1,152 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Outcome of a single SPARQL request attempt, as reported back to a
|
|
3
|
+
* {@link TimeoutPolicy} so it can adapt the budget for subsequent requests.
|
|
4
|
+
*
|
|
5
|
+
* - `ok` — the request resolved successfully (the HTTP response was accepted
|
|
6
|
+
* and the body started streaming).
|
|
7
|
+
* - `timeout` — the per-call {@link AbortSignal} fired, or the endpoint
|
|
8
|
+
* returned an HTTP 504 (upstream-reported timeout). Both are semantically
|
|
9
|
+
* ‘the endpoint did not deliver in time’.
|
|
10
|
+
* - `error` — any other failure (4xx other than 504, parser errors, etc.).
|
|
11
|
+
* Neutral with respect to adaptive tightening.
|
|
12
|
+
*/
|
|
13
|
+
export type TimeoutOutcome = 'ok' | 'error' | 'timeout';
|
|
14
|
+
/** Context passed to {@link TimeoutPolicy.beforeRequest}. */
|
|
15
|
+
export interface BeforeRequestContext {
|
|
16
|
+
/** Endpoint URL the upcoming request will be sent to. */
|
|
17
|
+
endpoint: URL;
|
|
18
|
+
}
|
|
19
|
+
/** Context passed to {@link TimeoutPolicy.afterRequest}. */
|
|
20
|
+
export interface AfterRequestContext {
|
|
21
|
+
/** Endpoint URL the request was sent to. */
|
|
22
|
+
endpoint: URL;
|
|
23
|
+
/** Classified outcome of the request. */
|
|
24
|
+
outcome: TimeoutOutcome;
|
|
25
|
+
/** Wall-clock duration of the request attempt, in milliseconds. */
|
|
26
|
+
durationMs: number;
|
|
27
|
+
/** The raw error, when {@link outcome} is `'error'` or `'timeout'`. */
|
|
28
|
+
error?: unknown;
|
|
29
|
+
}
|
|
30
|
+
/**
|
|
31
|
+
* Decides the timeout budget for each SPARQL request and observes the
|
|
32
|
+
* outcome. Implementations are free to adapt the budget based on recent
|
|
33
|
+
* behaviour — see {@link AdaptiveTimeoutPolicy} for the built-in adaptive
|
|
34
|
+
* implementation, and {@link ConstantTimeoutPolicy} for fixed-budget
|
|
35
|
+
* behaviour.
|
|
36
|
+
*
|
|
37
|
+
* Hooks are synchronous because they sit on the request hot path; async
|
|
38
|
+
* work is not supported.
|
|
39
|
+
*/
|
|
40
|
+
export interface TimeoutPolicy {
|
|
41
|
+
/**
|
|
42
|
+
* Returns the timeout (in milliseconds) to apply to the upcoming request.
|
|
43
|
+
* Called once per attempt — including retried attempts inside
|
|
44
|
+
* {@link p-retry}, so a retry can already use a tightened budget.
|
|
45
|
+
*/
|
|
46
|
+
beforeRequest(context: BeforeRequestContext): number;
|
|
47
|
+
/**
|
|
48
|
+
* Reports the outcome of the request that {@link beforeRequest} budgeted.
|
|
49
|
+
* Called once per attempt, regardless of outcome.
|
|
50
|
+
*/
|
|
51
|
+
afterRequest(context: AfterRequestContext): void;
|
|
52
|
+
/**
|
|
53
|
+
* Optional observer subscription for state transitions. Returns an
|
|
54
|
+
* `unsubscribe` function. Policies that don’t transition (e.g. constant)
|
|
55
|
+
* may omit this hook.
|
|
56
|
+
*/
|
|
57
|
+
subscribe?(observer: TimeoutPolicyObserver): () => void;
|
|
58
|
+
}
|
|
59
|
+
/** A single tighten/relax transition for one endpoint. */
|
|
60
|
+
export interface TimeoutTransitionEvent {
|
|
61
|
+
/** Endpoint whose timeout budget changed. */
|
|
62
|
+
endpoint: URL;
|
|
63
|
+
/** Budget in effect before the transition. */
|
|
64
|
+
fromTimeoutMs: number;
|
|
65
|
+
/** Budget in effect after the transition. */
|
|
66
|
+
toTimeoutMs: number;
|
|
67
|
+
/**
|
|
68
|
+
* Number of consecutive timeouts observed at the moment of the
|
|
69
|
+
* transition. For a `relax` event, this is the run that ended in the
|
|
70
|
+
* `ok` that triggered relaxation.
|
|
71
|
+
*/
|
|
72
|
+
consecutiveTimeouts: number;
|
|
73
|
+
}
|
|
74
|
+
/**
|
|
75
|
+
* Observer that receives notifications when a policy tightens or relaxes
|
|
76
|
+
* its budget for an endpoint. Both hooks are optional.
|
|
77
|
+
*/
|
|
78
|
+
export interface TimeoutPolicyObserver {
|
|
79
|
+
/** Called when the policy flips an endpoint to the tightened budget. */
|
|
80
|
+
onTighten?(event: TimeoutTransitionEvent): void;
|
|
81
|
+
/** Called when the policy relaxes an endpoint back to the default budget. */
|
|
82
|
+
onRelax?(event: TimeoutTransitionEvent): void;
|
|
83
|
+
}
|
|
84
|
+
/**
|
|
85
|
+
* Returns the same timeout for every request. Use this as the
|
|
86
|
+
* backwards-compatible default for callers that don’t want adaptive
|
|
87
|
+
* behaviour.
|
|
88
|
+
*/
|
|
89
|
+
export declare class ConstantTimeoutPolicy implements TimeoutPolicy {
|
|
90
|
+
private readonly timeoutMs;
|
|
91
|
+
constructor(timeoutMs: number);
|
|
92
|
+
beforeRequest(_context: BeforeRequestContext): number;
|
|
93
|
+
afterRequest(_context: AfterRequestContext): void;
|
|
94
|
+
}
|
|
95
|
+
/** Options for {@link AdaptiveTimeoutPolicy}. */
|
|
96
|
+
export interface AdaptiveTimeoutPolicyOptions {
|
|
97
|
+
/** Budget applied while the endpoint is healthy. Must be positive. */
|
|
98
|
+
defaultMs: number;
|
|
99
|
+
/**
|
|
100
|
+
* Budget applied after {@link tightenAfterTimeouts} consecutive timeouts.
|
|
101
|
+
* Must satisfy `tightenedMs < defaultMs`.
|
|
102
|
+
*/
|
|
103
|
+
tightenedMs: number;
|
|
104
|
+
/**
|
|
105
|
+
* Number of consecutive timeouts that flips an endpoint to the
|
|
106
|
+
* {@link tightenedMs} budget. Must be an integer ≥ 1.
|
|
107
|
+
*/
|
|
108
|
+
tightenAfterTimeouts: number;
|
|
109
|
+
}
|
|
110
|
+
/**
|
|
111
|
+
* Adaptive per-endpoint policy: after {@link AdaptiveTimeoutPolicyOptions.tightenAfterTimeouts}
|
|
112
|
+
* consecutive timeouts on the same endpoint, subsequent requests use the
|
|
113
|
+
* tightened budget so the pipeline fast-fails instead of waiting out the
|
|
114
|
+
* full default budget. A single successful request relaxes the endpoint
|
|
115
|
+
* back to the default budget.
|
|
116
|
+
*
|
|
117
|
+
* State is in-memory and tied to the policy instance — Pipeline creates a
|
|
118
|
+
* fresh instance per dataset so one offending dataset doesn’t poison the
|
|
119
|
+
* next.
|
|
120
|
+
*
|
|
121
|
+
* @example
|
|
122
|
+
* ```ts
|
|
123
|
+
* const factory = adaptiveTimeoutPolicy({
|
|
124
|
+
* defaultMs: 300_000,
|
|
125
|
+
* tightenedMs: 10_000,
|
|
126
|
+
* tightenAfterTimeouts: 2,
|
|
127
|
+
* });
|
|
128
|
+
* ```
|
|
129
|
+
*/
|
|
130
|
+
export declare class AdaptiveTimeoutPolicy implements TimeoutPolicy {
|
|
131
|
+
private readonly options;
|
|
132
|
+
private readonly states;
|
|
133
|
+
private readonly observers;
|
|
134
|
+
constructor(options: AdaptiveTimeoutPolicyOptions);
|
|
135
|
+
beforeRequest(context: BeforeRequestContext): number;
|
|
136
|
+
afterRequest(context: AfterRequestContext): void;
|
|
137
|
+
subscribe(observer: TimeoutPolicyObserver): () => void;
|
|
138
|
+
private stateFor;
|
|
139
|
+
private notify;
|
|
140
|
+
}
|
|
141
|
+
/**
|
|
142
|
+
* Factory returning a fresh {@link ConstantTimeoutPolicy} on every call.
|
|
143
|
+
* Pass this to {@link PipelineOptions.timeout}.
|
|
144
|
+
*/
|
|
145
|
+
export declare function constantTimeoutPolicy(timeoutMs: number): () => ConstantTimeoutPolicy;
|
|
146
|
+
/**
|
|
147
|
+
* Factory returning a fresh {@link AdaptiveTimeoutPolicy} on every call.
|
|
148
|
+
* Pass this to {@link PipelineOptions.timeout}; the Pipeline invokes
|
|
149
|
+
* the factory once per dataset so state resets between datasets.
|
|
150
|
+
*/
|
|
151
|
+
export declare function adaptiveTimeoutPolicy(options: AdaptiveTimeoutPolicyOptions): () => AdaptiveTimeoutPolicy;
|
|
152
|
+
//# sourceMappingURL=timeoutPolicy.d.ts.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"file":"timeoutPolicy.d.ts","sourceRoot":"","sources":["../../src/sparql/timeoutPolicy.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,MAAM,MAAM,cAAc,GAAG,IAAI,GAAG,OAAO,GAAG,SAAS,CAAC;AAExD,6DAA6D;AAC7D,MAAM,WAAW,oBAAoB;IACnC,yDAAyD;IACzD,QAAQ,EAAE,GAAG,CAAC;CACf;AAED,4DAA4D;AAC5D,MAAM,WAAW,mBAAmB;IAClC,4CAA4C;IAC5C,QAAQ,EAAE,GAAG,CAAC;IACd,yCAAyC;IACzC,OAAO,EAAE,cAAc,CAAC;IACxB,mEAAmE;IACnE,UAAU,EAAE,MAAM,CAAC;IACnB,uEAAuE;IACvE,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,aAAa;IAC5B;;;;OAIG;IACH,aAAa,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAAC;IACrD;;;OAGG;IACH,YAAY,CAAC,OAAO,EAAE,mBAAmB,GAAG,IAAI,CAAC;IACjD;;;;OAIG;IACH,SAAS,CAAC,CAAC,QAAQ,EAAE,qBAAqB,GAAG,MAAM,IAAI,CAAC;CACzD;AAED,0DAA0D;AAC1D,MAAM,WAAW,sBAAsB;IACrC,6CAA6C;IAC7C,QAAQ,EAAE,GAAG,CAAC;IACd,8CAA8C;IAC9C,aAAa,EAAE,MAAM,CAAC;IACtB,6CAA6C;IAC7C,WAAW,EAAE,MAAM,CAAC;IACpB;;;;OAIG;IACH,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,wEAAwE;IACxE,SAAS,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;IAChD,6EAA6E;IAC7E,OAAO,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;CAC/C;AAED;;;;GAIG;AACH,qBAAa,qBAAsB,YAAW,aAAa;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IAQ9C,aAAa,CAAC,QAAQ,EAAE,oBAAoB,GAAG,MAAM;IAIrD,YAAY,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI;CAGlD;AAED,iDAAiD;AACjD,MAAM,WAAW,4BAA4B;IAC3C,sEAAsE;IACtE,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;;OAGG;IACH,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAQD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,qBAAsB,YAAW,aAAa;IAI7C,OAAO,CAAC,QAAQ,CAAC,OAAO;IAHpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAoC;IAC3D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoC;gBAEjC,OAAO,EAAE,4BAA4B;IA0BlE,aAAa,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM;IAKpD,YAAY,CAAC,OAAO,EAAE,mBAAmB,GAAG,IAAI;IAmChD,SAAS,CAAC,QAAQ,EAAE,qBAAqB,GAAG,MAAM,IAAI;IAOtD,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,MAAM;CAUf;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CACnC,SAAS,EAAE,MAAM,GAChB,MAAM,qBAAqB,CAM7B;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,4BAA4B,GACpC,MAAM,qBAAqB,CAK7B"}
|
|
@@ -0,0 +1,138 @@
|
|
|
1
|
+
/**
|
|
2
|
+
* Returns the same timeout for every request. Use this as the
|
|
3
|
+
* backwards-compatible default for callers that don’t want adaptive
|
|
4
|
+
* behaviour.
|
|
5
|
+
*/
|
|
6
|
+
export class ConstantTimeoutPolicy {
|
|
7
|
+
timeoutMs;
|
|
8
|
+
constructor(timeoutMs) {
|
|
9
|
+
this.timeoutMs = timeoutMs;
|
|
10
|
+
if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
|
|
11
|
+
throw new Error(`ConstantTimeoutPolicy: timeoutMs must be a positive finite number, received ${timeoutMs}`);
|
|
12
|
+
}
|
|
13
|
+
}
|
|
14
|
+
beforeRequest(_context) {
|
|
15
|
+
return this.timeoutMs;
|
|
16
|
+
}
|
|
17
|
+
afterRequest(_context) {
|
|
18
|
+
// Constant policy is stateless — outcomes never affect future budgets.
|
|
19
|
+
}
|
|
20
|
+
}
|
|
21
|
+
/**
|
|
22
|
+
* Adaptive per-endpoint policy: after {@link AdaptiveTimeoutPolicyOptions.tightenAfterTimeouts}
|
|
23
|
+
* consecutive timeouts on the same endpoint, subsequent requests use the
|
|
24
|
+
* tightened budget so the pipeline fast-fails instead of waiting out the
|
|
25
|
+
* full default budget. A single successful request relaxes the endpoint
|
|
26
|
+
* back to the default budget.
|
|
27
|
+
*
|
|
28
|
+
* State is in-memory and tied to the policy instance — Pipeline creates a
|
|
29
|
+
* fresh instance per dataset so one offending dataset doesn’t poison the
|
|
30
|
+
* next.
|
|
31
|
+
*
|
|
32
|
+
* @example
|
|
33
|
+
* ```ts
|
|
34
|
+
* const factory = adaptiveTimeoutPolicy({
|
|
35
|
+
* defaultMs: 300_000,
|
|
36
|
+
* tightenedMs: 10_000,
|
|
37
|
+
* tightenAfterTimeouts: 2,
|
|
38
|
+
* });
|
|
39
|
+
* ```
|
|
40
|
+
*/
|
|
41
|
+
export class AdaptiveTimeoutPolicy {
|
|
42
|
+
options;
|
|
43
|
+
states = new Map();
|
|
44
|
+
observers = new Set();
|
|
45
|
+
constructor(options) {
|
|
46
|
+
this.options = options;
|
|
47
|
+
if (!Number.isFinite(options.defaultMs) || options.defaultMs <= 0) {
|
|
48
|
+
throw new Error(`AdaptiveTimeoutPolicy: \`defaultMs\` must be a positive finite number, received ${options.defaultMs}`);
|
|
49
|
+
}
|
|
50
|
+
if (!Number.isFinite(options.tightenedMs) || options.tightenedMs <= 0) {
|
|
51
|
+
throw new Error(`AdaptiveTimeoutPolicy: \`tightenedMs\` must be a positive finite number, received ${options.tightenedMs}`);
|
|
52
|
+
}
|
|
53
|
+
if (!(options.tightenedMs < options.defaultMs)) {
|
|
54
|
+
throw new Error(`AdaptiveTimeoutPolicy: \`tightenedMs\` (${options.tightenedMs}) must be less than \`defaultMs\` (${options.defaultMs})`);
|
|
55
|
+
}
|
|
56
|
+
if (!Number.isInteger(options.tightenAfterTimeouts) ||
|
|
57
|
+
options.tightenAfterTimeouts < 1) {
|
|
58
|
+
throw new Error(`AdaptiveTimeoutPolicy: \`tightenAfterTimeouts\` must be an integer ≥ 1, received ${options.tightenAfterTimeouts}`);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
beforeRequest(context) {
|
|
62
|
+
const state = this.stateFor(context.endpoint);
|
|
63
|
+
return state.tightened ? this.options.tightenedMs : this.options.defaultMs;
|
|
64
|
+
}
|
|
65
|
+
afterRequest(context) {
|
|
66
|
+
const state = this.stateFor(context.endpoint);
|
|
67
|
+
if (context.outcome === 'ok') {
|
|
68
|
+
const wasTightened = state.tightened;
|
|
69
|
+
const priorCount = state.consecutiveTimeouts;
|
|
70
|
+
state.consecutiveTimeouts = 0;
|
|
71
|
+
state.tightened = false;
|
|
72
|
+
if (wasTightened) {
|
|
73
|
+
this.notify('relax', {
|
|
74
|
+
endpoint: context.endpoint,
|
|
75
|
+
fromTimeoutMs: this.options.tightenedMs,
|
|
76
|
+
toTimeoutMs: this.options.defaultMs,
|
|
77
|
+
consecutiveTimeouts: priorCount,
|
|
78
|
+
});
|
|
79
|
+
}
|
|
80
|
+
return;
|
|
81
|
+
}
|
|
82
|
+
if (context.outcome === 'timeout') {
|
|
83
|
+
state.consecutiveTimeouts += 1;
|
|
84
|
+
if (!state.tightened &&
|
|
85
|
+
state.consecutiveTimeouts >= this.options.tightenAfterTimeouts) {
|
|
86
|
+
state.tightened = true;
|
|
87
|
+
this.notify('tighten', {
|
|
88
|
+
endpoint: context.endpoint,
|
|
89
|
+
fromTimeoutMs: this.options.defaultMs,
|
|
90
|
+
toTimeoutMs: this.options.tightenedMs,
|
|
91
|
+
consecutiveTimeouts: state.consecutiveTimeouts,
|
|
92
|
+
});
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
// 'error' is neutral.
|
|
96
|
+
}
|
|
97
|
+
subscribe(observer) {
|
|
98
|
+
this.observers.add(observer);
|
|
99
|
+
return () => {
|
|
100
|
+
this.observers.delete(observer);
|
|
101
|
+
};
|
|
102
|
+
}
|
|
103
|
+
stateFor(endpoint) {
|
|
104
|
+
const key = endpoint.toString();
|
|
105
|
+
let state = this.states.get(key);
|
|
106
|
+
if (!state) {
|
|
107
|
+
state = { tightened: false, consecutiveTimeouts: 0 };
|
|
108
|
+
this.states.set(key, state);
|
|
109
|
+
}
|
|
110
|
+
return state;
|
|
111
|
+
}
|
|
112
|
+
notify(kind, event) {
|
|
113
|
+
for (const observer of this.observers) {
|
|
114
|
+
const handler = kind === 'tighten' ? observer.onTighten : observer.onRelax;
|
|
115
|
+
handler?.(event);
|
|
116
|
+
}
|
|
117
|
+
}
|
|
118
|
+
}
|
|
119
|
+
/**
|
|
120
|
+
* Factory returning a fresh {@link ConstantTimeoutPolicy} on every call.
|
|
121
|
+
* Pass this to {@link PipelineOptions.timeout}.
|
|
122
|
+
*/
|
|
123
|
+
export function constantTimeoutPolicy(timeoutMs) {
|
|
124
|
+
// Validate eagerly so misconfiguration is caught at factory creation,
|
|
125
|
+
// not deferred until the first dataset boundary.
|
|
126
|
+
new ConstantTimeoutPolicy(timeoutMs);
|
|
127
|
+
return () => new ConstantTimeoutPolicy(timeoutMs);
|
|
128
|
+
}
|
|
129
|
+
/**
|
|
130
|
+
* Factory returning a fresh {@link AdaptiveTimeoutPolicy} on every call.
|
|
131
|
+
* Pass this to {@link PipelineOptions.timeout}; the Pipeline invokes
|
|
132
|
+
* the factory once per dataset so state resets between datasets.
|
|
133
|
+
*/
|
|
134
|
+
export function adaptiveTimeoutPolicy(options) {
|
|
135
|
+
// Validate eagerly (see {@link constantTimeoutPolicy}).
|
|
136
|
+
new AdaptiveTimeoutPolicy(options);
|
|
137
|
+
return () => new AdaptiveTimeoutPolicy(options);
|
|
138
|
+
}
|
package/dist/stage.d.ts
CHANGED
|
@@ -2,6 +2,7 @@ import { Dataset, Distribution } from '@lde/dataset';
|
|
|
2
2
|
import type { Quad } from '@rdfjs/types';
|
|
3
3
|
import type { Executor, VariableBindings } from './sparql/executor.js';
|
|
4
4
|
import { NotSupported } from './sparql/executor.js';
|
|
5
|
+
import type { TimeoutPolicy } from './sparql/timeoutPolicy.js';
|
|
5
6
|
import type { Validator } from './validator.js';
|
|
6
7
|
import type { Writer } from './writer/writer.js';
|
|
7
8
|
/** Transforms a quad stream, optionally using dataset metadata. */
|
|
@@ -40,6 +41,18 @@ export interface StageOptions {
|
|
|
40
41
|
}
|
|
41
42
|
export interface RunOptions {
|
|
42
43
|
onProgress?: (itemsProcessed: number, quadsGenerated: number) => void;
|
|
44
|
+
/**
|
|
45
|
+
* Per-dataset {@link TimeoutPolicy} threaded through to executors and
|
|
46
|
+
* item selectors. The Pipeline owns lifecycle (factory invocation per
|
|
47
|
+
* dataset), so a single policy instance covers all stages and child
|
|
48
|
+
* stages within one dataset.
|
|
49
|
+
*/
|
|
50
|
+
timeout?: TimeoutPolicy;
|
|
51
|
+
}
|
|
52
|
+
/** Options accepted by {@link ItemSelector.select}. */
|
|
53
|
+
export interface SelectOptions {
|
|
54
|
+
/** Per-call timeout policy. */
|
|
55
|
+
timeout?: TimeoutPolicy;
|
|
43
56
|
}
|
|
44
57
|
export declare class Stage {
|
|
45
58
|
readonly name: string;
|
|
@@ -63,6 +76,6 @@ export declare class Stage {
|
|
|
63
76
|
}
|
|
64
77
|
/** Selects items (as variable bindings) for executors to process. Pagination is an implementation detail. */
|
|
65
78
|
export interface ItemSelector {
|
|
66
|
-
select(distribution: Distribution, batchSize?: number): AsyncIterable<VariableBindings>;
|
|
79
|
+
select(distribution: Distribution, batchSize?: number, options?: SelectOptions): AsyncIterable<VariableBindings>;
|
|
67
80
|
}
|
|
68
81
|
//# sourceMappingURL=stage.d.ts.map
|
package/dist/stage.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;
|
|
1
|
+
{"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAqDjB,eAAe;IAwJ7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAwBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
|
package/dist/stage.js
CHANGED
|
@@ -25,10 +25,13 @@ export class Stage {
|
|
|
25
25
|
return this.validation?.validator;
|
|
26
26
|
}
|
|
27
27
|
async run(dataset, distribution, writer, options) {
|
|
28
|
+
const timeout = options?.timeout;
|
|
28
29
|
if (this.itemSelector) {
|
|
29
|
-
return this.runWithSelector(this.itemSelector.select(distribution, this.batchSize
|
|
30
|
+
return this.runWithSelector(this.itemSelector.select(distribution, this.batchSize, {
|
|
31
|
+
timeout,
|
|
32
|
+
}), dataset, distribution, writer, options);
|
|
30
33
|
}
|
|
31
|
-
const streams = await this.executeAll(dataset, distribution);
|
|
34
|
+
const streams = await this.executeAll(dataset, distribution, timeout);
|
|
32
35
|
if (streams instanceof NotSupported) {
|
|
33
36
|
return streams;
|
|
34
37
|
}
|
|
@@ -116,7 +119,10 @@ export class Stage {
|
|
|
116
119
|
track((async () => {
|
|
117
120
|
// Run all executors for this batch in parallel.
|
|
118
121
|
const executorOutputs = await Promise.all(this.executors.map(async (executor) => {
|
|
119
|
-
const result = await executor.execute(dataset, distribution, {
|
|
122
|
+
const result = await executor.execute(dataset, distribution, {
|
|
123
|
+
bindings,
|
|
124
|
+
timeout: options?.timeout,
|
|
125
|
+
});
|
|
120
126
|
if (result instanceof NotSupported)
|
|
121
127
|
return [];
|
|
122
128
|
hasResults = true;
|
|
@@ -197,8 +203,8 @@ export class Stage {
|
|
|
197
203
|
// 'skip': discard
|
|
198
204
|
return [];
|
|
199
205
|
}
|
|
200
|
-
async executeAll(dataset, distribution) {
|
|
201
|
-
const results = await Promise.all(this.executors.map((executor) => executor.execute(dataset, distribution)));
|
|
206
|
+
async executeAll(dataset, distribution, timeout) {
|
|
207
|
+
const results = await Promise.all(this.executors.map((executor) => executor.execute(dataset, distribution, { timeout })));
|
|
202
208
|
const streams = [];
|
|
203
209
|
for (const result of results) {
|
|
204
210
|
if (!(result instanceof NotSupported)) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/pipeline",
|
|
3
|
-
"version": "0.30.
|
|
3
|
+
"version": "0.30.4",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/pipeline"
|
|
@@ -26,7 +26,7 @@
|
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.4",
|
|
28
28
|
"@lde/dataset-registry-client": "0.8.0",
|
|
29
|
-
"@lde/distribution-probe": "0.1.
|
|
29
|
+
"@lde/distribution-probe": "0.1.5",
|
|
30
30
|
"@lde/sparql-importer": "0.6.2",
|
|
31
31
|
"@lde/sparql-server": "0.4.11",
|
|
32
32
|
"@rdfjs/types": "^2.0.1",
|