@lde/pipeline 0.30.1 → 0.30.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -208,6 +208,44 @@ new Stage({
208
208
 
209
209
  This keeps SPARQL doing the heavy lifting while TypeScript handles the edge cases. See [@lde/pipeline-void](../pipeline-void)'s `VocabularyExecutor` for a real-world example of this pattern.
210
210
 
211
+ #### Adaptive timeouts
212
+
213
+ By default, every SPARQL request uses the same 5-minute budget. When a pipeline runs against many third-party endpoints, that fixed budget can cost ~80 minutes on a single dataset whose endpoint times out repeatedly on heavy queries — light stages on the same endpoint then sit behind the heavy ones that will never succeed.
214
+
215
+ A `TimeoutPolicy` decides the budget for each SPARQL request and observes the outcome. Two are built in:
216
+
217
+ - **`ConstantTimeoutPolicy(timeoutMs)`** – returns the same budget for every request. The implicit default when `PipelineOptions.timeout` is omitted (`constantTimeoutPolicy(300_000)`).
218
+ - **`AdaptiveTimeoutPolicy({ defaultMs, tightenedMs, tightenAfterTimeouts })`** – per-endpoint state machine. Each endpoint is either _healthy_ (use `defaultMs`) or _tightened_ (use `tightenedMs`). After `tightenAfterTimeouts` consecutive `timeout` outcomes the endpoint flips to _tightened_; a single `ok` flips it back to _healthy_.
219
+
220
+ `PipelineOptions.timeout` accepts a `() => TimeoutPolicy` factory. The pipeline invokes it once per dataset, so policy state resets between datasets and one bad dataset can’t poison the next:
221
+
222
+ ```typescript
223
+ import { adaptiveTimeoutPolicy } from '@lde/pipeline';
224
+
225
+ new Pipeline({
226
+ // …
227
+ timeout: adaptiveTimeoutPolicy({
228
+ defaultMs: 300_000, // 5 min while the endpoint is healthy
229
+ tightenedMs: 10_000, // 10 s once the endpoint is tightened
230
+ tightenAfterTimeouts: 2, // flip to tightened after 2 consecutive timeouts
231
+ }),
232
+ });
233
+ ```
234
+
235
+ Outcomes are classified as:
236
+
237
+ | outcome | source |
238
+ | --------- | ------------------------------------------------------------------------ |
239
+ | `ok` | the request resolved |
240
+ | `timeout` | client-side `AbortSignal.timeout()` fired, or upstream returned HTTP 504 |
241
+ | `error` | anything else (other HTTP errors, parse errors, …) – neutral |
242
+
243
+ Transitions are forwarded to the `ProgressReporter` via `timeoutTightened` / `timeoutRelaxed`; `ConsoleReporter` prints them as `↘ Tightened` / `↗ Relaxed` lines so operators can tell a fast-failed stage from an unexpected speedup.
244
+
245
+ Implement `TimeoutPolicy` directly for custom strategies (closing over shared state in the factory if you want it to span datasets).
246
+
247
+ Timeouts live at the pipeline level — neither `SparqlConstructExecutor` nor `SparqlItemSelector` accept their own `timeout` option. Per-endpoint state belongs in the adaptive policy, and per-stage budgets aren’t supported. Reusable stage facades (`@lde/pipeline-void`, `@lde/pipeline-shacl-sampler`) follow the same convention.
248
+
211
249
  ### Validation
212
250
 
213
251
  Stages can optionally validate their output quads against a `Validator`. Validation operates on the **combined output of all executors per batch**, not on individual quads or per-executor output. A batch produces a complete result set — a self-contained cluster of linked resources — that can be meaningfully matched against SHACL shapes. Even with a single executor, each batch is a complete unit; with multiple executors, shapes that reference triples from different executors are validated correctly.
@@ -5,6 +5,7 @@ import type { Writer } from './writer/writer.js';
5
5
  import { type DistributionResolver } from './distribution/resolver.js';
6
6
  import type { StageOutputResolver } from './stageOutputResolver.js';
7
7
  import type { ProgressReporter } from './progressReporter.js';
8
+ import { type TimeoutPolicy } from './sparql/timeoutPolicy.js';
8
9
  /** Plugin that hooks into pipeline lifecycle events. */
9
10
  export interface PipelinePlugin {
10
11
  name: string;
@@ -23,6 +24,17 @@ export interface PipelineOptions {
23
24
  outputDir: string;
24
25
  };
25
26
  reporter?: ProgressReporter;
27
+ /**
28
+ * Factory producing a fresh {@link TimeoutPolicy} per dataset. Defaults
29
+ * to {@link constantTimeoutPolicy}`(300_000)` so existing call sites
30
+ * keep today’s 5-minute fixed budget.
31
+ *
32
+ * Use {@link adaptiveTimeoutPolicy} to fast-fail stages on endpoints
33
+ * that have shown a run of consecutive timeouts. State is per
34
+ * {@link TimeoutPolicy} instance, and the Pipeline invokes the factory
35
+ * once per dataset so state resets between datasets.
36
+ */
37
+ timeout?: () => TimeoutPolicy;
26
38
  }
27
39
  export declare class Pipeline {
28
40
  private readonly name;
@@ -32,6 +44,7 @@ export declare class Pipeline {
32
44
  private readonly distributionResolver;
33
45
  private readonly chaining?;
34
46
  private readonly reporter?;
47
+ private readonly timeoutFactory;
35
48
  constructor(options: PipelineOptions);
36
49
  run(): Promise<void>;
37
50
  private processDataset;
@@ -1 +1 @@
1
- {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAG/B,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;CAC7B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;gBAEjC,OAAO,EAAE,eAAe;IAgC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAmEd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IAwCtB,2EAA2E;YAC7D,eAAe;YAcf,QAAQ;YAmDP,SAAS;CAczB"}
1
+ {"version":3,"file":"pipeline.d.ts","sourceRoot":"","sources":["../src/pipeline.ts"],"names":[],"mappings":"AAIA,OAAO,KAAK,EAAE,eAAe,EAAE,MAAM,eAAe,CAAC;AACrD,OAAO,EAAE,KAAK,EAAE,MAAM,YAAY,CAAC;AACnC,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,YAAY,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAEjD,OAAO,EACL,KAAK,oBAAoB,EAE1B,MAAM,4BAA4B,CAAC;AAQpC,OAAO,KAAK,EAAE,mBAAmB,EAAE,MAAM,0BAA0B,CAAC;AACpE,OAAO,KAAK,EAEV,gBAAgB,EACjB,MAAM,uBAAuB,CAAC;AAE/B,OAAO,EAEL,KAAK,aAAa,EACnB,MAAM,2BAA2B,CAAC;AAEnC,wDAAwD;AACxD,MAAM,WAAW,cAAc;IAC7B,IAAI,EAAE,MAAM,CAAC;IACb,gDAAgD;IAChD,gBAAgB,CAAC,EAAE,aAAa,CAAC;CAClC;AAED,MAAM,WAAW,eAAe;IAC9B,eAAe,EAAE,eAAe,CAAC;IACjC,MAAM,EAAE,KAAK,EAAE,CAAC;IAChB,OAAO,EAAE,MAAM,GAAG,MAAM,EAAE,CAAC;IAC3B,OAAO,CAAC,EAAE,cAAc,EAAE,CAAC;IAC3B,IAAI,CAAC,EAAE,MAAM,CAAC;IACd,oBAAoB,CAAC,EAAE,oBAAoB,CAAC;IAC5C,QAAQ,CAAC,EAAE;QACT,mBAAmB,EAAE,mBAAmB,CAAC;QACzC,SAAS,EAAE,MAAM,CAAC;KACnB,CAAC;IACF,QAAQ,CAAC,EAAE,gBAAgB,CAAC;IAC5B;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,aAAa,CAAC;CAC/B;AAgFD,qBAAa,QAAQ;IACnB,OAAO,CAAC,QAAQ,CAAC,IAAI,CAAS;IAC9B,OAAO,CAAC,QAAQ,CAAC,eAAe,CAAkB;IAClD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAU;IACjC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAS;IAChC,OAAO,CAAC,QAAQ,CAAC,oBAAoB,CAAuB;IAC5D,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAA8B;IACxD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAC,CAAmB;IAC7C,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAsB;gBAEzC,OAAO,EAAE,eAAe;IAkC9B,GAAG,IAAI,OAAO,CAAC,IAAI,CAAC;YAoBZ,cAAc;YAgFd,gBAAgB;IAW9B,OAAO,CAAE,aAAa;IAOtB;;;OAGG;YACW,QAAQ;IA0CtB,2EAA2E;YAC7D,eAAe;YAqBf,QAAQ;YA2DP,SAAS;CAczB"}
package/dist/pipeline.js CHANGED
@@ -5,6 +5,7 @@ import { NoDistributionAvailable, } from './distribution/resolver.js';
5
5
  import { SparqlDistributionResolver } from './distribution/index.js';
6
6
  import { NetworkError, SparqlProbeResult, } from '@lde/distribution-probe';
7
7
  import { NotSupported } from './sparql/executor.js';
8
+ import { ConstantTimeoutPolicy, } from './sparql/timeoutPolicy.js';
8
9
  /**
9
10
  * Split an async iterable into `count` branches that can be consumed
10
11
  * independently. Backpressure is enforced by the slowest consumer –
@@ -79,6 +80,7 @@ export class Pipeline {
79
80
  distributionResolver;
80
81
  chaining;
81
82
  reporter;
83
+ timeoutFactory;
82
84
  constructor(options) {
83
85
  const hasSubStages = options.stages.some((stage) => stage.stages.length > 0);
84
86
  if (hasSubStages && !options.chaining) {
@@ -102,6 +104,8 @@ export class Pipeline {
102
104
  options.distributionResolver ?? new SparqlDistributionResolver();
103
105
  this.chaining = options.chaining;
104
106
  this.reporter = options.reporter;
107
+ this.timeoutFactory =
108
+ options.timeout ?? (() => new ConstantTimeoutPolicy(300_000));
105
109
  }
106
110
  async run() {
107
111
  const start = Date.now();
@@ -121,6 +125,11 @@ export class Pipeline {
121
125
  }
122
126
  async processDataset(dataset) {
123
127
  this.reporter?.datasetStart?.(dataset);
128
+ const timeout = this.timeoutFactory();
129
+ const unsubscribe = timeout.subscribe?.({
130
+ onTighten: (event) => this.reporter?.timeoutTightened?.(event),
131
+ onRelax: (event) => this.reporter?.timeoutRelaxed?.(event),
132
+ });
124
133
  let resolved;
125
134
  try {
126
135
  resolved = await this.distributionResolver.resolve(dataset, {
@@ -148,10 +157,10 @@ export class Pipeline {
148
157
  for (const stage of this.stages) {
149
158
  try {
150
159
  if (stage.stages.length > 0) {
151
- await this.runChain(dataset, resolved.distribution, stage);
160
+ await this.runChain(dataset, resolved.distribution, stage, timeout);
152
161
  }
153
162
  else {
154
- await this.runStage(dataset, resolved.distribution, stage);
163
+ await this.runStage(dataset, resolved.distribution, stage, this.writer, timeout);
155
164
  }
156
165
  }
157
166
  catch (error) {
@@ -161,6 +170,7 @@ export class Pipeline {
161
170
  }
162
171
  finally {
163
172
  await this.distributionResolver.cleanup?.();
173
+ unsubscribe?.();
164
174
  }
165
175
  await this.writer.flush?.(dataset);
166
176
  await this.reportValidators(dataset);
@@ -192,7 +202,7 @@ export class Pipeline {
192
202
  * Run a stage with reporting and return whether it was supported.
193
203
  * Returns `true` if the stage produced results, `false` if NotSupported.
194
204
  */
195
- async runStage(dataset, distribution, stage, writer = this.writer) {
205
+ async runStage(dataset, distribution, stage, writer = this.writer, timeout) {
196
206
  this.reporter?.stageStart?.(stage.name);
197
207
  const stageStart = Date.now();
198
208
  let itemsProcessed = 0;
@@ -209,6 +219,7 @@ export class Pipeline {
209
219
  heapUsedBytes: stageMemory.heapUsed,
210
220
  });
211
221
  },
222
+ timeout,
212
223
  });
213
224
  if (result instanceof NotSupported) {
214
225
  this.reporter?.stageSkipped?.(stage.name, result.message);
@@ -222,13 +233,13 @@ export class Pipeline {
222
233
  return true;
223
234
  }
224
235
  /** Run a stage in chained mode, throwing if the stage is not supported. */
225
- async runChainedStage(dataset, distribution, stage, writer) {
226
- const supported = await this.runStage(dataset, distribution, stage, writer);
236
+ async runChainedStage(dataset, distribution, stage, writer, timeout) {
237
+ const supported = await this.runStage(dataset, distribution, stage, writer, timeout);
227
238
  if (!supported) {
228
239
  throw new Error(`Stage '${stage.name}' returned NotSupported in chained mode`);
229
240
  }
230
241
  }
231
- async runChain(dataset, distribution, stage) {
242
+ async runChain(dataset, distribution, stage, timeout) {
232
243
  const { stageOutputResolver, outputDir } = this.chaining;
233
244
  const outputFiles = [];
234
245
  try {
@@ -237,7 +248,7 @@ export class Pipeline {
237
248
  outputDir: `${outputDir}/${stage.name}`,
238
249
  format: 'n-triples',
239
250
  });
240
- await this.runChainedStage(dataset, distribution, stage, parentWriter);
251
+ await this.runChainedStage(dataset, distribution, stage, parentWriter, timeout);
241
252
  outputFiles.push(parentWriter.getOutputPath(dataset));
242
253
  // 2. Chain through children.
243
254
  let currentDistribution = await stageOutputResolver.resolve(parentWriter.getOutputPath(dataset));
@@ -247,7 +258,7 @@ export class Pipeline {
247
258
  outputDir: `${outputDir}/${child.name}`,
248
259
  format: 'n-triples',
249
260
  });
250
- await this.runChainedStage(dataset, currentDistribution, child, childWriter);
261
+ await this.runChainedStage(dataset, currentDistribution, child, childWriter, timeout);
251
262
  outputFiles.push(childWriter.getOutputPath(dataset));
252
263
  if (i < stage.stages.length - 1) {
253
264
  currentDistribution = await stageOutputResolver.resolve(childWriter.getOutputPath(dataset));
@@ -1,5 +1,6 @@
1
1
  import type { Dataset, Distribution } from '@lde/dataset';
2
2
  import type { ValidationReport } from './validator.js';
3
+ import type { TimeoutTransitionEvent } from './sparql/timeoutPolicy.js';
3
4
  export interface DistributionAnalysisResult {
4
5
  distribution: Distribution;
5
6
  type: 'sparql' | 'data-dump' | 'network-error';
@@ -53,5 +54,16 @@ export interface ProgressReporter {
53
54
  memoryUsageBytes: number;
54
55
  heapUsedBytes: number;
55
56
  }): void;
57
+ /**
58
+ * Called when a {@link TimeoutPolicy} tightens the budget for an
59
+ * endpoint after a run of consecutive timeouts. Lets operators
60
+ * distinguish a fast-failed stage from an unexpected speedup.
61
+ */
62
+ timeoutTightened?(event: TimeoutTransitionEvent): void;
63
+ /**
64
+ * Called when a {@link TimeoutPolicy} relaxes the budget back to the
65
+ * default after a successful request on a previously-tightened endpoint.
66
+ */
67
+ timeoutRelaxed?(event: TimeoutTransitionEvent): void;
56
68
  }
57
69
  //# sourceMappingURL=progressReporter.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AAEvD,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,8DAA8D;IAC9D,kBAAkB,CAAC,CAAC,MAAM,EAAE,0BAA0B,GAAG,IAAI,CAAC;IAC9D,6CAA6C;IAC7C,aAAa,CAAC,IAAI,IAAI,CAAC;IACvB,kDAAkD;IAClD,YAAY,CAAC,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD;;;;;;;;OAQG;IACH,gBAAgB,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACpE,eAAe,CAAC,CACd,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,GAC1D,IAAI,CAAC;IACR,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;CACV"}
1
+ {"version":3,"file":"progressReporter.d.ts","sourceRoot":"","sources":["../src/progressReporter.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAC1D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,gBAAgB,CAAC;AACvD,OAAO,KAAK,EAAE,sBAAsB,EAAE,MAAM,2BAA2B,CAAC;AAExE,MAAM,WAAW,0BAA0B;IACzC,YAAY,EAAE,YAAY,CAAC;IAC3B,IAAI,EAAE,QAAQ,GAAG,WAAW,GAAG,eAAe,CAAC;IAC/C,SAAS,EAAE,OAAO,CAAC;IACnB,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,KAAK,CAAC,EAAE,MAAM,CAAC;IACf,QAAQ,EAAE,MAAM,EAAE,CAAC;CACpB;AAED,MAAM,WAAW,gBAAgB;IAC/B,aAAa,CAAC,CAAC,IAAI,EAAE,MAAM,GAAG,IAAI,CAAC;IACnC,gBAAgB,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,GAAG,IAAI,CAAC;IACzD,YAAY,CAAC,CAAC,OAAO,EAAE,OAAO,GAAG,IAAI,CAAC;IACtC,8DAA8D;IAC9D,kBAAkB,CAAC,CAAC,MAAM,EAAE,0BAA0B,GAAG,IAAI,CAAC;IAC9D,6CAA6C;IAC7C,aAAa,CAAC,IAAI,IAAI,CAAC;IACvB,kDAAkD;IAClD,YAAY,CAAC,CAAC,YAAY,EAAE,YAAY,EAAE,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IAC/D,oBAAoB,CAAC,CACnB,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,YAAY,CAAC,EAAE,YAAY,EAC3B,cAAc,CAAC,EAAE,MAAM,EACvB,WAAW,CAAC,EAAE,MAAM,GACnB,IAAI,CAAC;IACR,UAAU,CAAC,CAAC,KAAK,EAAE,MAAM,GAAG,IAAI,CAAC;IACjC,aAAa,CAAC,CAAC,MAAM,EAAE;QACrB,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT,aAAa,CAAC,CACZ,KAAK,EAAE,MAAM,EACb,MAAM,EAAE;QACN,cAAc,EAAE,MAAM,CAAC;QACvB,cAAc,EAAE,MAAM,CAAC;QACvB,QAAQ,EAAE,MAAM,CAAC;KAClB,GACA,IAAI,CAAC;IACR,WAAW,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,KAAK,EAAE,KAAK,GAAG,IAAI,CAAC;IAChD,YAAY,CAAC,CAAC,KAAK,EAAE,MAAM,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACnD;;;;;;;;OAQG;IACH,gBAAgB,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,gBAAgB,GAAG,IAAI,CAAC;IACpE,eAAe,CAAC,CACd,OAAO,EAAE,OAAO,EAChB,MAAM,EAAE;QAAE,gBAAgB,EAAE,MAAM,CAAC;QAAC,aAAa,EAAE,MAAM,CAAA;KAAE,GAC1D,IAAI,CAAC;IACR,cAAc,CAAC,CAAC,OAAO,EAAE,OAAO,EAAE,MAAM,EAAE,MAAM,GAAG,IAAI,CAAC;IACxD,gBAAgB,CAAC,CAAC,MAAM,EAAE;QACxB,QAAQ,EAAE,MAAM,CAAC;QACjB,gBAAgB,EAAE,MAAM,CAAC;QACzB,aAAa,EAAE,MAAM,CAAC;KACvB,GAAG,IAAI,CAAC;IACT;;;;OAIG;IACH,gBAAgB,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;IACvD;;;OAGG;IACH,cAAc,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;CACtD"}
@@ -2,6 +2,7 @@ import { Dataset, Distribution } from '@lde/dataset';
2
2
  import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
3
3
  import type { NamedNode, Quad } from '@rdfjs/types';
4
4
  import { Transform } from 'node:stream';
5
+ import { type TimeoutPolicy } from './timeoutPolicy.js';
5
6
  /**
6
7
  * An executor could not run because the dataset lacks a supported distribution.
7
8
  */
@@ -17,6 +18,16 @@ export interface ExecuteOptions {
17
18
  * When non-empty, a VALUES block is prepended to the WHERE clause.
18
19
  */
19
20
  bindings?: VariableBindings[];
21
+ /**
22
+ * Per-call {@link TimeoutPolicy}. When supplied, the executor calls
23
+ * {@link TimeoutPolicy.beforeRequest} once per attempt (including
24
+ * retries), installs an {@link AbortSignal} with the returned budget,
25
+ * and reports the outcome via {@link TimeoutPolicy.afterRequest}.
26
+ *
27
+ * Overrides the executor-level policy passed at construction time.
28
+ * Pipeline runners use this to thread the per-dataset policy through.
29
+ */
30
+ timeout?: TimeoutPolicy;
20
31
  }
21
32
  export interface Executor {
22
33
  execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad> | NotSupported>;
@@ -29,11 +40,6 @@ export interface SparqlConstructExecutorOptions {
29
40
  * SPARQL CONSTRUCT query to execute.
30
41
  */
31
42
  query: string;
32
- /**
33
- * Optional timeout for SPARQL queries in milliseconds.
34
- * @default 300000 (5 minutes)
35
- */
36
- timeout?: number;
37
43
  /**
38
44
  * Number of retries for transient errors (network failures and HTTP 502/503/504).
39
45
  * @default 3
@@ -41,6 +47,20 @@ export interface SparqlConstructExecutorOptions {
41
47
  retries?: number;
42
48
  /**
43
49
  * Optional custom SparqlEndpointFetcher instance.
50
+ *
51
+ * When supplied, the executor uses this fetcher as-is for every attempt
52
+ * — the per-attempt timeout from the {@link TimeoutPolicy} is **not**
53
+ * enforced (the supplied fetcher’s own `timeout` governs). Policy
54
+ * `beforeRequest`/`afterRequest` hooks still fire so outcome
55
+ * classification works, but adaptive tightening cannot apply.
56
+ *
57
+ * When omitted, the executor builds a fresh
58
+ * {@link SparqlEndpointFetcher} per attempt with the per-attempt timeout
59
+ * baked in.
60
+ *
61
+ * This option is intended for tests (mocking `fetchTriples`) and
62
+ * advanced cases that need full control of the fetcher. Most callers
63
+ * should leave it unset.
44
64
  */
45
65
  fetcher?: SparqlEndpointFetcher;
46
66
  /**
@@ -101,7 +121,7 @@ export interface SparqlConstructExecutorOptions {
101
121
  export declare class SparqlConstructExecutor implements Executor {
102
122
  private readonly rawQuery;
103
123
  private readonly preParsed?;
104
- private readonly fetcher;
124
+ private readonly userFetcher?;
105
125
  private readonly retries;
106
126
  private readonly lineBuffer;
107
127
  private readonly deduplicate;
@@ -116,6 +136,22 @@ export declare class SparqlConstructExecutor implements Executor {
116
136
  * @returns AsyncIterable<Quad> stream of results.
117
137
  */
118
138
  execute(dataset: Dataset, distribution: Distribution, options?: ExecuteOptions): Promise<AsyncIterable<Quad>>;
139
+ /**
140
+ * Run a single attempt against the endpoint with a per-call abort
141
+ * signal derived from {@link TimeoutPolicy.beforeRequest}. Reports the
142
+ * outcome via {@link TimeoutPolicy.afterRequest} regardless of whether
143
+ * the attempt resolved or threw.
144
+ */
145
+ private fetchQuadsWithPolicy;
146
+ /**
147
+ * Pick the fetcher to use for a single attempt. A user-supplied fetcher
148
+ * is used as-is and its own timeout governs the request; the per-attempt
149
+ * policy budget is bypassed in that case (see the JSDoc on
150
+ * {@link SparqlConstructExecutorOptions.fetcher}). Otherwise a fresh
151
+ * {@link SparqlEndpointFetcher} is constructed per attempt with the
152
+ * policy-supplied timeout baked in.
153
+ */
154
+ private fetcherForAttempt;
119
155
  /**
120
156
  * Fetch quads from the endpoint, optionally line-buffering the response
121
157
  * stream before it reaches the N3 parser to work around
@@ -1 +1 @@
1
- {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAiB,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAWxC;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;CAC/B;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;OAEG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;IAEhC;;;;;;;;;OASG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IAErB;;;;;;;;;;;;;;OAcG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;IAChD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAU;IACrC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IACtC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAqBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IA0C/B;;;;OAIG;YACW,UAAU;IAmBxB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,mBAAoB,SAAQ,SAAS;IAChD,OAAO,CAAC,SAAS,CAAM;IAEd,UAAU,CACjB,KAAK,EAAE,MAAM,EACb,SAAS,EAAE,MAAM,EACjB,QAAQ,EAAE,MAAM,IAAI;IAWb,MAAM,CAAC,QAAQ,EAAE,MAAM,IAAI;CAMrC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAuB,gBAAgB,CACrC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GACzB,aAAa,CAAC,IAAI,CAAC,CASrB"}
1
+ {"version":3,"file":"executor.d.ts","sourceRoot":"","sources":["../../src/sparql/executor.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAiB,MAAM,cAAc,CAAC;AACpE,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAC9D,OAAO,KAAK,EAAE,SAAS,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AAGpD,OAAO,EAAE,SAAS,EAAE,MAAM,aAAa,CAAC;AAUxC,OAAO,EAGL,KAAK,aAAa,EACnB,MAAM,oBAAoB,CAAC;AAS5B;;GAEG;AACH,qBAAa,YAAY;aACK,OAAO,EAAE,MAAM;gBAAf,OAAO,EAAE,MAAM;CAC5C;AAED,qEAAqE;AACrE,MAAM,MAAM,gBAAgB,GAAG,MAAM,CAAC,MAAM,EAAE,SAAS,CAAC,CAAC;AAEzD,MAAM,WAAW,cAAc;IAC7B;;;OAGG;IACH,QAAQ,CAAC,EAAE,gBAAgB,EAAE,CAAC;IAE9B;;;;;;;;OAQG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,MAAM,WAAW,QAAQ;IACvB,OAAO,CACL,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,GAAG,YAAY,CAAC,CAAC;CAChD;AAED;;GAEG;AACH,MAAM,WAAW,8BAA8B;IAC7C;;OAEG;IACH,KAAK,EAAE,MAAM,CAAC;IAEd;;;OAGG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IAEjB;;;;;;;;;;;;;;;;OAgBG;IACH,OAAO,CAAC,EAAE,qBAAqB,CAAC;IAEhC;;;;;;;;;OASG;IACH,UAAU,CAAC,EAAE,OAAO,CAAC;IAErB;;;;;;;;;;;;;;OAcG;IACH,WAAW,CAAC,EAAE,OAAO,CAAC;CACvB;AAED;;;;;;;;;;;;;;;;;;;;;;;;;;GA0BG;AACH,qBAAa,uBAAwB,YAAW,QAAQ;IACtD,OAAO,CAAC,QAAQ,CAAC,QAAQ,CAAS;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAC,CAAiB;IAC5C,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAwB;IACrD,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAS;IACjC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAU;IACrC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAU;IACtC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAmB;gBAEjC,OAAO,EAAE,8BAA8B;IAiBnD;;;;;;;OAOG;IACG,OAAO,CACX,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,cAAc,GACvB,OAAO,CAAC,aAAa,CAAC,IAAI,CAAC,CAAC;IA4C/B;;;;;OAKG;YACW,oBAAoB;IA+BlC;;;;;;;OAOG;IACH,OAAO,CAAC,iBAAiB;IAKzB;;;;OAIG;YACW,UAAU;IAmBxB;;;;;OAKG;WACiB,QAAQ,CAC1B,QAAQ,EAAE,MAAM,EAChB,OAAO,CAAC,EAAE,IAAI,CAAC,8BAA8B,EAAE,OAAO,CAAC,GACtD,OAAO,CAAC,uBAAuB,CAAC;CAIpC;AAED;;GAEG;AACH,wBAAsB,aAAa,CAAC,QAAQ,EAAE,MAAM,GAAG,OAAO,CAAC,MAAM,CAAC,CAErE;AAED;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,mBAAoB,SAAQ,SAAS;IAChD,OAAO,CAAC,SAAS,CAAM;IAEd,UAAU,CAAC,KAAK,EAAE,MAAM,EAAE,SAAS,EAAE,MAAM,EAAE,QAAQ,EAAE,MAAM,IAAI;IAUjE,MAAM,CAAC,QAAQ,EAAE,MAAM,IAAI;CAMrC;AAED;;;;;;;;;;;;;;;;GAgBG;AACH,wBAAuB,gBAAgB,CACrC,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,GACzB,aAAa,CAAC,IAAI,CAAC,CASrB"}
@@ -11,6 +11,13 @@ import pRetry from 'p-retry';
11
11
  import { quadToStringQuad } from 'rdf-string';
12
12
  import { withDefaultGraph } from './graph.js';
13
13
  import { injectValues } from './values.js';
14
+ import { ConstantTimeoutPolicy, } from './timeoutPolicy.js';
15
+ /**
16
+ * Fallback policy when no per-call `TimeoutPolicy` is supplied via
17
+ * {@link ExecuteOptions.timeout}. Pipeline always supplies one, so this only
18
+ * matters when the executor is driven directly (without a Pipeline).
19
+ */
20
+ const defaultTimeoutPolicy = new ConstantTimeoutPolicy(300_000);
14
21
  /**
15
22
  * An executor could not run because the dataset lacks a supported distribution.
16
23
  */
@@ -50,7 +57,7 @@ export class NotSupported {
50
57
  export class SparqlConstructExecutor {
51
58
  rawQuery;
52
59
  preParsed;
53
- fetcher;
60
+ userFetcher;
54
61
  retries;
55
62
  lineBuffer;
56
63
  deduplicate;
@@ -67,11 +74,7 @@ export class SparqlConstructExecutor {
67
74
  }
68
75
  this.preParsed = parsed;
69
76
  }
70
- this.fetcher =
71
- options.fetcher ??
72
- new SparqlEndpointFetcher({
73
- timeout: options.timeout ?? 300_000,
74
- });
77
+ this.userFetcher = options.fetcher;
75
78
  }
76
79
  /**
77
80
  * Execute the SPARQL CONSTRUCT query against the distribution's endpoint.
@@ -105,22 +108,65 @@ export class SparqlConstructExecutor {
105
108
  let query = this.generator.generate(ast);
106
109
  assertSafeIri(dataset.iri.toString());
107
110
  query = query.replaceAll('?dataset', `<${dataset.iri}>`);
108
- const quads = await pRetry(() => this.fetchQuads(endpoint.toString(), query), {
111
+ const policy = options?.timeout ?? defaultTimeoutPolicy;
112
+ const quads = await pRetry(() => this.fetchQuadsWithPolicy(endpoint, query, policy), {
109
113
  retries: this.retries,
110
114
  shouldRetry: ({ error }) => isTransientError(error),
111
115
  });
112
116
  return this.deduplicate ? deduplicateQuads(quads) : quads;
113
117
  }
118
+ /**
119
+ * Run a single attempt against the endpoint with a per-call abort
120
+ * signal derived from {@link TimeoutPolicy.beforeRequest}. Reports the
121
+ * outcome via {@link TimeoutPolicy.afterRequest} regardless of whether
122
+ * the attempt resolved or threw.
123
+ */
124
+ async fetchQuadsWithPolicy(endpointUrl, query, policy) {
125
+ const timeoutMs = policy.beforeRequest({ endpoint: endpointUrl });
126
+ const fetcher = this.fetcherForAttempt(timeoutMs);
127
+ const start = Date.now();
128
+ try {
129
+ const quads = await this.fetchQuads(fetcher, endpointUrl.toString(), query);
130
+ policy.afterRequest({
131
+ endpoint: endpointUrl,
132
+ outcome: 'ok',
133
+ durationMs: Date.now() - start,
134
+ });
135
+ return quads;
136
+ }
137
+ catch (error) {
138
+ policy.afterRequest({
139
+ endpoint: endpointUrl,
140
+ outcome: classifyOutcome(error),
141
+ durationMs: Date.now() - start,
142
+ error,
143
+ });
144
+ throw error;
145
+ }
146
+ }
147
+ /**
148
+ * Pick the fetcher to use for a single attempt. A user-supplied fetcher
149
+ * is used as-is and its own timeout governs the request; the per-attempt
150
+ * policy budget is bypassed in that case (see the JSDoc on
151
+ * {@link SparqlConstructExecutorOptions.fetcher}). Otherwise a fresh
152
+ * {@link SparqlEndpointFetcher} is constructed per attempt with the
153
+ * policy-supplied timeout baked in.
154
+ */
155
+ fetcherForAttempt(timeoutMs) {
156
+ if (this.userFetcher)
157
+ return this.userFetcher;
158
+ return new SparqlEndpointFetcher({ timeout: timeoutMs });
159
+ }
114
160
  /**
115
161
  * Fetch quads from the endpoint, optionally line-buffering the response
116
162
  * stream before it reaches the N3 parser to work around
117
163
  * {@link https://github.com/rdfjs/N3.js/issues/578 | N3.js#578}.
118
164
  */
119
- async fetchQuads(endpoint, query) {
165
+ async fetchQuads(fetcher, endpoint, query) {
120
166
  if (!this.lineBuffer) {
121
- return this.fetcher.fetchTriples(endpoint, query);
167
+ return fetcher.fetchTriples(endpoint, query);
122
168
  }
123
- const [contentType, , responseStream] = await this.fetcher.fetchRawStream(endpoint, query, SparqlEndpointFetcher.CONTENTTYPE_TURTLE);
169
+ const [contentType, , responseStream] = await fetcher.fetchRawStream(endpoint, query, SparqlEndpointFetcher.CONTENTTYPE_TURTLE);
124
170
  return responseStream
125
171
  .pipe(new LineBufferTransform())
126
172
  .pipe(new StreamParser({ format: contentType }));
@@ -219,3 +265,29 @@ function isTransientError(error) {
219
265
  const status = Number(match[1]);
220
266
  return status === 502 || status === 503 || status === 504;
221
267
  }
268
+ /**
269
+ * Classify a fetch error for {@link TimeoutPolicy} reporting.
270
+ *
271
+ * - HTTP 504 → `'timeout'`: the upstream reported it ran out of time. This
272
+ * is the exact failure mode adaptive timeouts exist to react to.
273
+ * - `AbortError` / `TimeoutError`: our own `AbortSignal.timeout()` fired.
274
+ * - Anything else → `'error'`: neutral with respect to tightening.
275
+ */
276
+ function classifyOutcome(error) {
277
+ if (error instanceof Error) {
278
+ if (error.name === 'AbortError' || error.name === 'TimeoutError') {
279
+ return 'timeout';
280
+ }
281
+ if (error.cause instanceof Error) {
282
+ if (error.cause.name === 'AbortError' ||
283
+ error.cause.name === 'TimeoutError') {
284
+ return 'timeout';
285
+ }
286
+ }
287
+ const match = error.message.match(transientStatusPattern);
288
+ if (match && Number(match[1]) === 504) {
289
+ return 'timeout';
290
+ }
291
+ }
292
+ return 'error';
293
+ }
@@ -2,4 +2,5 @@ export { deduplicateQuads, SparqlConstructExecutor, LineBufferTransform, NotSupp
2
2
  export { SparqlItemSelector, type SparqlItemSelectorOptions, } from './selector.js';
3
3
  export { injectValues } from './values.js';
4
4
  export { withDefaultGraph } from './graph.js';
5
+ export { AdaptiveTimeoutPolicy, ConstantTimeoutPolicy, adaptiveTimeoutPolicy, constantTimeoutPolicy, type AdaptiveTimeoutPolicyOptions, type AfterRequestContext, type BeforeRequestContext, type TimeoutOutcome, type TimeoutPolicy, type TimeoutPolicyObserver, type TimeoutTransitionEvent, } from './timeoutPolicy.js';
5
6
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,uBAAuB,EACvB,mBAAmB,EACnB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../../src/sparql/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,gBAAgB,EAChB,uBAAuB,EACvB,mBAAmB,EACnB,YAAY,EACZ,aAAa,EACb,KAAK,cAAc,EACnB,KAAK,QAAQ,EACb,KAAK,8BAA8B,EACnC,KAAK,gBAAgB,GACtB,MAAM,eAAe,CAAC;AACvB,OAAO,EACL,kBAAkB,EAClB,KAAK,yBAAyB,GAC/B,MAAM,eAAe,CAAC;AAEvB,OAAO,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAE3C,OAAO,EAAE,gBAAgB,EAAE,MAAM,YAAY,CAAC;AAE9C,OAAO,EACL,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,qBAAqB,EACrB,KAAK,4BAA4B,EACjC,KAAK,mBAAmB,EACxB,KAAK,oBAAoB,EACzB,KAAK,cAAc,EACnB,KAAK,aAAa,EAClB,KAAK,qBAAqB,EAC1B,KAAK,sBAAsB,GAC5B,MAAM,oBAAoB,CAAC"}
@@ -2,3 +2,4 @@ export { deduplicateQuads, SparqlConstructExecutor, LineBufferTransform, NotSupp
2
2
  export { SparqlItemSelector, } from './selector.js';
3
3
  export { injectValues } from './values.js';
4
4
  export { withDefaultGraph } from './graph.js';
5
+ export { AdaptiveTimeoutPolicy, ConstantTimeoutPolicy, adaptiveTimeoutPolicy, constantTimeoutPolicy, } from './timeoutPolicy.js';
@@ -1,6 +1,6 @@
1
1
  import type { Distribution } from '@lde/dataset';
2
2
  import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
3
- import type { ItemSelector } from '../stage.js';
3
+ import type { ItemSelector, SelectOptions } from '../stage.js';
4
4
  import type { VariableBindings } from './executor.js';
5
5
  export interface SparqlItemSelectorOptions {
6
6
  /**
@@ -46,8 +46,14 @@ export declare class SparqlItemSelector implements ItemSelector {
46
46
  private readonly parsed;
47
47
  private readonly queryLimit?;
48
48
  private readonly maxResults?;
49
- private readonly fetcher;
49
+ private readonly userFetcher?;
50
50
  constructor(options: SparqlItemSelectorOptions);
51
- select(distribution: Distribution, batchSize?: number): AsyncIterableIterator<VariableBindings>;
51
+ select(distribution: Distribution, batchSize?: number, options?: SelectOptions): AsyncIterableIterator<VariableBindings>;
52
+ /**
53
+ * Run a single SPARQL request against the endpoint, threading the
54
+ * per-call timeout from {@link TimeoutPolicy.beforeRequest} and
55
+ * reporting the outcome to {@link TimeoutPolicy.afterRequest}.
56
+ */
57
+ private fetchBindingsWithPolicy;
52
58
  }
53
59
  //# sourceMappingURL=selector.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,aAAa,CAAC;AAChD,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAMtD,MAAM,WAAW,yBAAyB;IACxC;;;;;;;;OAQG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,OAAO,CAAwB;gBAEpC,OAAO,EAAE,yBAAyB;IAmBvC,MAAM,CACX,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,qBAAqB,CAAC,gBAAgB,CAAC;CAyD3C"}
1
+ {"version":3,"file":"selector.d.ts","sourceRoot":"","sources":["../../src/sparql/selector.ts"],"names":[],"mappings":"AAAA,OAAO,KAAK,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AAEjD,OAAO,EAAE,qBAAqB,EAAE,MAAM,uBAAuB,CAAC;AAQ9D,OAAO,KAAK,EAAE,YAAY,EAAE,aAAa,EAAE,MAAM,aAAa,CAAC;AAC/D,OAAO,KAAK,EAAE,gBAAgB,EAAE,MAAM,eAAe,CAAC;AAoBtD,MAAM,WAAW,yBAAyB;IACxC;;;;;;;;OAQG;IACH,KAAK,EAAE,MAAM,CAAC;IACd;;;;;;OAMG;IACH,UAAU,CAAC,EAAE,MAAM,CAAC;IACpB,+BAA+B;IAC/B,OAAO,CAAC,EAAE,qBAAqB,CAAC;CACjC;AAED;;;;;;;;;;;;;;;;;GAiBG;AACH,qBAAa,kBAAmB,YAAW,YAAY;IACrD,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAc;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAAS;IACrC,OAAO,CAAC,QAAQ,CAAC,WAAW,CAAC,CAAwB;gBAEzC,OAAO,EAAE,yBAAyB;IAmBvC,MAAM,CACX,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,qBAAqB,CAAC,gBAAgB,CAAC;IA4D1C;;;;OAIG;YACW,uBAAuB;CA8BtC"}
@@ -2,6 +2,14 @@ import { SparqlEndpointFetcher } from 'fetch-sparql-endpoint';
2
2
  import { Parser } from '@traqula/parser-sparql-1-1';
3
3
  import { Generator } from '@traqula/generator-sparql-1-1';
4
4
  import { AstFactory, } from '@traqula/rules-sparql-1-1';
5
+ import { ConstantTimeoutPolicy, } from './timeoutPolicy.js';
6
+ const transientStatusPattern = /HTTP status (\d+)/;
7
+ /**
8
+ * Fallback policy when no per-call `TimeoutPolicy` is supplied via
9
+ * {@link SelectOptions.timeout}. Pipeline always supplies one, so this only
10
+ * matters when the selector is driven directly (without a Pipeline).
11
+ */
12
+ const defaultTimeoutPolicy = new ConstantTimeoutPolicy(300_000);
5
13
  const parser = new Parser();
6
14
  const generator = new Generator();
7
15
  const F = new AstFactory();
@@ -27,7 +35,7 @@ export class SparqlItemSelector {
27
35
  parsed;
28
36
  queryLimit;
29
37
  maxResults;
30
- fetcher;
38
+ userFetcher;
31
39
  constructor(options) {
32
40
  const parsed = parser.parse(options.query);
33
41
  if (parsed.type !== 'query' || parsed.subType !== 'select') {
@@ -40,13 +48,14 @@ export class SparqlItemSelector {
40
48
  this.parsed = parsed;
41
49
  this.queryLimit = this.parsed.solutionModifiers.limitOffset?.limit;
42
50
  this.maxResults = options.maxResults;
43
- this.fetcher = options.fetcher ?? new SparqlEndpointFetcher();
51
+ this.userFetcher = options.fetcher;
44
52
  }
45
- async *select(distribution, batchSize) {
53
+ async *select(distribution, batchSize, options) {
46
54
  if (this.maxResults === 0)
47
55
  return;
48
56
  const basePageSize = this.queryLimit ?? batchSize ?? 10;
49
57
  const endpoint = distribution.accessUrl;
58
+ const policy = options?.timeout ?? defaultTimeoutPolicy;
50
59
  let offset = 0;
51
60
  let totalYielded = 0;
52
61
  while (true) {
@@ -59,7 +68,7 @@ export class SparqlItemSelector {
59
68
  const effectivePageSize = offset === 0 ? basePageSize : Math.min(basePageSize, remaining);
60
69
  this.parsed.solutionModifiers.limitOffset = F.solutionModifierLimitOffset(effectivePageSize, offset, F.gen());
61
70
  const paginatedQuery = generator.generate(this.parsed);
62
- const stream = (await this.fetcher.fetchBindings(endpoint.toString(), paginatedQuery));
71
+ const stream = await this.fetchBindingsWithPolicy(endpoint, paginatedQuery, policy);
63
72
  let count = 0;
64
73
  for await (const record of stream) {
65
74
  const row = Object.fromEntries(Object.entries(record).filter(([, term]) => term.termType === 'NamedNode'));
@@ -79,6 +88,52 @@ export class SparqlItemSelector {
79
88
  offset += count;
80
89
  }
81
90
  }
91
+ /**
92
+ * Run a single SPARQL request against the endpoint, threading the
93
+ * per-call timeout from {@link TimeoutPolicy.beforeRequest} and
94
+ * reporting the outcome to {@link TimeoutPolicy.afterRequest}.
95
+ */
96
+ async fetchBindingsWithPolicy(endpoint, paginatedQuery, policy) {
97
+ const timeoutMs = policy.beforeRequest({ endpoint });
98
+ const fetcher = this.userFetcher ?? new SparqlEndpointFetcher({ timeout: timeoutMs });
99
+ const start = Date.now();
100
+ try {
101
+ const stream = (await fetcher.fetchBindings(endpoint.toString(), paginatedQuery));
102
+ policy.afterRequest({
103
+ endpoint,
104
+ outcome: 'ok',
105
+ durationMs: Date.now() - start,
106
+ });
107
+ return stream;
108
+ }
109
+ catch (error) {
110
+ policy.afterRequest({
111
+ endpoint,
112
+ outcome: classifyOutcome(error),
113
+ durationMs: Date.now() - start,
114
+ error,
115
+ });
116
+ throw error;
117
+ }
118
+ }
119
+ }
120
+ function classifyOutcome(error) {
121
+ if (error instanceof Error) {
122
+ if (error.name === 'AbortError' || error.name === 'TimeoutError') {
123
+ return 'timeout';
124
+ }
125
+ if (error.cause instanceof Error) {
126
+ if (error.cause.name === 'AbortError' ||
127
+ error.cause.name === 'TimeoutError') {
128
+ return 'timeout';
129
+ }
130
+ }
131
+ const match = error.message.match(transientStatusPattern);
132
+ if (match && Number(match[1]) === 504) {
133
+ return 'timeout';
134
+ }
135
+ }
136
+ return 'error';
82
137
  }
83
138
  function isVariableTerm(v) {
84
139
  return ('type' in v &&
@@ -0,0 +1,152 @@
1
+ /**
2
+ * Outcome of a single SPARQL request attempt, as reported back to a
3
+ * {@link TimeoutPolicy} so it can adapt the budget for subsequent requests.
4
+ *
5
+ * - `ok` — the request resolved successfully (the HTTP response was accepted
6
+ * and the body started streaming).
7
+ * - `timeout` — the per-call {@link AbortSignal} fired, or the endpoint
8
+ * returned an HTTP 504 (upstream-reported timeout). Both are semantically
9
+ * ‘the endpoint did not deliver in time’.
10
+ * - `error` — any other failure (4xx other than 504, parser errors, etc.).
11
+ * Neutral with respect to adaptive tightening.
12
+ */
13
+ export type TimeoutOutcome = 'ok' | 'error' | 'timeout';
14
+ /** Context passed to {@link TimeoutPolicy.beforeRequest}. */
15
+ export interface BeforeRequestContext {
16
+ /** Endpoint URL the upcoming request will be sent to. */
17
+ endpoint: URL;
18
+ }
19
+ /** Context passed to {@link TimeoutPolicy.afterRequest}. */
20
+ export interface AfterRequestContext {
21
+ /** Endpoint URL the request was sent to. */
22
+ endpoint: URL;
23
+ /** Classified outcome of the request. */
24
+ outcome: TimeoutOutcome;
25
+ /** Wall-clock duration of the request attempt, in milliseconds. */
26
+ durationMs: number;
27
+ /** The raw error, when {@link outcome} is `'error'` or `'timeout'`. */
28
+ error?: unknown;
29
+ }
30
+ /**
31
+ * Decides the timeout budget for each SPARQL request and observes the
32
+ * outcome. Implementations are free to adapt the budget based on recent
33
+ * behaviour — see {@link AdaptiveTimeoutPolicy} for the built-in adaptive
34
+ * implementation, and {@link ConstantTimeoutPolicy} for fixed-budget
35
+ * behaviour.
36
+ *
37
+ * Hooks are synchronous because they sit on the request hot path; async
38
+ * work is not supported.
39
+ */
40
+ export interface TimeoutPolicy {
41
+ /**
42
+ * Returns the timeout (in milliseconds) to apply to the upcoming request.
43
+ * Called once per attempt — including retried attempts inside
44
+ * {@link p-retry}, so a retry can already use a tightened budget.
45
+ */
46
+ beforeRequest(context: BeforeRequestContext): number;
47
+ /**
48
+ * Reports the outcome of the request that {@link beforeRequest} budgeted.
49
+ * Called once per attempt, regardless of outcome.
50
+ */
51
+ afterRequest(context: AfterRequestContext): void;
52
+ /**
53
+ * Optional observer subscription for state transitions. Returns an
54
+ * `unsubscribe` function. Policies that don’t transition (e.g. constant)
55
+ * may omit this hook.
56
+ */
57
+ subscribe?(observer: TimeoutPolicyObserver): () => void;
58
+ }
59
+ /** A single tighten/relax transition for one endpoint. */
60
+ export interface TimeoutTransitionEvent {
61
+ /** Endpoint whose timeout budget changed. */
62
+ endpoint: URL;
63
+ /** Budget in effect before the transition. */
64
+ fromTimeoutMs: number;
65
+ /** Budget in effect after the transition. */
66
+ toTimeoutMs: number;
67
+ /**
68
+ * Number of consecutive timeouts observed at the moment of the
69
+ * transition. For a `relax` event, this is the run that ended in the
70
+ * `ok` that triggered relaxation.
71
+ */
72
+ consecutiveTimeouts: number;
73
+ }
74
+ /**
75
+ * Observer that receives notifications when a policy tightens or relaxes
76
+ * its budget for an endpoint. Both hooks are optional.
77
+ */
78
+ export interface TimeoutPolicyObserver {
79
+ /** Called when the policy flips an endpoint to the tightened budget. */
80
+ onTighten?(event: TimeoutTransitionEvent): void;
81
+ /** Called when the policy relaxes an endpoint back to the default budget. */
82
+ onRelax?(event: TimeoutTransitionEvent): void;
83
+ }
84
+ /**
85
+ * Returns the same timeout for every request. Use this as the
86
+ * backwards-compatible default for callers that don’t want adaptive
87
+ * behaviour.
88
+ */
89
+ export declare class ConstantTimeoutPolicy implements TimeoutPolicy {
90
+ private readonly timeoutMs;
91
+ constructor(timeoutMs: number);
92
+ beforeRequest(_context: BeforeRequestContext): number;
93
+ afterRequest(_context: AfterRequestContext): void;
94
+ }
95
+ /** Options for {@link AdaptiveTimeoutPolicy}. */
96
+ export interface AdaptiveTimeoutPolicyOptions {
97
+ /** Budget applied while the endpoint is healthy. Must be positive. */
98
+ defaultMs: number;
99
+ /**
100
+ * Budget applied after {@link tightenAfterTimeouts} consecutive timeouts.
101
+ * Must satisfy `tightenedMs < defaultMs`.
102
+ */
103
+ tightenedMs: number;
104
+ /**
105
+ * Number of consecutive timeouts that flips an endpoint to the
106
+ * {@link tightenedMs} budget. Must be an integer ≥ 1.
107
+ */
108
+ tightenAfterTimeouts: number;
109
+ }
110
+ /**
111
+ * Adaptive per-endpoint policy: after {@link AdaptiveTimeoutPolicyOptions.tightenAfterTimeouts}
112
+ * consecutive timeouts on the same endpoint, subsequent requests use the
113
+ * tightened budget so the pipeline fast-fails instead of waiting out the
114
+ * full default budget. A single successful request relaxes the endpoint
115
+ * back to the default budget.
116
+ *
117
+ * State is in-memory and tied to the policy instance — Pipeline creates a
118
+ * fresh instance per dataset so one offending dataset doesn’t poison the
119
+ * next.
120
+ *
121
+ * @example
122
+ * ```ts
123
+ * const factory = adaptiveTimeoutPolicy({
124
+ * defaultMs: 300_000,
125
+ * tightenedMs: 10_000,
126
+ * tightenAfterTimeouts: 2,
127
+ * });
128
+ * ```
129
+ */
130
+ export declare class AdaptiveTimeoutPolicy implements TimeoutPolicy {
131
+ private readonly options;
132
+ private readonly states;
133
+ private readonly observers;
134
+ constructor(options: AdaptiveTimeoutPolicyOptions);
135
+ beforeRequest(context: BeforeRequestContext): number;
136
+ afterRequest(context: AfterRequestContext): void;
137
+ subscribe(observer: TimeoutPolicyObserver): () => void;
138
+ private stateFor;
139
+ private notify;
140
+ }
141
+ /**
142
+ * Factory returning a fresh {@link ConstantTimeoutPolicy} on every call.
143
+ * Pass this to {@link PipelineOptions.timeout}.
144
+ */
145
+ export declare function constantTimeoutPolicy(timeoutMs: number): () => ConstantTimeoutPolicy;
146
+ /**
147
+ * Factory returning a fresh {@link AdaptiveTimeoutPolicy} on every call.
148
+ * Pass this to {@link PipelineOptions.timeout}; the Pipeline invokes
149
+ * the factory once per dataset so state resets between datasets.
150
+ */
151
+ export declare function adaptiveTimeoutPolicy(options: AdaptiveTimeoutPolicyOptions): () => AdaptiveTimeoutPolicy;
152
+ //# sourceMappingURL=timeoutPolicy.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"timeoutPolicy.d.ts","sourceRoot":"","sources":["../../src/sparql/timeoutPolicy.ts"],"names":[],"mappings":"AAAA;;;;;;;;;;;GAWG;AACH,MAAM,MAAM,cAAc,GAAG,IAAI,GAAG,OAAO,GAAG,SAAS,CAAC;AAExD,6DAA6D;AAC7D,MAAM,WAAW,oBAAoB;IACnC,yDAAyD;IACzD,QAAQ,EAAE,GAAG,CAAC;CACf;AAED,4DAA4D;AAC5D,MAAM,WAAW,mBAAmB;IAClC,4CAA4C;IAC5C,QAAQ,EAAE,GAAG,CAAC;IACd,yCAAyC;IACzC,OAAO,EAAE,cAAc,CAAC;IACxB,mEAAmE;IACnE,UAAU,EAAE,MAAM,CAAC;IACnB,uEAAuE;IACvE,KAAK,CAAC,EAAE,OAAO,CAAC;CACjB;AAED;;;;;;;;;GASG;AACH,MAAM,WAAW,aAAa;IAC5B;;;;OAIG;IACH,aAAa,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM,CAAC;IACrD;;;OAGG;IACH,YAAY,CAAC,OAAO,EAAE,mBAAmB,GAAG,IAAI,CAAC;IACjD;;;;OAIG;IACH,SAAS,CAAC,CAAC,QAAQ,EAAE,qBAAqB,GAAG,MAAM,IAAI,CAAC;CACzD;AAED,0DAA0D;AAC1D,MAAM,WAAW,sBAAsB;IACrC,6CAA6C;IAC7C,QAAQ,EAAE,GAAG,CAAC;IACd,8CAA8C;IAC9C,aAAa,EAAE,MAAM,CAAC;IACtB,6CAA6C;IAC7C,WAAW,EAAE,MAAM,CAAC;IACpB;;;;OAIG;IACH,mBAAmB,EAAE,MAAM,CAAC;CAC7B;AAED;;;GAGG;AACH,MAAM,WAAW,qBAAqB;IACpC,wEAAwE;IACxE,SAAS,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;IAChD,6EAA6E;IAC7E,OAAO,CAAC,CAAC,KAAK,EAAE,sBAAsB,GAAG,IAAI,CAAC;CAC/C;AAED;;;;GAIG;AACH,qBAAa,qBAAsB,YAAW,aAAa;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS;gBAAT,SAAS,EAAE,MAAM;IAQ9C,aAAa,CAAC,QAAQ,EAAE,oBAAoB,GAAG,MAAM;IAIrD,YAAY,CAAC,QAAQ,EAAE,mBAAmB,GAAG,IAAI;CAGlD;AAED,iDAAiD;AACjD,MAAM,WAAW,4BAA4B;IAC3C,sEAAsE;IACtE,SAAS,EAAE,MAAM,CAAC;IAClB;;;OAGG;IACH,WAAW,EAAE,MAAM,CAAC;IACpB;;;OAGG;IACH,oBAAoB,EAAE,MAAM,CAAC;CAC9B;AAQD;;;;;;;;;;;;;;;;;;;GAmBG;AACH,qBAAa,qBAAsB,YAAW,aAAa;IAI7C,OAAO,CAAC,QAAQ,CAAC,OAAO;IAHpC,OAAO,CAAC,QAAQ,CAAC,MAAM,CAAoC;IAC3D,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAoC;gBAEjC,OAAO,EAAE,4BAA4B;IA0BlE,aAAa,CAAC,OAAO,EAAE,oBAAoB,GAAG,MAAM;IAKpD,YAAY,CAAC,OAAO,EAAE,mBAAmB,GAAG,IAAI;IAmChD,SAAS,CAAC,QAAQ,EAAE,qBAAqB,GAAG,MAAM,IAAI;IAOtD,OAAO,CAAC,QAAQ;IAUhB,OAAO,CAAC,MAAM;CAUf;AAED;;;GAGG;AACH,wBAAgB,qBAAqB,CACnC,SAAS,EAAE,MAAM,GAChB,MAAM,qBAAqB,CAM7B;AAED;;;;GAIG;AACH,wBAAgB,qBAAqB,CACnC,OAAO,EAAE,4BAA4B,GACpC,MAAM,qBAAqB,CAK7B"}
@@ -0,0 +1,138 @@
1
+ /**
2
+ * Returns the same timeout for every request. Use this as the
3
+ * backwards-compatible default for callers that don’t want adaptive
4
+ * behaviour.
5
+ */
6
+ export class ConstantTimeoutPolicy {
7
+ timeoutMs;
8
+ constructor(timeoutMs) {
9
+ this.timeoutMs = timeoutMs;
10
+ if (!Number.isFinite(timeoutMs) || timeoutMs <= 0) {
11
+ throw new Error(`ConstantTimeoutPolicy: timeoutMs must be a positive finite number, received ${timeoutMs}`);
12
+ }
13
+ }
14
+ beforeRequest(_context) {
15
+ return this.timeoutMs;
16
+ }
17
+ afterRequest(_context) {
18
+ // Constant policy is stateless — outcomes never affect future budgets.
19
+ }
20
+ }
21
+ /**
22
+ * Adaptive per-endpoint policy: after {@link AdaptiveTimeoutPolicyOptions.tightenAfterTimeouts}
23
+ * consecutive timeouts on the same endpoint, subsequent requests use the
24
+ * tightened budget so the pipeline fast-fails instead of waiting out the
25
+ * full default budget. A single successful request relaxes the endpoint
26
+ * back to the default budget.
27
+ *
28
+ * State is in-memory and tied to the policy instance — Pipeline creates a
29
+ * fresh instance per dataset so one offending dataset doesn’t poison the
30
+ * next.
31
+ *
32
+ * @example
33
+ * ```ts
34
+ * const factory = adaptiveTimeoutPolicy({
35
+ * defaultMs: 300_000,
36
+ * tightenedMs: 10_000,
37
+ * tightenAfterTimeouts: 2,
38
+ * });
39
+ * ```
40
+ */
41
+ export class AdaptiveTimeoutPolicy {
42
+ options;
43
+ states = new Map();
44
+ observers = new Set();
45
+ constructor(options) {
46
+ this.options = options;
47
+ if (!Number.isFinite(options.defaultMs) || options.defaultMs <= 0) {
48
+ throw new Error(`AdaptiveTimeoutPolicy: \`defaultMs\` must be a positive finite number, received ${options.defaultMs}`);
49
+ }
50
+ if (!Number.isFinite(options.tightenedMs) || options.tightenedMs <= 0) {
51
+ throw new Error(`AdaptiveTimeoutPolicy: \`tightenedMs\` must be a positive finite number, received ${options.tightenedMs}`);
52
+ }
53
+ if (!(options.tightenedMs < options.defaultMs)) {
54
+ throw new Error(`AdaptiveTimeoutPolicy: \`tightenedMs\` (${options.tightenedMs}) must be less than \`defaultMs\` (${options.defaultMs})`);
55
+ }
56
+ if (!Number.isInteger(options.tightenAfterTimeouts) ||
57
+ options.tightenAfterTimeouts < 1) {
58
+ throw new Error(`AdaptiveTimeoutPolicy: \`tightenAfterTimeouts\` must be an integer ≥ 1, received ${options.tightenAfterTimeouts}`);
59
+ }
60
+ }
61
+ beforeRequest(context) {
62
+ const state = this.stateFor(context.endpoint);
63
+ return state.tightened ? this.options.tightenedMs : this.options.defaultMs;
64
+ }
65
+ afterRequest(context) {
66
+ const state = this.stateFor(context.endpoint);
67
+ if (context.outcome === 'ok') {
68
+ const wasTightened = state.tightened;
69
+ const priorCount = state.consecutiveTimeouts;
70
+ state.consecutiveTimeouts = 0;
71
+ state.tightened = false;
72
+ if (wasTightened) {
73
+ this.notify('relax', {
74
+ endpoint: context.endpoint,
75
+ fromTimeoutMs: this.options.tightenedMs,
76
+ toTimeoutMs: this.options.defaultMs,
77
+ consecutiveTimeouts: priorCount,
78
+ });
79
+ }
80
+ return;
81
+ }
82
+ if (context.outcome === 'timeout') {
83
+ state.consecutiveTimeouts += 1;
84
+ if (!state.tightened &&
85
+ state.consecutiveTimeouts >= this.options.tightenAfterTimeouts) {
86
+ state.tightened = true;
87
+ this.notify('tighten', {
88
+ endpoint: context.endpoint,
89
+ fromTimeoutMs: this.options.defaultMs,
90
+ toTimeoutMs: this.options.tightenedMs,
91
+ consecutiveTimeouts: state.consecutiveTimeouts,
92
+ });
93
+ }
94
+ }
95
+ // 'error' is neutral.
96
+ }
97
+ subscribe(observer) {
98
+ this.observers.add(observer);
99
+ return () => {
100
+ this.observers.delete(observer);
101
+ };
102
+ }
103
+ stateFor(endpoint) {
104
+ const key = endpoint.toString();
105
+ let state = this.states.get(key);
106
+ if (!state) {
107
+ state = { tightened: false, consecutiveTimeouts: 0 };
108
+ this.states.set(key, state);
109
+ }
110
+ return state;
111
+ }
112
+ notify(kind, event) {
113
+ for (const observer of this.observers) {
114
+ const handler = kind === 'tighten' ? observer.onTighten : observer.onRelax;
115
+ handler?.(event);
116
+ }
117
+ }
118
+ }
119
+ /**
120
+ * Factory returning a fresh {@link ConstantTimeoutPolicy} on every call.
121
+ * Pass this to {@link PipelineOptions.timeout}.
122
+ */
123
+ export function constantTimeoutPolicy(timeoutMs) {
124
+ // Validate eagerly so misconfiguration is caught at factory creation,
125
+ // not deferred until the first dataset boundary.
126
+ new ConstantTimeoutPolicy(timeoutMs);
127
+ return () => new ConstantTimeoutPolicy(timeoutMs);
128
+ }
129
+ /**
130
+ * Factory returning a fresh {@link AdaptiveTimeoutPolicy} on every call.
131
+ * Pass this to {@link PipelineOptions.timeout}; the Pipeline invokes
132
+ * the factory once per dataset so state resets between datasets.
133
+ */
134
+ export function adaptiveTimeoutPolicy(options) {
135
+ // Validate eagerly (see {@link constantTimeoutPolicy}).
136
+ new AdaptiveTimeoutPolicy(options);
137
+ return () => new AdaptiveTimeoutPolicy(options);
138
+ }
package/dist/stage.d.ts CHANGED
@@ -2,6 +2,7 @@ import { Dataset, Distribution } from '@lde/dataset';
2
2
  import type { Quad } from '@rdfjs/types';
3
3
  import type { Executor, VariableBindings } from './sparql/executor.js';
4
4
  import { NotSupported } from './sparql/executor.js';
5
+ import type { TimeoutPolicy } from './sparql/timeoutPolicy.js';
5
6
  import type { Validator } from './validator.js';
6
7
  import type { Writer } from './writer/writer.js';
7
8
  /** Transforms a quad stream, optionally using dataset metadata. */
@@ -40,6 +41,18 @@ export interface StageOptions {
40
41
  }
41
42
  export interface RunOptions {
42
43
  onProgress?: (itemsProcessed: number, quadsGenerated: number) => void;
44
+ /**
45
+ * Per-dataset {@link TimeoutPolicy} threaded through to executors and
46
+ * item selectors. The Pipeline owns lifecycle (factory invocation per
47
+ * dataset), so a single policy instance covers all stages and child
48
+ * stages within one dataset.
49
+ */
50
+ timeout?: TimeoutPolicy;
51
+ }
52
+ /** Options accepted by {@link ItemSelector.select}. */
53
+ export interface SelectOptions {
54
+ /** Per-call timeout policy. */
55
+ timeout?: TimeoutPolicy;
43
56
  }
44
57
  export declare class Stage {
45
58
  readonly name: string;
@@ -63,6 +76,6 @@ export declare class Stage {
63
76
  }
64
77
  /** Selects items (as variable bindings) for executors to process. Pagination is an implementation detail. */
65
78
  export interface ItemSelector {
66
- select(distribution: Distribution, batchSize?: number): AsyncIterable<VariableBindings>;
79
+ select(distribution: Distribution, batchSize?: number, options?: SelectOptions): AsyncIterable<VariableBindings>;
67
80
  }
68
81
  //# sourceMappingURL=stage.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AAEpD,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;CACvE;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAkDjB,eAAe;IAyJ7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAqBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,GACjB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
1
+ {"version":3,"file":"stage.d.ts","sourceRoot":"","sources":["../src/stage.ts"],"names":[],"mappings":"AAAA,OAAO,EAAE,OAAO,EAAE,YAAY,EAAE,MAAM,cAAc,CAAC;AACrD,OAAO,KAAK,EAAE,IAAI,EAAE,MAAM,cAAc,CAAC;AACzC,OAAO,KAAK,EAAE,QAAQ,EAAE,gBAAgB,EAAE,MAAM,sBAAsB,CAAC;AACvE,OAAO,EAAE,YAAY,EAAE,MAAM,sBAAsB,CAAC;AACpD,OAAO,KAAK,EAAE,aAAa,EAAE,MAAM,2BAA2B,CAAC;AAE/D,OAAO,KAAK,EAAE,SAAS,EAAE,MAAM,gBAAgB,CAAC;AAChD,OAAO,KAAK,EAAE,MAAM,EAAE,MAAM,oBAAoB,CAAC;AAGjD,mEAAmE;AACnE,MAAM,MAAM,aAAa,GAAG,CAC1B,KAAK,EAAE,aAAa,CAAC,IAAI,CAAC,EAC1B,OAAO,EAAE,OAAO,KACb,aAAa,CAAC,IAAI,CAAC,CAAC;AAEzB,MAAM,WAAW,YAAY;IAC3B,IAAI,EAAE,MAAM,CAAC;IACb,SAAS,EAAE,QAAQ,GAAG,QAAQ,EAAE,CAAC;IACjC,YAAY,CAAC,EAAE,YAAY,CAAC;IAC5B;;;;;;;;OAQG;IACH,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;;;;;OAOG;IACH,cAAc,CAAC,EAAE,MAAM,CAAC;IACxB,uDAAuD;IACvD,MAAM,CAAC,EAAE,KAAK,EAAE,CAAC;IACjB,qFAAqF;IACrF,UAAU,CAAC,EAAE;QACX,SAAS,EAAE,SAAS,CAAC;QACrB,iEAAiE;QACjE,SAAS,CAAC,EAAE,OAAO,GAAG,MAAM,GAAG,MAAM,CAAC;KACvC,CAAC;CACH;AAED,MAAM,WAAW,UAAU;IACzB,UAAU,CAAC,EAAE,CAAC,cAAc,EAAE,MAAM,EAAE,cAAc,EAAE,MAAM,KAAK,IAAI,CAAC;IACtE;;;;;OAKG;IACH,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,uDAAuD;AACvD,MAAM,WAAW,aAAa;IAC5B,+BAA+B;IAC/B,OAAO,CAAC,EAAE,aAAa,CAAC;CACzB;AAED,qBAAa,KAAK;IAChB,QAAQ,CAAC,IAAI,EAAE,MAAM,CAAC;IACtB,QAAQ,CAAC,MAAM,EAAE,SAAS,KAAK,EAAE,CAAC;IAClC,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAa;IACvC,OAAO,CAAC,QAAQ,CAAC,YAAY,CAAC,CAAe;IAC7C,OAAO,CAAC,QAAQ,CAAC,SAAS,CAAS;IACnC,OAAO,CAAC,QAAQ,CAAC,cAAc,CAAS;IACxC,OAAO,CAAC,QAAQ,CAAC,UAAU,CAAC,CAA6B;gBAE7C,OAAO,EAAE,YAAY;IAYjC,mDAAmD;IACnD,IAAI,SAAS,IAAI,SAAS,GAAG,SAAS,CAErC;IAEK,GAAG,CACP,OAAO,EAAE,OAAO,EAChB,YAAY,EAAE,YAAY,EAC1B,MAAM,EAAE,MAAM,EACd,OAAO,CAAC,EAAE,UAAU,GACnB,OAAO,CAAC,YAAY,GAAG,IAAI,CAAC;YAqDjB,eAAe;IAwJ7B;;;OAGG;YACW,cAAc;YAqBd,UAAU;CAwBzB;AAUD,6GAA6G;AAC7G,MAAM,WAAW,YAAY;IAC3B,MAAM,CACJ,YAAY,EAAE,YAAY,EAC1B,SAAS,CAAC,EAAE,MAAM,EAClB,OAAO,CAAC,EAAE,aAAa,GACtB,aAAa,CAAC,gBAAgB,CAAC,CAAC;CACpC"}
package/dist/stage.js CHANGED
@@ -25,10 +25,13 @@ export class Stage {
25
25
  return this.validation?.validator;
26
26
  }
27
27
  async run(dataset, distribution, writer, options) {
28
+ const timeout = options?.timeout;
28
29
  if (this.itemSelector) {
29
- return this.runWithSelector(this.itemSelector.select(distribution, this.batchSize), dataset, distribution, writer, options);
30
+ return this.runWithSelector(this.itemSelector.select(distribution, this.batchSize, {
31
+ timeout,
32
+ }), dataset, distribution, writer, options);
30
33
  }
31
- const streams = await this.executeAll(dataset, distribution);
34
+ const streams = await this.executeAll(dataset, distribution, timeout);
32
35
  if (streams instanceof NotSupported) {
33
36
  return streams;
34
37
  }
@@ -116,7 +119,10 @@ export class Stage {
116
119
  track((async () => {
117
120
  // Run all executors for this batch in parallel.
118
121
  const executorOutputs = await Promise.all(this.executors.map(async (executor) => {
119
- const result = await executor.execute(dataset, distribution, { bindings });
122
+ const result = await executor.execute(dataset, distribution, {
123
+ bindings,
124
+ timeout: options?.timeout,
125
+ });
120
126
  if (result instanceof NotSupported)
121
127
  return [];
122
128
  hasResults = true;
@@ -197,8 +203,8 @@ export class Stage {
197
203
  // 'skip': discard
198
204
  return [];
199
205
  }
200
- async executeAll(dataset, distribution) {
201
- const results = await Promise.all(this.executors.map((executor) => executor.execute(dataset, distribution)));
206
+ async executeAll(dataset, distribution, timeout) {
207
+ const results = await Promise.all(this.executors.map((executor) => executor.execute(dataset, distribution, { timeout })));
202
208
  const streams = [];
203
209
  for (const result of results) {
204
210
  if (!(result instanceof NotSupported)) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/pipeline",
3
- "version": "0.30.1",
3
+ "version": "0.30.3",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/pipeline"
@@ -26,7 +26,7 @@
26
26
  "dependencies": {
27
27
  "@lde/dataset": "0.7.4",
28
28
  "@lde/dataset-registry-client": "0.8.0",
29
- "@lde/distribution-probe": "0.1.3",
29
+ "@lde/distribution-probe": "0.1.4",
30
30
  "@lde/sparql-importer": "0.6.2",
31
31
  "@lde/sparql-server": "0.4.11",
32
32
  "@rdfjs/types": "^2.0.1",