@lde/distribution-probe 0.2.0 → 0.2.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -48,3 +48,24 @@ When on, the probe `GET`s the dump — **regardless of size** — and reads only
48
48
  A thrown exception from `fetch` (DNS failure, connection refused, socket reset, TLS error, timeout after the configured `timeoutMs` – default 5 000 ms) is a connection-level failure. The probe retries these up to `retries` times (default 2) with a short backoff before giving up and returning a `NetworkError`. This turns a transient transport blip into a reliable single measurement without looking backward across checks. A genuine outage still resolves to a `NetworkError` on the current check – every attempt fails – but note each attempt gets its own `timeoutMs`, so an endpoint that fails only by timing out takes up to `(retries + 1) × timeoutMs` (plus backoff) to be reported down. HTTP error responses (4xx/5xx) and content-validation failures are real ‘down’ states and are **never** retried.
49
49
 
50
50
  `NetworkError.message` includes the underlying `error.cause` (e.g. `ECONNRESET`, `UND_ERR_SOCKET “other side closed”`) when Node wraps one, so observations record what actually failed rather than a bare ‘fetch failed’.
51
+
52
+ ## Probing many distributions
53
+
54
+ `probeMany` probes an array of distributions concurrently and returns one result per input, in input order. Each distribution is probed once with `probe`, so every behaviour above applies per distribution; like `probe`, `probeMany` never throws – a probe that fails is reported as a `NetworkError` in its slot.
55
+
56
+ ```ts
57
+ import { probeMany } from '@lde/distribution-probe';
58
+
59
+ const results = await probeMany(distributions, {
60
+ concurrency: 20, // max probes in flight across all hosts (default 20)
61
+ perHostConcurrency: 4, // max probes in flight against one host (default 4)
62
+ validateRdfContent: true, // any ProbeOptions are forwarded to each probe
63
+ });
64
+ ```
65
+
66
+ Two caps bound the batch:
67
+
68
+ - **`concurrency`** bounds the total fan-out, so a large catalogue does not exhaust sockets or buffer too many response bodies at once.
69
+ - **`perHostConcurrency`** bounds the burst any one server sees, keeping the batch a polite client: a catalogue that declares many distributions on a single host (e.g. a download endpoint per named graph) will not trip that server’s rate limiter (HTTP 429). Distributions sharing a host (by `accessUrl`) contend for the same budget; a probe whose host is saturated waits while probes for other hosts proceed, so one busy host never idles the global pool.
70
+
71
+ All other `ProbeOptions` (`timeoutMs`, `retries`, `validateRdfContent`, and the rest) are forwarded unchanged to every probe.
package/dist/index.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeOptions, type ProbeResultType, } from './probe.js';
1
+ export { probe, probeMany, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeOptions, type ProbeManyOptions, type ProbeResultType, } from './probe.js';
2
2
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,YAAY,EACjB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,SAAS,EACT,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC"}
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
1
+ export { probe, probeMany, NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
package/dist/probe.d.ts CHANGED
@@ -52,6 +52,35 @@ export interface ProbeOptions {
52
52
  */
53
53
  rdfValidationBudgetMs?: number;
54
54
  }
55
+ /**
56
+ * Options for {@link probeMany}: the per-probe {@link ProbeOptions} plus the
57
+ * concurrency budgets that bound the batch.
58
+ */
59
+ export interface ProbeManyOptions extends ProbeOptions {
60
+ /**
61
+ * Maximum number of probes to run at once across all hosts. Bounds the batch’s
62
+ * total fan-out so a large catalogue does not exhaust sockets or buffer too many
63
+ * response bodies at once. Default 20.
64
+ */
65
+ concurrency?: number;
66
+ /**
67
+ * Maximum number of probes to run at once against a single host. Bounds the
68
+ * burst any one server sees, so a catalogue that declares many distributions on
69
+ * one host (e.g. a download endpoint per named graph) does not trip its rate
70
+ * limiter (HTTP 429). A probe whose host is at this cap waits while probes for
71
+ * other hosts proceed, so this never idles the global pool. Default 4.
72
+ */
73
+ perHostConcurrency?: number;
74
+ /**
75
+ * Called once after each probe settles, with the number of probes completed so
76
+ * far and the total to run (`distributions.length`). Lets a caller drive a
77
+ * determinate progress indicator while a large batch runs. Fires `total` times,
78
+ * ending at `(total, total)`; the order reflects completion, not input order.
79
+ * Never called for an empty batch. A throwing callback rejects the batch, so
80
+ * keep it cheap and side-effect-only.
81
+ */
82
+ onProgress?: (completed: number, total: number) => void;
83
+ }
55
84
  /**
56
85
  * Result of a network error during probing.
57
86
  */
@@ -109,5 +138,19 @@ export type ProbeResultType = SparqlProbeResult | DataDumpProbeResult | NetworkE
109
138
  * Returns a pure result object; never throws.
110
139
  */
111
140
  export declare function probe(distribution: Distribution, options?: ProbeOptions): Promise<ProbeResultType>;
141
+ /**
142
+ * Probe many distributions concurrently, bounded by a global cap and a per-host
143
+ * cap, returning one result per input in input order. Like {@link probe}, this
144
+ * never throws: a probe that somehow fails is reported as a {@link NetworkError}
145
+ * in its slot.
146
+ *
147
+ * The per-host cap keeps the batch a polite client. Distributions sharing a host
148
+ * (by {@link Distribution.accessUrl}) contend for the same budget, so no single
149
+ * server is hit by the full global pool at once — the burst that trips a rate
150
+ * limiter (HTTP 429). When the next queued probe’s host is saturated it is
151
+ * skipped in favour of a later probe on a different host, so one busy host never
152
+ * idles the global pool (no head-of-line blocking).
153
+ */
154
+ export declare function probeMany(distributions: readonly Distribution[], options?: ProbeManyOptions): Promise<ProbeResultType[]>;
112
155
  export {};
113
156
  //# sourceMappingURL=probe.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAKnE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;;;;OAUG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B;;;;;;;;;OASG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAChC;AAgCD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAqBD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;;GASG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
1
+ {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAKnE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;;;;OAUG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B;;;;;;;;;OASG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAChC;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;OAMG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;IAC5B;;;;;;;OAOG;IACH,UAAU,CAAC,EAAE,CAAC,SAAS,EAAE,MAAM,EAAE,KAAK,EAAE,MAAM,KAAK,IAAI,CAAC;CACzD;AAkCD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAqBD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;;GASG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAqD1B;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,SAAS,CAC7B,aAAa,EAAE,SAAS,YAAY,EAAE,EACtC,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,eAAe,EAAE,CAAC,CAuC5B"}
package/dist/probe.js CHANGED
@@ -5,6 +5,8 @@ import { createGunzip } from 'node:zlib';
5
5
  const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
6
6
  const DEFAULT_TIMEOUT_MS = 5000;
7
7
  const DEFAULT_RETRIES = 2;
8
+ const DEFAULT_PROBE_CONCURRENCY = 20;
9
+ const DEFAULT_PROBE_PER_HOST_CONCURRENCY = 4;
8
10
  /**
9
11
  * Default soft deadline for finding the first triple when content validation is
10
12
  * on (capped at `timeoutMs`). Two seconds comfortably covers a static file
@@ -169,6 +171,107 @@ export async function probe(distribution, options) {
169
171
  // real cost of a down endpoint.
170
172
  return new NetworkError(url, describeNetworkError(lastError), Math.round(performance.now() - overallStart));
171
173
  }
174
+ /**
175
+ * Probe many distributions concurrently, bounded by a global cap and a per-host
176
+ * cap, returning one result per input in input order. Like {@link probe}, this
177
+ * never throws: a probe that somehow fails is reported as a {@link NetworkError}
178
+ * in its slot.
179
+ *
180
+ * The per-host cap keeps the batch a polite client. Distributions sharing a host
181
+ * (by {@link Distribution.accessUrl}) contend for the same budget, so no single
182
+ * server is hit by the full global pool at once — the burst that trips a rate
183
+ * limiter (HTTP 429). When the next queued probe’s host is saturated it is
184
+ * skipped in favour of a later probe on a different host, so one busy host never
185
+ * idles the global pool (no head-of-line blocking).
186
+ */
187
+ export async function probeMany(distributions, options) {
188
+ // Clamp the budgets to a positive integer, mirroring how probe() treats an
189
+ // invalid retries value: a zero, negative, fractional, or NaN limit would
190
+ // otherwise stall the scheduler (no task ever starts, so the promise never
191
+ // resolves) or overrun the cap, so fall back to the default rather than trust
192
+ // the caller.
193
+ const globalLimit = positiveIntOrDefault(options?.concurrency, DEFAULT_PROBE_CONCURRENCY);
194
+ const perHostLimit = positiveIntOrDefault(options?.perHostConcurrency, DEFAULT_PROBE_PER_HOST_CONCURRENCY);
195
+ // Probes contend per host. An authority-less URL (e.g. urn:, file:) has an
196
+ // empty host, so it falls back to its full href and never shares a budget with
197
+ // an unrelated one.
198
+ const hostKeys = distributions.map((distribution) => distribution.accessUrl.host || distribution.accessUrl.href);
199
+ // Report progress as each probe settles. mapHostLimited resolves results in
200
+ // input order, but tasks complete out of order, so count completions here
201
+ // rather than rely on result position. The total is the batch size.
202
+ const onProgress = options?.onProgress;
203
+ const total = distributions.length;
204
+ let completed = 0;
205
+ return mapHostLimited(distributions, hostKeys, globalLimit, perHostLimit, async (distribution) => {
206
+ const result = await probe(distribution, options);
207
+ completed += 1;
208
+ onProgress?.(completed, total);
209
+ return result;
210
+ });
211
+ }
212
+ /**
213
+ * Coerce an optional concurrency budget to a usable value: a positive integer is
214
+ * taken as-is; undefined, zero, negative, fractional, or NaN falls back to the
215
+ * default. Matches probe()’s treatment of an invalid retries value.
216
+ */
217
+ function positiveIntOrDefault(value, fallback) {
218
+ return value !== undefined && Number.isInteger(value) && value >= 1
219
+ ? value
220
+ : fallback;
221
+ }
222
+ /**
223
+ * Run `task` over `items` with two concurrency caps — a global cap and a per-host
224
+ * cap keyed by `hostKeys[index]` — resolving to results in input order. When the
225
+ * next queued item’s host is at the per-host cap it is skipped for a later item on
226
+ * a different host, so a saturated host never idles the global pool (no head-of-line
227
+ * blocking); the skipped host always has a task in flight, whose completion re-runs
228
+ * the scheduler, so the queue always drains. `task` must not reject — callers wrap
229
+ * failures into a result value — as a rejection would leave the promise pending.
230
+ */
231
+ function mapHostLimited(items, hostKeys, globalLimit, perHostLimit, task) {
232
+ const results = new Array(items.length);
233
+ const perHostInFlight = new Map();
234
+ const pending = items.map((_unused, index) => index);
235
+ let globalInFlight = 0;
236
+ let settledCount = 0;
237
+ const adjustHost = (host, delta) => {
238
+ perHostInFlight.set(host, (perHostInFlight.get(host) ?? 0) + delta);
239
+ };
240
+ return new Promise((resolve) => {
241
+ const schedule = () => {
242
+ let cursor = 0;
243
+ while (cursor < pending.length && globalInFlight < globalLimit) {
244
+ const index = pending[cursor];
245
+ const host = hostKeys[index];
246
+ if ((perHostInFlight.get(host) ?? 0) >= perHostLimit) {
247
+ cursor++; // Host saturated; leave it queued and try a later, different host.
248
+ continue;
249
+ }
250
+ pending.splice(cursor, 1);
251
+ globalInFlight++;
252
+ adjustHost(host, 1);
253
+ void task(items[index]).then((result) => {
254
+ results[index] = result;
255
+ globalInFlight--;
256
+ adjustHost(host, -1);
257
+ settledCount++;
258
+ if (settledCount === items.length) {
259
+ resolve(results);
260
+ }
261
+ else {
262
+ schedule();
263
+ }
264
+ });
265
+ // pending[cursor] now holds the next queued item; do not advance cursor.
266
+ }
267
+ };
268
+ schedule();
269
+ // Resolve immediately when there is nothing to settle (empty input); a
270
+ // non-empty run resolves via the task completion above.
271
+ if (settledCount === items.length)
272
+ resolve(results);
273
+ });
274
+ }
172
275
  function delay(milliseconds) {
173
276
  return new Promise((resolve) => setTimeout(resolve, milliseconds));
174
277
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/distribution-probe",
3
- "version": "0.2.0",
3
+ "version": "0.2.2",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/distribution-probe"