@lde/distribution-probe 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -48,3 +48,24 @@ When on, the probe `GET`s the dump — **regardless of size** — and reads only
48
48
  A thrown exception from `fetch` (DNS failure, connection refused, socket reset, TLS error, timeout after the configured `timeoutMs` – default 5 000 ms) is a connection-level failure. The probe retries these up to `retries` times (default 2) with a short backoff before giving up and returning a `NetworkError`. This turns a transient transport blip into a reliable single measurement without looking backward across checks. A genuine outage still resolves to a `NetworkError` on the current check – every attempt fails – but note each attempt gets its own `timeoutMs`, so an endpoint that fails only by timing out takes up to `(retries + 1) × timeoutMs` (plus backoff) to be reported down. HTTP error responses (4xx/5xx) and content-validation failures are real ‘down’ states and are **never** retried.
49
49
 
50
50
  `NetworkError.message` includes the underlying `error.cause` (e.g. `ECONNRESET`, `UND_ERR_SOCKET “other side closed”`) when Node wraps one, so observations record what actually failed rather than a bare ‘fetch failed’.
51
+
52
+ ## Probing many distributions
53
+
54
+ `probeMany` probes an array of distributions concurrently and returns one result per input, in input order. Each distribution is probed once with `probe`, so every behaviour above applies per distribution; like `probe`, `probeMany` never throws – a probe that fails is reported as a `NetworkError` in its slot.
55
+
56
+ ```ts
57
+ import { probeMany } from '@lde/distribution-probe';
58
+
59
+ const results = await probeMany(distributions, {
60
+ concurrency: 20, // max probes in flight across all hosts (default 20)
61
+ perHostConcurrency: 4, // max probes in flight against one host (default 4)
62
+ validateRdfContent: true, // any ProbeOptions are forwarded to each probe
63
+ });
64
+ ```
65
+
66
+ Two caps bound the batch:
67
+
68
+ - **`concurrency`** bounds the total fan-out, so a large catalogue does not exhaust sockets or buffer too many response bodies at once.
69
+ - **`perHostConcurrency`** bounds the burst any one server sees, keeping the batch a polite client: a catalogue that declares many distributions on a single host (e.g. a download endpoint per named graph) will not trip that server’s rate limiter (HTTP 429). Distributions sharing a host (by `accessUrl`) contend for the same budget; a probe whose host is saturated waits while probes for other hosts proceed, so one busy host never idles the global pool.
70
+
71
+ All other `ProbeOptions` (`timeoutMs`, `retries`, `validateRdfContent`, and the rest) are forwarded unchanged to every probe.
package/dist/index.d.ts CHANGED
@@ -1,2 +1,2 @@
1
- export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeOptions, type ProbeResultType, } from './probe.js';
1
+ export { probe, probeMany, NetworkError, SparqlProbeResult, DataDumpProbeResult, type ProbeOptions, type ProbeManyOptions, type ProbeResultType, } from './probe.js';
2
2
  //# sourceMappingURL=index.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,YAAY,EACjB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC"}
1
+ {"version":3,"file":"index.d.ts","sourceRoot":"","sources":["../src/index.ts"],"names":[],"mappings":"AAAA,OAAO,EACL,KAAK,EACL,SAAS,EACT,YAAY,EACZ,iBAAiB,EACjB,mBAAmB,EACnB,KAAK,YAAY,EACjB,KAAK,gBAAgB,EACrB,KAAK,eAAe,GACrB,MAAM,YAAY,CAAC"}
package/dist/index.js CHANGED
@@ -1 +1 @@
1
- export { probe, NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
1
+ export { probe, probeMany, NetworkError, SparqlProbeResult, DataDumpProbeResult, } from './probe.js';
package/dist/probe.d.ts CHANGED
@@ -52,6 +52,26 @@ export interface ProbeOptions {
52
52
  */
53
53
  rdfValidationBudgetMs?: number;
54
54
  }
55
+ /**
56
+ * Options for {@link probeMany}: the per-probe {@link ProbeOptions} plus the
57
+ * concurrency budgets that bound the batch.
58
+ */
59
+ export interface ProbeManyOptions extends ProbeOptions {
60
+ /**
61
+ * Maximum number of probes to run at once across all hosts. Bounds the batch’s
62
+ * total fan-out so a large catalogue does not exhaust sockets or buffer too many
63
+ * response bodies at once. Default 20.
64
+ */
65
+ concurrency?: number;
66
+ /**
67
+ * Maximum number of probes to run at once against a single host. Bounds the
68
+ * burst any one server sees, so a catalogue that declares many distributions on
69
+ * one host (e.g. a download endpoint per named graph) does not trip its rate
70
+ * limiter (HTTP 429). A probe whose host is at this cap waits while probes for
71
+ * other hosts proceed, so this never idles the global pool. Default 4.
72
+ */
73
+ perHostConcurrency?: number;
74
+ }
55
75
  /**
56
76
  * Result of a network error during probing.
57
77
  */
@@ -109,5 +129,19 @@ export type ProbeResultType = SparqlProbeResult | DataDumpProbeResult | NetworkE
109
129
  * Returns a pure result object; never throws.
110
130
  */
111
131
  export declare function probe(distribution: Distribution, options?: ProbeOptions): Promise<ProbeResultType>;
132
+ /**
133
+ * Probe many distributions concurrently, bounded by a global cap and a per-host
134
+ * cap, returning one result per input in input order. Like {@link probe}, this
135
+ * never throws: a probe that somehow fails is reported as a {@link NetworkError}
136
+ * in its slot.
137
+ *
138
+ * The per-host cap keeps the batch a polite client. Distributions sharing a host
139
+ * (by {@link Distribution.accessUrl}) contend for the same budget, so no single
140
+ * server is hit by the full global pool at once — the burst that trips a rate
141
+ * limiter (HTTP 429). When the next queued probe’s host is saturated it is
142
+ * skipped in favour of a later probe on a different host, so one busy host never
143
+ * idles the global pool (no head-of-line blocking).
144
+ */
145
+ export declare function probeMany(distributions: readonly Distribution[], options?: ProbeManyOptions): Promise<ProbeResultType[]>;
112
146
  export {};
113
147
  //# sourceMappingURL=probe.d.ts.map
@@ -1 +1 @@
1
- {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAKnE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;;;;OAUG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B;;;;;;;;;OASG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAChC;AAgCD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAqBD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;;GASG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
1
+ {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAKnE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;IACjB;;;;;;;;;;OAUG;IACH,kBAAkB,CAAC,EAAE,OAAO,CAAC;IAC7B;;;;;;;;;OASG;IACH,qBAAqB,CAAC,EAAE,MAAM,CAAC;CAChC;AAED;;;GAGG;AACH,MAAM,WAAW,gBAAiB,SAAQ,YAAY;IACpD;;;;OAIG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;OAMG;IACH,kBAAkB,CAAC,EAAE,MAAM,CAAC;CAC7B;AAkCD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAqBD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;;GASG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAqD1B;AAED;;;;;;;;;;;;GAYG;AACH,wBAAsB,SAAS,CAC7B,aAAa,EAAE,SAAS,YAAY,EAAE,EACtC,OAAO,CAAC,EAAE,gBAAgB,GACzB,OAAO,CAAC,eAAe,EAAE,CAAC,CA4B5B"}
package/dist/probe.js CHANGED
@@ -5,6 +5,8 @@ import { createGunzip } from 'node:zlib';
5
5
  const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
6
6
  const DEFAULT_TIMEOUT_MS = 5000;
7
7
  const DEFAULT_RETRIES = 2;
8
+ const DEFAULT_PROBE_CONCURRENCY = 20;
9
+ const DEFAULT_PROBE_PER_HOST_CONCURRENCY = 4;
8
10
  /**
9
11
  * Default soft deadline for finding the first triple when content validation is
10
12
  * on (capped at `timeoutMs`). Two seconds comfortably covers a static file
@@ -169,6 +171,96 @@ export async function probe(distribution, options) {
169
171
  // real cost of a down endpoint.
170
172
  return new NetworkError(url, describeNetworkError(lastError), Math.round(performance.now() - overallStart));
171
173
  }
174
+ /**
175
+ * Probe many distributions concurrently, bounded by a global cap and a per-host
176
+ * cap, returning one result per input in input order. Like {@link probe}, this
177
+ * never throws: a probe that somehow fails is reported as a {@link NetworkError}
178
+ * in its slot.
179
+ *
180
+ * The per-host cap keeps the batch a polite client. Distributions sharing a host
181
+ * (by {@link Distribution.accessUrl}) contend for the same budget, so no single
182
+ * server is hit by the full global pool at once — the burst that trips a rate
183
+ * limiter (HTTP 429). When the next queued probe’s host is saturated it is
184
+ * skipped in favour of a later probe on a different host, so one busy host never
185
+ * idles the global pool (no head-of-line blocking).
186
+ */
187
+ export async function probeMany(distributions, options) {
188
+ // Clamp the budgets to a positive integer, mirroring how probe() treats an
189
+ // invalid retries value: a zero, negative, fractional, or NaN limit would
190
+ // otherwise stall the scheduler (no task ever starts, so the promise never
191
+ // resolves) or overrun the cap, so fall back to the default rather than trust
192
+ // the caller.
193
+ const globalLimit = positiveIntOrDefault(options?.concurrency, DEFAULT_PROBE_CONCURRENCY);
194
+ const perHostLimit = positiveIntOrDefault(options?.perHostConcurrency, DEFAULT_PROBE_PER_HOST_CONCURRENCY);
195
+ // Probes contend per host. An authority-less URL (e.g. urn:, file:) has an
196
+ // empty host, so it falls back to its full href and never shares a budget with
197
+ // an unrelated one.
198
+ const hostKeys = distributions.map((distribution) => distribution.accessUrl.host || distribution.accessUrl.href);
199
+ return mapHostLimited(distributions, hostKeys, globalLimit, perHostLimit, (distribution) => probe(distribution, options));
200
+ }
201
+ /**
202
+ * Coerce an optional concurrency budget to a usable value: a positive integer is
203
+ * taken as-is; undefined, zero, negative, fractional, or NaN falls back to the
204
+ * default. Matches probe()’s treatment of an invalid retries value.
205
+ */
206
+ function positiveIntOrDefault(value, fallback) {
207
+ return value !== undefined && Number.isInteger(value) && value >= 1
208
+ ? value
209
+ : fallback;
210
+ }
211
+ /**
212
+ * Run `task` over `items` with two concurrency caps — a global cap and a per-host
213
+ * cap keyed by `hostKeys[index]` — resolving to results in input order. When the
214
+ * next queued item’s host is at the per-host cap it is skipped for a later item on
215
+ * a different host, so a saturated host never idles the global pool (no head-of-line
216
+ * blocking); the skipped host always has a task in flight, whose completion re-runs
217
+ * the scheduler, so the queue always drains. `task` must not reject — callers wrap
218
+ * failures into a result value — as a rejection would leave the promise pending.
219
+ */
220
+ function mapHostLimited(items, hostKeys, globalLimit, perHostLimit, task) {
221
+ const results = new Array(items.length);
222
+ const perHostInFlight = new Map();
223
+ const pending = items.map((_unused, index) => index);
224
+ let globalInFlight = 0;
225
+ let settledCount = 0;
226
+ const adjustHost = (host, delta) => {
227
+ perHostInFlight.set(host, (perHostInFlight.get(host) ?? 0) + delta);
228
+ };
229
+ return new Promise((resolve) => {
230
+ const schedule = () => {
231
+ let cursor = 0;
232
+ while (cursor < pending.length && globalInFlight < globalLimit) {
233
+ const index = pending[cursor];
234
+ const host = hostKeys[index];
235
+ if ((perHostInFlight.get(host) ?? 0) >= perHostLimit) {
236
+ cursor++; // Host saturated; leave it queued and try a later, different host.
237
+ continue;
238
+ }
239
+ pending.splice(cursor, 1);
240
+ globalInFlight++;
241
+ adjustHost(host, 1);
242
+ void task(items[index]).then((result) => {
243
+ results[index] = result;
244
+ globalInFlight--;
245
+ adjustHost(host, -1);
246
+ settledCount++;
247
+ if (settledCount === items.length) {
248
+ resolve(results);
249
+ }
250
+ else {
251
+ schedule();
252
+ }
253
+ });
254
+ // pending[cursor] now holds the next queued item; do not advance cursor.
255
+ }
256
+ };
257
+ schedule();
258
+ // Resolve immediately when there is nothing to settle (empty input); a
259
+ // non-empty run resolves via the task completion above.
260
+ if (settledCount === items.length)
261
+ resolve(results);
262
+ });
263
+ }
172
264
  function delay(milliseconds) {
173
265
  return new Promise((resolve) => setTimeout(resolve, milliseconds));
174
266
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/distribution-probe",
3
- "version": "0.2.0",
3
+ "version": "0.2.1",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/distribution-probe"