npm - @lde/distribution-probe - Versions diffs - 0.1.10 → 0.1.12 - Mend

@lde/distribution-probe 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/README.md CHANGED Viewed

@@ -17,10 +17,10 @@ const result = await probe(distribution);
 ### SPARQL endpoints
-Sends `POST` with `SELECT * { ?s ?p ?o } LIMIT 1` and `Accept: application/sparql-results+json`, then:
+Sends `POST` with the configured query (default `SELECT * { ?s ?p ?o } LIMIT 1`). The query type is detected (`ASK` / `SELECT` / `CONSTRUCT` / `DESCRIBE`) and drives both the `Accept` header and how the response is validated:
-- **Content-Type is enforced.** The response Content-Type must start with `application/sparql-results+json`; anything else fails the probe (`isSuccess() === false`). This rules out HTML error pages served with `200 OK`.
-- The JSON body must parse and contain a `results` object. Empty bodies, invalid JSON, and missing `results` all fail the probe with a `failureReason`.
+- **`ASK` / `SELECT`** request `application/sparql-results+json`, with `application/sparql-results+xml` as a lower-priority fallback. The response Content-Type must be one of those — anything else fails the probe (`isSuccess() === false`), which rules out HTML error pages served with `200 OK`. The body must parse and contain a results document (a `results` object for `SELECT`, a `boolean` for `ASK`); empty bodies, invalid JSON/XML, and missing results all fail with a `failureReason`.
+- **`CONSTRUCT` / `DESCRIBE`** request the common RDF serializations (`text/turtle`, `application/n-triples`, `application/rdf+xml`, `application/ld+json`, `application/n-quads`, `application/trig`) and accept any of them. A `2xx` RDF response confirms availability, and **an empty graph is a valid answer** — so an empty body does not fail the probe (unlike a data dump, which must be non-empty). The body is not parse-validated.
 ### Data dumps
@@ -32,4 +32,6 @@ Sends `HEAD` with `Accept: <distribution.mimeType>` and `Accept-Encoding: identi
 ### Network errors
-Any thrown exception from `fetch` (DNS, connection refused, TLS, timeout after the configured `timeout` – default 5 000 ms) is caught and returned as a `NetworkError` with the original message.
+A thrown exception from `fetch` (DNS failure, connection refused, socket reset, TLS error, timeout after the configured `timeoutMs` – default 5 000 ms) is a connection-level failure. The probe retries these up to `retries` times (default 2) with a short backoff before giving up and returning a `NetworkError`. This turns a transient transport blip into a reliable single measurement without looking backward across checks. A genuine outage still resolves to a `NetworkError` on the current check – every attempt fails – but note each attempt gets its own `timeoutMs`, so an endpoint that fails only by timing out takes up to `(retries + 1) × timeoutMs` (plus backoff) to be reported down. HTTP error responses (4xx/5xx) and content-validation failures are real ‘down’ states and are **never** retried.
+`NetworkError.message` includes the underlying `error.cause` (e.g. `ECONNRESET`, `UND_ERR_SOCKET “other side closed”`) when Node wraps one, so observations record what actually failed rather than a bare ‘fetch failed’.

package/dist/probe.d.ts CHANGED Viewed

@@ -17,6 +17,17 @@ export interface ProbeOptions {
      * distributions. Defaults to `SELECT * { ?s ?p ?o } LIMIT 1`.
      */
     sparqlQuery?: string;
+    /**
+     * How many times to retry a connection-level failure (DNS, connection
+     * refused, socket reset, TLS error, timeout) before returning a
+     * {@link NetworkError}. Only transport errors are retried within the same
+     * check, so a transient blip does not flip an otherwise healthy distribution
+     * to ‘unavailable’; HTTP error responses and content-validation failures are
+     * genuine ‘down’ states and are never retried. Set to `0` to disable.
+     * Defaults to `2`. A non-integer or otherwise invalid value falls back to
+     * the default; negative values are clamped to `0`.
+     */
+    retries?: number;
 }
 /**
  * Result of a network error during probing.

package/dist/probe.d.ts.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;~~CACtB~~;~~AAKD~~;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;~~AAMD~~;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,~~CAkC1B~~"}
1	+ {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AASD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAqBD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAqD1B"}

package/dist/probe.js CHANGED Viewed

@@ -3,6 +3,9 @@ import { rdfParser } from 'rdf-parse';
 import { Readable } from 'node:stream';
 const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
 const DEFAULT_TIMEOUT_MS = 5000;
+const DEFAULT_RETRIES = 2;
+/** Base backoff between retries; the nth retry waits `n × base`. */
+const RETRY_BACKOFF_MS = 250;
 /**
  * Result of a network error during probing.
  */
@@ -48,7 +51,21 @@ class ProbeResult {
 }
 const SPARQL_RESULTS_JSON = 'application/sparql-results+json';
 const SPARQL_RESULTS_XML = 'application/sparql-results+xml';
-const SPARQL_RDF_RESULTS = 'application/n-triples';
+/**
+ * RDF serializations a CONSTRUCT or DESCRIBE query may be answered with, in
+ * preference order. The endpoint chooses the serialization, so availability must
+ * not hinge on a single one: accepting only n-triples flagged healthy endpoints
+ * that answer in Turtle (a common default) as unavailable, and made endpoints
+ * that cannot emit n-triples reject the probe with HTTP 406.
+ */
+const SPARQL_RDF_RESULTS = [
+    'text/turtle',
+    'application/n-triples',
+    'application/rdf+xml',
+    'application/ld+json',
+    'application/n-quads',
+    'application/trig',
+];
 /**
  * Result of probing a SPARQL endpoint.
  */
@@ -100,23 +117,75 @@ export async function probe(distribution, options) {
     const [authUrl, authHeaders] = distribution.accessUrl !== undefined
         ? extractUrlCredentials(distribution.accessUrl, resolved.headers)
         : [new URL(url), new Headers(resolved.headers)];
-    const start = performance.now();
-    try {
-        if (distribution.isSparql()) {
-            return await probeSparqlEndpoint(authUrl.toString(), distribution, resolved, authHeaders, start);
+    // Retry only connection-level failures (a thrown `fetch`): HTTP error
+    // responses and content-validation failures are returned as result objects,
+    // never thrown, so they exit the loop on the first attempt and are not
+    // retried. A genuine outage still resolves to a NetworkError – every attempt
+    // fails – but note each attempt gets its own `timeoutMs`, so an endpoint that
+    // fails only by timing out takes up to (retries + 1) × timeoutMs (plus
+    // backoff) to be reported down.
+    const overallStart = performance.now();
+    let lastError;
+    for (let attempt = 0; attempt <= resolved.retries; attempt++) {
+        if (attempt > 0) {
+            await delay(RETRY_BACKOFF_MS * attempt);
+        }
+        const start = performance.now();
+        try {
+            if (distribution.isSparql()) {
+                return await probeSparqlEndpoint(authUrl.toString(), distribution, resolved, authHeaders, start);
+            }
+            return await probeDataDump(authUrl.toString(), distribution, resolved, authHeaders, start);
+        }
+        catch (error) {
+            lastError = error;
         }
-        return await probeDataDump(authUrl.toString(), distribution, resolved, authHeaders, start);
     }
-    catch (e) {
-        const responseTimeMs = Math.round(performance.now() - start);
-        return new NetworkError(url, e instanceof Error ? e.message : String(e), responseTimeMs);
+    // A successful probe reports its own attempt's latency (computed inside the
+    // probe functions); a NetworkError reports the total time spent failing,
+    // across every attempt and backoff, so observations do not understate the
+    // real cost of a down endpoint.
+    return new NetworkError(url, describeNetworkError(lastError), Math.round(performance.now() - overallStart));
+}
+function delay(milliseconds) {
+    return new Promise((resolve) => setTimeout(resolve, milliseconds));
+}
+/**
+ * Describe a thrown fetch error for a {@link NetworkError} message. undici wraps
+ * * the real reason (`ECONNRESET`, `UND_ERR_SOCKET “other side closed”`, TLS
+ * errors, …) in `error.cause`, while `error.message` is usually a bare
+ * ‘fetch failed’. Including the cause’s code and message preserves the
+ * diagnostic detail that would otherwise be discarded.
+ */
+function describeNetworkError(error) {
+    if (!(error instanceof Error)) {
+        return String(error);
+    }
+    const { cause } = error;
+    if (cause === undefined || cause === null) {
+        return error.message;
     }
+    const detail = cause instanceof Error
+        ? [cause.code, cause.message]
+            .filter(Boolean)
+            .join(': ')
+        : String(cause);
+    return detail && detail !== error.message
+        ? `${error.message} (${detail})`
+        : error.message;
 }
 function resolveOptions(options) {
+    const retries = options?.retries;
     return {
         timeoutMs: options?.timeoutMs ?? DEFAULT_TIMEOUT_MS,
         headers: options?.headers ?? new Headers(),
         sparqlQuery: options?.sparqlQuery ?? DEFAULT_SPARQL_QUERY,
+        // Guard the loop bound: a non-integer (NaN, Infinity, fractional) would
+        // otherwise either skip the loop entirely or never terminate. Negatives
+        // clamp to 0 (retries disabled).
+        retries: retries === undefined || !Number.isInteger(retries)
+            ? DEFAULT_RETRIES
+            : Math.max(0, retries),
     };
 }
 /**
@@ -157,7 +226,7 @@ function acceptableContentTypes(queryType) {
     if (queryType === 'ASK' || queryType === 'SELECT') {
         return [SPARQL_RESULTS_JSON, SPARQL_RESULTS_XML];
     }
-    return [SPARQL_RDF_RESULTS];
+    return [...SPARQL_RDF_RESULTS];
 }
 /**
  * Build an `Accept` header that prefers the first content type but still accepts
@@ -199,15 +268,19 @@ async function probeSparqlEndpoint(url, _distribution, options, authHeaders, sta
     return new SparqlProbeResult(url, response, responseTimeMs, acceptedContentTypes, failureReason);
 }
 async function validateSparqlResponse(response, queryType, contentType) {
+    if (queryType === 'CONSTRUCT' || queryType === 'DESCRIBE') {
+        // A CONSTRUCT/DESCRIBE answer is RDF, and an empty graph is a valid answer –
+        // e.g. an availability probe whose query happens to match nothing – so the
+        // 200 response alone confirms the endpoint is up. Deep parse validation is
+        // the data-dump path’s job. Only data dumps must be non-empty (see
+        // validateBody); a SPARQL result may be empty.
+        await response.body?.cancel();
+        return null;
+    }
     const body = await response.text();
     if (body.length === 0) {
         return 'SPARQL endpoint returned an empty response';
     }
-    if (queryType === 'CONSTRUCT' || queryType === 'DESCRIBE') {
-        // Body should be RDF; a non-empty response is sufficient to confirm the
-        // endpoint answered. Deep parse validation is the data-dump path’s job.
-        return null;
-    }
     return contentType.startsWith(SPARQL_RESULTS_XML)
         ? validateSparqlXmlResults(body, queryType)
         : validateSparqlJsonResults(body, queryType);

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@lde/distribution-probe",
-  "version": "0.1.10",
+  "version": "0.1.12",
   "repository": {
     "url": "git+https://github.com/ldelements/lde.git",
     "directory": "packages/distribution-probe"