@lde/distribution-probe 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -17,10 +17,10 @@ const result = await probe(distribution);
17
17
 
18
18
  ### SPARQL endpoints
19
19
 
20
- Sends `POST` with `SELECT * { ?s ?p ?o } LIMIT 1` and `Accept: application/sparql-results+json`, then:
20
+ Sends `POST` with the configured query (default `SELECT * { ?s ?p ?o } LIMIT 1`). The query type is detected (`ASK` / `SELECT` / `CONSTRUCT` / `DESCRIBE`) and drives both the `Accept` header and how the response is validated:
21
21
 
22
- - **Content-Type is enforced.** The response Content-Type must start with `application/sparql-results+json`; anything else fails the probe (`isSuccess() === false`). This rules out HTML error pages served with `200 OK`.
23
- - The JSON body must parse and contain a `results` object. Empty bodies, invalid JSON, and missing `results` all fail the probe with a `failureReason`.
22
+ - **`ASK` / `SELECT`** request `application/sparql-results+json`, with `application/sparql-results+xml` as a lower-priority fallback. The response Content-Type must be one of those — anything else fails the probe (`isSuccess() === false`), which rules out HTML error pages served with `200 OK`. The body must parse and contain a results document (a `results` object for `SELECT`, a `boolean` for `ASK`); empty bodies, invalid JSON/XML, and missing results all fail with a `failureReason`.
23
+ - **`CONSTRUCT` / `DESCRIBE`** request the common RDF serializations (`text/turtle`, `application/n-triples`, `application/rdf+xml`, `application/ld+json`, `application/n-quads`, `application/trig`) and accept any of them. A `2xx` RDF response confirms availability, and **an empty graph is a valid answer** — so an empty body does not fail the probe (unlike a data dump, which must be non-empty). The body is not parse-validated.
24
24
 
25
25
  ### Data dumps
26
26
 
@@ -32,4 +32,6 @@ Sends `HEAD` with `Accept: <distribution.mimeType>` and `Accept-Encoding: identi
32
32
 
33
33
  ### Network errors
34
34
 
35
- Any thrown exception from `fetch` (DNS, connection refused, TLS, timeout after the configured `timeout` – default 5 000 ms) is caught and returned as a `NetworkError` with the original message.
35
+ A thrown exception from `fetch` (DNS failure, connection refused, socket reset, TLS error, timeout after the configured `timeoutMs` – default 5 000 ms) is a connection-level failure. The probe retries these up to `retries` times (default 2) with a short backoff before giving up and returning a `NetworkError`. This turns a transient transport blip into a reliable single measurement without looking backward across checks. A genuine outage still resolves to a `NetworkError` on the current check – every attempt fails – but note each attempt gets its own `timeoutMs`, so an endpoint that fails only by timing out takes up to `(retries + 1) × timeoutMs` (plus backoff) to be reported down. HTTP error responses (4xx/5xx) and content-validation failures are real ‘down’ states and are **never** retried.
36
+
37
+ `NetworkError.message` includes the underlying `error.cause` (e.g. `ECONNRESET`, `UND_ERR_SOCKET “other side closed”`) when Node wraps one, so observations record what actually failed rather than a bare ‘fetch failed’.
package/dist/probe.d.ts CHANGED
@@ -17,6 +17,17 @@ export interface ProbeOptions {
17
17
  * distributions. Defaults to `SELECT * { ?s ?p ?o } LIMIT 1`.
18
18
  */
19
19
  sparqlQuery?: string;
20
+ /**
21
+ * How many times to retry a connection-level failure (DNS, connection
22
+ * refused, socket reset, TLS error, timeout) before returning a
23
+ * {@link NetworkError}. Only transport errors are retried within the same
24
+ * check, so a transient blip does not flip an otherwise healthy distribution
25
+ * to ‘unavailable’; HTTP error responses and content-validation failures are
26
+ * genuine ‘down’ states and are never retried. Set to `0` to disable.
27
+ * Defaults to `2`. A non-integer or otherwise invalid value falls back to
28
+ * the default; negative values are clamped to `0`.
29
+ */
30
+ retries?: number;
20
31
  }
21
32
  /**
22
33
  * Result of a network error during probing.
@@ -1 +1 @@
1
- {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAMD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAkC1B"}
1
+ {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AASD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAqBD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
package/dist/probe.js CHANGED
@@ -3,6 +3,9 @@ import { rdfParser } from 'rdf-parse';
3
3
  import { Readable } from 'node:stream';
4
4
  const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
5
5
  const DEFAULT_TIMEOUT_MS = 5000;
6
+ const DEFAULT_RETRIES = 2;
7
+ /** Base backoff between retries; the nth retry waits `n × base`. */
8
+ const RETRY_BACKOFF_MS = 250;
6
9
  /**
7
10
  * Result of a network error during probing.
8
11
  */
@@ -48,7 +51,21 @@ class ProbeResult {
48
51
  }
49
52
  const SPARQL_RESULTS_JSON = 'application/sparql-results+json';
50
53
  const SPARQL_RESULTS_XML = 'application/sparql-results+xml';
51
- const SPARQL_RDF_RESULTS = 'application/n-triples';
54
+ /**
55
+ * RDF serializations a CONSTRUCT or DESCRIBE query may be answered with, in
56
+ * preference order. The endpoint chooses the serialization, so availability must
57
+ * not hinge on a single one: accepting only n-triples flagged healthy endpoints
58
+ * that answer in Turtle (a common default) as unavailable, and made endpoints
59
+ * that cannot emit n-triples reject the probe with HTTP 406.
60
+ */
61
+ const SPARQL_RDF_RESULTS = [
62
+ 'text/turtle',
63
+ 'application/n-triples',
64
+ 'application/rdf+xml',
65
+ 'application/ld+json',
66
+ 'application/n-quads',
67
+ 'application/trig',
68
+ ];
52
69
  /**
53
70
  * Result of probing a SPARQL endpoint.
54
71
  */
@@ -100,23 +117,75 @@ export async function probe(distribution, options) {
100
117
  const [authUrl, authHeaders] = distribution.accessUrl !== undefined
101
118
  ? extractUrlCredentials(distribution.accessUrl, resolved.headers)
102
119
  : [new URL(url), new Headers(resolved.headers)];
103
- const start = performance.now();
104
- try {
105
- if (distribution.isSparql()) {
106
- return await probeSparqlEndpoint(authUrl.toString(), distribution, resolved, authHeaders, start);
120
+ // Retry only connection-level failures (a thrown `fetch`): HTTP error
121
+ // responses and content-validation failures are returned as result objects,
122
+ // never thrown, so they exit the loop on the first attempt and are not
123
+ // retried. A genuine outage still resolves to a NetworkError – every attempt
124
+ // fails – but note each attempt gets its own `timeoutMs`, so an endpoint that
125
+ // fails only by timing out takes up to (retries + 1) × timeoutMs (plus
126
+ // backoff) to be reported down.
127
+ const overallStart = performance.now();
128
+ let lastError;
129
+ for (let attempt = 0; attempt <= resolved.retries; attempt++) {
130
+ if (attempt > 0) {
131
+ await delay(RETRY_BACKOFF_MS * attempt);
132
+ }
133
+ const start = performance.now();
134
+ try {
135
+ if (distribution.isSparql()) {
136
+ return await probeSparqlEndpoint(authUrl.toString(), distribution, resolved, authHeaders, start);
137
+ }
138
+ return await probeDataDump(authUrl.toString(), distribution, resolved, authHeaders, start);
139
+ }
140
+ catch (error) {
141
+ lastError = error;
107
142
  }
108
- return await probeDataDump(authUrl.toString(), distribution, resolved, authHeaders, start);
109
143
  }
110
- catch (e) {
111
- const responseTimeMs = Math.round(performance.now() - start);
112
- return new NetworkError(url, e instanceof Error ? e.message : String(e), responseTimeMs);
144
+ // A successful probe reports its own attempt's latency (computed inside the
145
+ // probe functions); a NetworkError reports the total time spent failing,
146
+ // across every attempt and backoff, so observations do not understate the
147
+ // real cost of a down endpoint.
148
+ return new NetworkError(url, describeNetworkError(lastError), Math.round(performance.now() - overallStart));
149
+ }
150
+ function delay(milliseconds) {
151
+ return new Promise((resolve) => setTimeout(resolve, milliseconds));
152
+ }
153
+ /**
154
+ * Describe a thrown fetch error for a {@link NetworkError} message. undici wraps
155
+ * * the real reason (`ECONNRESET`, `UND_ERR_SOCKET “other side closed”`, TLS
156
+ * errors, …) in `error.cause`, while `error.message` is usually a bare
157
+ * ‘fetch failed’. Including the cause’s code and message preserves the
158
+ * diagnostic detail that would otherwise be discarded.
159
+ */
160
+ function describeNetworkError(error) {
161
+ if (!(error instanceof Error)) {
162
+ return String(error);
163
+ }
164
+ const { cause } = error;
165
+ if (cause === undefined || cause === null) {
166
+ return error.message;
113
167
  }
168
+ const detail = cause instanceof Error
169
+ ? [cause.code, cause.message]
170
+ .filter(Boolean)
171
+ .join(': ')
172
+ : String(cause);
173
+ return detail && detail !== error.message
174
+ ? `${error.message} (${detail})`
175
+ : error.message;
114
176
  }
115
177
  function resolveOptions(options) {
178
+ const retries = options?.retries;
116
179
  return {
117
180
  timeoutMs: options?.timeoutMs ?? DEFAULT_TIMEOUT_MS,
118
181
  headers: options?.headers ?? new Headers(),
119
182
  sparqlQuery: options?.sparqlQuery ?? DEFAULT_SPARQL_QUERY,
183
+ // Guard the loop bound: a non-integer (NaN, Infinity, fractional) would
184
+ // otherwise either skip the loop entirely or never terminate. Negatives
185
+ // clamp to 0 (retries disabled).
186
+ retries: retries === undefined || !Number.isInteger(retries)
187
+ ? DEFAULT_RETRIES
188
+ : Math.max(0, retries),
120
189
  };
121
190
  }
122
191
  /**
@@ -157,7 +226,7 @@ function acceptableContentTypes(queryType) {
157
226
  if (queryType === 'ASK' || queryType === 'SELECT') {
158
227
  return [SPARQL_RESULTS_JSON, SPARQL_RESULTS_XML];
159
228
  }
160
- return [SPARQL_RDF_RESULTS];
229
+ return [...SPARQL_RDF_RESULTS];
161
230
  }
162
231
  /**
163
232
  * Build an `Accept` header that prefers the first content type but still accepts
@@ -199,15 +268,19 @@ async function probeSparqlEndpoint(url, _distribution, options, authHeaders, sta
199
268
  return new SparqlProbeResult(url, response, responseTimeMs, acceptedContentTypes, failureReason);
200
269
  }
201
270
  async function validateSparqlResponse(response, queryType, contentType) {
271
+ if (queryType === 'CONSTRUCT' || queryType === 'DESCRIBE') {
272
+ // A CONSTRUCT/DESCRIBE answer is RDF, and an empty graph is a valid answer –
273
+ // e.g. an availability probe whose query happens to match nothing – so the
274
+ // 200 response alone confirms the endpoint is up. Deep parse validation is
275
+ // the data-dump path’s job. Only data dumps must be non-empty (see
276
+ // validateBody); a SPARQL result may be empty.
277
+ await response.body?.cancel();
278
+ return null;
279
+ }
202
280
  const body = await response.text();
203
281
  if (body.length === 0) {
204
282
  return 'SPARQL endpoint returned an empty response';
205
283
  }
206
- if (queryType === 'CONSTRUCT' || queryType === 'DESCRIBE') {
207
- // Body should be RDF; a non-empty response is sufficient to confirm the
208
- // endpoint answered. Deep parse validation is the data-dump path’s job.
209
- return null;
210
- }
211
284
  return contentType.startsWith(SPARQL_RESULTS_XML)
212
285
  ? validateSparqlXmlResults(body, queryType)
213
286
  : validateSparqlJsonResults(body, queryType);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/distribution-probe",
3
- "version": "0.1.10",
3
+ "version": "0.1.12",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/distribution-probe"