@lde/distribution-probe 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/probe.d.ts +11 -0
- package/dist/probe.d.ts.map +1 -1
- package/dist/probe.js +88 -15
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -17,10 +17,10 @@ const result = await probe(distribution);
|
|
|
17
17
|
|
|
18
18
|
### SPARQL endpoints
|
|
19
19
|
|
|
20
|
-
Sends `POST` with `SELECT * { ?s ?p ?o } LIMIT 1` and `Accept
|
|
20
|
+
Sends `POST` with the configured query (default `SELECT * { ?s ?p ?o } LIMIT 1`). The query type is detected (`ASK` / `SELECT` / `CONSTRUCT` / `DESCRIBE`) and drives both the `Accept` header and how the response is validated:
|
|
21
21
|
|
|
22
|
-
-
|
|
23
|
-
-
|
|
22
|
+
- **`ASK` / `SELECT`** request `application/sparql-results+json`, with `application/sparql-results+xml` as a lower-priority fallback. The response Content-Type must be one of those — anything else fails the probe (`isSuccess() === false`), which rules out HTML error pages served with `200 OK`. The body must parse and contain a results document (a `results` object for `SELECT`, a `boolean` for `ASK`); empty bodies, invalid JSON/XML, and missing results all fail with a `failureReason`.
|
|
23
|
+
- **`CONSTRUCT` / `DESCRIBE`** request the common RDF serializations (`text/turtle`, `application/n-triples`, `application/rdf+xml`, `application/ld+json`, `application/n-quads`, `application/trig`) and accept any of them. A `2xx` RDF response confirms availability, and **an empty graph is a valid answer** — so an empty body does not fail the probe (unlike a data dump, which must be non-empty). The body is not parse-validated.
|
|
24
24
|
|
|
25
25
|
### Data dumps
|
|
26
26
|
|
|
@@ -32,4 +32,6 @@ Sends `HEAD` with `Accept: <distribution.mimeType>` and `Accept-Encoding: identi
|
|
|
32
32
|
|
|
33
33
|
### Network errors
|
|
34
34
|
|
|
35
|
-
|
|
35
|
+
A thrown exception from `fetch` (DNS failure, connection refused, socket reset, TLS error, timeout after the configured `timeoutMs` – default 5 000 ms) is a connection-level failure. The probe retries these up to `retries` times (default 2) with a short backoff before giving up and returning a `NetworkError`. This turns a transient transport blip into a reliable single measurement without looking backward across checks. A genuine outage still resolves to a `NetworkError` on the current check – every attempt fails – but note each attempt gets its own `timeoutMs`, so an endpoint that fails only by timing out takes up to `(retries + 1) × timeoutMs` (plus backoff) to be reported down. HTTP error responses (4xx/5xx) and content-validation failures are real ‘down’ states and are **never** retried.
|
|
36
|
+
|
|
37
|
+
`NetworkError.message` includes the underlying `error.cause` (e.g. `ECONNRESET`, `UND_ERR_SOCKET “other side closed”`) when Node wraps one, so observations record what actually failed rather than a bare ‘fetch failed’.
|
package/dist/probe.d.ts
CHANGED
|
@@ -17,6 +17,17 @@ export interface ProbeOptions {
|
|
|
17
17
|
* distributions. Defaults to `SELECT * { ?s ?p ?o } LIMIT 1`.
|
|
18
18
|
*/
|
|
19
19
|
sparqlQuery?: string;
|
|
20
|
+
/**
|
|
21
|
+
* How many times to retry a connection-level failure (DNS, connection
|
|
22
|
+
* refused, socket reset, TLS error, timeout) before returning a
|
|
23
|
+
* {@link NetworkError}. Only transport errors are retried within the same
|
|
24
|
+
* check, so a transient blip does not flip an otherwise healthy distribution
|
|
25
|
+
* to ‘unavailable’; HTTP error responses and content-validation failures are
|
|
26
|
+
* genuine ‘down’ states and are never retried. Set to `0` to disable.
|
|
27
|
+
* Defaults to `2`. A non-integer or otherwise invalid value falls back to
|
|
28
|
+
* the default; negative values are clamped to `0`.
|
|
29
|
+
*/
|
|
30
|
+
retries?: number;
|
|
20
31
|
}
|
|
21
32
|
/**
|
|
22
33
|
* Result of a network error during probing.
|
package/dist/probe.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;
|
|
1
|
+
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;IACrB;;;;;;;;;OASG;IACH,OAAO,CAAC,EAAE,MAAM,CAAC;CAClB;AASD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAqBD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAqD1B"}
|
package/dist/probe.js
CHANGED
|
@@ -3,6 +3,9 @@ import { rdfParser } from 'rdf-parse';
|
|
|
3
3
|
import { Readable } from 'node:stream';
|
|
4
4
|
const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
|
|
5
5
|
const DEFAULT_TIMEOUT_MS = 5000;
|
|
6
|
+
const DEFAULT_RETRIES = 2;
|
|
7
|
+
/** Base backoff between retries; the nth retry waits `n × base`. */
|
|
8
|
+
const RETRY_BACKOFF_MS = 250;
|
|
6
9
|
/**
|
|
7
10
|
* Result of a network error during probing.
|
|
8
11
|
*/
|
|
@@ -48,7 +51,21 @@ class ProbeResult {
|
|
|
48
51
|
}
|
|
49
52
|
const SPARQL_RESULTS_JSON = 'application/sparql-results+json';
|
|
50
53
|
const SPARQL_RESULTS_XML = 'application/sparql-results+xml';
|
|
51
|
-
|
|
54
|
+
/**
|
|
55
|
+
* RDF serializations a CONSTRUCT or DESCRIBE query may be answered with, in
|
|
56
|
+
* preference order. The endpoint chooses the serialization, so availability must
|
|
57
|
+
* not hinge on a single one: accepting only n-triples flagged healthy endpoints
|
|
58
|
+
* that answer in Turtle (a common default) as unavailable, and made endpoints
|
|
59
|
+
* that cannot emit n-triples reject the probe with HTTP 406.
|
|
60
|
+
*/
|
|
61
|
+
const SPARQL_RDF_RESULTS = [
|
|
62
|
+
'text/turtle',
|
|
63
|
+
'application/n-triples',
|
|
64
|
+
'application/rdf+xml',
|
|
65
|
+
'application/ld+json',
|
|
66
|
+
'application/n-quads',
|
|
67
|
+
'application/trig',
|
|
68
|
+
];
|
|
52
69
|
/**
|
|
53
70
|
* Result of probing a SPARQL endpoint.
|
|
54
71
|
*/
|
|
@@ -100,23 +117,75 @@ export async function probe(distribution, options) {
|
|
|
100
117
|
const [authUrl, authHeaders] = distribution.accessUrl !== undefined
|
|
101
118
|
? extractUrlCredentials(distribution.accessUrl, resolved.headers)
|
|
102
119
|
: [new URL(url), new Headers(resolved.headers)];
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
120
|
+
// Retry only connection-level failures (a thrown `fetch`): HTTP error
|
|
121
|
+
// responses and content-validation failures are returned as result objects,
|
|
122
|
+
// never thrown, so they exit the loop on the first attempt and are not
|
|
123
|
+
// retried. A genuine outage still resolves to a NetworkError – every attempt
|
|
124
|
+
// fails – but note each attempt gets its own `timeoutMs`, so an endpoint that
|
|
125
|
+
// fails only by timing out takes up to (retries + 1) × timeoutMs (plus
|
|
126
|
+
// backoff) to be reported down.
|
|
127
|
+
const overallStart = performance.now();
|
|
128
|
+
let lastError;
|
|
129
|
+
for (let attempt = 0; attempt <= resolved.retries; attempt++) {
|
|
130
|
+
if (attempt > 0) {
|
|
131
|
+
await delay(RETRY_BACKOFF_MS * attempt);
|
|
132
|
+
}
|
|
133
|
+
const start = performance.now();
|
|
134
|
+
try {
|
|
135
|
+
if (distribution.isSparql()) {
|
|
136
|
+
return await probeSparqlEndpoint(authUrl.toString(), distribution, resolved, authHeaders, start);
|
|
137
|
+
}
|
|
138
|
+
return await probeDataDump(authUrl.toString(), distribution, resolved, authHeaders, start);
|
|
139
|
+
}
|
|
140
|
+
catch (error) {
|
|
141
|
+
lastError = error;
|
|
107
142
|
}
|
|
108
|
-
return await probeDataDump(authUrl.toString(), distribution, resolved, authHeaders, start);
|
|
109
143
|
}
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
144
|
+
// A successful probe reports its own attempt's latency (computed inside the
|
|
145
|
+
// probe functions); a NetworkError reports the total time spent failing,
|
|
146
|
+
// across every attempt and backoff, so observations do not understate the
|
|
147
|
+
// real cost of a down endpoint.
|
|
148
|
+
return new NetworkError(url, describeNetworkError(lastError), Math.round(performance.now() - overallStart));
|
|
149
|
+
}
|
|
150
|
+
function delay(milliseconds) {
|
|
151
|
+
return new Promise((resolve) => setTimeout(resolve, milliseconds));
|
|
152
|
+
}
|
|
153
|
+
/**
|
|
154
|
+
* Describe a thrown fetch error for a {@link NetworkError} message. undici wraps
|
|
155
|
+
* * the real reason (`ECONNRESET`, `UND_ERR_SOCKET “other side closed”`, TLS
|
|
156
|
+
* errors, …) in `error.cause`, while `error.message` is usually a bare
|
|
157
|
+
* ‘fetch failed’. Including the cause’s code and message preserves the
|
|
158
|
+
* diagnostic detail that would otherwise be discarded.
|
|
159
|
+
*/
|
|
160
|
+
function describeNetworkError(error) {
|
|
161
|
+
if (!(error instanceof Error)) {
|
|
162
|
+
return String(error);
|
|
163
|
+
}
|
|
164
|
+
const { cause } = error;
|
|
165
|
+
if (cause === undefined || cause === null) {
|
|
166
|
+
return error.message;
|
|
113
167
|
}
|
|
168
|
+
const detail = cause instanceof Error
|
|
169
|
+
? [cause.code, cause.message]
|
|
170
|
+
.filter(Boolean)
|
|
171
|
+
.join(': ')
|
|
172
|
+
: String(cause);
|
|
173
|
+
return detail && detail !== error.message
|
|
174
|
+
? `${error.message} (${detail})`
|
|
175
|
+
: error.message;
|
|
114
176
|
}
|
|
115
177
|
function resolveOptions(options) {
|
|
178
|
+
const retries = options?.retries;
|
|
116
179
|
return {
|
|
117
180
|
timeoutMs: options?.timeoutMs ?? DEFAULT_TIMEOUT_MS,
|
|
118
181
|
headers: options?.headers ?? new Headers(),
|
|
119
182
|
sparqlQuery: options?.sparqlQuery ?? DEFAULT_SPARQL_QUERY,
|
|
183
|
+
// Guard the loop bound: a non-integer (NaN, Infinity, fractional) would
|
|
184
|
+
// otherwise either skip the loop entirely or never terminate. Negatives
|
|
185
|
+
// clamp to 0 (retries disabled).
|
|
186
|
+
retries: retries === undefined || !Number.isInteger(retries)
|
|
187
|
+
? DEFAULT_RETRIES
|
|
188
|
+
: Math.max(0, retries),
|
|
120
189
|
};
|
|
121
190
|
}
|
|
122
191
|
/**
|
|
@@ -157,7 +226,7 @@ function acceptableContentTypes(queryType) {
|
|
|
157
226
|
if (queryType === 'ASK' || queryType === 'SELECT') {
|
|
158
227
|
return [SPARQL_RESULTS_JSON, SPARQL_RESULTS_XML];
|
|
159
228
|
}
|
|
160
|
-
return [SPARQL_RDF_RESULTS];
|
|
229
|
+
return [...SPARQL_RDF_RESULTS];
|
|
161
230
|
}
|
|
162
231
|
/**
|
|
163
232
|
* Build an `Accept` header that prefers the first content type but still accepts
|
|
@@ -199,15 +268,19 @@ async function probeSparqlEndpoint(url, _distribution, options, authHeaders, sta
|
|
|
199
268
|
return new SparqlProbeResult(url, response, responseTimeMs, acceptedContentTypes, failureReason);
|
|
200
269
|
}
|
|
201
270
|
async function validateSparqlResponse(response, queryType, contentType) {
|
|
271
|
+
if (queryType === 'CONSTRUCT' || queryType === 'DESCRIBE') {
|
|
272
|
+
// A CONSTRUCT/DESCRIBE answer is RDF, and an empty graph is a valid answer –
|
|
273
|
+
// e.g. an availability probe whose query happens to match nothing – so the
|
|
274
|
+
// 200 response alone confirms the endpoint is up. Deep parse validation is
|
|
275
|
+
// the data-dump path’s job. Only data dumps must be non-empty (see
|
|
276
|
+
// validateBody); a SPARQL result may be empty.
|
|
277
|
+
await response.body?.cancel();
|
|
278
|
+
return null;
|
|
279
|
+
}
|
|
202
280
|
const body = await response.text();
|
|
203
281
|
if (body.length === 0) {
|
|
204
282
|
return 'SPARQL endpoint returned an empty response';
|
|
205
283
|
}
|
|
206
|
-
if (queryType === 'CONSTRUCT' || queryType === 'DESCRIBE') {
|
|
207
|
-
// Body should be RDF; a non-empty response is sufficient to confirm the
|
|
208
|
-
// endpoint answered. Deep parse validation is the data-dump path’s job.
|
|
209
|
-
return null;
|
|
210
|
-
}
|
|
211
284
|
return contentType.startsWith(SPARQL_RESULTS_XML)
|
|
212
285
|
? validateSparqlXmlResults(body, queryType)
|
|
213
286
|
: validateSparqlJsonResults(body, queryType);
|