@lde/distribution-probe 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/probe.d.ts +8 -2
- package/dist/probe.d.ts.map +1 -1
- package/dist/probe.js +136 -29
- package/package.json +2 -2
package/dist/probe.d.ts
CHANGED
|
@@ -46,8 +46,14 @@ declare abstract class ProbeResult {
|
|
|
46
46
|
* Result of probing a SPARQL endpoint.
|
|
47
47
|
*/
|
|
48
48
|
export declare class SparqlProbeResult extends ProbeResult {
|
|
49
|
-
|
|
50
|
-
|
|
49
|
+
/**
|
|
50
|
+
* Content types the probe was prepared to accept as a valid answer. A SELECT or
|
|
51
|
+
* ASK query may be answered with SPARQL results in JSON or XML; the endpoint
|
|
52
|
+
* chooses, so success is not tied to a single serialization. A single string is
|
|
53
|
+
* accepted and normalized to a one-element list for backwards compatibility.
|
|
54
|
+
*/
|
|
55
|
+
readonly acceptedContentTypes: readonly string[];
|
|
56
|
+
constructor(url: string, response: Response, responseTimeMs: number, acceptedContentTypes: string | readonly string[], failureReason?: string | null);
|
|
51
57
|
isSuccess(): boolean;
|
|
52
58
|
}
|
|
53
59
|
/**
|
package/dist/probe.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAMD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAkC1B"}
|
package/dist/probe.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { compressionMediaTypes } from '@lde/dataset';
|
|
2
|
-
import {
|
|
2
|
+
import { rdfParser } from 'rdf-parse';
|
|
3
|
+
import { Readable } from 'node:stream';
|
|
3
4
|
const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
|
|
4
5
|
const DEFAULT_TIMEOUT_MS = 5000;
|
|
5
6
|
/**
|
|
@@ -46,19 +47,29 @@ class ProbeResult {
|
|
|
46
47
|
}
|
|
47
48
|
}
|
|
48
49
|
const SPARQL_RESULTS_JSON = 'application/sparql-results+json';
|
|
50
|
+
const SPARQL_RESULTS_XML = 'application/sparql-results+xml';
|
|
49
51
|
const SPARQL_RDF_RESULTS = 'application/n-triples';
|
|
50
52
|
/**
|
|
51
53
|
* Result of probing a SPARQL endpoint.
|
|
52
54
|
*/
|
|
53
55
|
export class SparqlProbeResult extends ProbeResult {
|
|
54
|
-
|
|
55
|
-
|
|
56
|
+
/**
|
|
57
|
+
* Content types the probe was prepared to accept as a valid answer. A SELECT or
|
|
58
|
+
* ASK query may be answered with SPARQL results in JSON or XML; the endpoint
|
|
59
|
+
* chooses, so success is not tied to a single serialization. A single string is
|
|
60
|
+
* accepted and normalized to a one-element list for backwards compatibility.
|
|
61
|
+
*/
|
|
62
|
+
acceptedContentTypes;
|
|
63
|
+
constructor(url, response, responseTimeMs, acceptedContentTypes, failureReason = null) {
|
|
56
64
|
super(url, response, responseTimeMs, failureReason);
|
|
57
|
-
this.
|
|
65
|
+
this.acceptedContentTypes =
|
|
66
|
+
typeof acceptedContentTypes === 'string'
|
|
67
|
+
? [acceptedContentTypes]
|
|
68
|
+
: acceptedContentTypes;
|
|
58
69
|
}
|
|
59
70
|
isSuccess() {
|
|
60
71
|
return (super.isSuccess() &&
|
|
61
|
-
(this.contentType?.startsWith(
|
|
72
|
+
this.acceptedContentTypes.some((type) => this.contentType?.startsWith(type) ?? false));
|
|
62
73
|
}
|
|
63
74
|
}
|
|
64
75
|
/**
|
|
@@ -137,18 +148,33 @@ function detectSparqlQueryType(query) {
|
|
|
137
148
|
const match = /\b(ASK|SELECT|CONSTRUCT|DESCRIBE)\b/i.exec(withoutComments);
|
|
138
149
|
return (match?.[1].toUpperCase() ?? 'SELECT');
|
|
139
150
|
}
|
|
140
|
-
|
|
151
|
+
/**
|
|
152
|
+
* Content types a SPARQL endpoint may legitimately answer with, in preference
|
|
153
|
+
* order, for the given query type. SELECT and ASK return a results document
|
|
154
|
+
* (JSON or XML – the endpoint chooses); CONSTRUCT and DESCRIBE return RDF.
|
|
155
|
+
*/
|
|
156
|
+
function acceptableContentTypes(queryType) {
|
|
141
157
|
if (queryType === 'ASK' || queryType === 'SELECT') {
|
|
142
|
-
return SPARQL_RESULTS_JSON;
|
|
158
|
+
return [SPARQL_RESULTS_JSON, SPARQL_RESULTS_XML];
|
|
143
159
|
}
|
|
144
|
-
return SPARQL_RDF_RESULTS;
|
|
160
|
+
return [SPARQL_RDF_RESULTS];
|
|
161
|
+
}
|
|
162
|
+
/**
|
|
163
|
+
* Build an `Accept` header that prefers the first content type but still accepts
|
|
164
|
+
* the rest at a lower q-value, so an endpoint that only serves a later type is
|
|
165
|
+
* not rejected with a 406.
|
|
166
|
+
*/
|
|
167
|
+
function acceptHeader(contentTypes) {
|
|
168
|
+
return contentTypes
|
|
169
|
+
.map((type, index) => (index === 0 ? type : `${type};q=0.9`))
|
|
170
|
+
.join(', ');
|
|
145
171
|
}
|
|
146
172
|
async function probeSparqlEndpoint(url, _distribution, options, authHeaders, start) {
|
|
147
173
|
const queryType = detectSparqlQueryType(options.sparqlQuery);
|
|
148
|
-
const
|
|
174
|
+
const acceptedContentTypes = acceptableContentTypes(queryType);
|
|
149
175
|
const headers = new Headers({
|
|
150
176
|
'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
|
|
151
|
-
Accept:
|
|
177
|
+
Accept: acceptHeader(acceptedContentTypes),
|
|
152
178
|
});
|
|
153
179
|
for (const [key, value] of authHeaders) {
|
|
154
180
|
headers.set(key, value);
|
|
@@ -160,19 +186,19 @@ async function probeSparqlEndpoint(url, _distribution, options, authHeaders, sta
|
|
|
160
186
|
body: `query=${encodeURIComponent(options.sparqlQuery)}`,
|
|
161
187
|
});
|
|
162
188
|
const actualContentType = response.headers.get('Content-Type');
|
|
163
|
-
const
|
|
189
|
+
const matchedContentType = acceptedContentTypes.find((type) => actualContentType?.startsWith(type) ?? false);
|
|
164
190
|
let failureReason = null;
|
|
165
|
-
if (response.ok &&
|
|
166
|
-
failureReason = await validateSparqlResponse(response, queryType);
|
|
191
|
+
if (response.ok && matchedContentType !== undefined) {
|
|
192
|
+
failureReason = await validateSparqlResponse(response, queryType, matchedContentType);
|
|
167
193
|
}
|
|
168
194
|
else {
|
|
169
195
|
// Drain unconsumed body to release the underlying connection.
|
|
170
196
|
await response.body?.cancel();
|
|
171
197
|
}
|
|
172
198
|
const responseTimeMs = Math.round(performance.now() - start);
|
|
173
|
-
return new SparqlProbeResult(url, response, responseTimeMs,
|
|
199
|
+
return new SparqlProbeResult(url, response, responseTimeMs, acceptedContentTypes, failureReason);
|
|
174
200
|
}
|
|
175
|
-
async function validateSparqlResponse(response, queryType) {
|
|
201
|
+
async function validateSparqlResponse(response, queryType, contentType) {
|
|
176
202
|
const body = await response.text();
|
|
177
203
|
if (body.length === 0) {
|
|
178
204
|
return 'SPARQL endpoint returned an empty response';
|
|
@@ -182,6 +208,11 @@ async function validateSparqlResponse(response, queryType) {
|
|
|
182
208
|
// endpoint answered. Deep parse validation is the data-dump path’s job.
|
|
183
209
|
return null;
|
|
184
210
|
}
|
|
211
|
+
return contentType.startsWith(SPARQL_RESULTS_XML)
|
|
212
|
+
? validateSparqlXmlResults(body, queryType)
|
|
213
|
+
: validateSparqlJsonResults(body, queryType);
|
|
214
|
+
}
|
|
215
|
+
function validateSparqlJsonResults(body, queryType) {
|
|
185
216
|
let json;
|
|
186
217
|
try {
|
|
187
218
|
json = JSON.parse(body);
|
|
@@ -201,6 +232,27 @@ async function validateSparqlResponse(response, queryType) {
|
|
|
201
232
|
}
|
|
202
233
|
return null;
|
|
203
234
|
}
|
|
235
|
+
/**
|
|
236
|
+
* Lightweight structural check on a SPARQL Query Results XML document. Mirrors
|
|
237
|
+
* the JSON path’s intent – confirm the endpoint answered with the expected shape
|
|
238
|
+
* – without pulling in a full XML parser.
|
|
239
|
+
*/
|
|
240
|
+
function validateSparqlXmlResults(body, queryType) {
|
|
241
|
+
if (!/<sparql[\s>]/i.test(body)) {
|
|
242
|
+
return 'SPARQL endpoint returned invalid XML';
|
|
243
|
+
}
|
|
244
|
+
if (queryType === 'ASK') {
|
|
245
|
+
if (!/<boolean>\s*(true|false)\s*<\/boolean>/i.test(body)) {
|
|
246
|
+
return 'SPARQL endpoint did not return a valid ASK result';
|
|
247
|
+
}
|
|
248
|
+
return null;
|
|
249
|
+
}
|
|
250
|
+
// SELECT
|
|
251
|
+
if (!/<results[\s/>]/i.test(body)) {
|
|
252
|
+
return 'SPARQL endpoint did not return a valid results object';
|
|
253
|
+
}
|
|
254
|
+
return null;
|
|
255
|
+
}
|
|
204
256
|
async function probeDataDump(url, distribution, options, authHeaders, start) {
|
|
205
257
|
// Express a preference for the declared media type, but accept anything as a
|
|
206
258
|
// fallback. Servers that implement RFC 9110 §12.5.1 content negotiation will
|
|
@@ -237,7 +289,7 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
|
|
|
237
289
|
const body = await getResponse.text();
|
|
238
290
|
const isHttpSuccess = getResponse.status >= 200 && getResponse.status < 400;
|
|
239
291
|
const failureReason = isHttpSuccess
|
|
240
|
-
? validateBody(body, getResponse.headers.get('Content-Type'))
|
|
292
|
+
? await validateBody(body, getResponse.headers.get('Content-Type'), url, options.timeoutMs)
|
|
241
293
|
: null;
|
|
242
294
|
const responseTimeMs = Math.round(performance.now() - start);
|
|
243
295
|
const result = new DataDumpProbeResult(url, getResponse, responseTimeMs, failureReason);
|
|
@@ -249,28 +301,83 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
|
|
|
249
301
|
checkContentTypeMismatch(result, distribution.mimeType);
|
|
250
302
|
return result;
|
|
251
303
|
}
|
|
304
|
+
// The RDF serializations whose bodies we parse to confirm they carry triples. A
|
|
305
|
+
// non-empty body in one of these formats that yields zero triples — an empty
|
|
306
|
+
// graph such as a JSON-LD `{}`, an `<rdf:RDF/>`, or prefix-only Turtle — is a
|
|
307
|
+
// faulty distribution, not a usable one, so it must be caught here. Other
|
|
308
|
+
// content types (CSV, HTML, …) are left untouched: the probe is not the place
|
|
309
|
+
// to assert what a non-RDF body should contain.
|
|
252
310
|
const rdfContentTypes = [
|
|
253
311
|
'text/turtle',
|
|
254
312
|
'application/n-triples',
|
|
255
313
|
'application/n-quads',
|
|
314
|
+
'application/trig',
|
|
315
|
+
'text/n3',
|
|
316
|
+
'application/ld+json',
|
|
317
|
+
'application/rdf+xml',
|
|
256
318
|
];
|
|
257
|
-
function validateBody(body, contentType) {
|
|
319
|
+
async function validateBody(body, contentType, baseIRI, timeoutMs) {
|
|
258
320
|
if (body.length === 0) {
|
|
259
321
|
return 'Distribution is empty';
|
|
260
322
|
}
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
const quads = parser.parse(body);
|
|
265
|
-
if (quads.length === 0) {
|
|
266
|
-
return 'Distribution contains no RDF triples';
|
|
267
|
-
}
|
|
268
|
-
}
|
|
269
|
-
catch (e) {
|
|
270
|
-
return e instanceof Error ? e.message : String(e);
|
|
271
|
-
}
|
|
323
|
+
const serialization = contentType?.split(';')[0].trim();
|
|
324
|
+
if (!serialization || !rdfContentTypes.includes(serialization)) {
|
|
325
|
+
return null;
|
|
272
326
|
}
|
|
273
|
-
|
|
327
|
+
const outcome = await classifyRdfBody(body, serialization, baseIRI, timeoutMs);
|
|
328
|
+
switch (outcome.type) {
|
|
329
|
+
case 'empty':
|
|
330
|
+
return 'Distribution contains no RDF triples';
|
|
331
|
+
case 'parseError':
|
|
332
|
+
return outcome.message;
|
|
333
|
+
// 'hasTriples' proves content. 'inconclusive' means the parse timed out or a
|
|
334
|
+
// remote JSON-LD @context could not be loaded — a third-party hiccup, not
|
|
335
|
+
// evidence the distribution is faulty — so neither is reported as a failure.
|
|
336
|
+
default:
|
|
337
|
+
return null;
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
/**
|
|
341
|
+
* Parse an RDF body just far enough to tell whether it carries any triples:
|
|
342
|
+
* resolve on the first triple (presence is all we need, not a full count), on a
|
|
343
|
+
* clean end with none ('empty'), or on a parse error. The parse is bounded by
|
|
344
|
+
* `timeoutMs` because a JSON-LD `@context` is fetched from its origin, and a
|
|
345
|
+
* slow or hanging context host would otherwise stall the probe past its budget;
|
|
346
|
+
* on expiry — and likewise when a remote `@context` is unreachable — the outcome
|
|
347
|
+
* is 'inconclusive', so a valid distribution is never flagged faulty for a
|
|
348
|
+
* context host's failure. `baseIRI` resolves any relative IRIs in the document.
|
|
349
|
+
*/
|
|
350
|
+
function classifyRdfBody(body, contentType, baseIRI, timeoutMs) {
|
|
351
|
+
return new Promise((resolve) => {
|
|
352
|
+
const quads = rdfParser.parse(Readable.from([body]), {
|
|
353
|
+
contentType,
|
|
354
|
+
baseIRI,
|
|
355
|
+
});
|
|
356
|
+
const timer = setTimeout(() => settle({ type: 'inconclusive' }), timeoutMs);
|
|
357
|
+
let settled = false;
|
|
358
|
+
function settle(outcome) {
|
|
359
|
+
if (settled)
|
|
360
|
+
return;
|
|
361
|
+
settled = true;
|
|
362
|
+
clearTimeout(timer);
|
|
363
|
+
quads.destroy();
|
|
364
|
+
resolve(outcome);
|
|
365
|
+
}
|
|
366
|
+
quads
|
|
367
|
+
.on('data', () => settle({ type: 'hasTriples' }))
|
|
368
|
+
.on('error', (error) => settle(isRemoteContextError(error)
|
|
369
|
+
? { type: 'inconclusive' }
|
|
370
|
+
: { type: 'parseError', message: error.message }))
|
|
371
|
+
.on('end', () => settle({ type: 'empty' }));
|
|
372
|
+
});
|
|
373
|
+
}
|
|
374
|
+
/**
|
|
375
|
+
* Whether a parse error is the RDF parser failing to load a remote JSON-LD
|
|
376
|
+
* `@context` (an unreachable or broken third-party context host) rather than a
|
|
377
|
+
* defect in the distribution body itself.
|
|
378
|
+
*/
|
|
379
|
+
function isRemoteContextError(error) {
|
|
380
|
+
return /remote context/i.test(error.message);
|
|
274
381
|
}
|
|
275
382
|
/**
|
|
276
383
|
* Compare the declared MIME type from the dataset registry against the
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/distribution-probe",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/distribution-probe"
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.4",
|
|
28
|
-
"
|
|
28
|
+
"rdf-parse": "^5.0.0",
|
|
29
29
|
"tslib": "^2.3.0"
|
|
30
30
|
}
|
|
31
31
|
}
|