@lde/distribution-probe 0.1.5 → 0.1.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/probe.d.ts.map +1 -1
- package/dist/probe.js +74 -15
- package/package.json +2 -2
package/dist/probe.d.ts.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;
|
|
1
|
+
{"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAMD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAkC1B"}
|
package/dist/probe.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
import { compressionMediaTypes } from '@lde/dataset';
|
|
2
|
-
import {
|
|
2
|
+
import { rdfParser } from 'rdf-parse';
|
|
3
|
+
import { Readable } from 'node:stream';
|
|
3
4
|
const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
|
|
4
5
|
const DEFAULT_TIMEOUT_MS = 5000;
|
|
5
6
|
/**
|
|
@@ -288,7 +289,7 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
|
|
|
288
289
|
const body = await getResponse.text();
|
|
289
290
|
const isHttpSuccess = getResponse.status >= 200 && getResponse.status < 400;
|
|
290
291
|
const failureReason = isHttpSuccess
|
|
291
|
-
? validateBody(body, getResponse.headers.get('Content-Type'))
|
|
292
|
+
? await validateBody(body, getResponse.headers.get('Content-Type'), url, options.timeoutMs)
|
|
292
293
|
: null;
|
|
293
294
|
const responseTimeMs = Math.round(performance.now() - start);
|
|
294
295
|
const result = new DataDumpProbeResult(url, getResponse, responseTimeMs, failureReason);
|
|
@@ -300,28 +301,86 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
|
|
|
300
301
|
checkContentTypeMismatch(result, distribution.mimeType);
|
|
301
302
|
return result;
|
|
302
303
|
}
|
|
304
|
+
// The RDF serializations whose bodies we parse to confirm they carry triples. A
|
|
305
|
+
// non-empty body in one of these formats that yields zero triples — an empty
|
|
306
|
+
// graph such as a JSON-LD `{}`, an `<rdf:RDF/>`, or prefix-only Turtle — is a
|
|
307
|
+
// faulty distribution, not a usable one, so it must be caught here. Other
|
|
308
|
+
// content types (CSV, HTML, …) are left untouched: the probe is not the place
|
|
309
|
+
// to assert what a non-RDF body should contain.
|
|
303
310
|
const rdfContentTypes = [
|
|
304
311
|
'text/turtle',
|
|
305
312
|
'application/n-triples',
|
|
306
313
|
'application/n-quads',
|
|
314
|
+
'application/trig',
|
|
315
|
+
'text/n3',
|
|
316
|
+
'application/ld+json',
|
|
317
|
+
'application/rdf+xml',
|
|
307
318
|
];
|
|
308
|
-
function validateBody(body, contentType) {
|
|
319
|
+
async function validateBody(body, contentType, baseIRI, timeoutMs) {
|
|
309
320
|
if (body.length === 0) {
|
|
310
321
|
return 'Distribution is empty';
|
|
311
322
|
}
|
|
312
|
-
|
|
313
|
-
|
|
314
|
-
|
|
315
|
-
|
|
316
|
-
|
|
317
|
-
|
|
318
|
-
}
|
|
319
|
-
}
|
|
320
|
-
catch (e) {
|
|
321
|
-
return e instanceof Error ? e.message : String(e);
|
|
322
|
-
}
|
|
323
|
+
// Media types are case-insensitive (RFC 9110 §8.3.1), so normalise before
|
|
324
|
+
// matching the lower-case allow-list — a server sending `Application/LD+JSON`
|
|
325
|
+
// must still have its body validated.
|
|
326
|
+
const serialization = contentType?.split(';')[0].trim().toLowerCase();
|
|
327
|
+
if (!serialization || !rdfContentTypes.includes(serialization)) {
|
|
328
|
+
return null;
|
|
323
329
|
}
|
|
324
|
-
|
|
330
|
+
const outcome = await classifyRdfBody(body, serialization, baseIRI, timeoutMs);
|
|
331
|
+
switch (outcome.type) {
|
|
332
|
+
case 'empty':
|
|
333
|
+
return 'Distribution contains no RDF triples';
|
|
334
|
+
case 'parseError':
|
|
335
|
+
return outcome.message;
|
|
336
|
+
// 'hasTriples' proves content. 'inconclusive' means the parse timed out or a
|
|
337
|
+
// remote JSON-LD @context could not be loaded — a third-party hiccup, not
|
|
338
|
+
// evidence the distribution is faulty — so neither is reported as a failure.
|
|
339
|
+
default:
|
|
340
|
+
return null;
|
|
341
|
+
}
|
|
342
|
+
}
|
|
343
|
+
/**
|
|
344
|
+
* Parse an RDF body just far enough to tell whether it carries any triples:
|
|
345
|
+
* resolve on the first triple (presence is all we need, not a full count), on a
|
|
346
|
+
* clean end with none ('empty'), or on a parse error. The parse is bounded by
|
|
347
|
+
* `timeoutMs` because a JSON-LD `@context` is fetched from its origin, and a
|
|
348
|
+
* slow or hanging context host would otherwise stall the probe past its budget;
|
|
349
|
+
* on expiry — and likewise when a remote `@context` is unreachable — the outcome
|
|
350
|
+
* is 'inconclusive', so a valid distribution is never flagged faulty for a
|
|
351
|
+
* context host's failure. `baseIRI` resolves any relative IRIs in the document.
|
|
352
|
+
*/
|
|
353
|
+
function classifyRdfBody(body, contentType, baseIRI, timeoutMs) {
|
|
354
|
+
return new Promise((resolve) => {
|
|
355
|
+
const quads = rdfParser.parse(Readable.from([body]), {
|
|
356
|
+
contentType,
|
|
357
|
+
baseIRI,
|
|
358
|
+
});
|
|
359
|
+
const timer = setTimeout(() => settle({ type: 'inconclusive' }), timeoutMs);
|
|
360
|
+
let settled = false;
|
|
361
|
+
function settle(outcome) {
|
|
362
|
+
if (settled)
|
|
363
|
+
return;
|
|
364
|
+
settled = true;
|
|
365
|
+
clearTimeout(timer);
|
|
366
|
+
quads.destroy();
|
|
367
|
+
resolve(outcome);
|
|
368
|
+
}
|
|
369
|
+
quads
|
|
370
|
+
.on('data', () => settle({ type: 'hasTriples' }))
|
|
371
|
+
.on('error', (error) => settle(isRemoteContextError(error)
|
|
372
|
+
? { type: 'inconclusive' }
|
|
373
|
+
: { type: 'parseError', message: error.message }))
|
|
374
|
+
.on('end', () => settle({ type: 'empty' }));
|
|
375
|
+
});
|
|
376
|
+
}
|
|
377
|
+
/**
|
|
378
|
+
* Whether a parse error is the RDF parser failing to load a remote JSON-LD
|
|
379
|
+
* `@context` (an unreachable or broken third-party context host) rather than a
|
|
380
|
+
* defect in the distribution body itself.
|
|
381
|
+
*/
|
|
382
|
+
function isRemoteContextError(error) {
|
|
383
|
+
return /remote context/i.test(error.message);
|
|
325
384
|
}
|
|
326
385
|
/**
|
|
327
386
|
* Compare the declared MIME type from the dataset registry against the
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@lde/distribution-probe",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.7",
|
|
4
4
|
"repository": {
|
|
5
5
|
"url": "git+https://github.com/ldelements/lde.git",
|
|
6
6
|
"directory": "packages/distribution-probe"
|
|
@@ -25,7 +25,7 @@
|
|
|
25
25
|
],
|
|
26
26
|
"dependencies": {
|
|
27
27
|
"@lde/dataset": "0.7.4",
|
|
28
|
-
"
|
|
28
|
+
"rdf-parse": "^5.0.0",
|
|
29
29
|
"tslib": "^2.3.0"
|
|
30
30
|
}
|
|
31
31
|
}
|