@lde/distribution-probe 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAGnE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAMD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAkC1B"}
1
+ {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAMD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAkC1B"}
package/dist/probe.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { compressionMediaTypes } from '@lde/dataset';
2
- import { Parser } from 'n3';
2
+ import { rdfParser } from 'rdf-parse';
3
+ import { Readable } from 'node:stream';
3
4
  const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
4
5
  const DEFAULT_TIMEOUT_MS = 5000;
5
6
  /**
@@ -288,7 +289,7 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
288
289
  const body = await getResponse.text();
289
290
  const isHttpSuccess = getResponse.status >= 200 && getResponse.status < 400;
290
291
  const failureReason = isHttpSuccess
291
- ? validateBody(body, getResponse.headers.get('Content-Type'))
292
+ ? await validateBody(body, getResponse.headers.get('Content-Type'), url, options.timeoutMs)
292
293
  : null;
293
294
  const responseTimeMs = Math.round(performance.now() - start);
294
295
  const result = new DataDumpProbeResult(url, getResponse, responseTimeMs, failureReason);
@@ -300,28 +301,83 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
300
301
  checkContentTypeMismatch(result, distribution.mimeType);
301
302
  return result;
302
303
  }
304
+ // The RDF serializations whose bodies we parse to confirm they carry triples. A
305
+ // non-empty body in one of these formats that yields zero triples — an empty
306
+ // graph such as a JSON-LD `{}`, an `<rdf:RDF/>`, or prefix-only Turtle — is a
307
+ // faulty distribution, not a usable one, so it must be caught here. Other
308
+ // content types (CSV, HTML, …) are left untouched: the probe is not the place
309
+ // to assert what a non-RDF body should contain.
303
310
  const rdfContentTypes = [
304
311
  'text/turtle',
305
312
  'application/n-triples',
306
313
  'application/n-quads',
314
+ 'application/trig',
315
+ 'text/n3',
316
+ 'application/ld+json',
317
+ 'application/rdf+xml',
307
318
  ];
308
- function validateBody(body, contentType) {
319
+ async function validateBody(body, contentType, baseIRI, timeoutMs) {
309
320
  if (body.length === 0) {
310
321
  return 'Distribution is empty';
311
322
  }
312
- if (contentType && rdfContentTypes.some((t) => contentType.startsWith(t))) {
313
- try {
314
- const parser = new Parser();
315
- const quads = parser.parse(body);
316
- if (quads.length === 0) {
317
- return 'Distribution contains no RDF triples';
318
- }
319
- }
320
- catch (e) {
321
- return e instanceof Error ? e.message : String(e);
322
- }
323
+ const serialization = contentType?.split(';')[0].trim();
324
+ if (!serialization || !rdfContentTypes.includes(serialization)) {
325
+ return null;
323
326
  }
324
- return null;
327
+ const outcome = await classifyRdfBody(body, serialization, baseIRI, timeoutMs);
328
+ switch (outcome.type) {
329
+ case 'empty':
330
+ return 'Distribution contains no RDF triples';
331
+ case 'parseError':
332
+ return outcome.message;
333
+ // 'hasTriples' proves content. 'inconclusive' means the parse timed out or a
334
+ // remote JSON-LD @context could not be loaded — a third-party hiccup, not
335
+ // evidence the distribution is faulty — so neither is reported as a failure.
336
+ default:
337
+ return null;
338
+ }
339
+ }
340
+ /**
341
+ * Parse an RDF body just far enough to tell whether it carries any triples:
342
+ * resolve on the first triple (presence is all we need, not a full count), on a
343
+ * clean end with none ('empty'), or on a parse error. The parse is bounded by
344
+ * `timeoutMs` because a JSON-LD `@context` is fetched from its origin, and a
345
+ * slow or hanging context host would otherwise stall the probe past its budget;
346
+ * on expiry — and likewise when a remote `@context` is unreachable — the outcome
347
+ * is 'inconclusive', so a valid distribution is never flagged faulty for a
348
+ * context host's failure. `baseIRI` resolves any relative IRIs in the document.
349
+ */
350
+ function classifyRdfBody(body, contentType, baseIRI, timeoutMs) {
351
+ return new Promise((resolve) => {
352
+ const quads = rdfParser.parse(Readable.from([body]), {
353
+ contentType,
354
+ baseIRI,
355
+ });
356
+ const timer = setTimeout(() => settle({ type: 'inconclusive' }), timeoutMs);
357
+ let settled = false;
358
+ function settle(outcome) {
359
+ if (settled)
360
+ return;
361
+ settled = true;
362
+ clearTimeout(timer);
363
+ quads.destroy();
364
+ resolve(outcome);
365
+ }
366
+ quads
367
+ .on('data', () => settle({ type: 'hasTriples' }))
368
+ .on('error', (error) => settle(isRemoteContextError(error)
369
+ ? { type: 'inconclusive' }
370
+ : { type: 'parseError', message: error.message }))
371
+ .on('end', () => settle({ type: 'empty' }));
372
+ });
373
+ }
374
+ /**
375
+ * Whether a parse error is the RDF parser failing to load a remote JSON-LD
376
+ * `@context` (an unreachable or broken third-party context host) rather than a
377
+ * defect in the distribution body itself.
378
+ */
379
+ function isRemoteContextError(error) {
380
+ return /remote context/i.test(error.message);
325
381
  }
326
382
  /**
327
383
  * Compare the declared MIME type from the dataset registry against the
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/distribution-probe",
3
- "version": "0.1.5",
3
+ "version": "0.1.6",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/distribution-probe"
@@ -25,7 +25,7 @@
25
25
  ],
26
26
  "dependencies": {
27
27
  "@lde/dataset": "0.7.4",
28
- "n3": "^2.0.1",
28
+ "rdf-parse": "^5.0.0",
29
29
  "tslib": "^2.3.0"
30
30
  }
31
31
  }