@lde/distribution-probe 0.1.4 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/probe.d.ts CHANGED
@@ -46,8 +46,14 @@ declare abstract class ProbeResult {
46
46
  * Result of probing a SPARQL endpoint.
47
47
  */
48
48
  export declare class SparqlProbeResult extends ProbeResult {
49
- readonly acceptedContentType: string;
50
- constructor(url: string, response: Response, responseTimeMs: number, acceptedContentType: string, failureReason?: string | null);
49
+ /**
50
+ * Content types the probe was prepared to accept as a valid answer. A SELECT or
51
+ * ASK query may be answered with SPARQL results in JSON or XML; the endpoint
52
+ * chooses, so success is not tied to a single serialization. A single string is
53
+ * accepted and normalized to a one-element list for backwards compatibility.
54
+ */
55
+ readonly acceptedContentTypes: readonly string[];
56
+ constructor(url: string, response: Response, responseTimeMs: number, acceptedContentTypes: string | readonly string[], failureReason?: string | null);
51
57
  isSuccess(): boolean;
52
58
  }
53
59
  /**
@@ -1 +1 @@
1
- {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAGnE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAKD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD,SAAgB,mBAAmB,EAAE,MAAM,CAAC;gBAG1C,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,mBAAmB,EAAE,MAAM,EAC3B,aAAa,GAAE,MAAM,GAAG,IAAW;IAM5B,SAAS,IAAI,OAAO;CAM9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAkC1B"}
1
+ {"version":3,"file":"probe.d.ts","sourceRoot":"","sources":["../src/probe.ts"],"names":[],"mappings":"AAAA,OAAO,EAAyB,YAAY,EAAE,MAAM,cAAc,CAAC;AAInE;;GAEG;AACH,MAAM,WAAW,YAAY;IAC3B,0DAA0D;IAC1D,SAAS,CAAC,EAAE,MAAM,CAAC;IACnB;;;OAGG;IACH,OAAO,CAAC,EAAE,OAAO,CAAC;IAClB;;;;;OAKG;IACH,WAAW,CAAC,EAAE,MAAM,CAAC;CACtB;AAKD;;GAEG;AACH,qBAAa,YAAY;aAEL,GAAG,EAAE,MAAM;aACX,OAAO,EAAE,MAAM;aACf,cAAc,EAAE,MAAM;gBAFtB,GAAG,EAAE,MAAM,EACX,OAAO,EAAE,MAAM,EACf,cAAc,EAAE,MAAM;CAEzC;AAED;;GAEG;AACH,uBAAe,WAAW;aAUN,GAAG,EAAE,MAAM;IAT7B,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,UAAU,EAAE,MAAM,CAAC;IACnC,SAAgB,YAAY,EAAE,IAAI,GAAG,IAAI,CAAQ;IACjD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAC;IAC3C,SAAgB,aAAa,EAAE,MAAM,GAAG,IAAI,CAAC;IAC7C,SAAgB,QAAQ,EAAE,MAAM,EAAE,CAAM;IACxC,SAAgB,cAAc,EAAE,MAAM,CAAC;gBAGrB,GAAG,EAAE,MAAM,EAC3B,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;IAa9B,SAAS,IAAI,OAAO;CAO5B;AAMD;;GAEG;AACH,qBAAa,iBAAkB,SAAQ,WAAW;IAChD;;;;;OAKG;IACH,SAAgB,oBAAoB,EAAE,SAAS,MAAM,EAAE,CAAC;gBAGtD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,oBAAoB,EAAE,MAAM,GAAG,SAAS,MAAM,EAAE,EAChD,aAAa,GAAE,MAAM,GAAG,IAAW;IAS5B,SAAS,IAAI,OAAO;CAQ9B;AAED;;GAEG;AACH,qBAAa,mBAAoB,SAAQ,WAAW;IAClD,SAAgB,WAAW,EAAE,MAAM,GAAG,IAAI,CAAQ;gBAGhD,GAAG,EAAE,MAAM,EACX,QAAQ,EAAE,QAAQ,EAClB,cAAc,EAAE,MAAM,EACtB,aAAa,GAAE,MAAM,GAAG,IAAW;CAQtC;AAED,MAAM,MAAM,eAAe,GACvB,iBAAiB,GACjB,mBAAmB,GACnB,YAAY,CAAC;AAIjB;;;;;;;;GAQG;AACH,wBAAsB,KAAK,CACzB,YAAY,EAAE,YAAY,EAC1B,OAAO,CAAC,EAAE,YAAY,GACrB,OAAO,CAAC,eAAe,CAAC,CAkC1B"}
package/dist/probe.js CHANGED
@@ -1,5 +1,6 @@
1
1
  import { compressionMediaTypes } from '@lde/dataset';
2
- import { Parser } from 'n3';
2
+ import { rdfParser } from 'rdf-parse';
3
+ import { Readable } from 'node:stream';
3
4
  const DEFAULT_SPARQL_QUERY = 'SELECT * { ?s ?p ?o } LIMIT 1';
4
5
  const DEFAULT_TIMEOUT_MS = 5000;
5
6
  /**
@@ -46,19 +47,29 @@ class ProbeResult {
46
47
  }
47
48
  }
48
49
  const SPARQL_RESULTS_JSON = 'application/sparql-results+json';
50
+ const SPARQL_RESULTS_XML = 'application/sparql-results+xml';
49
51
  const SPARQL_RDF_RESULTS = 'application/n-triples';
50
52
  /**
51
53
  * Result of probing a SPARQL endpoint.
52
54
  */
53
55
  export class SparqlProbeResult extends ProbeResult {
54
- acceptedContentType;
55
- constructor(url, response, responseTimeMs, acceptedContentType, failureReason = null) {
56
+ /**
57
+ * Content types the probe was prepared to accept as a valid answer. A SELECT or
58
+ * ASK query may be answered with SPARQL results in JSON or XML; the endpoint
59
+ * chooses, so success is not tied to a single serialization. A single string is
60
+ * accepted and normalized to a one-element list for backwards compatibility.
61
+ */
62
+ acceptedContentTypes;
63
+ constructor(url, response, responseTimeMs, acceptedContentTypes, failureReason = null) {
56
64
  super(url, response, responseTimeMs, failureReason);
57
- this.acceptedContentType = acceptedContentType;
65
+ this.acceptedContentTypes =
66
+ typeof acceptedContentTypes === 'string'
67
+ ? [acceptedContentTypes]
68
+ : acceptedContentTypes;
58
69
  }
59
70
  isSuccess() {
60
71
  return (super.isSuccess() &&
61
- (this.contentType?.startsWith(this.acceptedContentType) ?? false));
72
+ this.acceptedContentTypes.some((type) => this.contentType?.startsWith(type) ?? false));
62
73
  }
63
74
  }
64
75
  /**
@@ -137,18 +148,33 @@ function detectSparqlQueryType(query) {
137
148
  const match = /\b(ASK|SELECT|CONSTRUCT|DESCRIBE)\b/i.exec(withoutComments);
138
149
  return (match?.[1].toUpperCase() ?? 'SELECT');
139
150
  }
140
- function acceptHeaderForQueryType(queryType) {
151
+ /**
152
+ * Content types a SPARQL endpoint may legitimately answer with, in preference
153
+ * order, for the given query type. SELECT and ASK return a results document
154
+ * (JSON or XML – the endpoint chooses); CONSTRUCT and DESCRIBE return RDF.
155
+ */
156
+ function acceptableContentTypes(queryType) {
141
157
  if (queryType === 'ASK' || queryType === 'SELECT') {
142
- return SPARQL_RESULTS_JSON;
158
+ return [SPARQL_RESULTS_JSON, SPARQL_RESULTS_XML];
143
159
  }
144
- return SPARQL_RDF_RESULTS;
160
+ return [SPARQL_RDF_RESULTS];
161
+ }
162
+ /**
163
+ * Build an `Accept` header that prefers the first content type but still accepts
164
+ * the rest at a lower q-value, so an endpoint that only serves a later type is
165
+ * not rejected with a 406.
166
+ */
167
+ function acceptHeader(contentTypes) {
168
+ return contentTypes
169
+ .map((type, index) => (index === 0 ? type : `${type};q=0.9`))
170
+ .join(', ');
145
171
  }
146
172
  async function probeSparqlEndpoint(url, _distribution, options, authHeaders, start) {
147
173
  const queryType = detectSparqlQueryType(options.sparqlQuery);
148
- const accept = acceptHeaderForQueryType(queryType);
174
+ const acceptedContentTypes = acceptableContentTypes(queryType);
149
175
  const headers = new Headers({
150
176
  'Content-Type': 'application/x-www-form-urlencoded;charset=UTF-8',
151
- Accept: accept,
177
+ Accept: acceptHeader(acceptedContentTypes),
152
178
  });
153
179
  for (const [key, value] of authHeaders) {
154
180
  headers.set(key, value);
@@ -160,19 +186,19 @@ async function probeSparqlEndpoint(url, _distribution, options, authHeaders, sta
160
186
  body: `query=${encodeURIComponent(options.sparqlQuery)}`,
161
187
  });
162
188
  const actualContentType = response.headers.get('Content-Type');
163
- const contentTypeMatches = actualContentType?.startsWith(accept) ?? false;
189
+ const matchedContentType = acceptedContentTypes.find((type) => actualContentType?.startsWith(type) ?? false);
164
190
  let failureReason = null;
165
- if (response.ok && contentTypeMatches) {
166
- failureReason = await validateSparqlResponse(response, queryType);
191
+ if (response.ok && matchedContentType !== undefined) {
192
+ failureReason = await validateSparqlResponse(response, queryType, matchedContentType);
167
193
  }
168
194
  else {
169
195
  // Drain unconsumed body to release the underlying connection.
170
196
  await response.body?.cancel();
171
197
  }
172
198
  const responseTimeMs = Math.round(performance.now() - start);
173
- return new SparqlProbeResult(url, response, responseTimeMs, accept, failureReason);
199
+ return new SparqlProbeResult(url, response, responseTimeMs, acceptedContentTypes, failureReason);
174
200
  }
175
- async function validateSparqlResponse(response, queryType) {
201
+ async function validateSparqlResponse(response, queryType, contentType) {
176
202
  const body = await response.text();
177
203
  if (body.length === 0) {
178
204
  return 'SPARQL endpoint returned an empty response';
@@ -182,6 +208,11 @@ async function validateSparqlResponse(response, queryType) {
182
208
  // endpoint answered. Deep parse validation is the data-dump path’s job.
183
209
  return null;
184
210
  }
211
+ return contentType.startsWith(SPARQL_RESULTS_XML)
212
+ ? validateSparqlXmlResults(body, queryType)
213
+ : validateSparqlJsonResults(body, queryType);
214
+ }
215
+ function validateSparqlJsonResults(body, queryType) {
185
216
  let json;
186
217
  try {
187
218
  json = JSON.parse(body);
@@ -201,6 +232,27 @@ async function validateSparqlResponse(response, queryType) {
201
232
  }
202
233
  return null;
203
234
  }
235
+ /**
236
+ * Lightweight structural check on a SPARQL Query Results XML document. Mirrors
237
+ * the JSON path’s intent – confirm the endpoint answered with the expected shape
238
+ * – without pulling in a full XML parser.
239
+ */
240
+ function validateSparqlXmlResults(body, queryType) {
241
+ if (!/<sparql[\s>]/i.test(body)) {
242
+ return 'SPARQL endpoint returned invalid XML';
243
+ }
244
+ if (queryType === 'ASK') {
245
+ if (!/<boolean>\s*(true|false)\s*<\/boolean>/i.test(body)) {
246
+ return 'SPARQL endpoint did not return a valid ASK result';
247
+ }
248
+ return null;
249
+ }
250
+ // SELECT
251
+ if (!/<results[\s/>]/i.test(body)) {
252
+ return 'SPARQL endpoint did not return a valid results object';
253
+ }
254
+ return null;
255
+ }
204
256
  async function probeDataDump(url, distribution, options, authHeaders, start) {
205
257
  // Express a preference for the declared media type, but accept anything as a
206
258
  // fallback. Servers that implement RFC 9110 §12.5.1 content negotiation will
@@ -237,7 +289,7 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
237
289
  const body = await getResponse.text();
238
290
  const isHttpSuccess = getResponse.status >= 200 && getResponse.status < 400;
239
291
  const failureReason = isHttpSuccess
240
- ? validateBody(body, getResponse.headers.get('Content-Type'))
292
+ ? await validateBody(body, getResponse.headers.get('Content-Type'), url, options.timeoutMs)
241
293
  : null;
242
294
  const responseTimeMs = Math.round(performance.now() - start);
243
295
  const result = new DataDumpProbeResult(url, getResponse, responseTimeMs, failureReason);
@@ -249,28 +301,83 @@ async function probeDataDump(url, distribution, options, authHeaders, start) {
249
301
  checkContentTypeMismatch(result, distribution.mimeType);
250
302
  return result;
251
303
  }
304
+ // The RDF serializations whose bodies we parse to confirm they carry triples. A
305
+ // non-empty body in one of these formats that yields zero triples — an empty
306
+ // graph such as a JSON-LD `{}`, an `<rdf:RDF/>`, or prefix-only Turtle — is a
307
+ // faulty distribution, not a usable one, so it must be caught here. Other
308
+ // content types (CSV, HTML, …) are left untouched: the probe is not the place
309
+ // to assert what a non-RDF body should contain.
252
310
  const rdfContentTypes = [
253
311
  'text/turtle',
254
312
  'application/n-triples',
255
313
  'application/n-quads',
314
+ 'application/trig',
315
+ 'text/n3',
316
+ 'application/ld+json',
317
+ 'application/rdf+xml',
256
318
  ];
257
- function validateBody(body, contentType) {
319
+ async function validateBody(body, contentType, baseIRI, timeoutMs) {
258
320
  if (body.length === 0) {
259
321
  return 'Distribution is empty';
260
322
  }
261
- if (contentType && rdfContentTypes.some((t) => contentType.startsWith(t))) {
262
- try {
263
- const parser = new Parser();
264
- const quads = parser.parse(body);
265
- if (quads.length === 0) {
266
- return 'Distribution contains no RDF triples';
267
- }
268
- }
269
- catch (e) {
270
- return e instanceof Error ? e.message : String(e);
271
- }
323
+ const serialization = contentType?.split(';')[0].trim();
324
+ if (!serialization || !rdfContentTypes.includes(serialization)) {
325
+ return null;
272
326
  }
273
- return null;
327
+ const outcome = await classifyRdfBody(body, serialization, baseIRI, timeoutMs);
328
+ switch (outcome.type) {
329
+ case 'empty':
330
+ return 'Distribution contains no RDF triples';
331
+ case 'parseError':
332
+ return outcome.message;
333
+ // 'hasTriples' proves content. 'inconclusive' means the parse timed out or a
334
+ // remote JSON-LD @context could not be loaded — a third-party hiccup, not
335
+ // evidence the distribution is faulty — so neither is reported as a failure.
336
+ default:
337
+ return null;
338
+ }
339
+ }
340
+ /**
341
+ * Parse an RDF body just far enough to tell whether it carries any triples:
342
+ * resolve on the first triple (presence is all we need, not a full count), on a
343
+ * clean end with none ('empty'), or on a parse error. The parse is bounded by
344
+ * `timeoutMs` because a JSON-LD `@context` is fetched from its origin, and a
345
+ * slow or hanging context host would otherwise stall the probe past its budget;
346
+ * on expiry — and likewise when a remote `@context` is unreachable — the outcome
347
+ * is 'inconclusive', so a valid distribution is never flagged faulty for a
348
+ * context host's failure. `baseIRI` resolves any relative IRIs in the document.
349
+ */
350
+ function classifyRdfBody(body, contentType, baseIRI, timeoutMs) {
351
+ return new Promise((resolve) => {
352
+ const quads = rdfParser.parse(Readable.from([body]), {
353
+ contentType,
354
+ baseIRI,
355
+ });
356
+ const timer = setTimeout(() => settle({ type: 'inconclusive' }), timeoutMs);
357
+ let settled = false;
358
+ function settle(outcome) {
359
+ if (settled)
360
+ return;
361
+ settled = true;
362
+ clearTimeout(timer);
363
+ quads.destroy();
364
+ resolve(outcome);
365
+ }
366
+ quads
367
+ .on('data', () => settle({ type: 'hasTriples' }))
368
+ .on('error', (error) => settle(isRemoteContextError(error)
369
+ ? { type: 'inconclusive' }
370
+ : { type: 'parseError', message: error.message }))
371
+ .on('end', () => settle({ type: 'empty' }));
372
+ });
373
+ }
374
+ /**
375
+ * Whether a parse error is the RDF parser failing to load a remote JSON-LD
376
+ * `@context` (an unreachable or broken third-party context host) rather than a
377
+ * defect in the distribution body itself.
378
+ */
379
+ function isRemoteContextError(error) {
380
+ return /remote context/i.test(error.message);
274
381
  }
275
382
  /**
276
383
  * Compare the declared MIME type from the dataset registry against the
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@lde/distribution-probe",
3
- "version": "0.1.4",
3
+ "version": "0.1.6",
4
4
  "repository": {
5
5
  "url": "git+https://github.com/ldelements/lde.git",
6
6
  "directory": "packages/distribution-probe"
@@ -25,7 +25,7 @@
25
25
  ],
26
26
  "dependencies": {
27
27
  "@lde/dataset": "0.7.4",
28
- "n3": "^2.0.1",
28
+ "rdf-parse": "^5.0.0",
29
29
  "tslib": "^2.3.0"
30
30
  }
31
31
  }