hdoc-tools 0.47.3 → 0.47.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-module.js CHANGED
@@ -48,6 +48,8 @@
48
48
  }
49
49
  };
50
50
 
51
+ exports.fetchWithRetry = fetchWithRetry;
52
+
51
53
  exports.content_type_for_ext = (ext) => {
52
54
  switch (ext) {
53
55
  case ".z":
package/hdoc-validate.js CHANGED
@@ -465,14 +465,35 @@ const { error } = require("node:console");
465
465
  return returnPaths;
466
466
  }
467
467
 
468
- const _fetch_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' };
468
+ // Headers that mimic a real Chrome browser request sites doing bot detection
469
+ // check far more than just User-Agent (Accept, Sec-Fetch-*, client hints, etc.).
470
+ const _fetch_headers = {
471
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
472
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
473
+ 'Accept-Language': 'en-US,en;q=0.9',
474
+ 'Accept-Encoding': 'gzip, deflate, br',
475
+ 'Cache-Control': 'no-cache',
476
+ 'Pragma': 'no-cache',
477
+ 'Sec-Fetch-Dest': 'document',
478
+ 'Sec-Fetch-Mode': 'navigate',
479
+ 'Sec-Fetch-Site': 'none',
480
+ 'Sec-Fetch-User': '?1',
481
+ 'Sec-Ch-Ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
482
+ 'Sec-Ch-Ua-Mobile': '?0',
483
+ 'Sec-Ch-Ua-Platform': '"Windows"',
484
+ 'Upgrade-Insecure-Requests': '1',
485
+ };
469
486
 
470
- // Checks a single external URL by sending a HEAD request (falling back to GET
471
- // if the server returns 405 Method Not Allowed). Returns the HTTP status code.
487
+ // Checks a single external URL by sending a HEAD request, falling back to GET
488
+ // if the server returns 405 (Method Not Allowed) or 404 (some servers, e.g.
489
+ // marketplace.visualstudio.com, return 404 for HEAD even when the page exists).
490
+ // Retries up to 5 times on transient errors (5xx, 429, network failures).
491
+ // Returns the HTTP status code.
472
492
  const fetchExternalLinkStatus = async (url) => {
473
- const resp = await fetch(url, { method: 'HEAD', headers: _fetch_headers, signal: AbortSignal.timeout(10000), redirect: 'follow' });
474
- if (resp.status === 405) {
475
- const getResp = await fetch(url, { method: 'GET', headers: _fetch_headers, signal: AbortSignal.timeout(10000), redirect: 'follow' });
493
+ const opts = { method: 'HEAD', headers: _fetch_headers, timeoutMs: 10000, redirect: 'follow' };
494
+ const resp = await hdoc.fetchWithRetry(url, opts);
495
+ if (resp.status === 404 || resp.status === 405) {
496
+ const getResp = await hdoc.fetchWithRetry(url, { ...opts, method: 'GET' });
476
497
  return getResp.status;
477
498
  }
478
499
  return resp.status;
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hdoc-tools",
3
- "version": "0.47.3",
3
+ "version": "0.47.4",
4
4
  "description": "Hornbill HDocBook Development Support Tool",
5
5
  "main": "hdoc.js",
6
6
  "bin": {