hdoc-tools 0.47.3 → 0.47.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hdoc-module.js +2 -0
- package/hdoc-validate.js +27 -6
- package/package.json +1 -1
package/hdoc-module.js
CHANGED
package/hdoc-validate.js
CHANGED
|
@@ -465,14 +465,35 @@ const { error } = require("node:console");
|
|
|
465
465
|
return returnPaths;
|
|
466
466
|
}
|
|
467
467
|
|
|
468
|
-
|
|
468
|
+
// Headers that mimic a real Chrome browser request — sites doing bot detection
|
|
469
|
+
// check far more than just User-Agent (Accept, Sec-Fetch-*, client hints, etc.).
|
|
470
|
+
const _fetch_headers = {
|
|
471
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
472
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
473
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
474
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
475
|
+
'Cache-Control': 'no-cache',
|
|
476
|
+
'Pragma': 'no-cache',
|
|
477
|
+
'Sec-Fetch-Dest': 'document',
|
|
478
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
479
|
+
'Sec-Fetch-Site': 'none',
|
|
480
|
+
'Sec-Fetch-User': '?1',
|
|
481
|
+
'Sec-Ch-Ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
482
|
+
'Sec-Ch-Ua-Mobile': '?0',
|
|
483
|
+
'Sec-Ch-Ua-Platform': '"Windows"',
|
|
484
|
+
'Upgrade-Insecure-Requests': '1',
|
|
485
|
+
};
|
|
469
486
|
|
|
470
|
-
// Checks a single external URL by sending a HEAD request
|
|
471
|
-
// if the server returns 405 Method Not Allowed)
|
|
487
|
+
// Checks a single external URL by sending a HEAD request, falling back to GET
|
|
488
|
+
// if the server returns 405 (Method Not Allowed) or 404 (some servers, e.g.
|
|
489
|
+
// marketplace.visualstudio.com, return 404 for HEAD even when the page exists).
|
|
490
|
+
// Retries up to 5 times on transient errors (5xx, 429, network failures).
|
|
491
|
+
// Returns the HTTP status code.
|
|
472
492
|
const fetchExternalLinkStatus = async (url) => {
|
|
473
|
-
const
|
|
474
|
-
|
|
475
|
-
|
|
493
|
+
const opts = { method: 'HEAD', headers: _fetch_headers, timeoutMs: 10000, redirect: 'follow' };
|
|
494
|
+
const resp = await hdoc.fetchWithRetry(url, opts);
|
|
495
|
+
if (resp.status === 404 || resp.status === 405) {
|
|
496
|
+
const getResp = await hdoc.fetchWithRetry(url, { ...opts, method: 'GET' });
|
|
476
497
|
return getResp.status;
|
|
477
498
|
}
|
|
478
499
|
return resp.status;
|