hdoc-tools 0.47.3 → 0.47.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/hdoc-module.js +2 -0
- package/hdoc-validate.js +32 -11
- package/package.json +1 -1
package/hdoc-module.js
CHANGED
package/hdoc-validate.js
CHANGED
|
@@ -465,14 +465,35 @@ const { error } = require("node:console");
|
|
|
465
465
|
return returnPaths;
|
|
466
466
|
}
|
|
467
467
|
|
|
468
|
-
|
|
468
|
+
// Headers that mimic a real Chrome browser request — sites doing bot detection
|
|
469
|
+
// check far more than just User-Agent (Accept, Sec-Fetch-*, client hints, etc.).
|
|
470
|
+
const _fetch_headers = {
|
|
471
|
+
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
|
|
472
|
+
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
|
|
473
|
+
'Accept-Language': 'en-US,en;q=0.9',
|
|
474
|
+
'Accept-Encoding': 'gzip, deflate, br',
|
|
475
|
+
'Cache-Control': 'no-cache',
|
|
476
|
+
'Pragma': 'no-cache',
|
|
477
|
+
'Sec-Fetch-Dest': 'document',
|
|
478
|
+
'Sec-Fetch-Mode': 'navigate',
|
|
479
|
+
'Sec-Fetch-Site': 'none',
|
|
480
|
+
'Sec-Fetch-User': '?1',
|
|
481
|
+
'Sec-Ch-Ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
|
|
482
|
+
'Sec-Ch-Ua-Mobile': '?0',
|
|
483
|
+
'Sec-Ch-Ua-Platform': '"Windows"',
|
|
484
|
+
'Upgrade-Insecure-Requests': '1',
|
|
485
|
+
};
|
|
469
486
|
|
|
470
|
-
// Checks a single external URL by sending a HEAD request
|
|
471
|
-
// if the server returns 405 Method Not Allowed)
|
|
487
|
+
// Checks a single external URL by sending a HEAD request, falling back to GET
|
|
488
|
+
// if the server returns 405 (Method Not Allowed) or 404 (some servers, e.g.
|
|
489
|
+
// marketplace.visualstudio.com, return 404 for HEAD even when the page exists).
|
|
490
|
+
// Retries up to 5 times on transient errors (5xx, 429, network failures).
|
|
491
|
+
// Returns the HTTP status code.
|
|
472
492
|
const fetchExternalLinkStatus = async (url) => {
|
|
473
|
-
const
|
|
474
|
-
|
|
475
|
-
|
|
493
|
+
const opts = { method: 'HEAD', headers: _fetch_headers, timeoutMs: 10000, redirect: 'follow' };
|
|
494
|
+
const resp = await hdoc.fetchWithRetry(url, opts);
|
|
495
|
+
if (resp.status === 404 || resp.status === 405) {
|
|
496
|
+
const getResp = await hdoc.fetchWithRetry(url, { ...opts, method: 'GET' });
|
|
476
497
|
return getResp.status;
|
|
477
498
|
}
|
|
478
499
|
return resp.status;
|
|
@@ -508,7 +529,10 @@ const { error } = require("node:console");
|
|
|
508
529
|
const valid_url = hdoc.valid_url(links[i]);
|
|
509
530
|
if (!valid_url) {
|
|
510
531
|
// Could be a relative path, check
|
|
511
|
-
if (links[i].startsWith("
|
|
532
|
+
if (links[i].startsWith("#") || links[i].startsWith("/#")) {
|
|
533
|
+
//Flat Anchor - validate we have a same-file hit
|
|
534
|
+
isHashAnchor(htmlFile, links[i]);
|
|
535
|
+
} else if (links[i].startsWith("/") && !links[i].startsWith("/#")) {
|
|
512
536
|
let link_segments = links[i].split("/");
|
|
513
537
|
if (link_segments[0] === "") link_segments.shift();
|
|
514
538
|
const link_root = link_segments[0] === "_books" ? link_segments[1] : link_segments[0];
|
|
@@ -521,14 +545,11 @@ const { error } = require("node:console");
|
|
|
521
545
|
}
|
|
522
546
|
|
|
523
547
|
// Checking for internal links in other books - can't easily validate those here, returning
|
|
524
|
-
if (link_segments.length > 1 && link_root !== hdocbook_config.docId) {
|
|
548
|
+
if ((link_segments.length > 1 && link_root !== hdocbook_config.docId) || (link_segments.length === 1 && link_root !== hdocbook_config.docId && link_root !== "index")) {
|
|
525
549
|
fs.appendFileSync(skip_link_file, `${links[i]}\n`);
|
|
526
550
|
continue;
|
|
527
551
|
}
|
|
528
552
|
isRelativePath(source_path, htmlFile, links[i]);
|
|
529
|
-
} else if (links[i].startsWith("#") || links[i].startsWith("/#")) {
|
|
530
|
-
//Flat Anchor - validate we have a same-file hit
|
|
531
|
-
isHashAnchor(htmlFile, links[i]);
|
|
532
553
|
} else {
|
|
533
554
|
const error_message = processErrorMessage(`Root relative links should start with a forward-slash: ${links[i]}`, markdown_paths.relativePath, markdown_content, links[i]);
|
|
534
555
|
errors[htmlFile.relativePath].push(error_message);
|