hdoc-tools 0.47.3 → 0.47.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-module.js CHANGED
@@ -48,6 +48,8 @@
48
48
  }
49
49
  };
50
50
 
51
+ exports.fetchWithRetry = fetchWithRetry;
52
+
51
53
  exports.content_type_for_ext = (ext) => {
52
54
  switch (ext) {
53
55
  case ".z":
package/hdoc-validate.js CHANGED
@@ -465,14 +465,35 @@ const { error } = require("node:console");
465
465
  return returnPaths;
466
466
  }
467
467
 
468
- const _fetch_headers = { 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36' };
468
+ // Headers that mimic a real Chrome browser request sites doing bot detection
469
+ // check far more than just User-Agent (Accept, Sec-Fetch-*, client hints, etc.).
470
+ const _fetch_headers = {
471
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
472
+ 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8',
473
+ 'Accept-Language': 'en-US,en;q=0.9',
474
+ 'Accept-Encoding': 'gzip, deflate, br',
475
+ 'Cache-Control': 'no-cache',
476
+ 'Pragma': 'no-cache',
477
+ 'Sec-Fetch-Dest': 'document',
478
+ 'Sec-Fetch-Mode': 'navigate',
479
+ 'Sec-Fetch-Site': 'none',
480
+ 'Sec-Fetch-User': '?1',
481
+ 'Sec-Ch-Ua': '"Google Chrome";v="131", "Chromium";v="131", "Not_A Brand";v="24"',
482
+ 'Sec-Ch-Ua-Mobile': '?0',
483
+ 'Sec-Ch-Ua-Platform': '"Windows"',
484
+ 'Upgrade-Insecure-Requests': '1',
485
+ };
469
486
 
470
- // Checks a single external URL by sending a HEAD request (falling back to GET
471
- // if the server returns 405 Method Not Allowed). Returns the HTTP status code.
487
+ // Checks a single external URL by sending a HEAD request, falling back to GET
488
+ // if the server returns 405 (Method Not Allowed) or 404 (some servers, e.g.
489
+ // marketplace.visualstudio.com, return 404 for HEAD even when the page exists).
490
+ // Retries up to 5 times on transient errors (5xx, 429, network failures).
491
+ // Returns the HTTP status code.
472
492
  const fetchExternalLinkStatus = async (url) => {
473
- const resp = await fetch(url, { method: 'HEAD', headers: _fetch_headers, signal: AbortSignal.timeout(10000), redirect: 'follow' });
474
- if (resp.status === 405) {
475
- const getResp = await fetch(url, { method: 'GET', headers: _fetch_headers, signal: AbortSignal.timeout(10000), redirect: 'follow' });
493
+ const opts = { method: 'HEAD', headers: _fetch_headers, timeoutMs: 10000, redirect: 'follow' };
494
+ const resp = await hdoc.fetchWithRetry(url, opts);
495
+ if (resp.status === 404 || resp.status === 405) {
496
+ const getResp = await hdoc.fetchWithRetry(url, { ...opts, method: 'GET' });
476
497
  return getResp.status;
477
498
  }
478
499
  return resp.status;
@@ -508,7 +529,10 @@ const { error } = require("node:console");
508
529
  const valid_url = hdoc.valid_url(links[i]);
509
530
  if (!valid_url) {
510
531
  // Could be a relative path, check
511
- if (links[i].startsWith("/") && !links[i].startsWith("/#")) {
532
+ if (links[i].startsWith("#") || links[i].startsWith("/#")) {
533
+ //Flat Anchor - validate we have a same-file hit
534
+ isHashAnchor(htmlFile, links[i]);
535
+ } else if (links[i].startsWith("/") && !links[i].startsWith("/#")) {
512
536
  let link_segments = links[i].split("/");
513
537
  if (link_segments[0] === "") link_segments.shift();
514
538
  const link_root = link_segments[0] === "_books" ? link_segments[1] : link_segments[0];
@@ -521,14 +545,11 @@ const { error } = require("node:console");
521
545
  }
522
546
 
523
547
  // Checking for internal links in other books - can't easily validate those here, returning
524
- if (link_segments.length > 1 && link_root !== hdocbook_config.docId) {
548
+ if ((link_segments.length > 1 && link_root !== hdocbook_config.docId) || (link_segments.length === 1 && link_root !== hdocbook_config.docId && link_root !== "index")) {
525
549
  fs.appendFileSync(skip_link_file, `${links[i]}\n`);
526
550
  continue;
527
551
  }
528
552
  isRelativePath(source_path, htmlFile, links[i]);
529
- } else if (links[i].startsWith("#") || links[i].startsWith("/#")) {
530
- //Flat Anchor - validate we have a same-file hit
531
- isHashAnchor(htmlFile, links[i]);
532
553
  } else {
533
554
  const error_message = processErrorMessage(`Root relative links should start with a forward-slash: ${links[i]}`, markdown_paths.relativePath, markdown_content, links[i]);
534
555
  errors[htmlFile.relativePath].push(error_message);
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hdoc-tools",
3
- "version": "0.47.3",
3
+ "version": "0.47.5",
4
4
  "description": "Hornbill HDocBook Development Support Tool",
5
5
  "main": "hdoc.js",
6
6
  "bin": {