hdoc-tools 0.34.2 → 0.35.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-build.js CHANGED
@@ -72,7 +72,7 @@
72
72
  let prods_supported = [];
73
73
  let doc_id = "";
74
74
  let git_token =
75
- "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"; // Github fine-grained personal access token that has minimum read-only access to Hornbill Docs metadata
75
+ "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"; // Github fine-grained personal access token that has minimum read-only access to Hornbill Docs metadata
76
76
  let hdocbook_config = {};
77
77
  let hdocbook_project;
78
78
  let includes_found = 0;
@@ -1201,7 +1201,7 @@
1201
1201
 
1202
1202
  // Get github repo details
1203
1203
  github_repo_details = await hdoc.get_github_repo_details( api_path, git_token );
1204
- if (github_repo_details.success) {
1204
+ if (!github_repo_details.success) {
1205
1205
  console.warn(`Unable to retrieve GitHub Repository details: ${github_repo_details.error}`);
1206
1206
  }
1207
1207
  }
@@ -1345,10 +1345,8 @@
1345
1345
  // Get a list of MD files in work_path
1346
1346
  dree.scan(work_path, dreeOptions, build_file_callback);
1347
1347
 
1348
- if (pdf_enable) {
1349
- // Create a Chromium browser instance generate PDFs with
1350
- browser = await puppeteer.launch({ headless: "shell" });
1351
- }
1348
+ // Create a Chromium browser instance generate PDFs and validate links with
1349
+ browser = await puppeteer.launch({ headless: "shell" });
1352
1350
 
1353
1351
  // Work through MD files and convert to HTML
1354
1352
  const mdPromiseArray = [];
@@ -1380,11 +1378,6 @@
1380
1378
  );
1381
1379
  }
1382
1380
 
1383
- if (pdf_enable) {
1384
- // Close the Chromium browser instance
1385
- await browser.close();
1386
- }
1387
-
1388
1381
  // Output to console
1389
1382
  console.log(`\n MD files found: ${conversion_attempted}`);
1390
1383
  console.log(`Successfully converted to HTML: ${conversion_success}`);
@@ -1417,7 +1410,12 @@
1417
1410
  redirects,
1418
1411
  draft_links,
1419
1412
  github_repo_details && github_repo_details.data && github_repo_details.data.private ? github_repo_details.data.private : false,
1413
+ browser
1420
1414
  );
1415
+
1416
+ // Close the Chromium browser instance
1417
+ await browser.close();
1418
+
1421
1419
  if (!validation_success) {
1422
1420
  const end_time = Date.now();
1423
1421
  console.log(`\nTime Taken: ${get_duration(start_time, end_time)}\n`);
package/hdoc-module.js CHANGED
@@ -483,7 +483,7 @@
483
483
  response.private = github_response.data.private;
484
484
  } else {
485
485
  // Is it a 404 or 403?
486
- response.error = `${github_response.status} : ${data.message}`;
486
+ response.error = `${github_response.status} : ${github_response.data.message}`;
487
487
  }
488
488
  return response;
489
489
  };
package/hdoc-validate.js CHANGED
@@ -7,7 +7,6 @@ const e = require("express");
7
7
  const dns = require("node:dns");
8
8
  const fs = require("node:fs");
9
9
  const path = require("node:path");
10
- const https = require("node:https");
11
10
  const hdoc = require(path.join(__dirname, "hdoc-module.js"));
12
11
  const translator = require("american-british-english-translator");
13
12
  const { trueCasePathSync } = require("true-case-path");
@@ -17,9 +16,6 @@ const e = require("express");
17
16
  spelling: true,
18
17
  };
19
18
  const regex_nav_paths = /[a-z0-9-\/]+[a-z0-9]+#{0,1}[a-z0-9-\/]+/;
20
- const agent = new https.Agent({
21
- rejectUnauthorized: false,
22
- });
23
19
 
24
20
  const errors = {};
25
21
  const messages = {};
@@ -449,7 +445,7 @@ const e = require("express");
449
445
  return returnPaths;
450
446
  }
451
447
 
452
- const checkLinks = async (source_path, htmlFile, links, hdocbook_config, hdocbook_project) => {
448
+ const checkLinks = async (source_path, htmlFile, links, hdocbook_config, hdocbook_project, browser) => {
453
449
  const markdown_paths = getMDPathFromHtmlPath(htmlFile);
454
450
  const markdown_content = fs.readFileSync(markdown_paths.markdownPath, 'utf8');
455
451
 
@@ -557,19 +553,62 @@ const e = require("express");
557
553
  continue;
558
554
  }
559
555
 
556
+
560
557
  try {
561
- await axios({
562
- url: links[i],
563
- method: 'get',
564
- timeout: 30000,
565
- maxRedirects: 5,
566
- validateStatus: (status) =>
567
- status >= 200 && status < 400,
568
- })
569
558
 
570
- messages[htmlFile.relativePath].push(
571
- `Link is a valid external URL: ${links[i]}`,
572
- );
559
+ // Use Puppeteer to validate link address works
560
+ const page = await browser.newPage();
561
+
562
+ // Set a user-agent to mimic a real browser
563
+ await page.setUserAgent('Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36');
564
+
565
+ try {
566
+ let response = null;
567
+ let lastRedirectStatus = null;
568
+ let redirectChain = [];
569
+
570
+ // Capture redirects and final response
571
+ page.on('request', (request) => {
572
+ if (request.isNavigationRequest() && request.redirectChain().length) {
573
+ redirectChain = request.redirectChain().map((req) => req.url());
574
+ }
575
+ });
576
+
577
+ // Capture the response
578
+ page.on('response', (res) => {
579
+ const chain = res.request().redirectChain();
580
+ if (chain.length > 0) {
581
+ redirectChain = chain.map((req) => req.url());
582
+ lastRedirectStatus = res.status(); // Status of the last redirect
583
+ }
584
+ });
585
+
586
+ // Try loading the URL
587
+ response = await page.goto(links[i], { waitUntil: 'networkidle2' }).catch(() => {
588
+ // Ignore rendering errors (likely binary files like PDFs)
589
+ });
590
+
591
+ if (response) {
592
+ let status = response.status();
593
+ const contentType = response.headers()['content-type'];
594
+
595
+ // If it's a PDF switch to direct fetching
596
+ if (contentType && contentType.includes('application/')) {
597
+ status = await page.evaluate(async (url) => {
598
+ const res = await fetch(url, { method: 'HEAD' });
599
+ return res.status;
600
+ }, links[i]);
601
+ }
602
+ if ((status < 200 || status > 299) && status !== 304)
603
+ throw `Unexpected Status Returned: ${status}`;
604
+ }
605
+ } catch (error) {
606
+ throw error;
607
+ }
608
+
609
+ // Close the headless browser tab
610
+ page.close();
611
+
573
612
  } catch (e) {
574
613
  let error_message;
575
614
  if (e instanceof AggregateError) {
@@ -832,6 +871,7 @@ const e = require("express");
832
871
  gen_redirects,
833
872
  draft_links,
834
873
  is_private,
874
+ browser,
835
875
  ) => {
836
876
  console.log("Performing Validation and Building SEO Link List...");
837
877
  redirects = gen_redirects;
@@ -1002,7 +1042,7 @@ const e = require("express");
1002
1042
  if (links.href.length === 0) {
1003
1043
  messages[file.relativePath].push("No links found in file");
1004
1044
  } else {
1005
- await checkLinks(source_path, file, links.href, hdocbook_config, hdocbook_project);
1045
+ await checkLinks(source_path, file, links.href, hdocbook_config, hdocbook_project, browser);
1006
1046
  }
1007
1047
  if (links.img.length === 0) {
1008
1048
  messages[file.relativePath].push("No images found in file");
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "hdoc-tools",
3
- "version": "0.34.2",
3
+ "version": "0.35.0",
4
4
  "description": "Hornbill HDocBook Development Support Tool",
5
5
  "main": "hdoc.js",
6
6
  "bin": {