npm - hdoc-tools - Versions diffs - 0.51.0 → 0.52.1 - Mend

hdoc-tools 0.51.0 → 0.52.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (13) hide show

package/.puppeteerrc.cjs +12 -0
package/hdoc-build.js +66 -41
package/hdoc-edit.js +1449 -0
package/hdoc-help.js +4 -1
package/hdoc-install-browser.js +145 -0
package/hdoc-mermaid.js +204 -0
package/hdoc-module.js +61 -4
package/hdoc-serve.js +16 -296
package/hdoc-validate.js +5 -2
package/hdoc.js +4 -0
package/npm-shrinkwrap.json +1095 -4345
package/package.json +13 -8
package/templates/mermaid-puppeteer-config.json +0 -6

package/hdoc-help.js CHANGED Viewed

@@ -15,7 +15,10 @@ Commands
      - Use the '--no-links' argument to skip link output to CLI during validation.
  - createDocs
-    Creates folder structure and markdown documents as defined in the HDocBook navigation item links
+    Creates folder structure and markdown documents as defined in the HDocBook navigation item links
+ - edit
+    Starts the local structure/content editor on port 3000 (bound to 127.0.0.1), serving the editor UI alongside a live preview of the content. Supports a -port N to use a different port.
  - help
     Outputs available arguments and switches

package/hdoc-install-browser.js ADDED Viewed

@@ -0,0 +1,145 @@
+// Resilient browser provisioning for hdoc-tools.
+//
+// Replaces the old `puppeteer browsers install ...` one-liner postinstall.
+// Puppeteer's own bundled download (install.mjs) is disabled via
+// .puppeteerrc.cjs (skipDownload), so this script is the single, controlled
+// place Chrome + chrome-headless-shell are fetched.
+//
+// Why: on some Windows Server 2019 build agents the Chrome archive extracts
+// only partially (Defender quarantining binaries mid-extract, or a truncated
+// download behind a proxy). The first failure leaves the version folder on
+// disk, after which Puppeteer refuses to re-extract and every later install
+// reports "folder exists but executable missing" — a permanent dead end.
+//
+// This script makes provisioning idempotent and self-healing:
+//   * skip when a valid executable already exists,
+//   * delete any stale/partial version folder before (re)installing,
+//   * retry the download a few times,
+//   * verify the executable exists afterwards and fail loudly with concrete
+//     remediation if it still does not.
+(async () => {
+	const fs = require("node:fs");
+	const path = require("node:path");
+	const os = require("node:os");
+	const {
+		install,
+		computeExecutablePath,
+		detectBrowserPlatform,
+		Browser,
+	} = require("@puppeteer/browsers");
+	// Keep this build id in lockstep with the puppeteer version pinned in
+	// package.json (puppeteer 25.1.0 ships Chrome 149.0.7827.22).
+	const CHROME_BUILD = "149.0.7827.22";
+	const MAX_ATTEMPTS = 3;
+	const RED = "\x1b[31m";
+	const YELLOW = "\x1b[33m";
+	const GREEN = "\x1b[32m";
+	const RESET = "\x1b[0m";
+	const log = (msg) => console.log(`[hdoc-tools] ${msg}`);
+	// Resolve the cache directory the same way Puppeteer does at runtime:
+	// PUPPETEER_CACHE_DIR wins, otherwise the documented default. Keeping this
+	// aligned with runtime means we install to exactly where the browser is
+	// later launched from.
+	const cacheDir =
+		process.env.PUPPETEER_CACHE_DIR ||
+		path.join(os.homedir(), ".cache", "puppeteer");
+	let platform;
+	try {
+		platform = detectBrowserPlatform();
+	} catch (err) {
+		console.error(
+			`${RED}Unable to detect browser platform: ${err.message}${RESET}`,
+		);
+		process.exit(1);
+	}
+	const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+	const rmrf = (target) => {
+		try {
+			fs.rmSync(target, { recursive: true, force: true });
+		} catch {
+			/* best effort */
+		}
+	};
+	// A usable install is one where the executable exists and is non-empty.
+	const isUsable = (exePath) => {
+		try {
+			return fs.statSync(exePath).size > 0;
+		} catch {
+			return false;
+		}
+	};
+	const provision = async (browser, label) => {
+		const exePath = computeExecutablePath({
+			browser,
+			buildId: CHROME_BUILD,
+			cacheDir,
+			platform,
+		});
+		// chrome.exe -> chrome-win64 -> win64-<build>; nuke the whole build folder
+		// so a partial extraction can never block a clean re-extract.
+		const versionFolder = path.dirname(path.dirname(exePath));
+		if (isUsable(exePath)) {
+			log(`${label} ${CHROME_BUILD} already present, skipping download.`);
+			return true;
+		}
+		for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
+			rmrf(versionFolder); // clear any stale/partial extract first
+			try {
+				log(
+					`Installing ${label} ${CHROME_BUILD} (attempt ${attempt}/${MAX_ATTEMPTS})...`,
+				);
+				await install({
+					browser,
+					buildId: CHROME_BUILD,
+					cacheDir,
+					platform,
+					unpack: true,
+				});
+			} catch (err) {
+				console.error(
+					`${YELLOW}  attempt ${attempt} failed: ${err.message}${RESET}`,
+				);
+			}
+			if (isUsable(exePath)) {
+				log(`${GREEN}${label} ${CHROME_BUILD} ready.${RESET}`);
+				return true;
+			}
+			if (attempt < MAX_ATTEMPTS) await sleep(2000);
+		}
+		return false;
+	};
+	const chromeOk = await provision(Browser.CHROME, "Chrome");
+	const shellOk = await provision(
+		Browser.CHROMEHEADLESSSHELL,
+		"chrome-headless-shell",
+	);
+	if (chromeOk && shellOk) process.exit(0);
+	console.error(
+		`\n${RED}Failed to provision a complete browser into:${RESET}\n  ${cacheDir}\n\n` +
+			"This is almost always one of:\n" +
+			"  1. Antivirus (e.g. Windows Defender) quarantining Chrome files mid-extract.\n" +
+			`     Fix: Add-MpPreference -ExclusionPath "${cacheDir}"\n` +
+			"  2. A stale/partial cache folder. Fix: delete the chrome / chrome-headless-shell\n" +
+			`     sub-folders under "${cacheDir}" and reinstall.\n` +
+			"  3. A truncated download behind a proxy / TLS inspection. Check npm/HTTPS proxy config.\n",
+	);
+	process.exit(1);
+})();

package/hdoc-mermaid.js ADDED Viewed

@@ -0,0 +1,204 @@
+// In-process Mermaid -> SVG renderer.
+//
+// Replaces the previous `@mermaid-js/mermaid-cli` (mmdc) integration. Instead of
+// shelling out to the CLI (which spawned a fresh Chromium per diagram and pinned
+// puppeteer to <=24 via its peerDependency), this renders Mermaid using the
+// `mermaid` library directly inside a puppeteer browser supplied by the caller —
+// the very same browser hdoc-build already launches for PDF/link work.
+//
+// The render path (Interceptor + page evaluation) is ported from mermaid-cli
+// (MIT licensed): https://github.com/mermaid-js/mermaid-cli
+// Trimmed to the SVG-only case we need, plus ELK layout support (required by
+// books that use `layout: elk`). zenuml / tidy-tree / icon-packs / png / pdf are
+// intentionally dropped — no hdocbook content uses them.
+const { readFile, realpath } = require("node:fs/promises");
+const path = require("node:path");
+const url = require("node:url");
+// ESM bundle locations inside the installed packages. Both packages expose
+// "./*": "./*" in their exports map, so the dist files resolve directly.
+const mermaidESMPath = require.resolve("mermaid/dist/mermaid.esm.mjs");
+const elkESMPath = require.resolve(
+	"@mermaid-js/layout-elk/dist/mermaid-layout-elk.esm.mjs",
+);
+/**
+ * Guesses the MIME-type of a file based on its extension. Only the handful of
+ * types the Mermaid ESM bundles actually request are supported.
+ */
+function getContentTypeFromFileExtension(filePath) {
+	const ext = path.extname(filePath).toLowerCase();
+	switch (ext) {
+		case ".css":
+			// Force UTF-8 so puppeteer doesn't mis-parse as Latin-1.
+			return "text/css;charset=UTF-8";
+		case ".js":
+		case ".mjs":
+			return "application/javascript";
+		case ".woff2":
+			return "font/woff2";
+		default:
+			throw new Error(`Unsupported file extension for intercept: ${ext}`);
+	}
+}
+/**
+ * Puppeteer can't `import` ESM modules from `file://` URLs. Rather than stand up
+ * a real HTTP server (ports/firewalls), we intercept requests to a dummy
+ * `https://hdoc-mermaid-intercept.invalid` origin and serve the corresponding
+ * local file. Ported verbatim from mermaid-cli's puppeteerIntercept.js.
+ */
+class Interceptor {
+	#INTERCEPT_ORIGIN = "https://hdoc-mermaid-intercept.invalid";
+	/** @type {Set<string>} resolved (realpath) dirs allowed to be served. */
+	#allowedDirs = new Set();
+	async fileUrlToInterceptUrl(fileUrl, { allowParentDirectoryLevel = 1 } = {}) {
+		fileUrl = new URL(fileUrl);
+		if (fileUrl.protocol !== "file:") {
+			throw new Error(`Invalid file URL: ${fileUrl}`);
+		}
+		let parentDirectory = await realpath(url.fileURLToPath(fileUrl));
+		while (allowParentDirectoryLevel-- >= 0) {
+			parentDirectory = path.dirname(parentDirectory);
+		}
+		this.#allowedDirs.add(parentDirectory);
+		return `${this.#INTERCEPT_ORIGIN}${fileUrl.pathname}`;
+	}
+	async interceptUrlToFileUrl(interceptUrl) {
+		interceptUrl = new URL(interceptUrl);
+		if (interceptUrl.origin !== this.#INTERCEPT_ORIGIN) {
+			throw new Error(`Invalid intercept URL: ${interceptUrl}`);
+		}
+		const fileUrl = new URL(
+			interceptUrl.href.slice(this.#INTERCEPT_ORIGIN.length),
+			"file://",
+		);
+		const filePath = await realpath(url.fileURLToPath(fileUrl));
+		if (
+			![...this.#allowedDirs].some((dir) =>
+				path.relative(filePath, dir).startsWith(".."),
+			)
+		) {
+			throw new Error(
+				`Intercept URL is not in an allowed directory: ${interceptUrl}`,
+			);
+		}
+		return fileUrl;
+	}
+	async #interceptRequestHandler(request) {
+		try {
+			if (request.url().startsWith(this.#INTERCEPT_ORIGIN)) {
+				const fileUrl = await this.interceptUrlToFileUrl(request.url());
+				return request.respond({
+					status: 200,
+					headers: { "Access-Control-Allow-Origin": "*" },
+					contentType: getContentTypeFromFileExtension(
+						url.fileURLToPath(fileUrl),
+					),
+					body: await readFile(fileUrl),
+				});
+			}
+		} catch (error) {
+			console.error(
+				`Error handling intercept request for ${request.url()}:`,
+				error,
+			);
+			request.abort();
+			return;
+		}
+		request.continue();
+	}
+	get interceptRequestHandler() {
+		return this.#interceptRequestHandler.bind(this);
+	}
+}
+/**
+ * Render a single Mermaid definition to an SVG string.
+ *
+ * @param {import('puppeteer').Browser} browser - An open puppeteer browser.
+ * @param {string} definition - Mermaid source (may include `--- config: ---` frontmatter).
+ * @param {{ backgroundColor?: string }} [opts]
+ * @returns {Promise<string>} the serialized `<svg>`.
+ */
+async function render_svg(browser, definition, { backgroundColor = "transparent" } = {}) {
+	const page = await browser.newPage();
+	// Surface in-page warnings/errors (e.g. mermaid parse errors) to the build log.
+	page.on("console", (msg) => {
+		if (msg.type() === "error" || msg.type() === "warning") {
+			console.warn(msg.text());
+		}
+	});
+	try {
+		await page.setContent(
+			"<!doctype html><html><body><div id=\"container\"></div></body></html>",
+		);
+		await page.$eval(
+			"body",
+			(body, bg) => {
+				body.style.background = bg;
+			},
+			backgroundColor,
+		);
+		const interceptor = new Interceptor();
+		const mermaidUrl = await interceptor.fileUrlToInterceptUrl(
+			url.pathToFileURL(mermaidESMPath),
+		);
+		const elkUrl = await interceptor.fileUrlToInterceptUrl(
+			url.pathToFileURL(elkESMPath),
+		);
+		page.on("request", interceptor.interceptRequestHandler);
+		await page.setRequestInterception(true);
+		await page.$eval(
+			"#container",
+			async (container, { definition, mermaidUrl, elkUrl, backgroundColor }) => {
+				const { default: mermaid } = await import(mermaidUrl);
+				const { default: elkLayouts } = await import(elkUrl);
+				await Promise.all(
+					Array.from(document.fonts, (font) => font.load()),
+				);
+				mermaid.registerLayoutLoaders(elkLayouts);
+				mermaid.initialize({ startOnLoad: false });
+				// Throws on invalid diagrams — propagates out of $eval as a rejection.
+				const { svg: svgText } = await mermaid.render(
+					"hdoc-svg",
+					definition,
+					container,
+				);
+				container.innerHTML = svgText;
+				const svg = container.getElementsByTagName?.("svg")?.[0];
+				if (svg?.style) {
+					svg.style.backgroundColor = backgroundColor;
+				}
+			},
+			{ definition, mermaidUrl, elkUrl, backgroundColor },
+		);
+		// Serialize via XMLSerializer so HTML <foreignObject> content (e.g. <br>)
+		// becomes valid XML in the saved .svg file.
+		const svgXML = await page.$eval("svg", (svg) => {
+			// eslint-disable-next-line no-undef
+			const xmlSerializer = new XMLSerializer();
+			return xmlSerializer.serializeToString(svg);
+		});
+		return svgXML;
+	} finally {
+		await page.close();
+	}
+}
+module.exports = { render_svg, Interceptor };

package/hdoc-module.js CHANGED Viewed

@@ -9,10 +9,48 @@
 	let retried = false;
+	const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
+	// GitHub enforces a "secondary" (burst) rate limit on top of the hourly one:
+	// too many requests in a short window are rejected with 403/429 even when the
+	// hourly budget is intact. The contributor pull fires one call per file, so a
+	// fast run trips it. Historically `hdoc validate` got away with this only
+	// because verbose console output slowed the loop down between calls; the
+	// --quiet mode removed that incidental spacing and started hitting 403s.
+	// Throttle GitHub API calls to a fixed minimum interval so behaviour no longer
+	// depends on how chatty the console is.
+	let _last_github_call_at = 0;
+	const GITHUB_HOST = "api.github.com";
+	const GITHUB_MIN_INTERVAL_MS = 800;
+	// Never block the whole build waiting on a single rate-limited call.
+	const GITHUB_MAX_BACKOFF_MS = 60000;
+	// For a rate-limited response, work out how long to wait before retrying.
+	// Returns null when the response is NOT a retryable rate limit (e.g. a genuine
+	// 403 auth/forbidden), in which case the caller should give up immediately.
+	const rate_limit_wait_ms = (response) => {
+		if (response.status !== 403 && response.status !== 429) return null;
+		const retry_after = response.headers.get("retry-after");
+		if (retry_after !== null) {
+			const secs = Number(retry_after);
+			if (!Number.isNaN(secs)) return Math.max(0, secs * 1000);
+		}
+		const remaining = response.headers.get("x-ratelimit-remaining");
+		const reset = response.headers.get("x-ratelimit-reset");
+		if (remaining === "0" && reset !== null) {
+			const reset_ms = Number(reset) * 1000 - Date.now();
+			if (!Number.isNaN(reset_ms)) return Math.max(0, reset_ms);
+		}
+		// 403/429 with no rate-limit signal => auth/forbidden, not worth retrying.
+		return null;
+	};
 	// Wraps the built-in fetch() with automatic retry for transient errors.
 	// Retries up to maxRetries times when the server returns an HTTP error status
-	// >= 400, except for 401 (Unauthorized) and 403 (Forbidden) which are auth
-	// failures that won't be resolved by retrying. Network errors (where fetch
+	// >= 400, except for 401 (Unauthorized) and non-rate-limit 403 (Forbidden)
+	// which are auth failures that won't be resolved by retrying. GitHub
+	// rate-limit responses (403/429 carrying Retry-After or X-RateLimit-Reset) ARE
+	// retried after honouring the indicated wait. Network errors (where fetch
 	// itself throws) are also retried. Sets the module-level `retried` flag so
 	// callers can detect and log a success-after-retry message.
 	//
@@ -21,8 +59,18 @@
 	// does not leave an already-aborted signal in place for the retries.
 	const fetchWithRetry = async (url, options = {}, maxRetries = 5) => {
 		const { timeoutMs, ...fetchOptions } = options;
+		const is_github = typeof url === "string" && url.includes(GITHUB_HOST);
 		let retryCount = 0;
 		while (true) {
+			// Space out GitHub API calls so a fast (quiet) run does not burst past
+			// the secondary rate limit. No effect on non-GitHub requests.
+			if (is_github) {
+				const since = Date.now() - _last_github_call_at;
+				if (since < GITHUB_MIN_INTERVAL_MS) {
+					await sleep(GITHUB_MIN_INTERVAL_MS - since);
+				}
+				_last_github_call_at = Date.now();
+			}
 			// Create a fresh signal for each attempt; reusing an already-aborted
 			// signal would cause every subsequent retry to abort immediately.
 			const attemptOptions = timeoutMs
@@ -38,10 +86,19 @@
 				retried = true;
 				continue;
 			}
-			// Auth failures and successes are not retried
-			if (response.ok || response.status === 401 || response.status === 403) {
+			if (response.ok || response.status === 401) {
 				return response;
 			}
+			// 403/429: retry only when it is a rate limit we can wait out.
+			if (response.status === 403 || response.status === 429) {
+				const wait_ms = rate_limit_wait_ms(response);
+				if (wait_ms === null) return response; // auth/forbidden — give up
+				retryCount++;
+				if (retryCount > maxRetries) return response;
+				retried = true;
+				await sleep(Math.min(wait_ms, GITHUB_MAX_BACKOFF_MS));
+				continue;
+			}
 			retryCount++;
 			if (retryCount > maxRetries) return response;
 			retried = true;