npm - hdoc-tools - Versions diffs - 0.52.0 → 0.53.0 - Mend

hdoc-tools 0.52.0 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/.puppeteerrc.cjs ADDED Viewed

@@ -0,0 +1,12 @@
+// Disable Puppeteer's own bundled browser download (install.mjs). hdoc-tools
+// provisions Chrome + chrome-headless-shell itself via hdoc-install-browser.js,
+// which is idempotent, self-healing and retries — unlike the bundled download,
+// whose single partial-extract failure on some Windows Server boxes aborts the
+// whole `npm install`. Browsers are still fetched, just by our controlled path.
+//
+// Discovered by Puppeteer (cosmiconfig) walking up from node_modules/.../puppeteer
+// to this package root during install. skipDownload only affects provisioning,
+// not where the browser is launched from at runtime.
+module.exports = {
+	skipDownload: true,
+};

package/hdoc-build.js CHANGED Viewed

@@ -102,6 +102,23 @@
 	let currentMdFilePath = "";
 	let currentFrontmatter = "";
+	// Escape a Mermaid definition for safe embedding as the text content of a
+	// <pre class="mermaid"> element. The viewer's client-side plugin reads the
+	// element's textContent, so only HTML-text safety is required here.
+	const escape_html_text = (str) =>
+		str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+	// PDF-only: replace each client-render <pre class="mermaid" data-pdf-svg="…">
+	// block with the baked <img> pointing at its server-rendered SVG. Runs on the
+	// PDF HTML variant just before generate_pdf; the published HTML keeps the
+	// <pre> block for client rendering.
+	const pdfify_mermaid_blocks = (html) =>
+		html.replace(
+			/<pre class="mermaid" data-pdf-svg="([^"]+)">[\s\S]*?<\/pre>/g,
+			(_m, src) =>
+				`<img class="mermaid-diagram" src="${src}" alt="Mermaid Diagram">`,
+		);
 	// Shared markdown-it instance — created once, reused for every file.
 	// Previously recreated per file because the highlight callback closed over
 	// the file_path parameter; it now reads currentMdFilePath instead.
@@ -111,12 +128,28 @@
 		linkify: true,
 		typographer: true,
 		highlight: function (str, lang) {
-			if (lang === "mermaid" && process.env.GITHUB_ACTIONS !== 'true') {
-				// markdown-it's highlight callback is synchronous, but Mermaid
-				// rendering is async (puppeteer). So here we only *queue* the
-				// diagram and emit the <img> tag pointing at its eventual SVG;
-				// the actual rendering happens in a batch after all markdown has
-				// been processed (see render_mermaid_queue, before validation/zip).
+			if (lang === "mermaid") {
+				// Published HTML (and serve) render Mermaid client-side via the
+				// viewer's Mermaid plugin: we emit the raw definition inside
+				// <pre class="mermaid"> and the viewer calls mermaid.run() after
+				// injecting the fragment (theme applied via mermaid.initialize).
+				//
+				// PDFs have no JS runtime, so when PDF generation is enabled we ALSO
+				// queue a server-rendered SVG (headless puppeteer, batched after all
+				// markdown — see flush_mermaid_queue) and tag the block with
+				// data-pdf-svg. The PDF-only transform (pdfify_mermaid_blocks) swaps
+				// those tagged blocks for the baked <img> before generate_pdf; the
+				// attribute is inert for the client-rendered HTML.
+				//
+				// Skip the SVG bake entirely when this file won't produce a PDF
+				// (PDF disabled, or this path is PDF-excluded) — otherwise we'd
+				// render headless SVGs that nothing ever consumes.
+				const needs_pdf_svg =
+					pdf_enable && !pdf_path_excluded(currentMdFilePath.relativePath);
+				if (!needs_pdf_svg) {
+					return `<pre class="mermaid">${escape_html_text(str)}</pre>`;
+				}
 				const outputFileName = `mermaid-${crypto.createHash("sha256").update(str).digest("hex").slice(0, 16)}.svg`;
 				const outputPath = path.join(mermaid_images_path, outputFileName);
 				const outputLink = `/_books/${doc_id}/mermaid-images/${outputFileName}`;
@@ -135,7 +168,7 @@
 					});
 				}
-				return `<img class="mermaid-diagram" src="${outputLink}" alt="Mermaid Diagram">`;
+				return `<pre class="mermaid" data-pdf-svg="${outputLink}">${escape_html_text(str)}</pre>`;
 			}
 		}
 	});
@@ -672,7 +705,12 @@
 		let pdf_size = 0;
 		if (pdf_enable && !pdf_path_excluded(file_path.relativePath)) {
-			let pdf_txt = await hdoc_build_pdf.process_images(file_path, html_txt);
+			// Swap client-render Mermaid <pre> blocks for baked SVG <img> (PDF has
+			// no JS runtime), then inline the SVG files via process_images.
+			let pdf_txt = await hdoc_build_pdf.process_images(
+				file_path,
+				pdfify_mermaid_blocks(html_txt),
+			);
 			pdf_txt = `${pdf_header}\n${pdf_txt}`;
 			// Generate PDF file from HTML
@@ -696,6 +734,11 @@
 		if (inline_content) html_txt = `${fm_header_str}\n${html_txt}`;
 		else html_txt = `${fm_header_str}\n${doc_header}\n${html_txt}`;
+		// The data-pdf-svg attr was only needed by the PDF transform (already run
+		// above). Strip it from the published HTML so the markup is clean and has no
+		// dangling reference to the mermaid-images/ dir (which is deleted before zip).
+		html_txt = html_txt.replace(/ data-pdf-svg="[^"]*"/g, "");
 		// Determine output file path (.md → .html for markdown; same path for static HTML)
 		const target_file = is_markdown
 			? file_path.path.replace(path.extname(file_path.path), ".html")
@@ -716,7 +759,13 @@
 			relative_path = relative_path.replace("/index.html", "");
 		}
-		const index_data = hdoc_index.transform_html_for_index(html_txt);
+		// Drop Mermaid source from the search index — it's diagram markup, not
+		// prose, and would otherwise pollute results.
+		const index_html = html_txt.replace(
+			/<pre class="mermaid"[^>]*>[\s\S]*?<\/pre>/g,
+			"",
+		);
+		const index_data = hdoc_index.transform_html_for_index(index_html);
 		for (const section of index_data.sections) {
 			index_records.push({
 				relative_path: relative_path,
@@ -1304,6 +1353,14 @@
 		// transform_file (no-op if all are done — rendering is memoized per diagram).
 		await flush_mermaid_queue();
+		// Mermaid SVGs are only an intermediate for baking diagrams into PDFs
+		// (process_images inlines them as base64 data URIs at PDF-gen time, already
+		// done above). Published HTML renders Mermaid client-side and references no
+		// SVG, so the directory is dead weight in the output — drop it before zip.
+		if (mermaid_images_path && fs.existsSync(mermaid_images_path)) {
+			fs.rmSync(mermaid_images_path, { recursive: true, force: true });
+		}
 		// Output to console
 		console.log(`\n                MD files found: ${conversion_attempted}`);
 		console.log(`Successfully converted to HTML: ${conversion_success}`);

package/hdoc-content-routes.js ADDED Viewed

@@ -0,0 +1,343 @@
+// Shared /_books/* content + render routes.
+//
+// Extracted from hdoc-serve.js so that both `hdoc serve` (read-only preview)
+// and `hdoc edit` (the upcoming editor surface) mount the SAME content
+// pipeline and renderer. This guarantees preview fidelity == published output
+// for any UI built on top of it.
+//
+// Usage:
+//   const { create_content_handler } = require("./hdoc-content-routes.js");
+//   const content = create_content_handler({
+//     source_path, docId, hdocbook_config, hdocbook_project, nav_inline,
+//   });
+//   content.register(app);
+//   // ...and reuse content.send_content_file / content.send_content_resource_404
+//   //    for your own catch-all (SPA) route.
+const fs = require("node:fs");
+const path = require("node:path");
+const stream = require("node:stream");
+const hdoc = require(path.join(__dirname, "hdoc-module.js"));
+const mdfm = require("markdown-it-front-matter");
+// Escape a Mermaid definition for safe embedding as the text content of a
+// <pre> element. The viewer's client-side Mermaid plugin reads the element's
+// textContent and renders it in the browser, so we only need HTML-text safety
+// here (no theme/frontmatter injection — theme is applied via mermaid.initialize
+// in the viewer).
+function escape_html(str) {
+	return str
+		.replace(/&/g, "&amp;")
+		.replace(/</g, "&lt;")
+		.replace(/>/g, "&gt;");
+}
+// Build a content handler bound to a single book's context.
+//
+// ctx:
+//   source_path      - absolute path to the document source root
+//   docId            - id of the hdocbook being served
+//   hdocbook_config  - parsed hdocbook.json
+//   hdocbook_project - parsed hdocbook-project.json (used for redirects)
+//   nav_inline       - inline-help nav fragment for library.json
+exports.create_content_handler = (ctx) => {
+	const {
+		source_path,
+		docId,
+		hdocbook_config,
+		hdocbook_project,
+		nav_inline,
+	} = ctx;
+	// process_includes resolves include paths relative to the source root.
+	const global_source_path = source_path;
+	// Render markdown SOURCE to HTML through the same pipeline as published output:
+	// expand_variables -> process_includes -> markdown-it (+ mermaid, tips, frontmatter).
+	//
+	// file_path anchors include resolution (relative to it) and mermaid logging.
+	// md_source is the raw markdown to render — read from disk for serve/publish,
+	// or supplied from the editor buffer for live preview (so preview == published).
+	// Returns { html, frontmatter } where frontmatter is the parsed object or null.
+	async function render_markdown(file_path, md_source) {
+		let md_txt = hdoc.expand_variables(md_source.toString(), docId);
+		const includes_processed = await hdoc.process_includes(
+			file_path,
+			md_txt,
+			global_source_path,
+		);
+		md_txt = includes_processed.body;
+		// Mermaid diagrams are rendered client-side by the viewer's Mermaid plugin.
+		// We emit the raw definition inside <pre class="mermaid"> and the viewer
+		// calls mermaid.run() after injecting the fragment (theme applied there via
+		// mermaid.initialize). This keeps serve == published and drops the
+		// per-request headless browser entirely.
+		const md = require("markdown-it")({
+			html: true,
+			linkify: true,
+			typographer: true,
+			highlight: function (str, lang) {
+				if (lang === "mermaid") {
+					return `<pre class="mermaid">${escape_html(str)}</pre>`;
+				}
+			}
+		});
+		md.linkify.set({
+			fuzzyEmail: false,
+			fuzzyLink: false,
+			fuzzyIP: false,
+		});
+		let frontmatter_content = "";
+		md.use(mdfm, (fm) => {
+			frontmatter_content = fm;
+		});
+		const tips = require(`${__dirname}/custom_modules/tips.js`);
+		md.use(tips, { links: true });
+		const html = md.render(md_txt.toString());
+		const frontmatter = frontmatter_content.length
+			? hdoc.parse_yaml(frontmatter_content)
+			: null;
+		return { html, frontmatter };
+	}
+	async function transform_markdown_and_send_html(req, res, file_path) {
+		if (!fs.existsSync(file_path)) return false;
+		const { html, frontmatter } = await render_markdown(
+			file_path,
+			fs.readFileSync(file_path).toString(),
+		);
+		if (frontmatter) {
+			const base64 = Buffer.from(JSON.stringify(frontmatter), "utf-8").toString(
+				"base64",
+			);
+			res.setHeader("X-frontmatter", base64);
+		}
+		res.setHeader("Content-Type", "text/html");
+		res.send(html);
+		return true;
+	}
+	function send_content_file(req, res, file_path, redirected = false) {
+		let content_txt = hdoc.expand_variables(
+			fs.readFileSync(file_path).toString(),
+			docId,
+		);
+		if (redirected)
+			content_txt = `Redirected from ${redirected}\n\n${content_txt}`;
+		const contentType = hdoc.content_type_for_ext(path.extname(file_path));
+		if (path.extname(file_path) === ".md") {
+			res.setHeader("Content-Disposition", "inline");
+		}
+		res.setHeader("Content-Type", contentType);
+		res.send(content_txt);
+	}
+	function send_file(req, res, file_path) {
+		// Need to set the content type here??
+		const contentType = hdoc.content_type_for_ext(path.extname(file_path));
+		res.setHeader("Content-Type", contentType);
+		// The vendored Mermaid bundle is large (~4.5MB) and content-stable, so cache
+		// it hard — without this the browser re-downloads it on every full page load
+		// when clicking through to diagram pages. Other dev assets stay uncached so
+		// edits to the viewer JS are picked up on refresh.
+		if (path.basename(file_path) === "mermaid.min.js") {
+			res.setHeader("Cache-Control", "public, max-age=604800, immutable");
+		}
+		const r = fs.createReadStream(file_path);
+		const ps = new stream.PassThrough();
+		stream.pipeline(r, ps, (err) => {
+			if (err) {
+				console.error(err); // No such file or any other kind of error
+				return res.sendStatus(400).send("Unexpected error");
+			}
+		});
+		ps.pipe(res);
+	}
+	function send_content_resource_404(req, res) {
+		res.setHeader("Content-Type", "text/html");
+		res.status(404).send("Content resource not found");
+	}
+	// 1. If we request a file with a .html file extension, and that file DOES NOT exist,
+	// we will look for the same file but with a .md extension. If we find that
+	// corresponding markdown file, we will transform that markdown file to HTML and
+	// return the HTML content
+	//
+	// 2. If we request a file, without any file extension then we will look for that file
+	// with a .md extension, and if that file exists, we will transform that markdown
+	// file to HTML and return that file.
+	//
+	// For all other requests, we are going to look on the filesystem.  If we request
+	// a specific file with its extension (including .md files), then we will simply
+	// return the file verbatim as a static file.
+	//
+	// If we request a file without an extension and that file does not exist, we will
+	// assume that is a folder, will append index.html and look for that file, if present
+	// we will send it, if not present we will look for index.md, and if thats present
+	// we will transform to HTML and return that
+	//
+	// Anything else in this handler will return a 404 error
+	function handle_books_request(req, res) {
+		let url = req.url.replace("/_books/", "/");
+		console.log("URL Requested:", url);
+		// Process redirect
+		if (
+			hdocbook_project.redirects &&
+			Array.isArray(hdocbook_project.redirects) &&
+			hdocbook_project.redirects.length > 0
+		) {
+			const source_url = url.indexOf("/") === 0 ? url : `/${url}`;
+			for (const redir of hdocbook_project.redirects) {
+				redir.url =
+					redir.url.indexOf("/") === 0 ? redir.url : `/${redir.url}`;
+				if (
+					redir.url === source_url &&
+					redir.location &&
+					redir.location !== ""
+				) {
+					url = `${redir.location}`;
+					console.log(`Redirecting to ${url}`);
+				}
+			}
+		}
+		const file_path = path.join(source_path, url);
+		if (path.extname(file_path) === ".html") {
+			// 1a. check for html files, and send/transform as required
+			if (fs.existsSync(file_path)) {
+				// HTML file exists on disk, just return it verbatim
+				res.setHeader("Content-Type", "text/html");
+				send_file(req, res, file_path);
+				return true;
+			}
+			if (fs.existsSync(file_path.replace(".html", ".md"))) {
+				if (
+					transform_markdown_and_send_html(
+						req,
+						res,
+						file_path.replace(".html", ".md"),
+					)
+				) {
+					return;
+				}
+			}
+		} else if (path.extname(file_path) === ".md") {
+			// If the markdown file exists, just send to caller as is
+			if (fs.existsSync(file_path)) {
+				send_content_file(req, res, file_path);
+				return true;
+			}
+		} else if (path.extname(file_path).length === 0) {
+			// 2. If we request a file, without any file extension
+			if (fs.existsSync(`${file_path}.md`)) {
+				if (transform_markdown_and_send_html(req, res, `${file_path}.md`)) {
+					return;
+				}
+			} else if (fs.existsSync(path.join(`${file_path}index.md`))) {
+				if (
+					transform_markdown_and_send_html(
+						req,
+						res,
+						path.join(file_path, "index.md"),
+					)
+				) {
+					return;
+				}
+			} else if (fs.existsSync(path.join(`${file_path}index.html`))) {
+				res.setHeader("Content-Type", "text/html");
+				send_content_file(req, res, path.join(`${file_path}index.html`));
+				return;
+			} else if (fs.existsSync(`${file_path}/index.md`)) {
+				if (
+					transform_markdown_and_send_html(req, res, `${file_path}/index.md`)
+				) {
+					return;
+				}
+			} else if (fs.existsSync(path.join(`${file_path}/index.html`))) {
+				res.setHeader("Content-Type", "text/html");
+				send_content_file(req, res, path.join(`${file_path}/index.html`));
+				return;
+			} else if (fs.existsSync(path.join(`${file_path}.html`))) {
+				res.setHeader("Content-Type", "text/html");
+				send_content_file(req, res, path.join(`${file_path}.html`));
+				return;
+			} else if (fs.existsSync(path.join(`${file_path}.htm`))) {
+				res.setHeader("Content-Type", "text/html");
+				send_content_file(req, res, path.join(`${file_path}.htm`));
+				return;
+			}
+		} else if (fs.existsSync(file_path)) {
+			if (
+				file_path.endsWith("hdocbook.json") ||
+				file_path.endsWith("hdocbook_project.json")
+			) {
+				try {
+					// Read & parse file
+					JSON.parse(fs.readFileSync(file_path));
+				} catch (e) {
+					console.error(`Error parsing hdocbook.json: ${e}`);
+				}
+			}
+			send_file(req, res, file_path);
+			return;
+		}
+		// Return a 404 error here
+		send_content_resource_404(req, res);
+	}
+	function handle_library_request(req, res) {
+		const library = {
+			books: [
+				{
+					docId: hdocbook_config.docId,
+					title: hdocbook_config.title,
+					nav_inline: nav_inline,
+				},
+			],
+		};
+		res.setHeader("Content-Type", "application/json");
+		res.send(JSON.stringify(library, null, 3));
+	}
+	function register(app) {
+		app.get("/_books/library.json", handle_library_request);
+		app.get("/_books/*splat", handle_books_request);
+	}
+	return {
+		register,
+		// Exposed so host servers can reuse the same helpers for their own
+		// (SPA / editor) catch-all routes, and the editor's live-preview endpoint.
+		// handle_books_request / handle_library_request are exposed so the editor
+		// can register delegating /_books routes that follow a workspace switch.
+		render_markdown,
+		transform_markdown_and_send_html,
+		send_content_file,
+		send_file,
+		send_content_resource_404,
+		handle_books_request,
+		handle_library_request,
+	};
+};