hdoc-tools 0.52.0 → 0.53.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ // Disable Puppeteer's own bundled browser download (install.mjs). hdoc-tools
2
+ // provisions Chrome + chrome-headless-shell itself via hdoc-install-browser.js,
3
+ // which is idempotent, self-healing and retries — unlike the bundled download,
4
+ // whose single partial-extract failure on some Windows Server boxes aborts the
5
+ // whole `npm install`. Browsers are still fetched, just by our controlled path.
6
+ //
7
+ // Discovered by Puppeteer (cosmiconfig) walking up from node_modules/.../puppeteer
8
+ // to this package root during install. skipDownload only affects provisioning,
9
+ // not where the browser is launched from at runtime.
10
+ module.exports = {
11
+ skipDownload: true,
12
+ };
package/hdoc-build.js CHANGED
@@ -102,6 +102,23 @@
102
102
  let currentMdFilePath = "";
103
103
  let currentFrontmatter = "";
104
104
 
105
+ // Escape a Mermaid definition for safe embedding as the text content of a
106
+ // <pre class="mermaid"> element. The viewer's client-side plugin reads the
107
+ // element's textContent, so only HTML-text safety is required here.
108
+ const escape_html_text = (str) =>
109
+ str.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
110
+
111
+ // PDF-only: replace each client-render <pre class="mermaid" data-pdf-svg="…">
112
+ // block with the baked <img> pointing at its server-rendered SVG. Runs on the
113
+ // PDF HTML variant just before generate_pdf; the published HTML keeps the
114
+ // <pre> block for client rendering.
115
+ const pdfify_mermaid_blocks = (html) =>
116
+ html.replace(
117
+ /<pre class="mermaid" data-pdf-svg="([^"]+)">[\s\S]*?<\/pre>/g,
118
+ (_m, src) =>
119
+ `<img class="mermaid-diagram" src="${src}" alt="Mermaid Diagram">`,
120
+ );
121
+
105
122
  // Shared markdown-it instance — created once, reused for every file.
106
123
  // Previously recreated per file because the highlight callback closed over
107
124
  // the file_path parameter; it now reads currentMdFilePath instead.
@@ -111,12 +128,28 @@
111
128
  linkify: true,
112
129
  typographer: true,
113
130
  highlight: function (str, lang) {
114
- if (lang === "mermaid" && process.env.GITHUB_ACTIONS !== 'true') {
115
- // markdown-it's highlight callback is synchronous, but Mermaid
116
- // rendering is async (puppeteer). So here we only *queue* the
117
- // diagram and emit the <img> tag pointing at its eventual SVG;
118
- // the actual rendering happens in a batch after all markdown has
119
- // been processed (see render_mermaid_queue, before validation/zip).
131
+ if (lang === "mermaid") {
132
+ // Published HTML (and serve) render Mermaid client-side via the
133
+ // viewer's Mermaid plugin: we emit the raw definition inside
134
+ // <pre class="mermaid"> and the viewer calls mermaid.run() after
135
+ // injecting the fragment (theme applied via mermaid.initialize).
136
+ //
137
+ // PDFs have no JS runtime, so when PDF generation is enabled we ALSO
138
+ // queue a server-rendered SVG (headless puppeteer, batched after all
139
+ // markdown — see flush_mermaid_queue) and tag the block with
140
+ // data-pdf-svg. The PDF-only transform (pdfify_mermaid_blocks) swaps
141
+ // those tagged blocks for the baked <img> before generate_pdf; the
142
+ // attribute is inert for the client-rendered HTML.
143
+ //
144
+ // Skip the SVG bake entirely when this file won't produce a PDF
145
+ // (PDF disabled, or this path is PDF-excluded) — otherwise we'd
146
+ // render headless SVGs that nothing ever consumes.
147
+ const needs_pdf_svg =
148
+ pdf_enable && !pdf_path_excluded(currentMdFilePath.relativePath);
149
+ if (!needs_pdf_svg) {
150
+ return `<pre class="mermaid">${escape_html_text(str)}</pre>`;
151
+ }
152
+
120
153
  const outputFileName = `mermaid-${crypto.createHash("sha256").update(str).digest("hex").slice(0, 16)}.svg`;
121
154
  const outputPath = path.join(mermaid_images_path, outputFileName);
122
155
  const outputLink = `/_books/${doc_id}/mermaid-images/${outputFileName}`;
@@ -135,7 +168,7 @@
135
168
  });
136
169
  }
137
170
 
138
- return `<img class="mermaid-diagram" src="${outputLink}" alt="Mermaid Diagram">`;
171
+ return `<pre class="mermaid" data-pdf-svg="${outputLink}">${escape_html_text(str)}</pre>`;
139
172
  }
140
173
  }
141
174
  });
@@ -672,7 +705,12 @@
672
705
 
673
706
  let pdf_size = 0;
674
707
  if (pdf_enable && !pdf_path_excluded(file_path.relativePath)) {
675
- let pdf_txt = await hdoc_build_pdf.process_images(file_path, html_txt);
708
+ // Swap client-render Mermaid <pre> blocks for baked SVG <img> (PDF has
709
+ // no JS runtime), then inline the SVG files via process_images.
710
+ let pdf_txt = await hdoc_build_pdf.process_images(
711
+ file_path,
712
+ pdfify_mermaid_blocks(html_txt),
713
+ );
676
714
  pdf_txt = `${pdf_header}\n${pdf_txt}`;
677
715
 
678
716
  // Generate PDF file from HTML
@@ -696,6 +734,11 @@
696
734
  if (inline_content) html_txt = `${fm_header_str}\n${html_txt}`;
697
735
  else html_txt = `${fm_header_str}\n${doc_header}\n${html_txt}`;
698
736
 
737
+ // The data-pdf-svg attr was only needed by the PDF transform (already run
738
+ // above). Strip it from the published HTML so the markup is clean and has no
739
+ // dangling reference to the mermaid-images/ dir (which is deleted before zip).
740
+ html_txt = html_txt.replace(/ data-pdf-svg="[^"]*"/g, "");
741
+
699
742
  // Determine output file path (.md → .html for markdown; same path for static HTML)
700
743
  const target_file = is_markdown
701
744
  ? file_path.path.replace(path.extname(file_path.path), ".html")
@@ -716,7 +759,13 @@
716
759
  relative_path = relative_path.replace("/index.html", "");
717
760
  }
718
761
 
719
- const index_data = hdoc_index.transform_html_for_index(html_txt);
762
+ // Drop Mermaid source from the search index — it's diagram markup, not
763
+ // prose, and would otherwise pollute results.
764
+ const index_html = html_txt.replace(
765
+ /<pre class="mermaid"[^>]*>[\s\S]*?<\/pre>/g,
766
+ "",
767
+ );
768
+ const index_data = hdoc_index.transform_html_for_index(index_html);
720
769
  for (const section of index_data.sections) {
721
770
  index_records.push({
722
771
  relative_path: relative_path,
@@ -1304,6 +1353,14 @@
1304
1353
  // transform_file (no-op if all are done — rendering is memoized per diagram).
1305
1354
  await flush_mermaid_queue();
1306
1355
 
1356
+ // Mermaid SVGs are only an intermediate for baking diagrams into PDFs
1357
+ // (process_images inlines them as base64 data URIs at PDF-gen time, already
1358
+ // done above). Published HTML renders Mermaid client-side and references no
1359
+ // SVG, so the directory is dead weight in the output — drop it before zip.
1360
+ if (mermaid_images_path && fs.existsSync(mermaid_images_path)) {
1361
+ fs.rmSync(mermaid_images_path, { recursive: true, force: true });
1362
+ }
1363
+
1307
1364
  // Output to console
1308
1365
  console.log(`\n MD files found: ${conversion_attempted}`);
1309
1366
  console.log(`Successfully converted to HTML: ${conversion_success}`);
@@ -0,0 +1,343 @@
1
+ // Shared /_books/* content + render routes.
2
+ //
3
+ // Extracted from hdoc-serve.js so that both `hdoc serve` (read-only preview)
4
+ // and `hdoc edit` (the upcoming editor surface) mount the SAME content
5
+ // pipeline and renderer. This guarantees preview fidelity == published output
6
+ // for any UI built on top of it.
7
+ //
8
+ // Usage:
9
+ // const { create_content_handler } = require("./hdoc-content-routes.js");
10
+ // const content = create_content_handler({
11
+ // source_path, docId, hdocbook_config, hdocbook_project, nav_inline,
12
+ // });
13
+ // content.register(app);
14
+ // // ...and reuse content.send_content_file / content.send_content_resource_404
15
+ // // for your own catch-all (SPA) route.
16
+
17
+ const fs = require("node:fs");
18
+ const path = require("node:path");
19
+ const stream = require("node:stream");
20
+
21
+ const hdoc = require(path.join(__dirname, "hdoc-module.js"));
22
+ const mdfm = require("markdown-it-front-matter");
23
+
24
+ // Escape a Mermaid definition for safe embedding as the text content of a
25
+ // <pre> element. The viewer's client-side Mermaid plugin reads the element's
26
+ // textContent and renders it in the browser, so we only need HTML-text safety
27
+ // here (no theme/frontmatter injection — theme is applied via mermaid.initialize
28
+ // in the viewer).
29
+ function escape_html(str) {
30
+ return str
31
+ .replace(/&/g, "&amp;")
32
+ .replace(/</g, "&lt;")
33
+ .replace(/>/g, "&gt;");
34
+ }
35
+
36
+ // Build a content handler bound to a single book's context.
37
+ //
38
+ // ctx:
39
+ // source_path - absolute path to the document source root
40
+ // docId - id of the hdocbook being served
41
+ // hdocbook_config - parsed hdocbook.json
42
+ // hdocbook_project - parsed hdocbook-project.json (used for redirects)
43
+ // nav_inline - inline-help nav fragment for library.json
44
+ exports.create_content_handler = (ctx) => {
45
+ const {
46
+ source_path,
47
+ docId,
48
+ hdocbook_config,
49
+ hdocbook_project,
50
+ nav_inline,
51
+ } = ctx;
52
+
53
+ // process_includes resolves include paths relative to the source root.
54
+ const global_source_path = source_path;
55
+
56
+ // Render markdown SOURCE to HTML through the same pipeline as published output:
57
+ // expand_variables -> process_includes -> markdown-it (+ mermaid, tips, frontmatter).
58
+ //
59
+ // file_path anchors include resolution (relative to it) and mermaid logging.
60
+ // md_source is the raw markdown to render — read from disk for serve/publish,
61
+ // or supplied from the editor buffer for live preview (so preview == published).
62
+ // Returns { html, frontmatter } where frontmatter is the parsed object or null.
63
+ async function render_markdown(file_path, md_source) {
64
+ let md_txt = hdoc.expand_variables(md_source.toString(), docId);
65
+
66
+ const includes_processed = await hdoc.process_includes(
67
+ file_path,
68
+ md_txt,
69
+ global_source_path,
70
+ );
71
+ md_txt = includes_processed.body;
72
+
73
+ // Mermaid diagrams are rendered client-side by the viewer's Mermaid plugin.
74
+ // We emit the raw definition inside <pre class="mermaid"> and the viewer
75
+ // calls mermaid.run() after injecting the fragment (theme applied there via
76
+ // mermaid.initialize). This keeps serve == published and drops the
77
+ // per-request headless browser entirely.
78
+ const md = require("markdown-it")({
79
+ html: true,
80
+ linkify: true,
81
+ typographer: true,
82
+ highlight: function (str, lang) {
83
+ if (lang === "mermaid") {
84
+ return `<pre class="mermaid">${escape_html(str)}</pre>`;
85
+ }
86
+ }
87
+ });
88
+ md.linkify.set({
89
+ fuzzyEmail: false,
90
+ fuzzyLink: false,
91
+ fuzzyIP: false,
92
+ });
93
+
94
+ let frontmatter_content = "";
95
+ md.use(mdfm, (fm) => {
96
+ frontmatter_content = fm;
97
+ });
98
+
99
+ const tips = require(`${__dirname}/custom_modules/tips.js`);
100
+ md.use(tips, { links: true });
101
+
102
+ const html = md.render(md_txt.toString());
103
+
104
+ const frontmatter = frontmatter_content.length
105
+ ? hdoc.parse_yaml(frontmatter_content)
106
+ : null;
107
+ return { html, frontmatter };
108
+ }
109
+
110
+ async function transform_markdown_and_send_html(req, res, file_path) {
111
+ if (!fs.existsSync(file_path)) return false;
112
+
113
+ const { html, frontmatter } = await render_markdown(
114
+ file_path,
115
+ fs.readFileSync(file_path).toString(),
116
+ );
117
+
118
+ if (frontmatter) {
119
+ const base64 = Buffer.from(JSON.stringify(frontmatter), "utf-8").toString(
120
+ "base64",
121
+ );
122
+ res.setHeader("X-frontmatter", base64);
123
+ }
124
+
125
+ res.setHeader("Content-Type", "text/html");
126
+ res.send(html);
127
+ return true;
128
+ }
129
+
130
+
131
+ function send_content_file(req, res, file_path, redirected = false) {
132
+ let content_txt = hdoc.expand_variables(
133
+ fs.readFileSync(file_path).toString(),
134
+ docId,
135
+ );
136
+ if (redirected)
137
+ content_txt = `Redirected from ${redirected}\n\n${content_txt}`;
138
+
139
+ const contentType = hdoc.content_type_for_ext(path.extname(file_path));
140
+
141
+ if (path.extname(file_path) === ".md") {
142
+ res.setHeader("Content-Disposition", "inline");
143
+ }
144
+
145
+ res.setHeader("Content-Type", contentType);
146
+
147
+ res.send(content_txt);
148
+ }
149
+
150
+ function send_file(req, res, file_path) {
151
+ // Need to set the content type here??
152
+ const contentType = hdoc.content_type_for_ext(path.extname(file_path));
153
+ res.setHeader("Content-Type", contentType);
154
+
155
+ // The vendored Mermaid bundle is large (~4.5MB) and content-stable, so cache
156
+ // it hard — without this the browser re-downloads it on every full page load
157
+ // when clicking through to diagram pages. Other dev assets stay uncached so
158
+ // edits to the viewer JS are picked up on refresh.
159
+ if (path.basename(file_path) === "mermaid.min.js") {
160
+ res.setHeader("Cache-Control", "public, max-age=604800, immutable");
161
+ }
162
+
163
+ const r = fs.createReadStream(file_path);
164
+ const ps = new stream.PassThrough();
165
+ stream.pipeline(r, ps, (err) => {
166
+ if (err) {
167
+ console.error(err); // No such file or any other kind of error
168
+ return res.sendStatus(400).send("Unexpected error");
169
+ }
170
+ });
171
+ ps.pipe(res);
172
+ }
173
+
174
+ function send_content_resource_404(req, res) {
175
+ res.setHeader("Content-Type", "text/html");
176
+ res.status(404).send("Content resource not found");
177
+ }
178
+
179
+ // 1. If we request a file with a .html file extension, and that file DOES NOT exist,
180
+ // we will look for the same file but with a .md extension. If we find that
181
+ // corresponding markdown file, we will transform that markdown file to HTML and
182
+ // return the HTML content
183
+ //
184
+ // 2. If we request a file, without any file extension then we will look for that file
185
+ // with a .md extension, and if that file exists, we will transform that markdown
186
+ // file to HTML and return that file.
187
+ //
188
+ // For all other requests, we are going to look on the filesystem. If we request
189
+ // a specific file with its extension (including .md files), then we will simply
190
+ // return the file verbatim as a static file.
191
+ //
192
+ // If we request a file without an extension and that file does not exist, we will
193
+ // assume that is a folder, will append index.html and look for that file, if present
194
+ // we will send it, if not present we will look for index.md, and if thats present
195
+ // we will transform to HTML and return that
196
+ //
197
+ // Anything else in this handler will return a 404 error
198
+ function handle_books_request(req, res) {
199
+ let url = req.url.replace("/_books/", "/");
200
+
201
+ console.log("URL Requested:", url);
202
+
203
+ // Process redirect
204
+ if (
205
+ hdocbook_project.redirects &&
206
+ Array.isArray(hdocbook_project.redirects) &&
207
+ hdocbook_project.redirects.length > 0
208
+ ) {
209
+ const source_url = url.indexOf("/") === 0 ? url : `/${url}`;
210
+ for (const redir of hdocbook_project.redirects) {
211
+ redir.url =
212
+ redir.url.indexOf("/") === 0 ? redir.url : `/${redir.url}`;
213
+ if (
214
+ redir.url === source_url &&
215
+ redir.location &&
216
+ redir.location !== ""
217
+ ) {
218
+ url = `${redir.location}`;
219
+ console.log(`Redirecting to ${url}`);
220
+ }
221
+ }
222
+ }
223
+
224
+ const file_path = path.join(source_path, url);
225
+
226
+ if (path.extname(file_path) === ".html") {
227
+ // 1a. check for html files, and send/transform as required
228
+ if (fs.existsSync(file_path)) {
229
+ // HTML file exists on disk, just return it verbatim
230
+ res.setHeader("Content-Type", "text/html");
231
+ send_file(req, res, file_path);
232
+ return true;
233
+ }
234
+ if (fs.existsSync(file_path.replace(".html", ".md"))) {
235
+ if (
236
+ transform_markdown_and_send_html(
237
+ req,
238
+ res,
239
+ file_path.replace(".html", ".md"),
240
+ )
241
+ ) {
242
+ return;
243
+ }
244
+ }
245
+ } else if (path.extname(file_path) === ".md") {
246
+ // If the markdown file exists, just send to caller as is
247
+ if (fs.existsSync(file_path)) {
248
+ send_content_file(req, res, file_path);
249
+ return true;
250
+ }
251
+ } else if (path.extname(file_path).length === 0) {
252
+ // 2. If we request a file, without any file extension
253
+ if (fs.existsSync(`${file_path}.md`)) {
254
+ if (transform_markdown_and_send_html(req, res, `${file_path}.md`)) {
255
+ return;
256
+ }
257
+ } else if (fs.existsSync(path.join(`${file_path}index.md`))) {
258
+ if (
259
+ transform_markdown_and_send_html(
260
+ req,
261
+ res,
262
+ path.join(file_path, "index.md"),
263
+ )
264
+ ) {
265
+ return;
266
+ }
267
+ } else if (fs.existsSync(path.join(`${file_path}index.html`))) {
268
+ res.setHeader("Content-Type", "text/html");
269
+ send_content_file(req, res, path.join(`${file_path}index.html`));
270
+ return;
271
+ } else if (fs.existsSync(`${file_path}/index.md`)) {
272
+ if (
273
+ transform_markdown_and_send_html(req, res, `${file_path}/index.md`)
274
+ ) {
275
+ return;
276
+ }
277
+ } else if (fs.existsSync(path.join(`${file_path}/index.html`))) {
278
+ res.setHeader("Content-Type", "text/html");
279
+ send_content_file(req, res, path.join(`${file_path}/index.html`));
280
+ return;
281
+ } else if (fs.existsSync(path.join(`${file_path}.html`))) {
282
+ res.setHeader("Content-Type", "text/html");
283
+ send_content_file(req, res, path.join(`${file_path}.html`));
284
+ return;
285
+ } else if (fs.existsSync(path.join(`${file_path}.htm`))) {
286
+ res.setHeader("Content-Type", "text/html");
287
+ send_content_file(req, res, path.join(`${file_path}.htm`));
288
+ return;
289
+ }
290
+ } else if (fs.existsSync(file_path)) {
291
+ if (
292
+ file_path.endsWith("hdocbook.json") ||
293
+ file_path.endsWith("hdocbook_project.json")
294
+ ) {
295
+ try {
296
+ // Read & parse file
297
+ JSON.parse(fs.readFileSync(file_path));
298
+ } catch (e) {
299
+ console.error(`Error parsing hdocbook.json: ${e}`);
300
+ }
301
+ }
302
+ send_file(req, res, file_path);
303
+ return;
304
+ }
305
+
306
+ // Return a 404 error here
307
+ send_content_resource_404(req, res);
308
+ }
309
+
310
+ function handle_library_request(req, res) {
311
+ const library = {
312
+ books: [
313
+ {
314
+ docId: hdocbook_config.docId,
315
+ title: hdocbook_config.title,
316
+ nav_inline: nav_inline,
317
+ },
318
+ ],
319
+ };
320
+ res.setHeader("Content-Type", "application/json");
321
+ res.send(JSON.stringify(library, null, 3));
322
+ }
323
+
324
+ function register(app) {
325
+ app.get("/_books/library.json", handle_library_request);
326
+ app.get("/_books/*splat", handle_books_request);
327
+ }
328
+
329
+ return {
330
+ register,
331
+ // Exposed so host servers can reuse the same helpers for their own
332
+ // (SPA / editor) catch-all routes, and the editor's live-preview endpoint.
333
+ // handle_books_request / handle_library_request are exposed so the editor
334
+ // can register delegating /_books routes that follow a workspace switch.
335
+ render_markdown,
336
+ transform_markdown_and_send_html,
337
+ send_content_file,
338
+ send_file,
339
+ send_content_resource_404,
340
+ handle_books_request,
341
+ handle_library_request,
342
+ };
343
+ };