hdoc-tools 0.51.0 → 0.52.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-build.js CHANGED
@@ -11,10 +11,9 @@
11
11
  const hdoc_build_onyx = require(path.join(__dirname, "hdoc-build-onyx.js"));
12
12
  const hdoc_build_pdf = require(path.join(__dirname, "hdoc-build-pdf.js"));
13
13
  const hdoc_index = require(path.join(__dirname, "hdoc-db.js"));
14
+ const hdoc_mermaid = require(path.join(__dirname, "hdoc-mermaid.js"));
14
15
  const archiver = require("archiver");
15
16
 
16
- const { execSync } = require("child_process");
17
-
18
17
  const h_tags_to_search = ["h1", "h2", "h3"];
19
18
  const image_extensions = ["png", "svg", "jpg"];
20
19
 
@@ -44,14 +43,6 @@
44
43
  "mermaid-theme.yaml",
45
44
  );
46
45
 
47
- let mermaid_puppeteer_config_path = path.resolve(
48
- templates_path,
49
- "mermaid-puppeteer-config.json",
50
- );
51
- if (process.platform === "win32") {
52
- mermaid_puppeteer_config_path = `"${mermaid_puppeteer_config_path}"`
53
- }
54
-
55
46
  const pdf_template_path = path.join(__dirname, "templates", "pdf");
56
47
  const ui_css_path = path.join(__dirname, "ui", "css");
57
48
  const pdf_template_file_path = path.join(pdf_template_path, "template.html");
@@ -70,6 +61,10 @@
70
61
  const redirects = {};
71
62
  const static_html_files = [];
72
63
  const mermaid_failures = [];
64
+ // Mermaid diagrams collected during (synchronous) markdown rendering, then
65
+ // rendered to SVG in a batch afterwards (puppeteer is async; the markdown-it
66
+ // highlight callback is not). Keyed by output path to dedupe identical diagrams.
67
+ const mermaid_queue = new Map();
73
68
 
74
69
  let bc = {}; // Breadcrumbs map
75
70
  let book_read_time = 0;
@@ -117,38 +112,30 @@
117
112
  typographer: true,
118
113
  highlight: function (str, lang) {
119
114
  if (lang === "mermaid" && process.env.GITHUB_ACTIONS !== 'true') {
120
- try {
121
- const tmpInput = hdoc.tmp_file_sync({ postfix: ".mmd" });
122
- const outputFileName = `mermaid-${crypto.createHash("sha256").update(str).digest("hex").slice(0, 16)}.svg`;
123
- const outputPath = path.join(mermaid_images_path, outputFileName);
124
- const outputLink = `/_books/${doc_id}/mermaid-images/${outputFileName}`;
125
-
126
- if (!str.startsWith('---')) {
127
- str = '---\n' + fs.readFileSync(mermaid_theme_path, {encoding: 'utf-8'}) + `\n---\n${str}`;
128
- }
129
-
130
- fs.writeFileSync(tmpInput.name, str);
131
- let cmd = `${__dirname}/node_modules/.bin/mmdc`;
132
-
133
- if (process.platform === "win32") {
134
- cmd = `"${cmd}.cmd"`;
135
- }
136
-
137
- cmd = `${cmd} -i "${tmpInput.name}" -o "${outputPath}" --backgroundColor transparent --puppeteerConfigFile ${mermaid_puppeteer_config_path}`;
138
- console.log(`Generating Mermaid SVG found in ${currentMdFilePath.relativePath} - ${outputPath}`);
139
- execSync(cmd);
140
-
141
- if (!fs.existsSync(outputPath)) {
142
- throw new Error("mmdc did not generate output");
143
- }
144
-
145
- tmpInput.removeCallback();
115
+ // markdown-it's highlight callback is synchronous, but Mermaid
116
+ // rendering is async (puppeteer). So here we only *queue* the
117
+ // diagram and emit the <img> tag pointing at its eventual SVG;
118
+ // the actual rendering happens in a batch after all markdown has
119
+ // been processed (see render_mermaid_queue, before validation/zip).
120
+ const outputFileName = `mermaid-${crypto.createHash("sha256").update(str).digest("hex").slice(0, 16)}.svg`;
121
+ const outputPath = path.join(mermaid_images_path, outputFileName);
122
+ const outputLink = `/_books/${doc_id}/mermaid-images/${outputFileName}`;
123
+
124
+ let definition = str;
125
+ if (!definition.startsWith('---')) {
126
+ definition = '---\n' + fs.readFileSync(mermaid_theme_path, {encoding: 'utf-8'}) + `\n---\n${definition}`;
127
+ }
146
128
 
147
- return `<img class="mermaid-diagram" src="${outputLink}" alt="Mermaid Diagram">`;
148
- } catch (err) {
149
- mermaid_failures.push({path: currentMdFilePath.relativePath, error: err.message});
150
- return ``;
129
+ // Dedupe identical diagrams (same hash -> same output file).
130
+ if (!mermaid_queue.has(outputPath)) {
131
+ mermaid_queue.set(outputPath, {
132
+ definition,
133
+ outputPath,
134
+ relativePath: currentMdFilePath.relativePath,
135
+ });
151
136
  }
137
+
138
+ return `<img class="mermaid-diagram" src="${outputLink}" alt="Mermaid Diagram">`;
152
139
  }
153
140
  }
154
141
  });
@@ -197,6 +184,35 @@
197
184
  // generates a PDF, and writes the final HTML to disk ready for indexing.
198
185
  // Markdown files are rendered to HTML first; static HTML files are processed
199
186
  // in place. Both paths share the same post-processing pipeline.
187
+ // Render every queued-but-not-yet-rendered Mermaid diagram to its SVG file,
188
+ // reusing the already-open browser. Each diagram's render is memoized on the
189
+ // queue entry (item.promise), so concurrent transform_file calls and the final
190
+ // safety flush never render the same diagram twice. Failures are collected in
191
+ // mermaid_failures rather than thrown, matching the previous mmdc behavior.
192
+ const flush_mermaid_queue = async () => {
193
+ await Promise.all(
194
+ [...mermaid_queue.values()].map((item) => {
195
+ if (!item.promise) {
196
+ item.promise = (async () => {
197
+ try {
198
+ console.log(`Generating Mermaid SVG found in ${item.relativePath} - ${item.outputPath}`);
199
+ const svg = await hdoc_mermaid.render_svg(browser, item.definition, {
200
+ backgroundColor: "transparent",
201
+ });
202
+ if (!svg || svg.indexOf("<svg") === -1) {
203
+ throw new Error("Mermaid produced no SVG output");
204
+ }
205
+ fs.writeFileSync(item.outputPath, svg);
206
+ } catch (err) {
207
+ mermaid_failures.push({ path: item.relativePath, error: err.message });
208
+ }
209
+ })();
210
+ }
211
+ return item.promise;
212
+ }),
213
+ );
214
+ };
215
+
200
216
  const transform_file = async (file_path) => {
201
217
  const is_markdown = path.extname(file_path.path) === '.md';
202
218
  if (is_markdown) conversion_attempted++;
@@ -254,6 +270,11 @@
254
270
  // Render markdown into HTML
255
271
  html_txt = md.render(md_txt);
256
272
 
273
+ // md.render() synchronously queued any Mermaid diagrams in this file.
274
+ // Render them to SVG now (memoized/deduped) so the files exist before
275
+ // PDF generation below reads them, and before validation/zipping.
276
+ await flush_mermaid_queue();
277
+
257
278
  // Single pass: wrap h2/h3 divs + extract heading, paragraph, read-time.
258
279
  // Replaces separate wrapHContent + getFirstHTMLHeading + get_html_read_time calls.
259
280
  const extracted = hdoc.wrapAndExtract(html_txt, h_tags_to_search);
@@ -1279,6 +1300,10 @@
1279
1300
  );
1280
1301
  }
1281
1302
 
1303
+ // Safety net: render any Mermaid diagrams not already flushed during
1304
+ // transform_file (no-op if all are done — rendering is memoized per diagram).
1305
+ await flush_mermaid_queue();
1306
+
1282
1307
  // Output to console
1283
1308
  console.log(`\n MD files found: ${conversion_attempted}`);
1284
1309
  console.log(`Successfully converted to HTML: ${conversion_success}`);
@@ -1418,7 +1443,7 @@
1418
1443
  const zip_path = path.join(work_path, `${doc_id}.zip`);
1419
1444
 
1420
1445
  const output = fs.createWriteStream(zip_path);
1421
- const archive = archiver("zip");
1446
+ const archive = new archiver.ZipArchive();
1422
1447
  archive.on("error", (err) => {
1423
1448
  throw err;
1424
1449
  });
package/hdoc-help.js CHANGED
@@ -15,7 +15,10 @@ Commands
15
15
  - Use the '--no-links' argument to skip link output to CLI during validation.
16
16
 
17
17
  - createDocs
18
- Creates folder structure and markdown documents as defined in the HDocBook navigation item links
18
+ Creates folder structure and markdown documents as defined in the HDocBook navigation item links
19
+
20
+ - edit
21
+ Starts the local structure/content editor on port 3000 (bound to 127.0.0.1), serving the editor UI alongside a live preview of the content. Supports a -port N to use a different port.
19
22
 
20
23
  - help
21
24
  Outputs available arguments and switches
@@ -0,0 +1,204 @@
1
+ // In-process Mermaid -> SVG renderer.
2
+ //
3
+ // Replaces the previous `@mermaid-js/mermaid-cli` (mmdc) integration. Instead of
4
+ // shelling out to the CLI (which spawned a fresh Chromium per diagram and pinned
5
+ // puppeteer to <=24 via its peerDependency), this renders Mermaid using the
6
+ // `mermaid` library directly inside a puppeteer browser supplied by the caller —
7
+ // the very same browser hdoc-build already launches for PDF/link work.
8
+ //
9
+ // The render path (Interceptor + page evaluation) is ported from mermaid-cli
10
+ // (MIT licensed): https://github.com/mermaid-js/mermaid-cli
11
+ // Trimmed to the SVG-only case we need, plus ELK layout support (required by
12
+ // books that use `layout: elk`). zenuml / tidy-tree / icon-packs / png / pdf are
13
+ // intentionally dropped — no hdocbook content uses them.
14
+
15
+ const { readFile, realpath } = require("node:fs/promises");
16
+ const path = require("node:path");
17
+ const url = require("node:url");
18
+
19
+ // ESM bundle locations inside the installed packages. Both packages expose
20
+ // "./*": "./*" in their exports map, so the dist files resolve directly.
21
+ const mermaidESMPath = require.resolve("mermaid/dist/mermaid.esm.mjs");
22
+ const elkESMPath = require.resolve(
23
+ "@mermaid-js/layout-elk/dist/mermaid-layout-elk.esm.mjs",
24
+ );
25
+
26
+ /**
27
+ * Guesses the MIME-type of a file based on its extension. Only the handful of
28
+ * types the Mermaid ESM bundles actually request are supported.
29
+ */
30
+ function getContentTypeFromFileExtension(filePath) {
31
+ const ext = path.extname(filePath).toLowerCase();
32
+ switch (ext) {
33
+ case ".css":
34
+ // Force UTF-8 so puppeteer doesn't mis-parse as Latin-1.
35
+ return "text/css;charset=UTF-8";
36
+ case ".js":
37
+ case ".mjs":
38
+ return "application/javascript";
39
+ case ".woff2":
40
+ return "font/woff2";
41
+ default:
42
+ throw new Error(`Unsupported file extension for intercept: ${ext}`);
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Puppeteer can't `import` ESM modules from `file://` URLs. Rather than stand up
48
+ * a real HTTP server (ports/firewalls), we intercept requests to a dummy
49
+ * `https://hdoc-mermaid-intercept.invalid` origin and serve the corresponding
50
+ * local file. Ported verbatim from mermaid-cli's puppeteerIntercept.js.
51
+ */
52
+ class Interceptor {
53
+ #INTERCEPT_ORIGIN = "https://hdoc-mermaid-intercept.invalid";
54
+
55
+ /** @type {Set<string>} resolved (realpath) dirs allowed to be served. */
56
+ #allowedDirs = new Set();
57
+
58
+ async fileUrlToInterceptUrl(fileUrl, { allowParentDirectoryLevel = 1 } = {}) {
59
+ fileUrl = new URL(fileUrl);
60
+ if (fileUrl.protocol !== "file:") {
61
+ throw new Error(`Invalid file URL: ${fileUrl}`);
62
+ }
63
+ let parentDirectory = await realpath(url.fileURLToPath(fileUrl));
64
+ while (allowParentDirectoryLevel-- >= 0) {
65
+ parentDirectory = path.dirname(parentDirectory);
66
+ }
67
+ this.#allowedDirs.add(parentDirectory);
68
+ return `${this.#INTERCEPT_ORIGIN}${fileUrl.pathname}`;
69
+ }
70
+
71
+ async interceptUrlToFileUrl(interceptUrl) {
72
+ interceptUrl = new URL(interceptUrl);
73
+ if (interceptUrl.origin !== this.#INTERCEPT_ORIGIN) {
74
+ throw new Error(`Invalid intercept URL: ${interceptUrl}`);
75
+ }
76
+ const fileUrl = new URL(
77
+ interceptUrl.href.slice(this.#INTERCEPT_ORIGIN.length),
78
+ "file://",
79
+ );
80
+ const filePath = await realpath(url.fileURLToPath(fileUrl));
81
+ if (
82
+ ![...this.#allowedDirs].some((dir) =>
83
+ path.relative(filePath, dir).startsWith(".."),
84
+ )
85
+ ) {
86
+ throw new Error(
87
+ `Intercept URL is not in an allowed directory: ${interceptUrl}`,
88
+ );
89
+ }
90
+ return fileUrl;
91
+ }
92
+
93
+ async #interceptRequestHandler(request) {
94
+ try {
95
+ if (request.url().startsWith(this.#INTERCEPT_ORIGIN)) {
96
+ const fileUrl = await this.interceptUrlToFileUrl(request.url());
97
+ return request.respond({
98
+ status: 200,
99
+ headers: { "Access-Control-Allow-Origin": "*" },
100
+ contentType: getContentTypeFromFileExtension(
101
+ url.fileURLToPath(fileUrl),
102
+ ),
103
+ body: await readFile(fileUrl),
104
+ });
105
+ }
106
+ } catch (error) {
107
+ console.error(
108
+ `Error handling intercept request for ${request.url()}:`,
109
+ error,
110
+ );
111
+ request.abort();
112
+ return;
113
+ }
114
+ request.continue();
115
+ }
116
+
117
+ get interceptRequestHandler() {
118
+ return this.#interceptRequestHandler.bind(this);
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Render a single Mermaid definition to an SVG string.
124
+ *
125
+ * @param {import('puppeteer').Browser} browser - An open puppeteer browser.
126
+ * @param {string} definition - Mermaid source (may include `--- config: ---` frontmatter).
127
+ * @param {{ backgroundColor?: string }} [opts]
128
+ * @returns {Promise<string>} the serialized `<svg>`.
129
+ */
130
+ async function render_svg(browser, definition, { backgroundColor = "transparent" } = {}) {
131
+ const page = await browser.newPage();
132
+ // Surface in-page warnings/errors (e.g. mermaid parse errors) to the build log.
133
+ page.on("console", (msg) => {
134
+ if (msg.type() === "error" || msg.type() === "warning") {
135
+ console.warn(msg.text());
136
+ }
137
+ });
138
+ try {
139
+ await page.setContent(
140
+ "<!doctype html><html><body><div id=\"container\"></div></body></html>",
141
+ );
142
+ await page.$eval(
143
+ "body",
144
+ (body, bg) => {
145
+ body.style.background = bg;
146
+ },
147
+ backgroundColor,
148
+ );
149
+
150
+ const interceptor = new Interceptor();
151
+ const mermaidUrl = await interceptor.fileUrlToInterceptUrl(
152
+ url.pathToFileURL(mermaidESMPath),
153
+ );
154
+ const elkUrl = await interceptor.fileUrlToInterceptUrl(
155
+ url.pathToFileURL(elkESMPath),
156
+ );
157
+
158
+ page.on("request", interceptor.interceptRequestHandler);
159
+ await page.setRequestInterception(true);
160
+
161
+ await page.$eval(
162
+ "#container",
163
+ async (container, { definition, mermaidUrl, elkUrl, backgroundColor }) => {
164
+ const { default: mermaid } = await import(mermaidUrl);
165
+ const { default: elkLayouts } = await import(elkUrl);
166
+
167
+ await Promise.all(
168
+ Array.from(document.fonts, (font) => font.load()),
169
+ );
170
+
171
+ mermaid.registerLayoutLoaders(elkLayouts);
172
+ mermaid.initialize({ startOnLoad: false });
173
+
174
+ // Throws on invalid diagrams — propagates out of $eval as a rejection.
175
+ const { svg: svgText } = await mermaid.render(
176
+ "hdoc-svg",
177
+ definition,
178
+ container,
179
+ );
180
+ container.innerHTML = svgText;
181
+
182
+ const svg = container.getElementsByTagName?.("svg")?.[0];
183
+ if (svg?.style) {
184
+ svg.style.backgroundColor = backgroundColor;
185
+ }
186
+ },
187
+ { definition, mermaidUrl, elkUrl, backgroundColor },
188
+ );
189
+
190
+ // Serialize via XMLSerializer so HTML <foreignObject> content (e.g. <br>)
191
+ // becomes valid XML in the saved .svg file.
192
+ const svgXML = await page.$eval("svg", (svg) => {
193
+ // eslint-disable-next-line no-undef
194
+ const xmlSerializer = new XMLSerializer();
195
+ return xmlSerializer.serializeToString(svg);
196
+ });
197
+
198
+ return svgXML;
199
+ } finally {
200
+ await page.close();
201
+ }
202
+ }
203
+
204
+ module.exports = { render_svg, Interceptor };
package/hdoc-module.js CHANGED
@@ -9,10 +9,48 @@
9
9
 
10
10
  let retried = false;
11
11
 
12
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
13
+
14
+ // GitHub enforces a "secondary" (burst) rate limit on top of the hourly one:
15
+ // too many requests in a short window are rejected with 403/429 even when the
16
+ // hourly budget is intact. The contributor pull fires one call per file, so a
17
+ // fast run trips it. Historically `hdoc validate` got away with this only
18
+ // because verbose console output slowed the loop down between calls; the
19
+ // --quiet mode removed that incidental spacing and started hitting 403s.
20
+ // Throttle GitHub API calls to a fixed minimum interval so behaviour no longer
21
+ // depends on how chatty the console is.
22
+ let _last_github_call_at = 0;
23
+ const GITHUB_HOST = "api.github.com";
24
+ const GITHUB_MIN_INTERVAL_MS = 800;
25
+ // Never block the whole build waiting on a single rate-limited call.
26
+ const GITHUB_MAX_BACKOFF_MS = 60000;
27
+
28
+ // For a rate-limited response, work out how long to wait before retrying.
29
+ // Returns null when the response is NOT a retryable rate limit (e.g. a genuine
30
+ // 403 auth/forbidden), in which case the caller should give up immediately.
31
+ const rate_limit_wait_ms = (response) => {
32
+ if (response.status !== 403 && response.status !== 429) return null;
33
+ const retry_after = response.headers.get("retry-after");
34
+ if (retry_after !== null) {
35
+ const secs = Number(retry_after);
36
+ if (!Number.isNaN(secs)) return Math.max(0, secs * 1000);
37
+ }
38
+ const remaining = response.headers.get("x-ratelimit-remaining");
39
+ const reset = response.headers.get("x-ratelimit-reset");
40
+ if (remaining === "0" && reset !== null) {
41
+ const reset_ms = Number(reset) * 1000 - Date.now();
42
+ if (!Number.isNaN(reset_ms)) return Math.max(0, reset_ms);
43
+ }
44
+ // 403/429 with no rate-limit signal => auth/forbidden, not worth retrying.
45
+ return null;
46
+ };
47
+
12
48
  // Wraps the built-in fetch() with automatic retry for transient errors.
13
49
  // Retries up to maxRetries times when the server returns an HTTP error status
14
- // >= 400, except for 401 (Unauthorized) and 403 (Forbidden) which are auth
15
- // failures that won't be resolved by retrying. Network errors (where fetch
50
+ // >= 400, except for 401 (Unauthorized) and non-rate-limit 403 (Forbidden)
51
+ // which are auth failures that won't be resolved by retrying. GitHub
52
+ // rate-limit responses (403/429 carrying Retry-After or X-RateLimit-Reset) ARE
53
+ // retried after honouring the indicated wait. Network errors (where fetch
16
54
  // itself throws) are also retried. Sets the module-level `retried` flag so
17
55
  // callers can detect and log a success-after-retry message.
18
56
  //
@@ -21,8 +59,18 @@
21
59
  // does not leave an already-aborted signal in place for the retries.
22
60
  const fetchWithRetry = async (url, options = {}, maxRetries = 5) => {
23
61
  const { timeoutMs, ...fetchOptions } = options;
62
+ const is_github = typeof url === "string" && url.includes(GITHUB_HOST);
24
63
  let retryCount = 0;
25
64
  while (true) {
65
+ // Space out GitHub API calls so a fast (quiet) run does not burst past
66
+ // the secondary rate limit. No effect on non-GitHub requests.
67
+ if (is_github) {
68
+ const since = Date.now() - _last_github_call_at;
69
+ if (since < GITHUB_MIN_INTERVAL_MS) {
70
+ await sleep(GITHUB_MIN_INTERVAL_MS - since);
71
+ }
72
+ _last_github_call_at = Date.now();
73
+ }
26
74
  // Create a fresh signal for each attempt; reusing an already-aborted
27
75
  // signal would cause every subsequent retry to abort immediately.
28
76
  const attemptOptions = timeoutMs
@@ -38,10 +86,19 @@
38
86
  retried = true;
39
87
  continue;
40
88
  }
41
- // Auth failures and successes are not retried
42
- if (response.ok || response.status === 401 || response.status === 403) {
89
+ if (response.ok || response.status === 401) {
43
90
  return response;
44
91
  }
92
+ // 403/429: retry only when it is a rate limit we can wait out.
93
+ if (response.status === 403 || response.status === 429) {
94
+ const wait_ms = rate_limit_wait_ms(response);
95
+ if (wait_ms === null) return response; // auth/forbidden — give up
96
+ retryCount++;
97
+ if (retryCount > maxRetries) return response;
98
+ retried = true;
99
+ await sleep(Math.min(wait_ms, GITHUB_MAX_BACKOFF_MS));
100
+ continue;
101
+ }
45
102
  retryCount++;
46
103
  if (retryCount > maxRetries) return response;
47
104
  retried = true;