hdoc-tools 0.51.0 → 0.52.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/hdoc-help.js CHANGED
@@ -15,7 +15,10 @@ Commands
15
15
  - Use the '--no-links' argument to skip link output to CLI during validation.
16
16
 
17
17
  - createDocs
18
- Creates folder structure and markdown documents as defined in the HDocBook navigation item links
18
+ Creates folder structure and markdown documents as defined in the HDocBook navigation item links
19
+
20
+ - edit
21
+ Starts the local structure/content editor on port 3000 (bound to 127.0.0.1), serving the editor UI alongside a live preview of the content. Supports a -port N to use a different port.
19
22
 
20
23
  - help
21
24
  Outputs available arguments and switches
@@ -0,0 +1,145 @@
1
+ // Resilient browser provisioning for hdoc-tools.
2
+ //
3
+ // Replaces the old `puppeteer browsers install ...` one-liner postinstall.
4
+ // Puppeteer's own bundled download (install.mjs) is disabled via
5
+ // .puppeteerrc.cjs (skipDownload), so this script is the single, controlled
6
+ // place Chrome + chrome-headless-shell are fetched.
7
+ //
8
+ // Why: on some Windows Server 2019 build agents the Chrome archive extracts
9
+ // only partially (Defender quarantining binaries mid-extract, or a truncated
10
+ // download behind a proxy). The first failure leaves the version folder on
11
+ // disk, after which Puppeteer refuses to re-extract and every later install
12
+ // reports "folder exists but executable missing" — a permanent dead end.
13
+ //
14
+ // This script makes provisioning idempotent and self-healing:
15
+ // * skip when a valid executable already exists,
16
+ // * delete any stale/partial version folder before (re)installing,
17
+ // * retry the download a few times,
18
+ // * verify the executable exists afterwards and fail loudly with concrete
19
+ // remediation if it still does not.
20
+
21
+ (async () => {
22
+ const fs = require("node:fs");
23
+ const path = require("node:path");
24
+ const os = require("node:os");
25
+ const {
26
+ install,
27
+ computeExecutablePath,
28
+ detectBrowserPlatform,
29
+ Browser,
30
+ } = require("@puppeteer/browsers");
31
+
32
+ // Keep this build id in lockstep with the puppeteer version pinned in
33
+ // package.json (puppeteer 25.1.0 ships Chrome 149.0.7827.22).
34
+ const CHROME_BUILD = "149.0.7827.22";
35
+ const MAX_ATTEMPTS = 3;
36
+
37
+ const RED = "\x1b[31m";
38
+ const YELLOW = "\x1b[33m";
39
+ const GREEN = "\x1b[32m";
40
+ const RESET = "\x1b[0m";
41
+
42
+ const log = (msg) => console.log(`[hdoc-tools] ${msg}`);
43
+
44
+ // Resolve the cache directory the same way Puppeteer does at runtime:
45
+ // PUPPETEER_CACHE_DIR wins, otherwise the documented default. Keeping this
46
+ // aligned with runtime means we install to exactly where the browser is
47
+ // later launched from.
48
+ const cacheDir =
49
+ process.env.PUPPETEER_CACHE_DIR ||
50
+ path.join(os.homedir(), ".cache", "puppeteer");
51
+
52
+ let platform;
53
+ try {
54
+ platform = detectBrowserPlatform();
55
+ } catch (err) {
56
+ console.error(
57
+ `${RED}Unable to detect browser platform: ${err.message}${RESET}`,
58
+ );
59
+ process.exit(1);
60
+ }
61
+
62
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
63
+
64
+ const rmrf = (target) => {
65
+ try {
66
+ fs.rmSync(target, { recursive: true, force: true });
67
+ } catch {
68
+ /* best effort */
69
+ }
70
+ };
71
+
72
+ // A usable install is one where the executable exists and is non-empty.
73
+ const isUsable = (exePath) => {
74
+ try {
75
+ return fs.statSync(exePath).size > 0;
76
+ } catch {
77
+ return false;
78
+ }
79
+ };
80
+
81
+ const provision = async (browser, label) => {
82
+ const exePath = computeExecutablePath({
83
+ browser,
84
+ buildId: CHROME_BUILD,
85
+ cacheDir,
86
+ platform,
87
+ });
88
+ // chrome.exe -> chrome-win64 -> win64-<build>; nuke the whole build folder
89
+ // so a partial extraction can never block a clean re-extract.
90
+ const versionFolder = path.dirname(path.dirname(exePath));
91
+
92
+ if (isUsable(exePath)) {
93
+ log(`${label} ${CHROME_BUILD} already present, skipping download.`);
94
+ return true;
95
+ }
96
+
97
+ for (let attempt = 1; attempt <= MAX_ATTEMPTS; attempt++) {
98
+ rmrf(versionFolder); // clear any stale/partial extract first
99
+ try {
100
+ log(
101
+ `Installing ${label} ${CHROME_BUILD} (attempt ${attempt}/${MAX_ATTEMPTS})...`,
102
+ );
103
+ await install({
104
+ browser,
105
+ buildId: CHROME_BUILD,
106
+ cacheDir,
107
+ platform,
108
+ unpack: true,
109
+ });
110
+ } catch (err) {
111
+ console.error(
112
+ `${YELLOW} attempt ${attempt} failed: ${err.message}${RESET}`,
113
+ );
114
+ }
115
+
116
+ if (isUsable(exePath)) {
117
+ log(`${GREEN}${label} ${CHROME_BUILD} ready.${RESET}`);
118
+ return true;
119
+ }
120
+
121
+ if (attempt < MAX_ATTEMPTS) await sleep(2000);
122
+ }
123
+
124
+ return false;
125
+ };
126
+
127
+ const chromeOk = await provision(Browser.CHROME, "Chrome");
128
+ const shellOk = await provision(
129
+ Browser.CHROMEHEADLESSSHELL,
130
+ "chrome-headless-shell",
131
+ );
132
+
133
+ if (chromeOk && shellOk) process.exit(0);
134
+
135
+ console.error(
136
+ `\n${RED}Failed to provision a complete browser into:${RESET}\n ${cacheDir}\n\n` +
137
+ "This is almost always one of:\n" +
138
+ " 1. Antivirus (e.g. Windows Defender) quarantining Chrome files mid-extract.\n" +
139
+ ` Fix: Add-MpPreference -ExclusionPath "${cacheDir}"\n` +
140
+ " 2. A stale/partial cache folder. Fix: delete the chrome / chrome-headless-shell\n" +
141
+ ` sub-folders under "${cacheDir}" and reinstall.\n` +
142
+ " 3. A truncated download behind a proxy / TLS inspection. Check npm/HTTPS proxy config.\n",
143
+ );
144
+ process.exit(1);
145
+ })();
@@ -0,0 +1,204 @@
1
+ // In-process Mermaid -> SVG renderer.
2
+ //
3
+ // Replaces the previous `@mermaid-js/mermaid-cli` (mmdc) integration. Instead of
4
+ // shelling out to the CLI (which spawned a fresh Chromium per diagram and pinned
5
+ // puppeteer to <=24 via its peerDependency), this renders Mermaid using the
6
+ // `mermaid` library directly inside a puppeteer browser supplied by the caller —
7
+ // the very same browser hdoc-build already launches for PDF/link work.
8
+ //
9
+ // The render path (Interceptor + page evaluation) is ported from mermaid-cli
10
+ // (MIT licensed): https://github.com/mermaid-js/mermaid-cli
11
+ // Trimmed to the SVG-only case we need, plus ELK layout support (required by
12
+ // books that use `layout: elk`). zenuml / tidy-tree / icon-packs / png / pdf are
13
+ // intentionally dropped — no hdocbook content uses them.
14
+
15
+ const { readFile, realpath } = require("node:fs/promises");
16
+ const path = require("node:path");
17
+ const url = require("node:url");
18
+
19
+ // ESM bundle locations inside the installed packages. Both packages expose
20
+ // "./*": "./*" in their exports map, so the dist files resolve directly.
21
+ const mermaidESMPath = require.resolve("mermaid/dist/mermaid.esm.mjs");
22
+ const elkESMPath = require.resolve(
23
+ "@mermaid-js/layout-elk/dist/mermaid-layout-elk.esm.mjs",
24
+ );
25
+
26
+ /**
27
+ * Guesses the MIME-type of a file based on its extension. Only the handful of
28
+ * types the Mermaid ESM bundles actually request are supported.
29
+ */
30
+ function getContentTypeFromFileExtension(filePath) {
31
+ const ext = path.extname(filePath).toLowerCase();
32
+ switch (ext) {
33
+ case ".css":
34
+ // Force UTF-8 so puppeteer doesn't mis-parse as Latin-1.
35
+ return "text/css;charset=UTF-8";
36
+ case ".js":
37
+ case ".mjs":
38
+ return "application/javascript";
39
+ case ".woff2":
40
+ return "font/woff2";
41
+ default:
42
+ throw new Error(`Unsupported file extension for intercept: ${ext}`);
43
+ }
44
+ }
45
+
46
+ /**
47
+ * Puppeteer can't `import` ESM modules from `file://` URLs. Rather than stand up
48
+ * a real HTTP server (ports/firewalls), we intercept requests to a dummy
49
+ * `https://hdoc-mermaid-intercept.invalid` origin and serve the corresponding
50
+ * local file. Ported verbatim from mermaid-cli's puppeteerIntercept.js.
51
+ */
52
+ class Interceptor {
53
+ #INTERCEPT_ORIGIN = "https://hdoc-mermaid-intercept.invalid";
54
+
55
+ /** @type {Set<string>} resolved (realpath) dirs allowed to be served. */
56
+ #allowedDirs = new Set();
57
+
58
+ async fileUrlToInterceptUrl(fileUrl, { allowParentDirectoryLevel = 1 } = {}) {
59
+ fileUrl = new URL(fileUrl);
60
+ if (fileUrl.protocol !== "file:") {
61
+ throw new Error(`Invalid file URL: ${fileUrl}`);
62
+ }
63
+ let parentDirectory = await realpath(url.fileURLToPath(fileUrl));
64
+ while (allowParentDirectoryLevel-- >= 0) {
65
+ parentDirectory = path.dirname(parentDirectory);
66
+ }
67
+ this.#allowedDirs.add(parentDirectory);
68
+ return `${this.#INTERCEPT_ORIGIN}${fileUrl.pathname}`;
69
+ }
70
+
71
+ async interceptUrlToFileUrl(interceptUrl) {
72
+ interceptUrl = new URL(interceptUrl);
73
+ if (interceptUrl.origin !== this.#INTERCEPT_ORIGIN) {
74
+ throw new Error(`Invalid intercept URL: ${interceptUrl}`);
75
+ }
76
+ const fileUrl = new URL(
77
+ interceptUrl.href.slice(this.#INTERCEPT_ORIGIN.length),
78
+ "file://",
79
+ );
80
+ const filePath = await realpath(url.fileURLToPath(fileUrl));
81
+ if (
82
+ ![...this.#allowedDirs].some((dir) =>
83
+ path.relative(filePath, dir).startsWith(".."),
84
+ )
85
+ ) {
86
+ throw new Error(
87
+ `Intercept URL is not in an allowed directory: ${interceptUrl}`,
88
+ );
89
+ }
90
+ return fileUrl;
91
+ }
92
+
93
+ async #interceptRequestHandler(request) {
94
+ try {
95
+ if (request.url().startsWith(this.#INTERCEPT_ORIGIN)) {
96
+ const fileUrl = await this.interceptUrlToFileUrl(request.url());
97
+ return request.respond({
98
+ status: 200,
99
+ headers: { "Access-Control-Allow-Origin": "*" },
100
+ contentType: getContentTypeFromFileExtension(
101
+ url.fileURLToPath(fileUrl),
102
+ ),
103
+ body: await readFile(fileUrl),
104
+ });
105
+ }
106
+ } catch (error) {
107
+ console.error(
108
+ `Error handling intercept request for ${request.url()}:`,
109
+ error,
110
+ );
111
+ request.abort();
112
+ return;
113
+ }
114
+ request.continue();
115
+ }
116
+
117
+ get interceptRequestHandler() {
118
+ return this.#interceptRequestHandler.bind(this);
119
+ }
120
+ }
121
+
122
+ /**
123
+ * Render a single Mermaid definition to an SVG string.
124
+ *
125
+ * @param {import('puppeteer').Browser} browser - An open puppeteer browser.
126
+ * @param {string} definition - Mermaid source (may include `--- config: ---` frontmatter).
127
+ * @param {{ backgroundColor?: string }} [opts]
128
+ * @returns {Promise<string>} the serialized `<svg>`.
129
+ */
130
+ async function render_svg(browser, definition, { backgroundColor = "transparent" } = {}) {
131
+ const page = await browser.newPage();
132
+ // Surface in-page warnings/errors (e.g. mermaid parse errors) to the build log.
133
+ page.on("console", (msg) => {
134
+ if (msg.type() === "error" || msg.type() === "warning") {
135
+ console.warn(msg.text());
136
+ }
137
+ });
138
+ try {
139
+ await page.setContent(
140
+ "<!doctype html><html><body><div id=\"container\"></div></body></html>",
141
+ );
142
+ await page.$eval(
143
+ "body",
144
+ (body, bg) => {
145
+ body.style.background = bg;
146
+ },
147
+ backgroundColor,
148
+ );
149
+
150
+ const interceptor = new Interceptor();
151
+ const mermaidUrl = await interceptor.fileUrlToInterceptUrl(
152
+ url.pathToFileURL(mermaidESMPath),
153
+ );
154
+ const elkUrl = await interceptor.fileUrlToInterceptUrl(
155
+ url.pathToFileURL(elkESMPath),
156
+ );
157
+
158
+ page.on("request", interceptor.interceptRequestHandler);
159
+ await page.setRequestInterception(true);
160
+
161
+ await page.$eval(
162
+ "#container",
163
+ async (container, { definition, mermaidUrl, elkUrl, backgroundColor }) => {
164
+ const { default: mermaid } = await import(mermaidUrl);
165
+ const { default: elkLayouts } = await import(elkUrl);
166
+
167
+ await Promise.all(
168
+ Array.from(document.fonts, (font) => font.load()),
169
+ );
170
+
171
+ mermaid.registerLayoutLoaders(elkLayouts);
172
+ mermaid.initialize({ startOnLoad: false });
173
+
174
+ // Throws on invalid diagrams — propagates out of $eval as a rejection.
175
+ const { svg: svgText } = await mermaid.render(
176
+ "hdoc-svg",
177
+ definition,
178
+ container,
179
+ );
180
+ container.innerHTML = svgText;
181
+
182
+ const svg = container.getElementsByTagName?.("svg")?.[0];
183
+ if (svg?.style) {
184
+ svg.style.backgroundColor = backgroundColor;
185
+ }
186
+ },
187
+ { definition, mermaidUrl, elkUrl, backgroundColor },
188
+ );
189
+
190
+ // Serialize via XMLSerializer so HTML <foreignObject> content (e.g. <br>)
191
+ // becomes valid XML in the saved .svg file.
192
+ const svgXML = await page.$eval("svg", (svg) => {
193
+ // eslint-disable-next-line no-undef
194
+ const xmlSerializer = new XMLSerializer();
195
+ return xmlSerializer.serializeToString(svg);
196
+ });
197
+
198
+ return svgXML;
199
+ } finally {
200
+ await page.close();
201
+ }
202
+ }
203
+
204
+ module.exports = { render_svg, Interceptor };
package/hdoc-module.js CHANGED
@@ -9,10 +9,48 @@
9
9
 
10
10
  let retried = false;
11
11
 
12
+ const sleep = (ms) => new Promise((resolve) => setTimeout(resolve, ms));
13
+
14
+ // GitHub enforces a "secondary" (burst) rate limit on top of the hourly one:
15
+ // too many requests in a short window are rejected with 403/429 even when the
16
+ // hourly budget is intact. The contributor pull fires one call per file, so a
17
+ // fast run trips it. Historically `hdoc validate` got away with this only
18
+ // because verbose console output slowed the loop down between calls; the
19
+ // --quiet mode removed that incidental spacing and started hitting 403s.
20
+ // Throttle GitHub API calls to a fixed minimum interval so behaviour no longer
21
+ // depends on how chatty the console is.
22
+ let _last_github_call_at = 0;
23
+ const GITHUB_HOST = "api.github.com";
24
+ const GITHUB_MIN_INTERVAL_MS = 800;
25
+ // Never block the whole build waiting on a single rate-limited call.
26
+ const GITHUB_MAX_BACKOFF_MS = 60000;
27
+
28
+ // For a rate-limited response, work out how long to wait before retrying.
29
+ // Returns null when the response is NOT a retryable rate limit (e.g. a genuine
30
+ // 403 auth/forbidden), in which case the caller should give up immediately.
31
+ const rate_limit_wait_ms = (response) => {
32
+ if (response.status !== 403 && response.status !== 429) return null;
33
+ const retry_after = response.headers.get("retry-after");
34
+ if (retry_after !== null) {
35
+ const secs = Number(retry_after);
36
+ if (!Number.isNaN(secs)) return Math.max(0, secs * 1000);
37
+ }
38
+ const remaining = response.headers.get("x-ratelimit-remaining");
39
+ const reset = response.headers.get("x-ratelimit-reset");
40
+ if (remaining === "0" && reset !== null) {
41
+ const reset_ms = Number(reset) * 1000 - Date.now();
42
+ if (!Number.isNaN(reset_ms)) return Math.max(0, reset_ms);
43
+ }
44
+ // 403/429 with no rate-limit signal => auth/forbidden, not worth retrying.
45
+ return null;
46
+ };
47
+
12
48
  // Wraps the built-in fetch() with automatic retry for transient errors.
13
49
  // Retries up to maxRetries times when the server returns an HTTP error status
14
- // >= 400, except for 401 (Unauthorized) and 403 (Forbidden) which are auth
15
- // failures that won't be resolved by retrying. Network errors (where fetch
50
+ // >= 400, except for 401 (Unauthorized) and non-rate-limit 403 (Forbidden)
51
+ // which are auth failures that won't be resolved by retrying. GitHub
52
+ // rate-limit responses (403/429 carrying Retry-After or X-RateLimit-Reset) ARE
53
+ // retried after honouring the indicated wait. Network errors (where fetch
16
54
  // itself throws) are also retried. Sets the module-level `retried` flag so
17
55
  // callers can detect and log a success-after-retry message.
18
56
  //
@@ -21,8 +59,18 @@
21
59
  // does not leave an already-aborted signal in place for the retries.
22
60
  const fetchWithRetry = async (url, options = {}, maxRetries = 5) => {
23
61
  const { timeoutMs, ...fetchOptions } = options;
62
+ const is_github = typeof url === "string" && url.includes(GITHUB_HOST);
24
63
  let retryCount = 0;
25
64
  while (true) {
65
+ // Space out GitHub API calls so a fast (quiet) run does not burst past
66
+ // the secondary rate limit. No effect on non-GitHub requests.
67
+ if (is_github) {
68
+ const since = Date.now() - _last_github_call_at;
69
+ if (since < GITHUB_MIN_INTERVAL_MS) {
70
+ await sleep(GITHUB_MIN_INTERVAL_MS - since);
71
+ }
72
+ _last_github_call_at = Date.now();
73
+ }
26
74
  // Create a fresh signal for each attempt; reusing an already-aborted
27
75
  // signal would cause every subsequent retry to abort immediately.
28
76
  const attemptOptions = timeoutMs
@@ -38,10 +86,19 @@
38
86
  retried = true;
39
87
  continue;
40
88
  }
41
- // Auth failures and successes are not retried
42
- if (response.ok || response.status === 401 || response.status === 403) {
89
+ if (response.ok || response.status === 401) {
43
90
  return response;
44
91
  }
92
+ // 403/429: retry only when it is a rate limit we can wait out.
93
+ if (response.status === 403 || response.status === 429) {
94
+ const wait_ms = rate_limit_wait_ms(response);
95
+ if (wait_ms === null) return response; // auth/forbidden — give up
96
+ retryCount++;
97
+ if (retryCount > maxRetries) return response;
98
+ retried = true;
99
+ await sleep(Math.min(wait_ms, GITHUB_MAX_BACKOFF_MS));
100
+ continue;
101
+ }
45
102
  retryCount++;
46
103
  if (retryCount > maxRetries) return response;
47
104
  retried = true;