hdoc-tools 0.51.0 → 0.52.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,12 @@
1
+ // Disable Puppeteer's own bundled browser download (install.mjs). hdoc-tools
2
+ // provisions Chrome + chrome-headless-shell itself via hdoc-install-browser.js,
3
+ // which is idempotent, self-healing and retries — unlike the bundled download,
4
+ // whose single partial-extract failure on some Windows Server boxes aborts the
5
+ // whole `npm install`. Browsers are still fetched, just by our controlled path.
6
+ //
7
+ // Discovered by Puppeteer (cosmiconfig) walking up from node_modules/.../puppeteer
8
+ // to this package root during install. skipDownload only affects provisioning,
9
+ // not where the browser is launched from at runtime.
10
+ module.exports = {
11
+ skipDownload: true,
12
+ };
package/hdoc-build.js CHANGED
@@ -11,10 +11,9 @@
11
11
  const hdoc_build_onyx = require(path.join(__dirname, "hdoc-build-onyx.js"));
12
12
  const hdoc_build_pdf = require(path.join(__dirname, "hdoc-build-pdf.js"));
13
13
  const hdoc_index = require(path.join(__dirname, "hdoc-db.js"));
14
+ const hdoc_mermaid = require(path.join(__dirname, "hdoc-mermaid.js"));
14
15
  const archiver = require("archiver");
15
16
 
16
- const { execSync } = require("child_process");
17
-
18
17
  const h_tags_to_search = ["h1", "h2", "h3"];
19
18
  const image_extensions = ["png", "svg", "jpg"];
20
19
 
@@ -44,14 +43,6 @@
44
43
  "mermaid-theme.yaml",
45
44
  );
46
45
 
47
- let mermaid_puppeteer_config_path = path.resolve(
48
- templates_path,
49
- "mermaid-puppeteer-config.json",
50
- );
51
- if (process.platform === "win32") {
52
- mermaid_puppeteer_config_path = `"${mermaid_puppeteer_config_path}"`
53
- }
54
-
55
46
  const pdf_template_path = path.join(__dirname, "templates", "pdf");
56
47
  const ui_css_path = path.join(__dirname, "ui", "css");
57
48
  const pdf_template_file_path = path.join(pdf_template_path, "template.html");
@@ -70,6 +61,10 @@
70
61
  const redirects = {};
71
62
  const static_html_files = [];
72
63
  const mermaid_failures = [];
64
+ // Mermaid diagrams collected during (synchronous) markdown rendering, then
65
+ // rendered to SVG in a batch afterwards (puppeteer is async; the markdown-it
66
+ // highlight callback is not). Keyed by output path to dedupe identical diagrams.
67
+ const mermaid_queue = new Map();
73
68
 
74
69
  let bc = {}; // Breadcrumbs map
75
70
  let book_read_time = 0;
@@ -117,38 +112,30 @@
117
112
  typographer: true,
118
113
  highlight: function (str, lang) {
119
114
  if (lang === "mermaid" && process.env.GITHUB_ACTIONS !== 'true') {
120
- try {
121
- const tmpInput = hdoc.tmp_file_sync({ postfix: ".mmd" });
122
- const outputFileName = `mermaid-${crypto.createHash("sha256").update(str).digest("hex").slice(0, 16)}.svg`;
123
- const outputPath = path.join(mermaid_images_path, outputFileName);
124
- const outputLink = `/_books/${doc_id}/mermaid-images/${outputFileName}`;
125
-
126
- if (!str.startsWith('---')) {
127
- str = '---\n' + fs.readFileSync(mermaid_theme_path, {encoding: 'utf-8'}) + `\n---\n${str}`;
128
- }
129
-
130
- fs.writeFileSync(tmpInput.name, str);
131
- let cmd = `${__dirname}/node_modules/.bin/mmdc`;
132
-
133
- if (process.platform === "win32") {
134
- cmd = `"${cmd}.cmd"`;
135
- }
136
-
137
- cmd = `${cmd} -i "${tmpInput.name}" -o "${outputPath}" --backgroundColor transparent --puppeteerConfigFile ${mermaid_puppeteer_config_path}`;
138
- console.log(`Generating Mermaid SVG found in ${currentMdFilePath.relativePath} - ${outputPath}`);
139
- execSync(cmd);
140
-
141
- if (!fs.existsSync(outputPath)) {
142
- throw new Error("mmdc did not generate output");
143
- }
144
-
145
- tmpInput.removeCallback();
115
+ // markdown-it's highlight callback is synchronous, but Mermaid
116
+ // rendering is async (puppeteer). So here we only *queue* the
117
+ // diagram and emit the <img> tag pointing at its eventual SVG;
118
+ // the actual rendering happens in a batch after all markdown has
119
+ // been processed (see render_mermaid_queue, before validation/zip).
120
+ const outputFileName = `mermaid-${crypto.createHash("sha256").update(str).digest("hex").slice(0, 16)}.svg`;
121
+ const outputPath = path.join(mermaid_images_path, outputFileName);
122
+ const outputLink = `/_books/${doc_id}/mermaid-images/${outputFileName}`;
123
+
124
+ let definition = str;
125
+ if (!definition.startsWith('---')) {
126
+ definition = '---\n' + fs.readFileSync(mermaid_theme_path, {encoding: 'utf-8'}) + `\n---\n${definition}`;
127
+ }
146
128
 
147
- return `<img class="mermaid-diagram" src="${outputLink}" alt="Mermaid Diagram">`;
148
- } catch (err) {
149
- mermaid_failures.push({path: currentMdFilePath.relativePath, error: err.message});
150
- return ``;
129
+ // Dedupe identical diagrams (same hash -> same output file).
130
+ if (!mermaid_queue.has(outputPath)) {
131
+ mermaid_queue.set(outputPath, {
132
+ definition,
133
+ outputPath,
134
+ relativePath: currentMdFilePath.relativePath,
135
+ });
151
136
  }
137
+
138
+ return `<img class="mermaid-diagram" src="${outputLink}" alt="Mermaid Diagram">`;
152
139
  }
153
140
  }
154
141
  });
@@ -197,6 +184,35 @@
197
184
  // generates a PDF, and writes the final HTML to disk ready for indexing.
198
185
  // Markdown files are rendered to HTML first; static HTML files are processed
199
186
  // in place. Both paths share the same post-processing pipeline.
187
+ // Render every queued-but-not-yet-rendered Mermaid diagram to its SVG file,
188
+ // reusing the already-open browser. Each diagram's render is memoized on the
189
+ // queue entry (item.promise), so concurrent transform_file calls and the final
190
+ // safety flush never render the same diagram twice. Failures are collected in
191
+ // mermaid_failures rather than thrown, matching the previous mmdc behavior.
192
+ const flush_mermaid_queue = async () => {
193
+ await Promise.all(
194
+ [...mermaid_queue.values()].map((item) => {
195
+ if (!item.promise) {
196
+ item.promise = (async () => {
197
+ try {
198
+ console.log(`Generating Mermaid SVG found in ${item.relativePath} - ${item.outputPath}`);
199
+ const svg = await hdoc_mermaid.render_svg(browser, item.definition, {
200
+ backgroundColor: "transparent",
201
+ });
202
+ if (!svg || svg.indexOf("<svg") === -1) {
203
+ throw new Error("Mermaid produced no SVG output");
204
+ }
205
+ fs.writeFileSync(item.outputPath, svg);
206
+ } catch (err) {
207
+ mermaid_failures.push({ path: item.relativePath, error: err.message });
208
+ }
209
+ })();
210
+ }
211
+ return item.promise;
212
+ }),
213
+ );
214
+ };
215
+
200
216
  const transform_file = async (file_path) => {
201
217
  const is_markdown = path.extname(file_path.path) === '.md';
202
218
  if (is_markdown) conversion_attempted++;
@@ -254,6 +270,11 @@
254
270
  // Render markdown into HTML
255
271
  html_txt = md.render(md_txt);
256
272
 
273
+ // md.render() synchronously queued any Mermaid diagrams in this file.
274
+ // Render them to SVG now (memoized/deduped) so the files exist before
275
+ // PDF generation below reads them, and before validation/zipping.
276
+ await flush_mermaid_queue();
277
+
257
278
  // Single pass: wrap h2/h3 divs + extract heading, paragraph, read-time.
258
279
  // Replaces separate wrapHContent + getFirstHTMLHeading + get_html_read_time calls.
259
280
  const extracted = hdoc.wrapAndExtract(html_txt, h_tags_to_search);
@@ -1279,6 +1300,10 @@
1279
1300
  );
1280
1301
  }
1281
1302
 
1303
+ // Safety net: render any Mermaid diagrams not already flushed during
1304
+ // transform_file (no-op if all are done — rendering is memoized per diagram).
1305
+ await flush_mermaid_queue();
1306
+
1282
1307
  // Output to console
1283
1308
  console.log(`\n MD files found: ${conversion_attempted}`);
1284
1309
  console.log(`Successfully converted to HTML: ${conversion_success}`);
@@ -1418,7 +1443,7 @@
1418
1443
  const zip_path = path.join(work_path, `${doc_id}.zip`);
1419
1444
 
1420
1445
  const output = fs.createWriteStream(zip_path);
1421
- const archive = archiver("zip");
1446
+ const archive = new archiver.ZipArchive();
1422
1447
  archive.on("error", (err) => {
1423
1448
  throw err;
1424
1449
  });