@udx/md2html 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/index.js ADDED
@@ -0,0 +1,738 @@
1
+ #!/usr/bin/env node
2
+
3
+ /**
4
+ * UDX Markdown to HTML Converter (md2html)
5
+ * ========================================
6
+ *
7
+ * ## Usage:
8
+ *
9
+ * Basic conversion:
10
+ * node tools/md2html/index.js --src content/architecture/deploying-with-impunity --out site/static/docs/architecture/deploying-with-impunity/index.html
11
+ *
12
+ * Watch mode:
13
+ * node tools/md2html/index.js --src content/architecture/devsoc-2025 --out site/static/docs/architecture/devsoc-2025/index.html --watch
14
+ *
15
+ * Debug mode:
16
+ * node tools/md2html/index.js --src content/architecture/decentralized-devops --out site/static/docs/architecture/decentralized-devops/index.html --debug
17
+ *
18
+ * ## Purpose:
19
+ * - Convert a directory of markdown files or a single markdown file into a styled HTML document
20
+ * - Generate documentation with Google Docs-like formatting for professional presentations
21
+ * - Support automatic rebuilding of documents when source files change (watch mode)
22
+ * - Automate documentation generation in CI/CD pipelines
23
+ *
24
+ * ## Inputs:
25
+ * - Source directory containing markdown files OR a single markdown file
26
+ * - Output file path for the generated HTML document
27
+ * - Optional flags for watch mode and debug logging
28
+ *
29
+ * ## Outputs:
30
+ * - A single HTML document with properly styled content, table of contents, and formatted code blocks
31
+ * - Debug logs when debug mode is enabled
32
+ *
33
+ * ## Key Features:
34
+ * - Combines multiple markdown files into a single document, sorted by numeric prefixes
35
+ * - Extracts metadata from markdown comments (title, author, description, date, version)
36
+ * - Transforms internal links between markdown files into working anchor links
37
+ * - Handles images with support for various path resolution strategies
38
+ * - Generates a table of contents from headings
39
+ * - Applies syntax highlighting to code blocks
40
+ * - Supports watch mode for automatic rebuilding
41
+ *
42
+ * @version 1.0.0
43
+ * @author UDX Team
44
+ * @copyright 2025
45
+ */
46
+
47
+ import fs from 'fs';
48
+ import path from 'path';
49
+ import { marked } from 'marked';
50
+ import { program } from 'commander';
51
+ import chokidar from 'chokidar';
52
+ import Handlebars from 'handlebars';
53
+
54
+ // Custom renderer for handling math blocks
55
+ const renderer = new marked.Renderer();
56
+
57
+ // Configure marked to properly handle math blocks
58
+ marked.setOptions({
59
+ renderer: renderer,
60
+ gfm: true,
61
+ breaks: false,
62
+ pedantic: false,
63
+ sanitize: false,
64
+ smartLists: true,
65
+ smartypants: false
66
+ });
67
+
68
+ // Add a custom code renderer to handle math blocks
69
+ const originalCodeRenderer = renderer.code.bind(renderer);
70
+ renderer.code = function(code, language) {
71
+ // Handle math code blocks
72
+ if (language === 'math') {
73
+ // Clean up the code by removing any extra backticks or 'math' markers
74
+ const cleanCode = code.replace(/^```math\s*|```$/g, '');
75
+ return `<div class="math-block">$$
76
+ ${cleanCode}
77
+ $$</div>`;
78
+ }
79
+ return originalCodeRenderer(code, language);
80
+ };
81
+
82
+ // File paths for external resources
83
+ const TEMPLATE_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/view.hbs');
84
+ const STYLES_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/styles.css');
85
+ const CHAPTER_NAV_STYLES_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/chapter-navigation.css');
86
+ const SCRIPTS_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/scripts.js');
87
+
88
+ program
89
+ .version('1.0.0')
90
+ .description('Convert markdown files to a single HTML document with Google Docs styling')
91
+ .option('-s, --src <directory>', 'Source directory containing markdown files')
92
+ .option('-o, --out <file>', 'Output HTML file path')
93
+ .option('-w, --watch', 'Watch for changes and rebuild automatically', false)
94
+ .option('-d, --debug', 'Enable debug logging', false)
95
+ .parse(process.argv);
96
+
97
+ const options = program.opts();
98
+
99
+ if (!options.src || !options.out) {
100
+ console.error('Error: Source directory and output file are required');
101
+ program.help();
102
+ process.exit(1);
103
+ }
104
+
105
+ const srcPath = path.resolve(options.src);
106
+ const outputFile = path.resolve(options.out);
107
+
108
+ const debug = (message) => {
109
+ if (options.debug) {
110
+ console.log(`[DEBUG] ${message}`);
111
+ }
112
+ };
113
+
114
+ /**
115
+ * Builds the HTML document from markdown files
116
+ * @param {string} srcDir - Source directory containing markdown files
117
+ * @param {string} outputFile - Output HTML file path
118
+ * @returns {Promise<boolean>} Success status
119
+ */
120
+ async function buildHtml(srcDir, outputFile) {
121
+ debug(`Building HTML from ${srcDir} to ${outputFile}`);
122
+
123
+ try {
124
+ // Load external templates and styles
125
+ const templateSource = fs.readFileSync(TEMPLATE_PATH, 'utf8');
126
+ const cssStyles = fs.readFileSync(STYLES_PATH, 'utf8');
127
+ const chapterNavStyles = fs.readFileSync(CHAPTER_NAV_STYLES_PATH, 'utf8');
128
+ const jsScripts = fs.readFileSync(SCRIPTS_PATH, 'utf8');
129
+
130
+ // Register custom Handlebars helpers
131
+ Handlebars.registerHelper('slugify', function(text) {
132
+ return text
133
+ .toString()
134
+ .toLowerCase()
135
+ .replace(/\s+/g, '-') // Replace spaces with -
136
+ .replace(/[^\w\-]+/g, '') // Remove all non-word chars
137
+ .replace(/\-\-+/g, '-') // Replace multiple - with single -
138
+ .replace(/^-+/, '') // Trim - from start of text
139
+ .replace(/-+$/, '') // Trim - from end of text
140
+ .substring(0, 64); // Limit length
141
+ });
142
+
143
+ Handlebars.registerHelper('eq', function(a, b) {
144
+ return a === b;
145
+ });
146
+
147
+ // Compile Handlebars template
148
+ const template = Handlebars.compile(templateSource);
149
+
150
+ const srcStat = fs.statSync(srcDir);
151
+ let markdownFiles = [];
152
+
153
+ if (srcStat.isDirectory()) {
154
+ const files = fs.readdirSync(srcDir);
155
+ markdownFiles = files
156
+ .filter(file => file.endsWith('.md'))
157
+ .sort((a, b) => {
158
+ const numA = parseInt(a.match(/^(\d+)_/) ? a.match(/^(\d+)_/)[1] : '999');
159
+ const numB = parseInt(b.match(/^(\d+)_/) ? b.match(/^(\d+)_/)[1] : '999');
160
+ return numA - numB;
161
+ })
162
+ .map(file => path.join(srcDir, file));
163
+
164
+ debug(`Found ${markdownFiles.length} markdown files`);
165
+ } else if (srcStat.isFile() && srcDir.endsWith('.md')) {
166
+ markdownFiles = [srcDir];
167
+ debug('Processing single markdown file');
168
+ } else {
169
+ console.error('Error: Source must be a markdown file or directory containing markdown files');
170
+ return false;
171
+ }
172
+
173
+ if (markdownFiles.length === 0) {
174
+ console.error('Error: No markdown files found');
175
+ return false;
176
+ }
177
+
178
+ let combinedMarkdown = '';
179
+ let title = 'Documentation';
180
+ let description = 'Generated documentation';
181
+ let author = 'UDX';
182
+ let date = new Date().toLocaleDateString('en-US', {
183
+ year: 'numeric',
184
+ month: 'long',
185
+ day: 'numeric'
186
+ });
187
+ let version = 'v8';
188
+
189
+ // Process each file and wrap its content in a section
190
+ for (const file of markdownFiles) {
191
+ debug(`Processing file: ${file}`);
192
+ const content = fs.readFileSync(file, 'utf8');
193
+ const fileName = path.basename(file, '.md');
194
+ const chapterId = fileName.replace(/^\d+_/, '');
195
+
196
+ // Extract metadata from comments
197
+ const titleMatch = content.match(/<!--\s*title:\s*(.*?)\s*-->/i);
198
+ const descMatch = content.match(/<!--\s*description:\s*(.*?)\s*-->/i);
199
+ const authorMatch = content.match(/<!--\s*author:\s*(.*?)\s*-->/i);
200
+ const dateMatch = content.match(/<!--\s*date:\s*(.*?)\s*-->/i);
201
+ const versionMatch = content.match(/<!--\s*version:\s*(.*?)\s*-->/i);
202
+
203
+ if (titleMatch && !title) title = titleMatch[1];
204
+ if (descMatch) description = descMatch[1];
205
+ if (authorMatch) author = authorMatch[1];
206
+ if (dateMatch) date = dateMatch[1];
207
+ if (versionMatch) version = versionMatch[1];
208
+
209
+ // Extract first heading to use as section title if available
210
+ const headingMatch = content.match(/^#\s+(.*?)$/m);
211
+ const sectionTitle = headingMatch ? headingMatch[1] : chapterId;
212
+
213
+ // Add section opening tag with appropriate attributes
214
+ combinedMarkdown += `<!-- START SECTION: ${fileName} -->\n`;
215
+ combinedMarkdown += `<section id="${chapterId}" class="content-section" data-chapter-id="${chapterId}" data-file="${fileName}">\n\n`;
216
+
217
+ // Add the file content
218
+ combinedMarkdown += content;
219
+
220
+ // Add section closing tag
221
+ combinedMarkdown += `\n\n</section>\n<!-- END SECTION: ${fileName} -->\n\n`;
222
+ }
223
+
224
+ if (title === 'Documentation') {
225
+ const headingMatch = combinedMarkdown.match(/^#\s+(.*?)$/m);
226
+ if (headingMatch) {
227
+ title = headingMatch[1];
228
+ }
229
+ }
230
+
231
+ // Process all mathematical formulas in the content (before image processing)
232
+ // This pattern matches: $formula$ for inline math
233
+ const inlineMathPattern = /\$(.*?)\$/g;
234
+ combinedMarkdown = combinedMarkdown.replace(inlineMathPattern, (match, formula) => {
235
+ // Skip if it's likely a currency symbol
236
+ if (/^\s*\d+(\.\d+)?\s*$/.test(formula)) {
237
+ return match;
238
+ }
239
+ return `\\(${formula}\\)`;
240
+ });
241
+
242
+ // Process all image references in the markdown content
243
+ const imageMatches = [...combinedMarkdown.matchAll(/!\[(.*?)\]\((.*?)\)/g)];
244
+ for (const match of imageMatches) {
245
+ const imageAlt = match[1];
246
+ let imagePath = match[2];
247
+
248
+ // Skip external URLs
249
+ if (imagePath.startsWith('http')) continue;
250
+
251
+ // Try multiple path resolution strategies to find the image
252
+ const possiblePaths = [
253
+ // Strategy 1: Relative to parent directory of srcDir
254
+ path.resolve(path.dirname(srcDir), imagePath),
255
+
256
+ // Strategy 2: Directly in srcDir
257
+ path.resolve(srcDir, path.basename(imagePath)),
258
+
259
+ // Strategy 3: Looking in attachments folder inside srcDir
260
+ path.resolve(srcDir, 'attachments', path.basename(imagePath)),
261
+
262
+ // Strategy 4: Looking in srcDir/attachments regardless of original path
263
+ path.resolve(srcDir, 'attachments', path.basename(imagePath).replace(/^.*[\\\/]/, '')),
264
+
265
+ // Strategy 5: Looking in srcDir parent's attachments folder
266
+ path.resolve(path.dirname(srcDir), 'attachments', path.basename(imagePath)),
267
+ ];
268
+
269
+ // Try each possible path until we find one that exists
270
+ let imageFound = false;
271
+ for (const testPath of possiblePaths) {
272
+ if (fs.existsSync(testPath)) {
273
+ imageFound = true;
274
+ debug(`Found image at: ${testPath}`);
275
+ break;
276
+ }
277
+ }
278
+
279
+ if (!imageFound) {
280
+ console.warn(`Warning: Image not found: ${imagePath} (${imageAlt})`);
281
+ }
282
+ }
283
+
284
+ // Process all links to markdown files within the same directory structure
285
+ // This ensures that links between markdown files in the same document
286
+ // are converted to proper anchor links in the assembled HTML
287
+ let processedMarkdown = combinedMarkdown;
288
+
289
+ // Build a map of files to section IDs for link transformation
290
+
291
+ // Create a map of markdown filenames to their section IDs
292
+ const fileToSectionMap = new Map();
293
+ const fileNameRegex = /\d+_([\w-]+)\.md$/;
294
+
295
+ // First pass: build a map of filenames to section IDs
296
+ for (let i = 0; i < markdownFiles.length; i++) {
297
+ const filePath = markdownFiles[i];
298
+ const fileName = path.basename(filePath);
299
+ const fileContent = fs.readFileSync(filePath, 'utf8');
300
+
301
+ // Extract section ID from first heading or use filename
302
+ const headingMatch = fileContent.match(/^#\s+(.*?)$/m);
303
+ if (headingMatch) {
304
+ const headingText = headingMatch[1];
305
+ const sectionId = headingText.toLowerCase()
306
+ .replace(/[\s]+/g, '-') // Replace spaces with hyphens
307
+ .replace(/[^\w\-]+/g, '') // Remove non-word chars except hyphens
308
+ .replace(/--+/g, '-') // Replace multiple hyphens with single
309
+ .replace(/^-+|-+$/g, ''); // Trim hyphens from start and end
310
+
311
+ fileToSectionMap.set(fileName, sectionId);
312
+ debug(`Mapped file ${fileName} to section ID ${sectionId}`);
313
+ } else {
314
+ // If no heading found, use the filename without number prefix as fallback
315
+ const fileNameMatch = fileName.match(fileNameRegex);
316
+ let fallbackId = fileName.replace(/\.md$/, '');
317
+ if (fileNameMatch && fileNameMatch[1]) {
318
+ fallbackId = fileNameMatch[1]; // Use the part after number_ prefix
319
+ }
320
+ fallbackId = fallbackId.toLowerCase().replace(/[^\w\-]+/g, '-');
321
+ fileToSectionMap.set(fileName, fallbackId);
322
+ debug(`Mapped file ${fileName} to fallback ID ${fallbackId}`);
323
+ }
324
+ }
325
+
326
+ // Process Markdown BEFORE converting to HTML to transform links
327
+ // This regex matches Markdown links: [text](link.md)
328
+ const markdownLinkRegex = /\[([^\]]+)\]\(([^\)]+\.md)\)/g;
329
+
330
+ // Transform the markdown content to handle internal links
331
+ combinedMarkdown = combinedMarkdown.replace(markdownLinkRegex, (match, linkText, href) => {
332
+ // Get just the filename
333
+ const linkedFileName = path.basename(href);
334
+
335
+ // Variable to store the section ID if we find a match
336
+ let targetSectionId = null;
337
+
338
+ // Try direct match first using file base name without extension
339
+ const linkedFileBaseName = path.basename(linkedFileName, '.md');
340
+ if (fileToSectionMap.has(linkedFileBaseName)) {
341
+ targetSectionId = fileToSectionMap.get(linkedFileBaseName);
342
+ debug(`Direct match: ${linkedFileName} -> #${targetSectionId}`);
343
+ }
344
+ // Then try fuzzy match by ignoring numeric prefixes
345
+ else {
346
+ const baseNameMatch = linkedFileName.match(/^\d+_(.+)$/) || [null, linkedFileName];
347
+ const baseName = baseNameMatch[1];
348
+
349
+ for (const [fileName, sectionId] of fileToSectionMap.entries()) {
350
+ const fileBaseMatch = fileName.match(/^\d+_(.+)$/) || [null, fileName];
351
+ const fileBase = fileBaseMatch[1];
352
+
353
+ if (baseName === fileBase) {
354
+ targetSectionId = sectionId;
355
+ debug(`Fuzzy match: ${linkedFileName} -> #${targetSectionId}`);
356
+ break;
357
+ }
358
+ }
359
+ }
360
+
361
+ // If we found a matching section, return the updated Markdown link
362
+ if (targetSectionId) {
363
+ return `[${linkText}](#${targetSectionId})`;
364
+ }
365
+
366
+ // Otherwise return the original link unchanged
367
+ return match;
368
+ });
369
+
370
+ // Remove the duplicate fileToSectionMap and use the chaptersInfo directly
371
+ // Map markdown files to section IDs and chapter structure
372
+ const chaptersInfo = [];
373
+
374
+ // Build chapter structure information from markdown files
375
+ for (const file of markdownFiles) {
376
+ const fileName = path.basename(file, '.md');
377
+ const chapterId = fileName.replace(/^\d+_/, '');
378
+ const content = fs.readFileSync(file, 'utf8');
379
+
380
+ // Extract headings from content (only H2 headings)
381
+ const headings = [...content.matchAll(/^(#{2})\s+(.+)$/gm)];
382
+ const sections = [];
383
+
384
+ if (headings.length > 0) {
385
+ headings.forEach((match, index) => {
386
+ const title = match[2];
387
+ const headingId = title.toLowerCase()
388
+ .replace(/[^\w]+/g, '-')
389
+ .replace(/^-|-$/g, '');
390
+
391
+ const excludedTitles = ['abstract', 'executive summary', 'introduction',
392
+ 'appendix', 'further reading', 'references'];
393
+
394
+ if (!excludedTitles.includes(title.toLowerCase()) &&
395
+ title.toLowerCase() !== chapterId.toLowerCase()) {
396
+ sections.push({
397
+ id: `${chapterId}-${headingId}`,
398
+ title: title,
399
+ level: 2,
400
+ file: fileName
401
+ });
402
+ }
403
+ });
404
+
405
+ // Add sections to chaptersInfo
406
+ chaptersInfo.push(...sections);
407
+ }
408
+
409
+ // Update the existing fileToSectionMap
410
+ fileToSectionMap.set(fileName, chapterId);
411
+ }
412
+
413
+ // Now convert the processed markdown to HTML
414
+ let htmlContent = marked.parse(combinedMarkdown);
415
+
416
+ // Add file type indicators to code blocks with proper badges
417
+ const codeBlocksWithFileType = htmlContent.replace(
418
+ /<pre><code[^>]*>/g,
419
+ (match) => match.replace(/<pre>/, '<pre data-file-type="code">')
420
+ );
421
+
422
+ // Extract file types from code blocks with comments and add language class for syntax highlighting
423
+ const codeBlocksWithComments = codeBlocksWithFileType.replace(
424
+ /<pre data-file-type="code"><code>([\s\S]*?)\n/g,
425
+ (match, codeContent) => {
426
+ const fileTypeMatch = codeContent.match(/^(?:\/\/|#|<!--) ?([a-zA-Z0-9-_.]+)\s/);
427
+ if (fileTypeMatch) {
428
+ const fileType = fileTypeMatch[1];
429
+ let langClass = '';
430
+ let langName = '';
431
+
432
+ // Determine language class based on file extension or type
433
+ if (fileType.endsWith('.js') || fileType === 'javascript') {
434
+ langClass = ' class="language-javascript"';
435
+ langName = 'JavaScript';
436
+ } else if (fileType.endsWith('.py') || fileType === 'python') {
437
+ langClass = ' class="language-python"';
438
+ langName = 'Python';
439
+ } else if (fileType.endsWith('.sh') || fileType === 'bash' || fileType === 'shell') {
440
+ langClass = ' class="language-bash"';
441
+ langName = 'Shell';
442
+ } else if (fileType.endsWith('.yaml') || fileType.endsWith('.yml')) {
443
+ langClass = ' class="language-yaml"';
444
+ langName = 'YAML';
445
+ } else if (fileType.endsWith('.json')) {
446
+ langClass = ' class="language-json"';
447
+ langName = 'JSON';
448
+ } else if (fileType.endsWith('.html')) {
449
+ langClass = ' class="language-html"';
450
+ langName = 'HTML';
451
+ } else if (fileType.endsWith('.css')) {
452
+ langClass = ' class="language-css"';
453
+ langName = 'CSS';
454
+ } else if (fileType.endsWith('.md')) {
455
+ langClass = ' class="language-markdown"';
456
+ langName = 'Markdown';
457
+ } else {
458
+ langName = fileType;
459
+ }
460
+
461
+ // Create code block with file type badge
462
+ return match.replace('<code>', `<code${langClass}>`)
463
+ .replace('data-file-type="code"', `data-file-type="${fileType}"`) +
464
+ `<span class="file-type-badge">${langName}</span>`;
465
+ }
466
+ return match;
467
+ }
468
+ );
469
+
470
+ // Process images with proper figure elements and accessible captions
471
+ const enhancedHtmlContent = codeBlocksWithComments.replace(
472
+ /<img src="([^"]+)" alt="([^"]+)">/g,
473
+ '<figure class="image-container"><img src="$1" alt="$2"><figcaption>$2</figcaption></figure>'
474
+ );
475
+
476
+ const tableAccessibilityContent = enhancedHtmlContent.replace(
477
+ /<table>/g,
478
+ '<table aria-hidden="true" role="presentation">'
479
+ );
480
+
481
+ // Define processed content
482
+ // Process image URLs with imgix for proper sizing
483
+ const processedWithImageSizing = tableAccessibilityContent.replace(
484
+ /<img src="([^"]+imgix\.net[^"]+)"([^>]*)>/gi,
485
+ '<img src="$1" $2 width="1600">'
486
+ );
487
+
488
+ // Will be used for final content processing
489
+ let initialProcessedContent = processedWithImageSizing;
490
+
491
+ const tocItems = [];
492
+ const headingMatches = [...combinedMarkdown.matchAll(/^(#{1,3})\s+(.*?)$/gm)];
493
+ for (const match of headingMatches) {
494
+ const level = match[1].length;
495
+ const text = match[2];
496
+ const id = text.toLowerCase().replace(/[^\w]+/g, '-');
497
+ tocItems.push({ level, text, id });
498
+ }
499
+
500
+ // Generate initial TOC (will be possibly replaced later if <!-- TOC --> is found)
501
+ let tocHTML = '<div class="toc">\n<h2>Table of Contents</h2>\n<ul>\n';
502
+ for (const item of tocItems) {
503
+ const indent = ' '.repeat(item.level - 1);
504
+ tocHTML += `${indent}<li><a href="#${item.id}">${item.text}</a></li>\n`;
505
+ }
506
+ tocHTML += '</ul>\n</div>\n';
507
+
508
+ // External files were loaded at the beginning of buildHtml
509
+
510
+ // Convert Markdown to HTML and process it
511
+ let processedContent = marked(combinedMarkdown);
512
+
513
+ // Add file type indicators to code blocks
514
+ processedContent = processedContent.replace(/<pre><code class="language-(\w+)">/g, (match, lang) => {
515
+ return `<pre data-file-type="${lang}"><code class="language-${lang}">`;
516
+ });
517
+
518
+ // Extract file types from code blocks with comments and add language class for syntax highlighting
519
+ processedContent = processedContent.replace(/<pre><code>(```(\w+)[\s\S]*?```)<\/code><\/pre>/g, (match, content, lang) => {
520
+ return `<pre data-file-type="${lang}"><code class="language-${lang}">${content.replace(/```\w+\n|```$/g, '')}</code></pre>`;
521
+ });
522
+
523
+ // Process generic code blocks without language specification
524
+ processedContent = processedContent.replace(/<pre><code>(?!<)/g, '<pre data-file-type="code"><code>');
525
+
526
+ // Fix the image captions in content
527
+ processedContent = processedContent.replace(/<p><img src="([^"]+)"([^>]*)>\s*<\/p>\s*<p><em>([^<]+)<\/em><\/p>/g, (match, src, attrs, caption) => {
528
+ const altMatch = attrs.match(/alt="([^"]*)"/);
529
+ const alt = altMatch ? altMatch[1] : '';
530
+ return `<figure><img src="${src}"${attrs}><figcaption>${caption}</figcaption><span class="visually-hidden">${alt}</span></figure>`;
531
+ });
532
+
533
+ processedContent = processedContent.replace(/<table>/g, '<table aria-hidden="true" role="presentation">');
534
+
535
+ // If it's an imgix URL, add width parameter
536
+ processedContent = processedContent.replace(/src="(https:\/\/[^"]*imgix\.net\/[^"]+)(?:\?([^"]*))?"/g, (match, url, params) => {
537
+ if (params && params.includes('w=')) {
538
+ return match;
539
+ }
540
+ const separator = params ? '&' : '?';
541
+ return `src="${url}${params ? '?' + params : ''}${separator}w=1600"`;
542
+ });
543
+
544
+ // Check if we need a custom TOC
545
+ if (combinedMarkdown.includes('<!-- TOC -->')) {
546
+ // Reset the TOC HTML
547
+ tocHTML = '<div class="toc">\n<h2>Table of Contents</h2>\n<ul>';
548
+ const headings = processedContent.match(/<h([2-3])\s+id="([^"]+)">([^<]+)<\/h\1>/g) || [];
549
+ for (const heading of headings) {
550
+ const levelMatch = heading.match(/<h([2-3])>/);
551
+ const idMatch = heading.match(/id="([^"]+)"/);
552
+ const textMatch = heading.match(/>([^<]+)<\/h/);
553
+ if (levelMatch && idMatch && textMatch) {
554
+ const level = levelMatch[1];
555
+ const id = idMatch[1];
556
+ const text = textMatch[1];
557
+ const indent = level > 2 ? ' ' : '';
558
+ tocHTML += `\n${indent}<li><a href="#${id}">${text}</a></li>`;
559
+ }
560
+ }
561
+ tocHTML += '\n</ul>\n</div>\n';
562
+ }
563
+
564
+ // Replace TOC placeholder
565
+ processedContent = processedContent.replace('<!-- TOC -->', tocHTML);
566
+
567
+ // Process the HTML content to add chapter-aware section structure
568
+ let chapterAwareContent = processedContent;
569
+ let headingCounter = 0;
570
+
571
+ // Create structured sections with proper data attributes for chapter identification
572
+ // This makes it easier for the annotation system to attach annotations to specific parts
573
+ chaptersInfo.forEach(chapter => {
574
+ const chapterHeadingRegex = new RegExp(
575
+ `<h1[^>]*>(${chapter.title.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})</h1>`,
576
+ 'i'
577
+ );
578
+
579
+ // Replace each main chapter heading with a properly structured section
580
+ // This adds data-chapter-id attributes used by the annotation system
581
+ if (chapterAwareContent.match(chapterHeadingRegex)) {
582
+ chapterAwareContent = chapterAwareContent.replace(
583
+ chapterHeadingRegex,
584
+ `<section id="${chapter.id}" class="content-section" data-chapter-id="${chapter.id}" data-file="${chapter.file}">
585
+ <h1 id="heading-${++headingCounter}">$1</h1>`
586
+ );
587
+
588
+ // Close the section tag at an appropriate point
589
+ // Look for the next h1 or the end of the content
590
+ const nextH1Index = chapterAwareContent.indexOf('<h1', chapterAwareContent.indexOf(`<h1 id="heading-${headingCounter}">`) + 1);
591
+
592
+ if (nextH1Index !== -1) {
593
+ // Insert closing section tag before the next h1
594
+ chapterAwareContent = chapterAwareContent.slice(0, nextH1Index) + '</section>\n' + chapterAwareContent.slice(nextH1Index);
595
+ } else {
596
+ // No more h1s, append closing section tag to the end
597
+ chapterAwareContent += '\n</section>';
598
+ }
599
+ }
600
+ });
601
+
602
+ // Add data attributes to all heading elements for annotation targeting
603
+ let processedWithHeadingAttrs = chapterAwareContent.replace(
604
+ /<h([2-6])(?:[^>]*)>([^<]+)<\/h\1>/g,
605
+ (match, level, text) => {
606
+ const textContent = text.trim();
607
+ const headingId = chaptersInfo.find(ch => ch.title === textContent)?.id || `heading-${++headingCounter}`;
608
+ return `<h${level} id="${headingId}" data-heading="${textContent.replace(/"/g, '&quot;')}"><span>${textContent}</span></h${level}>`;
609
+ }
610
+ );
611
+
612
+ // Add JSON data for chapter information in a hidden script tag
613
+ // This allows the annotation system to easily access the chapter structure
614
+ const chaptersJSON = JSON.stringify(chaptersInfo, null, 2);
615
+ processedWithHeadingAttrs += `\n<script type="application/json" id="document-chapters-data">${chaptersJSON}</script>\n`;
616
+
617
+ // Prepare data for template
618
+ const templateData = {
619
+ title,
620
+ description,
621
+ author,
622
+ date,
623
+ version,
624
+ content: processedWithHeadingAttrs,
625
+ styles: cssStyles + '\n' + chapterNavStyles,
626
+ scripts: jsScripts,
627
+ chapters: chaptersInfo
628
+ };
629
+
630
+ // Generate HTML using Handlebars template
631
+ const html = template(templateData);
632
+
633
+ // Create output directory if it doesn't exist
634
+ const outputDir = path.dirname(outputFile);
635
+ if (!fs.existsSync(outputDir)) {
636
+ fs.mkdirSync(outputDir, { recursive: true });
637
+ }
638
+
639
+ // Write HTML to output file
640
+ fs.writeFileSync(outputFile, html);
641
+
642
+ console.log(`HTML document generated successfully: ${outputFile}`);
643
+ return true;
644
+ } catch (error) {
645
+ console.error(`Error building HTML: ${error.message}`);
646
+ return false;
647
+ }
648
+ }
649
+
650
+ /**
651
+ * Watches for changes in markdown files and rebuilds HTML
652
+ * @param {string} srcDir - Source directory containing markdown files
653
+ * @param {string} outputFile - Output HTML file path
654
+ */
655
+ function watchMarkdown(srcDir, outputFile) {
656
+ console.log(`Watching for changes in ${srcDir}...`);
657
+
658
+ // Build HTML initially
659
+ buildHtml(srcDir, outputFile);
660
+
661
+ // Determine the pattern to watch based on whether srcDir is a file or directory
662
+ const resolvedSrcDir = path.resolve(srcDir);
663
+ const isDirectory = fs.statSync(resolvedSrcDir).isDirectory();
664
+
665
+ // Set up the watcher with reliable configuration
666
+ const watcher = chokidar.watch(resolvedSrcDir, {
667
+ persistent: true,
668
+ ignoreInitial: true,
669
+ usePolling: true, // More reliable but uses more CPU
670
+ interval: 1000, // Poll every 1000ms = 1 second
671
+ awaitWriteFinish: {
672
+ stabilityThreshold: 2000, // Wait 2 seconds of stability before triggering
673
+ pollInterval: 500 // Poll every 500ms during stability wait
674
+ },
675
+ ignored: /(^|\/)\..|node_modules/, // Ignore dotfiles and node_modules
676
+ depth: isDirectory ? undefined : 0,
677
+ alwaysStat: false,
678
+ atomic: true // Handle atomic writes reliably
679
+ });
680
+
681
+ // Use debouncing to prevent multiple rebuilds for rapid changes
682
+ let debounceTimer;
683
+ const debouncedRebuild = () => {
684
+ clearTimeout(debounceTimer);
685
+ debounceTimer = setTimeout(async () => {
686
+ console.log('\nā³ Regenerating HTML document...');
687
+ try {
688
+ await buildHtml(srcDir, outputFile);
689
+ console.log(`āœ… HTML document updated: ${outputFile}\n`);
690
+ } catch (error) {
691
+ console.error(`āŒ Error rebuilding HTML: ${error.message}\n`);
692
+ }
693
+ }, 500); // 500ms debounce time
694
+ };
695
+
696
+ // File change event handler
697
+ const handleChange = (filePath) => {
698
+ // Only process markdown files
699
+ if (isDirectory && !filePath.endsWith('.md')) {
700
+ return;
701
+ }
702
+
703
+ console.log(`šŸ“„ File changed: ${path.relative(process.cwd(), filePath)}`);
704
+ debouncedRebuild();
705
+ };
706
+
707
+ // Set up event handlers
708
+ watcher.on('change', handleChange);
709
+ watcher.on('add', (path) => {
710
+ if (path.endsWith('.md')) {
711
+ console.log(`šŸ“„ File added: ${path}`);
712
+ debouncedRebuild();
713
+ }
714
+ });
715
+ watcher.on('unlink', (path) => {
716
+ if (path.endsWith('.md')) {
717
+ console.log(`šŸ“„ File removed: ${path}`);
718
+ debouncedRebuild();
719
+ }
720
+ });
721
+
722
+ // When ready, show a clear message
723
+ watcher.on('ready', () => {
724
+ console.log('šŸ‘€ Watching for file changes. Press Ctrl+C to stop.\n');
725
+ });
726
+
727
+ // Handle watch errors
728
+ watcher.on('error', (error) => {
729
+ console.error(`āŒ Watch error: ${error}`);
730
+ });
731
+ }
732
+
733
+ // Watch for changes if enabled
734
+ if (options.watch) {
735
+ watchMarkdown(srcPath, outputFile);
736
+ } else {
737
+ buildHtml(srcPath, outputFile);
738
+ }