@udx/md2html 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +219 -0
- package/index.js +738 -0
- package/package.json +48 -0
- package/static/chapter-navigation.css +213 -0
- package/static/scripts.js +410 -0
- package/static/styles.css +484 -0
- package/static/view.hbs +341 -0
package/index.js
ADDED
|
@@ -0,0 +1,738 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
/**
|
|
4
|
+
* UDX Markdown to HTML Converter (md2html)
|
|
5
|
+
* ========================================
|
|
6
|
+
*
|
|
7
|
+
* ## Usage:
|
|
8
|
+
*
|
|
9
|
+
* Basic conversion:
|
|
10
|
+
* node tools/md2html/index.js --src content/architecture/deploying-with-impunity --out site/static/docs/architecture/deploying-with-impunity/index.html
|
|
11
|
+
*
|
|
12
|
+
* Watch mode:
|
|
13
|
+
* node tools/md2html/index.js --src content/architecture/devsoc-2025 --out site/static/docs/architecture/devsoc-2025/index.html --watch
|
|
14
|
+
*
|
|
15
|
+
* Debug mode:
|
|
16
|
+
* node tools/md2html/index.js --src content/architecture/decentralized-devops --out site/static/docs/architecture/decentralized-devops/index.html --debug
|
|
17
|
+
*
|
|
18
|
+
* ## Purpose:
|
|
19
|
+
* - Convert a directory of markdown files or a single markdown file into a styled HTML document
|
|
20
|
+
* - Generate documentation with Google Docs-like formatting for professional presentations
|
|
21
|
+
* - Support automatic rebuilding of documents when source files change (watch mode)
|
|
22
|
+
* - Automate documentation generation in CI/CD pipelines
|
|
23
|
+
*
|
|
24
|
+
* ## Inputs:
|
|
25
|
+
* - Source directory containing markdown files OR a single markdown file
|
|
26
|
+
* - Output file path for the generated HTML document
|
|
27
|
+
* - Optional flags for watch mode and debug logging
|
|
28
|
+
*
|
|
29
|
+
* ## Outputs:
|
|
30
|
+
* - A single HTML document with properly styled content, table of contents, and formatted code blocks
|
|
31
|
+
* - Debug logs when debug mode is enabled
|
|
32
|
+
*
|
|
33
|
+
* ## Key Features:
|
|
34
|
+
* - Combines multiple markdown files into a single document, sorted by numeric prefixes
|
|
35
|
+
* - Extracts metadata from markdown comments (title, author, description, date, version)
|
|
36
|
+
* - Transforms internal links between markdown files into working anchor links
|
|
37
|
+
* - Handles images with support for various path resolution strategies
|
|
38
|
+
* - Generates a table of contents from headings
|
|
39
|
+
* - Applies syntax highlighting to code blocks
|
|
40
|
+
* - Supports watch mode for automatic rebuilding
|
|
41
|
+
*
|
|
42
|
+
* @version 1.0.0
|
|
43
|
+
* @author UDX Team
|
|
44
|
+
* @copyright 2025
|
|
45
|
+
*/
|
|
46
|
+
|
|
47
|
+
import fs from 'fs';
|
|
48
|
+
import path from 'path';
|
|
49
|
+
import { marked } from 'marked';
|
|
50
|
+
import { program } from 'commander';
|
|
51
|
+
import chokidar from 'chokidar';
|
|
52
|
+
import Handlebars from 'handlebars';
|
|
53
|
+
|
|
54
|
+
// Custom renderer for handling math blocks
|
|
55
|
+
const renderer = new marked.Renderer();
|
|
56
|
+
|
|
57
|
+
// Configure marked to properly handle math blocks
|
|
58
|
+
marked.setOptions({
|
|
59
|
+
renderer: renderer,
|
|
60
|
+
gfm: true,
|
|
61
|
+
breaks: false,
|
|
62
|
+
pedantic: false,
|
|
63
|
+
sanitize: false,
|
|
64
|
+
smartLists: true,
|
|
65
|
+
smartypants: false
|
|
66
|
+
});
|
|
67
|
+
|
|
68
|
+
// Add a custom code renderer to handle math blocks
|
|
69
|
+
const originalCodeRenderer = renderer.code.bind(renderer);
|
|
70
|
+
renderer.code = function(code, language) {
|
|
71
|
+
// Handle math code blocks
|
|
72
|
+
if (language === 'math') {
|
|
73
|
+
// Clean up the code by removing any extra backticks or 'math' markers
|
|
74
|
+
const cleanCode = code.replace(/^```math\s*|```$/g, '');
|
|
75
|
+
return `<div class="math-block">$$
|
|
76
|
+
${cleanCode}
|
|
77
|
+
$$</div>`;
|
|
78
|
+
}
|
|
79
|
+
return originalCodeRenderer(code, language);
|
|
80
|
+
};
|
|
81
|
+
|
|
82
|
+
// File paths for external resources
|
|
83
|
+
const TEMPLATE_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/view.hbs');
|
|
84
|
+
const STYLES_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/styles.css');
|
|
85
|
+
const CHAPTER_NAV_STYLES_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/chapter-navigation.css');
|
|
86
|
+
const SCRIPTS_PATH = path.join(path.dirname(new URL(import.meta.url).pathname), 'static/scripts.js');
|
|
87
|
+
|
|
88
|
+
program
|
|
89
|
+
.version('1.0.0')
|
|
90
|
+
.description('Convert markdown files to a single HTML document with Google Docs styling')
|
|
91
|
+
.option('-s, --src <directory>', 'Source directory containing markdown files')
|
|
92
|
+
.option('-o, --out <file>', 'Output HTML file path')
|
|
93
|
+
.option('-w, --watch', 'Watch for changes and rebuild automatically', false)
|
|
94
|
+
.option('-d, --debug', 'Enable debug logging', false)
|
|
95
|
+
.parse(process.argv);
|
|
96
|
+
|
|
97
|
+
const options = program.opts();
|
|
98
|
+
|
|
99
|
+
if (!options.src || !options.out) {
|
|
100
|
+
console.error('Error: Source directory and output file are required');
|
|
101
|
+
program.help();
|
|
102
|
+
process.exit(1);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const srcPath = path.resolve(options.src);
|
|
106
|
+
const outputFile = path.resolve(options.out);
|
|
107
|
+
|
|
108
|
+
const debug = (message) => {
|
|
109
|
+
if (options.debug) {
|
|
110
|
+
console.log(`[DEBUG] ${message}`);
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
|
|
114
|
+
/**
|
|
115
|
+
* Builds the HTML document from markdown files
|
|
116
|
+
* @param {string} srcDir - Source directory containing markdown files
|
|
117
|
+
* @param {string} outputFile - Output HTML file path
|
|
118
|
+
* @returns {Promise<boolean>} Success status
|
|
119
|
+
*/
|
|
120
|
+
async function buildHtml(srcDir, outputFile) {
|
|
121
|
+
debug(`Building HTML from ${srcDir} to ${outputFile}`);
|
|
122
|
+
|
|
123
|
+
try {
|
|
124
|
+
// Load external templates and styles
|
|
125
|
+
const templateSource = fs.readFileSync(TEMPLATE_PATH, 'utf8');
|
|
126
|
+
const cssStyles = fs.readFileSync(STYLES_PATH, 'utf8');
|
|
127
|
+
const chapterNavStyles = fs.readFileSync(CHAPTER_NAV_STYLES_PATH, 'utf8');
|
|
128
|
+
const jsScripts = fs.readFileSync(SCRIPTS_PATH, 'utf8');
|
|
129
|
+
|
|
130
|
+
// Register custom Handlebars helpers
|
|
131
|
+
Handlebars.registerHelper('slugify', function(text) {
|
|
132
|
+
return text
|
|
133
|
+
.toString()
|
|
134
|
+
.toLowerCase()
|
|
135
|
+
.replace(/\s+/g, '-') // Replace spaces with -
|
|
136
|
+
.replace(/[^\w\-]+/g, '') // Remove all non-word chars
|
|
137
|
+
.replace(/\-\-+/g, '-') // Replace multiple - with single -
|
|
138
|
+
.replace(/^-+/, '') // Trim - from start of text
|
|
139
|
+
.replace(/-+$/, '') // Trim - from end of text
|
|
140
|
+
.substring(0, 64); // Limit length
|
|
141
|
+
});
|
|
142
|
+
|
|
143
|
+
Handlebars.registerHelper('eq', function(a, b) {
|
|
144
|
+
return a === b;
|
|
145
|
+
});
|
|
146
|
+
|
|
147
|
+
// Compile Handlebars template
|
|
148
|
+
const template = Handlebars.compile(templateSource);
|
|
149
|
+
|
|
150
|
+
const srcStat = fs.statSync(srcDir);
|
|
151
|
+
let markdownFiles = [];
|
|
152
|
+
|
|
153
|
+
if (srcStat.isDirectory()) {
|
|
154
|
+
const files = fs.readdirSync(srcDir);
|
|
155
|
+
markdownFiles = files
|
|
156
|
+
.filter(file => file.endsWith('.md'))
|
|
157
|
+
.sort((a, b) => {
|
|
158
|
+
const numA = parseInt(a.match(/^(\d+)_/) ? a.match(/^(\d+)_/)[1] : '999');
|
|
159
|
+
const numB = parseInt(b.match(/^(\d+)_/) ? b.match(/^(\d+)_/)[1] : '999');
|
|
160
|
+
return numA - numB;
|
|
161
|
+
})
|
|
162
|
+
.map(file => path.join(srcDir, file));
|
|
163
|
+
|
|
164
|
+
debug(`Found ${markdownFiles.length} markdown files`);
|
|
165
|
+
} else if (srcStat.isFile() && srcDir.endsWith('.md')) {
|
|
166
|
+
markdownFiles = [srcDir];
|
|
167
|
+
debug('Processing single markdown file');
|
|
168
|
+
} else {
|
|
169
|
+
console.error('Error: Source must be a markdown file or directory containing markdown files');
|
|
170
|
+
return false;
|
|
171
|
+
}
|
|
172
|
+
|
|
173
|
+
if (markdownFiles.length === 0) {
|
|
174
|
+
console.error('Error: No markdown files found');
|
|
175
|
+
return false;
|
|
176
|
+
}
|
|
177
|
+
|
|
178
|
+
let combinedMarkdown = '';
|
|
179
|
+
let title = 'Documentation';
|
|
180
|
+
let description = 'Generated documentation';
|
|
181
|
+
let author = 'UDX';
|
|
182
|
+
let date = new Date().toLocaleDateString('en-US', {
|
|
183
|
+
year: 'numeric',
|
|
184
|
+
month: 'long',
|
|
185
|
+
day: 'numeric'
|
|
186
|
+
});
|
|
187
|
+
let version = 'v8';
|
|
188
|
+
|
|
189
|
+
// Process each file and wrap its content in a section
|
|
190
|
+
for (const file of markdownFiles) {
|
|
191
|
+
debug(`Processing file: ${file}`);
|
|
192
|
+
const content = fs.readFileSync(file, 'utf8');
|
|
193
|
+
const fileName = path.basename(file, '.md');
|
|
194
|
+
const chapterId = fileName.replace(/^\d+_/, '');
|
|
195
|
+
|
|
196
|
+
// Extract metadata from comments
|
|
197
|
+
const titleMatch = content.match(/<!--\s*title:\s*(.*?)\s*-->/i);
|
|
198
|
+
const descMatch = content.match(/<!--\s*description:\s*(.*?)\s*-->/i);
|
|
199
|
+
const authorMatch = content.match(/<!--\s*author:\s*(.*?)\s*-->/i);
|
|
200
|
+
const dateMatch = content.match(/<!--\s*date:\s*(.*?)\s*-->/i);
|
|
201
|
+
const versionMatch = content.match(/<!--\s*version:\s*(.*?)\s*-->/i);
|
|
202
|
+
|
|
203
|
+
if (titleMatch && !title) title = titleMatch[1];
|
|
204
|
+
if (descMatch) description = descMatch[1];
|
|
205
|
+
if (authorMatch) author = authorMatch[1];
|
|
206
|
+
if (dateMatch) date = dateMatch[1];
|
|
207
|
+
if (versionMatch) version = versionMatch[1];
|
|
208
|
+
|
|
209
|
+
// Extract first heading to use as section title if available
|
|
210
|
+
const headingMatch = content.match(/^#\s+(.*?)$/m);
|
|
211
|
+
const sectionTitle = headingMatch ? headingMatch[1] : chapterId;
|
|
212
|
+
|
|
213
|
+
// Add section opening tag with appropriate attributes
|
|
214
|
+
combinedMarkdown += `<!-- START SECTION: ${fileName} -->\n`;
|
|
215
|
+
combinedMarkdown += `<section id="${chapterId}" class="content-section" data-chapter-id="${chapterId}" data-file="${fileName}">\n\n`;
|
|
216
|
+
|
|
217
|
+
// Add the file content
|
|
218
|
+
combinedMarkdown += content;
|
|
219
|
+
|
|
220
|
+
// Add section closing tag
|
|
221
|
+
combinedMarkdown += `\n\n</section>\n<!-- END SECTION: ${fileName} -->\n\n`;
|
|
222
|
+
}
|
|
223
|
+
|
|
224
|
+
if (title === 'Documentation') {
|
|
225
|
+
const headingMatch = combinedMarkdown.match(/^#\s+(.*?)$/m);
|
|
226
|
+
if (headingMatch) {
|
|
227
|
+
title = headingMatch[1];
|
|
228
|
+
}
|
|
229
|
+
}
|
|
230
|
+
|
|
231
|
+
// Process all mathematical formulas in the content (before image processing)
|
|
232
|
+
// This pattern matches: $formula$ for inline math
|
|
233
|
+
const inlineMathPattern = /\$(.*?)\$/g;
|
|
234
|
+
combinedMarkdown = combinedMarkdown.replace(inlineMathPattern, (match, formula) => {
|
|
235
|
+
// Skip if it's likely a currency symbol
|
|
236
|
+
if (/^\s*\d+(\.\d+)?\s*$/.test(formula)) {
|
|
237
|
+
return match;
|
|
238
|
+
}
|
|
239
|
+
return `\\(${formula}\\)`;
|
|
240
|
+
});
|
|
241
|
+
|
|
242
|
+
// Process all image references in the markdown content
|
|
243
|
+
const imageMatches = [...combinedMarkdown.matchAll(/!\[(.*?)\]\((.*?)\)/g)];
|
|
244
|
+
for (const match of imageMatches) {
|
|
245
|
+
const imageAlt = match[1];
|
|
246
|
+
let imagePath = match[2];
|
|
247
|
+
|
|
248
|
+
// Skip external URLs
|
|
249
|
+
if (imagePath.startsWith('http')) continue;
|
|
250
|
+
|
|
251
|
+
// Try multiple path resolution strategies to find the image
|
|
252
|
+
const possiblePaths = [
|
|
253
|
+
// Strategy 1: Relative to parent directory of srcDir
|
|
254
|
+
path.resolve(path.dirname(srcDir), imagePath),
|
|
255
|
+
|
|
256
|
+
// Strategy 2: Directly in srcDir
|
|
257
|
+
path.resolve(srcDir, path.basename(imagePath)),
|
|
258
|
+
|
|
259
|
+
// Strategy 3: Looking in attachments folder inside srcDir
|
|
260
|
+
path.resolve(srcDir, 'attachments', path.basename(imagePath)),
|
|
261
|
+
|
|
262
|
+
// Strategy 4: Looking in srcDir/attachments regardless of original path
|
|
263
|
+
path.resolve(srcDir, 'attachments', path.basename(imagePath).replace(/^.*[\\\/]/, '')),
|
|
264
|
+
|
|
265
|
+
// Strategy 5: Looking in srcDir parent's attachments folder
|
|
266
|
+
path.resolve(path.dirname(srcDir), 'attachments', path.basename(imagePath)),
|
|
267
|
+
];
|
|
268
|
+
|
|
269
|
+
// Try each possible path until we find one that exists
|
|
270
|
+
let imageFound = false;
|
|
271
|
+
for (const testPath of possiblePaths) {
|
|
272
|
+
if (fs.existsSync(testPath)) {
|
|
273
|
+
imageFound = true;
|
|
274
|
+
debug(`Found image at: ${testPath}`);
|
|
275
|
+
break;
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
if (!imageFound) {
|
|
280
|
+
console.warn(`Warning: Image not found: ${imagePath} (${imageAlt})`);
|
|
281
|
+
}
|
|
282
|
+
}
|
|
283
|
+
|
|
284
|
+
// Process all links to markdown files within the same directory structure
|
|
285
|
+
// This ensures that links between markdown files in the same document
|
|
286
|
+
// are converted to proper anchor links in the assembled HTML
|
|
287
|
+
let processedMarkdown = combinedMarkdown;
|
|
288
|
+
|
|
289
|
+
// Build a map of files to section IDs for link transformation
|
|
290
|
+
|
|
291
|
+
// Create a map of markdown filenames to their section IDs
|
|
292
|
+
const fileToSectionMap = new Map();
|
|
293
|
+
const fileNameRegex = /\d+_([\w-]+)\.md$/;
|
|
294
|
+
|
|
295
|
+
// First pass: build a map of filenames to section IDs
|
|
296
|
+
for (let i = 0; i < markdownFiles.length; i++) {
|
|
297
|
+
const filePath = markdownFiles[i];
|
|
298
|
+
const fileName = path.basename(filePath);
|
|
299
|
+
const fileContent = fs.readFileSync(filePath, 'utf8');
|
|
300
|
+
|
|
301
|
+
// Extract section ID from first heading or use filename
|
|
302
|
+
const headingMatch = fileContent.match(/^#\s+(.*?)$/m);
|
|
303
|
+
if (headingMatch) {
|
|
304
|
+
const headingText = headingMatch[1];
|
|
305
|
+
const sectionId = headingText.toLowerCase()
|
|
306
|
+
.replace(/[\s]+/g, '-') // Replace spaces with hyphens
|
|
307
|
+
.replace(/[^\w\-]+/g, '') // Remove non-word chars except hyphens
|
|
308
|
+
.replace(/--+/g, '-') // Replace multiple hyphens with single
|
|
309
|
+
.replace(/^-+|-+$/g, ''); // Trim hyphens from start and end
|
|
310
|
+
|
|
311
|
+
fileToSectionMap.set(fileName, sectionId);
|
|
312
|
+
debug(`Mapped file ${fileName} to section ID ${sectionId}`);
|
|
313
|
+
} else {
|
|
314
|
+
// If no heading found, use the filename without number prefix as fallback
|
|
315
|
+
const fileNameMatch = fileName.match(fileNameRegex);
|
|
316
|
+
let fallbackId = fileName.replace(/\.md$/, '');
|
|
317
|
+
if (fileNameMatch && fileNameMatch[1]) {
|
|
318
|
+
fallbackId = fileNameMatch[1]; // Use the part after number_ prefix
|
|
319
|
+
}
|
|
320
|
+
fallbackId = fallbackId.toLowerCase().replace(/[^\w\-]+/g, '-');
|
|
321
|
+
fileToSectionMap.set(fileName, fallbackId);
|
|
322
|
+
debug(`Mapped file ${fileName} to fallback ID ${fallbackId}`);
|
|
323
|
+
}
|
|
324
|
+
}
|
|
325
|
+
|
|
326
|
+
// Process Markdown BEFORE converting to HTML to transform links
|
|
327
|
+
// This regex matches Markdown links: [text](link.md)
|
|
328
|
+
const markdownLinkRegex = /\[([^\]]+)\]\(([^\)]+\.md)\)/g;
|
|
329
|
+
|
|
330
|
+
// Transform the markdown content to handle internal links
|
|
331
|
+
combinedMarkdown = combinedMarkdown.replace(markdownLinkRegex, (match, linkText, href) => {
|
|
332
|
+
// Get just the filename
|
|
333
|
+
const linkedFileName = path.basename(href);
|
|
334
|
+
|
|
335
|
+
// Variable to store the section ID if we find a match
|
|
336
|
+
let targetSectionId = null;
|
|
337
|
+
|
|
338
|
+
// Try direct match first using file base name without extension
|
|
339
|
+
const linkedFileBaseName = path.basename(linkedFileName, '.md');
|
|
340
|
+
if (fileToSectionMap.has(linkedFileBaseName)) {
|
|
341
|
+
targetSectionId = fileToSectionMap.get(linkedFileBaseName);
|
|
342
|
+
debug(`Direct match: ${linkedFileName} -> #${targetSectionId}`);
|
|
343
|
+
}
|
|
344
|
+
// Then try fuzzy match by ignoring numeric prefixes
|
|
345
|
+
else {
|
|
346
|
+
const baseNameMatch = linkedFileName.match(/^\d+_(.+)$/) || [null, linkedFileName];
|
|
347
|
+
const baseName = baseNameMatch[1];
|
|
348
|
+
|
|
349
|
+
for (const [fileName, sectionId] of fileToSectionMap.entries()) {
|
|
350
|
+
const fileBaseMatch = fileName.match(/^\d+_(.+)$/) || [null, fileName];
|
|
351
|
+
const fileBase = fileBaseMatch[1];
|
|
352
|
+
|
|
353
|
+
if (baseName === fileBase) {
|
|
354
|
+
targetSectionId = sectionId;
|
|
355
|
+
debug(`Fuzzy match: ${linkedFileName} -> #${targetSectionId}`);
|
|
356
|
+
break;
|
|
357
|
+
}
|
|
358
|
+
}
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
// If we found a matching section, return the updated Markdown link
|
|
362
|
+
if (targetSectionId) {
|
|
363
|
+
return `[${linkText}](#${targetSectionId})`;
|
|
364
|
+
}
|
|
365
|
+
|
|
366
|
+
// Otherwise return the original link unchanged
|
|
367
|
+
return match;
|
|
368
|
+
});
|
|
369
|
+
|
|
370
|
+
// Remove the duplicate fileToSectionMap and use the chaptersInfo directly
|
|
371
|
+
// Map markdown files to section IDs and chapter structure
|
|
372
|
+
const chaptersInfo = [];
|
|
373
|
+
|
|
374
|
+
// Build chapter structure information from markdown files
|
|
375
|
+
for (const file of markdownFiles) {
|
|
376
|
+
const fileName = path.basename(file, '.md');
|
|
377
|
+
const chapterId = fileName.replace(/^\d+_/, '');
|
|
378
|
+
const content = fs.readFileSync(file, 'utf8');
|
|
379
|
+
|
|
380
|
+
// Extract headings from content (only H2 headings)
|
|
381
|
+
const headings = [...content.matchAll(/^(#{2})\s+(.+)$/gm)];
|
|
382
|
+
const sections = [];
|
|
383
|
+
|
|
384
|
+
if (headings.length > 0) {
|
|
385
|
+
headings.forEach((match, index) => {
|
|
386
|
+
const title = match[2];
|
|
387
|
+
const headingId = title.toLowerCase()
|
|
388
|
+
.replace(/[^\w]+/g, '-')
|
|
389
|
+
.replace(/^-|-$/g, '');
|
|
390
|
+
|
|
391
|
+
const excludedTitles = ['abstract', 'executive summary', 'introduction',
|
|
392
|
+
'appendix', 'further reading', 'references'];
|
|
393
|
+
|
|
394
|
+
if (!excludedTitles.includes(title.toLowerCase()) &&
|
|
395
|
+
title.toLowerCase() !== chapterId.toLowerCase()) {
|
|
396
|
+
sections.push({
|
|
397
|
+
id: `${chapterId}-${headingId}`,
|
|
398
|
+
title: title,
|
|
399
|
+
level: 2,
|
|
400
|
+
file: fileName
|
|
401
|
+
});
|
|
402
|
+
}
|
|
403
|
+
});
|
|
404
|
+
|
|
405
|
+
// Add sections to chaptersInfo
|
|
406
|
+
chaptersInfo.push(...sections);
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
// Update the existing fileToSectionMap
|
|
410
|
+
fileToSectionMap.set(fileName, chapterId);
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
// Now convert the processed markdown to HTML
|
|
414
|
+
let htmlContent = marked.parse(combinedMarkdown);
|
|
415
|
+
|
|
416
|
+
// Add file type indicators to code blocks with proper badges
|
|
417
|
+
const codeBlocksWithFileType = htmlContent.replace(
|
|
418
|
+
/<pre><code[^>]*>/g,
|
|
419
|
+
(match) => match.replace(/<pre>/, '<pre data-file-type="code">')
|
|
420
|
+
);
|
|
421
|
+
|
|
422
|
+
// Extract file types from code blocks with comments and add language class for syntax highlighting
|
|
423
|
+
const codeBlocksWithComments = codeBlocksWithFileType.replace(
|
|
424
|
+
/<pre data-file-type="code"><code>([\s\S]*?)\n/g,
|
|
425
|
+
(match, codeContent) => {
|
|
426
|
+
const fileTypeMatch = codeContent.match(/^(?:\/\/|#|<!--) ?([a-zA-Z0-9-_.]+)\s/);
|
|
427
|
+
if (fileTypeMatch) {
|
|
428
|
+
const fileType = fileTypeMatch[1];
|
|
429
|
+
let langClass = '';
|
|
430
|
+
let langName = '';
|
|
431
|
+
|
|
432
|
+
// Determine language class based on file extension or type
|
|
433
|
+
if (fileType.endsWith('.js') || fileType === 'javascript') {
|
|
434
|
+
langClass = ' class="language-javascript"';
|
|
435
|
+
langName = 'JavaScript';
|
|
436
|
+
} else if (fileType.endsWith('.py') || fileType === 'python') {
|
|
437
|
+
langClass = ' class="language-python"';
|
|
438
|
+
langName = 'Python';
|
|
439
|
+
} else if (fileType.endsWith('.sh') || fileType === 'bash' || fileType === 'shell') {
|
|
440
|
+
langClass = ' class="language-bash"';
|
|
441
|
+
langName = 'Shell';
|
|
442
|
+
} else if (fileType.endsWith('.yaml') || fileType.endsWith('.yml')) {
|
|
443
|
+
langClass = ' class="language-yaml"';
|
|
444
|
+
langName = 'YAML';
|
|
445
|
+
} else if (fileType.endsWith('.json')) {
|
|
446
|
+
langClass = ' class="language-json"';
|
|
447
|
+
langName = 'JSON';
|
|
448
|
+
} else if (fileType.endsWith('.html')) {
|
|
449
|
+
langClass = ' class="language-html"';
|
|
450
|
+
langName = 'HTML';
|
|
451
|
+
} else if (fileType.endsWith('.css')) {
|
|
452
|
+
langClass = ' class="language-css"';
|
|
453
|
+
langName = 'CSS';
|
|
454
|
+
} else if (fileType.endsWith('.md')) {
|
|
455
|
+
langClass = ' class="language-markdown"';
|
|
456
|
+
langName = 'Markdown';
|
|
457
|
+
} else {
|
|
458
|
+
langName = fileType;
|
|
459
|
+
}
|
|
460
|
+
|
|
461
|
+
// Create code block with file type badge
|
|
462
|
+
return match.replace('<code>', `<code${langClass}>`)
|
|
463
|
+
.replace('data-file-type="code"', `data-file-type="${fileType}"`) +
|
|
464
|
+
`<span class="file-type-badge">${langName}</span>`;
|
|
465
|
+
}
|
|
466
|
+
return match;
|
|
467
|
+
}
|
|
468
|
+
);
|
|
469
|
+
|
|
470
|
+
// Process images with proper figure elements and accessible captions
|
|
471
|
+
const enhancedHtmlContent = codeBlocksWithComments.replace(
|
|
472
|
+
/<img src="([^"]+)" alt="([^"]+)">/g,
|
|
473
|
+
'<figure class="image-container"><img src="$1" alt="$2"><figcaption>$2</figcaption></figure>'
|
|
474
|
+
);
|
|
475
|
+
|
|
476
|
+
const tableAccessibilityContent = enhancedHtmlContent.replace(
|
|
477
|
+
/<table>/g,
|
|
478
|
+
'<table aria-hidden="true" role="presentation">'
|
|
479
|
+
);
|
|
480
|
+
|
|
481
|
+
// Define processed content
|
|
482
|
+
// Process image URLs with imgix for proper sizing
|
|
483
|
+
const processedWithImageSizing = tableAccessibilityContent.replace(
|
|
484
|
+
/<img src="([^"]+imgix\.net[^"]+)"([^>]*)>/gi,
|
|
485
|
+
'<img src="$1" $2 width="1600">'
|
|
486
|
+
);
|
|
487
|
+
|
|
488
|
+
// Will be used for final content processing
|
|
489
|
+
let initialProcessedContent = processedWithImageSizing;
|
|
490
|
+
|
|
491
|
+
const tocItems = [];
|
|
492
|
+
const headingMatches = [...combinedMarkdown.matchAll(/^(#{1,3})\s+(.*?)$/gm)];
|
|
493
|
+
for (const match of headingMatches) {
|
|
494
|
+
const level = match[1].length;
|
|
495
|
+
const text = match[2];
|
|
496
|
+
const id = text.toLowerCase().replace(/[^\w]+/g, '-');
|
|
497
|
+
tocItems.push({ level, text, id });
|
|
498
|
+
}
|
|
499
|
+
|
|
500
|
+
// Generate initial TOC (will be possibly replaced later if <!-- TOC --> is found)
|
|
501
|
+
let tocHTML = '<div class="toc">\n<h2>Table of Contents</h2>\n<ul>\n';
|
|
502
|
+
for (const item of tocItems) {
|
|
503
|
+
const indent = ' '.repeat(item.level - 1);
|
|
504
|
+
tocHTML += `${indent}<li><a href="#${item.id}">${item.text}</a></li>\n`;
|
|
505
|
+
}
|
|
506
|
+
tocHTML += '</ul>\n</div>\n';
|
|
507
|
+
|
|
508
|
+
// External files were loaded at the beginning of buildHtml
|
|
509
|
+
|
|
510
|
+
// Convert Markdown to HTML and process it
|
|
511
|
+
let processedContent = marked(combinedMarkdown);
|
|
512
|
+
|
|
513
|
+
// Add file type indicators to code blocks
|
|
514
|
+
processedContent = processedContent.replace(/<pre><code class="language-(\w+)">/g, (match, lang) => {
|
|
515
|
+
return `<pre data-file-type="${lang}"><code class="language-${lang}">`;
|
|
516
|
+
});
|
|
517
|
+
|
|
518
|
+
// Extract file types from code blocks with comments and add language class for syntax highlighting
|
|
519
|
+
processedContent = processedContent.replace(/<pre><code>(```(\w+)[\s\S]*?```)<\/code><\/pre>/g, (match, content, lang) => {
|
|
520
|
+
return `<pre data-file-type="${lang}"><code class="language-${lang}">${content.replace(/```\w+\n|```$/g, '')}</code></pre>`;
|
|
521
|
+
});
|
|
522
|
+
|
|
523
|
+
// Process generic code blocks without language specification
|
|
524
|
+
processedContent = processedContent.replace(/<pre><code>(?!<)/g, '<pre data-file-type="code"><code>');
|
|
525
|
+
|
|
526
|
+
// Fix the image captions in content
|
|
527
|
+
processedContent = processedContent.replace(/<p><img src="([^"]+)"([^>]*)>\s*<\/p>\s*<p><em>([^<]+)<\/em><\/p>/g, (match, src, attrs, caption) => {
|
|
528
|
+
const altMatch = attrs.match(/alt="([^"]*)"/);
|
|
529
|
+
const alt = altMatch ? altMatch[1] : '';
|
|
530
|
+
return `<figure><img src="${src}"${attrs}><figcaption>${caption}</figcaption><span class="visually-hidden">${alt}</span></figure>`;
|
|
531
|
+
});
|
|
532
|
+
|
|
533
|
+
processedContent = processedContent.replace(/<table>/g, '<table aria-hidden="true" role="presentation">');
|
|
534
|
+
|
|
535
|
+
// If it's an imgix URL, add width parameter
|
|
536
|
+
processedContent = processedContent.replace(/src="(https:\/\/[^"]*imgix\.net\/[^"]+)(?:\?([^"]*))?"/g, (match, url, params) => {
|
|
537
|
+
if (params && params.includes('w=')) {
|
|
538
|
+
return match;
|
|
539
|
+
}
|
|
540
|
+
const separator = params ? '&' : '?';
|
|
541
|
+
return `src="${url}${params ? '?' + params : ''}${separator}w=1600"`;
|
|
542
|
+
});
|
|
543
|
+
|
|
544
|
+
// Check if we need a custom TOC
|
|
545
|
+
if (combinedMarkdown.includes('<!-- TOC -->')) {
|
|
546
|
+
// Reset the TOC HTML
|
|
547
|
+
tocHTML = '<div class="toc">\n<h2>Table of Contents</h2>\n<ul>';
|
|
548
|
+
const headings = processedContent.match(/<h([2-3])\s+id="([^"]+)">([^<]+)<\/h\1>/g) || [];
|
|
549
|
+
for (const heading of headings) {
|
|
550
|
+
const levelMatch = heading.match(/<h([2-3])>/);
|
|
551
|
+
const idMatch = heading.match(/id="([^"]+)"/);
|
|
552
|
+
const textMatch = heading.match(/>([^<]+)<\/h/);
|
|
553
|
+
if (levelMatch && idMatch && textMatch) {
|
|
554
|
+
const level = levelMatch[1];
|
|
555
|
+
const id = idMatch[1];
|
|
556
|
+
const text = textMatch[1];
|
|
557
|
+
const indent = level > 2 ? ' ' : '';
|
|
558
|
+
tocHTML += `\n${indent}<li><a href="#${id}">${text}</a></li>`;
|
|
559
|
+
}
|
|
560
|
+
}
|
|
561
|
+
tocHTML += '\n</ul>\n</div>\n';
|
|
562
|
+
}
|
|
563
|
+
|
|
564
|
+
// Replace TOC placeholder
|
|
565
|
+
processedContent = processedContent.replace('<!-- TOC -->', tocHTML);
|
|
566
|
+
|
|
567
|
+
// Process the HTML content to add chapter-aware section structure
|
|
568
|
+
let chapterAwareContent = processedContent;
|
|
569
|
+
let headingCounter = 0;
|
|
570
|
+
|
|
571
|
+
// Create structured sections with proper data attributes for chapter identification
|
|
572
|
+
// This makes it easier for the annotation system to attach annotations to specific parts
|
|
573
|
+
chaptersInfo.forEach(chapter => {
|
|
574
|
+
const chapterHeadingRegex = new RegExp(
|
|
575
|
+
`<h1[^>]*>(${chapter.title.replace(/[.*+?^${}()|[\]\\]/g, '\\$&')})</h1>`,
|
|
576
|
+
'i'
|
|
577
|
+
);
|
|
578
|
+
|
|
579
|
+
// Replace each main chapter heading with a properly structured section
|
|
580
|
+
// This adds data-chapter-id attributes used by the annotation system
|
|
581
|
+
if (chapterAwareContent.match(chapterHeadingRegex)) {
|
|
582
|
+
chapterAwareContent = chapterAwareContent.replace(
|
|
583
|
+
chapterHeadingRegex,
|
|
584
|
+
`<section id="${chapter.id}" class="content-section" data-chapter-id="${chapter.id}" data-file="${chapter.file}">
|
|
585
|
+
<h1 id="heading-${++headingCounter}">$1</h1>`
|
|
586
|
+
);
|
|
587
|
+
|
|
588
|
+
// Close the section tag at an appropriate point
|
|
589
|
+
// Look for the next h1 or the end of the content
|
|
590
|
+
const nextH1Index = chapterAwareContent.indexOf('<h1', chapterAwareContent.indexOf(`<h1 id="heading-${headingCounter}">`) + 1);
|
|
591
|
+
|
|
592
|
+
if (nextH1Index !== -1) {
|
|
593
|
+
// Insert closing section tag before the next h1
|
|
594
|
+
chapterAwareContent = chapterAwareContent.slice(0, nextH1Index) + '</section>\n' + chapterAwareContent.slice(nextH1Index);
|
|
595
|
+
} else {
|
|
596
|
+
// No more h1s, append closing section tag to the end
|
|
597
|
+
chapterAwareContent += '\n</section>';
|
|
598
|
+
}
|
|
599
|
+
}
|
|
600
|
+
});
|
|
601
|
+
|
|
602
|
+
// Add data attributes to all heading elements for annotation targeting
|
|
603
|
+
let processedWithHeadingAttrs = chapterAwareContent.replace(
|
|
604
|
+
/<h([2-6])(?:[^>]*)>([^<]+)<\/h\1>/g,
|
|
605
|
+
(match, level, text) => {
|
|
606
|
+
const textContent = text.trim();
|
|
607
|
+
const headingId = chaptersInfo.find(ch => ch.title === textContent)?.id || `heading-${++headingCounter}`;
|
|
608
|
+
return `<h${level} id="${headingId}" data-heading="${textContent.replace(/"/g, '"')}"><span>${textContent}</span></h${level}>`;
|
|
609
|
+
}
|
|
610
|
+
);
|
|
611
|
+
|
|
612
|
+
// Add JSON data for chapter information in a hidden script tag
|
|
613
|
+
// This allows the annotation system to easily access the chapter structure
|
|
614
|
+
const chaptersJSON = JSON.stringify(chaptersInfo, null, 2);
|
|
615
|
+
processedWithHeadingAttrs += `\n<script type="application/json" id="document-chapters-data">${chaptersJSON}</script>\n`;
|
|
616
|
+
|
|
617
|
+
// Prepare data for template
|
|
618
|
+
const templateData = {
|
|
619
|
+
title,
|
|
620
|
+
description,
|
|
621
|
+
author,
|
|
622
|
+
date,
|
|
623
|
+
version,
|
|
624
|
+
content: processedWithHeadingAttrs,
|
|
625
|
+
styles: cssStyles + '\n' + chapterNavStyles,
|
|
626
|
+
scripts: jsScripts,
|
|
627
|
+
chapters: chaptersInfo
|
|
628
|
+
};
|
|
629
|
+
|
|
630
|
+
// Generate HTML using Handlebars template
|
|
631
|
+
const html = template(templateData);
|
|
632
|
+
|
|
633
|
+
// Create output directory if it doesn't exist
|
|
634
|
+
const outputDir = path.dirname(outputFile);
|
|
635
|
+
if (!fs.existsSync(outputDir)) {
|
|
636
|
+
fs.mkdirSync(outputDir, { recursive: true });
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
// Write HTML to output file
|
|
640
|
+
fs.writeFileSync(outputFile, html);
|
|
641
|
+
|
|
642
|
+
console.log(`HTML document generated successfully: ${outputFile}`);
|
|
643
|
+
return true;
|
|
644
|
+
} catch (error) {
|
|
645
|
+
console.error(`Error building HTML: ${error.message}`);
|
|
646
|
+
return false;
|
|
647
|
+
}
|
|
648
|
+
}
|
|
649
|
+
|
|
650
|
+
/**
|
|
651
|
+
* Watches for changes in markdown files and rebuilds HTML
|
|
652
|
+
* @param {string} srcDir - Source directory containing markdown files
|
|
653
|
+
* @param {string} outputFile - Output HTML file path
|
|
654
|
+
*/
|
|
655
|
+
function watchMarkdown(srcDir, outputFile) {
|
|
656
|
+
console.log(`Watching for changes in ${srcDir}...`);
|
|
657
|
+
|
|
658
|
+
// Build HTML initially
|
|
659
|
+
buildHtml(srcDir, outputFile);
|
|
660
|
+
|
|
661
|
+
// Determine the pattern to watch based on whether srcDir is a file or directory
|
|
662
|
+
const resolvedSrcDir = path.resolve(srcDir);
|
|
663
|
+
const isDirectory = fs.statSync(resolvedSrcDir).isDirectory();
|
|
664
|
+
|
|
665
|
+
// Set up the watcher with reliable configuration
|
|
666
|
+
const watcher = chokidar.watch(resolvedSrcDir, {
|
|
667
|
+
persistent: true,
|
|
668
|
+
ignoreInitial: true,
|
|
669
|
+
usePolling: true, // More reliable but uses more CPU
|
|
670
|
+
interval: 1000, // Poll every 1000ms = 1 second
|
|
671
|
+
awaitWriteFinish: {
|
|
672
|
+
stabilityThreshold: 2000, // Wait 2 seconds of stability before triggering
|
|
673
|
+
pollInterval: 500 // Poll every 500ms during stability wait
|
|
674
|
+
},
|
|
675
|
+
ignored: /(^|\/)\..|node_modules/, // Ignore dotfiles and node_modules
|
|
676
|
+
depth: isDirectory ? undefined : 0,
|
|
677
|
+
alwaysStat: false,
|
|
678
|
+
atomic: true // Handle atomic writes reliably
|
|
679
|
+
});
|
|
680
|
+
|
|
681
|
+
// Use debouncing to prevent multiple rebuilds for rapid changes
|
|
682
|
+
let debounceTimer;
|
|
683
|
+
const debouncedRebuild = () => {
|
|
684
|
+
clearTimeout(debounceTimer);
|
|
685
|
+
debounceTimer = setTimeout(async () => {
|
|
686
|
+
console.log('\nā³ Regenerating HTML document...');
|
|
687
|
+
try {
|
|
688
|
+
await buildHtml(srcDir, outputFile);
|
|
689
|
+
console.log(`ā
HTML document updated: ${outputFile}\n`);
|
|
690
|
+
} catch (error) {
|
|
691
|
+
console.error(`ā Error rebuilding HTML: ${error.message}\n`);
|
|
692
|
+
}
|
|
693
|
+
}, 500); // 500ms debounce time
|
|
694
|
+
};
|
|
695
|
+
|
|
696
|
+
// File change event handler
|
|
697
|
+
const handleChange = (filePath) => {
|
|
698
|
+
// Only process markdown files
|
|
699
|
+
if (isDirectory && !filePath.endsWith('.md')) {
|
|
700
|
+
return;
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
console.log(`š File changed: ${path.relative(process.cwd(), filePath)}`);
|
|
704
|
+
debouncedRebuild();
|
|
705
|
+
};
|
|
706
|
+
|
|
707
|
+
// Set up event handlers
|
|
708
|
+
watcher.on('change', handleChange);
|
|
709
|
+
watcher.on('add', (path) => {
|
|
710
|
+
if (path.endsWith('.md')) {
|
|
711
|
+
console.log(`š File added: ${path}`);
|
|
712
|
+
debouncedRebuild();
|
|
713
|
+
}
|
|
714
|
+
});
|
|
715
|
+
watcher.on('unlink', (path) => {
|
|
716
|
+
if (path.endsWith('.md')) {
|
|
717
|
+
console.log(`š File removed: ${path}`);
|
|
718
|
+
debouncedRebuild();
|
|
719
|
+
}
|
|
720
|
+
});
|
|
721
|
+
|
|
722
|
+
// When ready, show a clear message
|
|
723
|
+
watcher.on('ready', () => {
|
|
724
|
+
console.log('š Watching for file changes. Press Ctrl+C to stop.\n');
|
|
725
|
+
});
|
|
726
|
+
|
|
727
|
+
// Handle watch errors
|
|
728
|
+
watcher.on('error', (error) => {
|
|
729
|
+
console.error(`ā Watch error: ${error}`);
|
|
730
|
+
});
|
|
731
|
+
}
|
|
732
|
+
|
|
733
|
+
// Watch for changes if enabled
|
|
734
|
+
if (options.watch) {
|
|
735
|
+
watchMarkdown(srcPath, outputFile);
|
|
736
|
+
} else {
|
|
737
|
+
buildHtml(srcPath, outputFile);
|
|
738
|
+
}
|