@xjtlumedia/markdown-mcp-server 1.0.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -5,6 +5,7 @@
5
5
  [![npm version](https://img.shields.io/npm/v/@xjtlumedia/markdown-mcp-server.svg)](https://www.npmjs.com/package/@xjtlumedia/markdown-mcp-server)
6
6
  [![License: MIT](https://img.shields.io/badge/License-MIT-blue.svg)](https://opensource.org/licenses/MIT)
7
7
  [![MCP Registry](https://img.shields.io/badge/MCP-Registry-purple)](https://registry.modelcontextprotocol.io)
8
+ [![Glama](https://glama.ai/mcp/servers/XJTLUmedia/AI_answer_copier/badge)](https://glama.ai/mcp/servers/XJTLUmedia/AI_answer_copier)
8
9
  [![GitHub](https://img.shields.io/github/stars/XJTLUmedia/AI_answer_copier?style=social)](https://github.com/XJTLUmedia/AI_answer_copier)
9
10
 
10
11
  **A Model Context Protocol (MCP) server that gives your AI assistant the power to convert Markdown into 14 professional document formats** — PDF, DOCX, HTML, LaTeX, CSV, JSON, XML, XLSX, RTF, PNG, and more. Stop copy-pasting. Let the AI do the exporting.
@@ -0,0 +1,223 @@
1
+ // ── Document Analysis Functions ──────────────────────────────────────
2
+ // ── Code Block Extraction ────────────────────────────────────────────
3
+ export function extractCodeBlocks(md) {
4
+ const blocks = [];
5
+ const lines = md.split('\n');
6
+ let inBlock = false;
7
+ let currentLang = '';
8
+ let currentCode = [];
9
+ let startLine = 0;
10
+ for (let i = 0; i < lines.length; i++) {
11
+ const line = lines[i];
12
+ const fenceMatch = line.match(/^(`{3,}|~{3,})([\w+-]*)\s*$/);
13
+ if (fenceMatch) {
14
+ if (!inBlock) {
15
+ inBlock = true;
16
+ currentLang = fenceMatch[2] || 'text';
17
+ currentCode = [];
18
+ startLine = i + 1;
19
+ }
20
+ else {
21
+ blocks.push({
22
+ language: currentLang,
23
+ code: currentCode.join('\n'),
24
+ startLine: startLine + 1, // 1-indexed
25
+ endLine: i + 1,
26
+ });
27
+ inBlock = false;
28
+ }
29
+ }
30
+ else if (inBlock) {
31
+ currentCode.push(line);
32
+ }
33
+ }
34
+ // Handle unclosed fence
35
+ if (inBlock && currentCode.length > 0) {
36
+ blocks.push({
37
+ language: currentLang,
38
+ code: currentCode.join('\n'),
39
+ startLine: startLine + 1,
40
+ endLine: lines.length,
41
+ });
42
+ }
43
+ return blocks;
44
+ }
45
+ // ── Link Extraction ──────────────────────────────────────────────────
46
+ export function extractLinks(md) {
47
+ const links = [];
48
+ const lines = md.split('\n');
49
+ let inCodeBlock = false;
50
+ for (let i = 0; i < lines.length; i++) {
51
+ const line = lines[i];
52
+ if (/^(`{3,}|~{3,})/.test(line.trim())) {
53
+ inCodeBlock = !inCodeBlock;
54
+ continue;
55
+ }
56
+ if (inCodeBlock)
57
+ continue;
58
+ // Images: ![alt](url)
59
+ const imgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
60
+ let match;
61
+ while ((match = imgRegex.exec(line)) !== null) {
62
+ links.push({ text: match[1], url: match[2], line: i + 1, type: 'image' });
63
+ }
64
+ // Inline links: [text](url) — but not images
65
+ const linkRegex = /(?<!!)\[([^\]]+)\]\(([^)]+)\)/g;
66
+ while ((match = linkRegex.exec(line)) !== null) {
67
+ links.push({ text: match[1], url: match[2], line: i + 1, type: 'inline' });
68
+ }
69
+ // Reference links: [text][ref]
70
+ const refRegex = /\[([^\]]+)\]\[([^\]]*)\]/g;
71
+ while ((match = refRegex.exec(line)) !== null) {
72
+ links.push({ text: match[1], url: match[2] || match[1], line: i + 1, type: 'reference' });
73
+ }
74
+ // Autolinks: <url>
75
+ const autoRegex = /<(https?:\/\/[^>]+)>/g;
76
+ while ((match = autoRegex.exec(line)) !== null) {
77
+ links.push({ text: match[1], url: match[1], line: i + 1, type: 'autolink' });
78
+ }
79
+ }
80
+ return links;
81
+ }
82
+ // ── Table of Contents Generation ─────────────────────────────────────
83
+ export function generateTOC(md, maxDepth = 6) {
84
+ const entries = extractTocEntries(md, maxDepth);
85
+ if (entries.length === 0)
86
+ return '';
87
+ const minLevel = Math.min(...entries.map(e => e.level));
88
+ const lines = entries.map(entry => {
89
+ const indent = ' '.repeat(entry.level - minLevel);
90
+ return `${indent}- [${entry.text}](#${entry.slug})`;
91
+ });
92
+ return '## Table of Contents\n\n' + lines.join('\n') + '\n';
93
+ }
94
+ export function extractTocEntries(md, maxDepth = 6) {
95
+ const entries = [];
96
+ const lines = md.split('\n');
97
+ let inCodeBlock = false;
98
+ const slugCounts = new Map();
99
+ for (let i = 0; i < lines.length; i++) {
100
+ const line = lines[i];
101
+ if (/^(`{3,}|~{3,})/.test(line.trim())) {
102
+ inCodeBlock = !inCodeBlock;
103
+ continue;
104
+ }
105
+ if (inCodeBlock)
106
+ continue;
107
+ const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
108
+ if (headingMatch) {
109
+ const level = headingMatch[1].length;
110
+ if (level > maxDepth)
111
+ continue;
112
+ const text = headingMatch[2].replace(/\s*#+\s*$/, '').trim();
113
+ let slug = text.toLowerCase()
114
+ .replace(/[^\w\s-]/g, '')
115
+ .replace(/\s+/g, '-')
116
+ .replace(/-+/g, '-')
117
+ .replace(/^-|-$/g, '');
118
+ // Handle duplicate slugs
119
+ const count = slugCounts.get(slug) || 0;
120
+ slugCounts.set(slug, count + 1);
121
+ if (count > 0)
122
+ slug = `${slug}-${count}`;
123
+ entries.push({ level, text, slug, line: i + 1 });
124
+ }
125
+ }
126
+ return entries;
127
+ }
128
+ // ── Document Statistics ──────────────────────────────────────────────
129
+ export function analyzeDocument(md) {
130
+ const lines = md.split('\n');
131
+ const plainText = md
132
+ .replace(/```[\s\S]*?```/g, '')
133
+ .replace(/`[^`]+`/g, '')
134
+ .replace(/<[^>]+>/g, '')
135
+ .replace(/[#*_~`>\[\]|()!]/g, ' ');
136
+ const words = plainText.split(/\s+/).filter(w => w.length > 0).length;
137
+ const sentences = plainText.split(/[.!?]+\s/).filter(s => s.trim().length > 0).length;
138
+ const paragraphs = md.split(/\n\s*\n/).filter(p => p.trim().length > 0).length;
139
+ let headings = 0;
140
+ let codeBlocks = 0;
141
+ let tables = 0;
142
+ let images = 0;
143
+ let lists = 0;
144
+ let blockquotes = 0;
145
+ let inCodeBlock = false;
146
+ let inTable = false;
147
+ for (const line of lines) {
148
+ const trimmed = line.trim();
149
+ if (/^(`{3,}|~{3,})/.test(trimmed)) {
150
+ if (!inCodeBlock)
151
+ codeBlocks++;
152
+ inCodeBlock = !inCodeBlock;
153
+ continue;
154
+ }
155
+ if (inCodeBlock)
156
+ continue;
157
+ if (/^#{1,6}\s/.test(trimmed))
158
+ headings++;
159
+ if (/^[-*+]\s|^\d+\.\s/.test(trimmed))
160
+ lists++;
161
+ if (/^>\s/.test(trimmed))
162
+ blockquotes++;
163
+ if (/!\[/.test(trimmed))
164
+ images += (trimmed.match(/!\[/g) || []).length;
165
+ if (trimmed.startsWith('|') && trimmed.endsWith('|')) {
166
+ if (!inTable) {
167
+ tables++;
168
+ inTable = true;
169
+ }
170
+ }
171
+ else {
172
+ inTable = false;
173
+ }
174
+ }
175
+ const linkMatches = md.match(/(?<!!)\[([^\]]+)\]\(([^)]+)\)/g);
176
+ const linkCount = linkMatches ? linkMatches.length : 0;
177
+ return {
178
+ lines: lines.length,
179
+ words,
180
+ characters: md.length,
181
+ paragraphs,
182
+ sentences,
183
+ headings,
184
+ codeBlocks,
185
+ tables,
186
+ links: linkCount,
187
+ images,
188
+ lists,
189
+ blockquotes,
190
+ estimatedReadingTimeMinutes: Math.max(1, Math.ceil(words / 200)),
191
+ };
192
+ }
193
+ // ── Heading Structure Extraction ─────────────────────────────────────
194
+ export function extractStructure(md) {
195
+ const entries = extractTocEntries(md);
196
+ const stats = analyzeDocument(md);
197
+ const codeBlocks = extractCodeBlocks(md);
198
+ const links = extractLinks(md);
199
+ return {
200
+ stats,
201
+ outline: entries.map(e => ({
202
+ level: e.level,
203
+ text: e.text,
204
+ line: e.line,
205
+ })),
206
+ codeBlocks: codeBlocks.map(b => ({
207
+ language: b.language,
208
+ lineCount: b.code.split('\n').length,
209
+ startLine: b.startLine,
210
+ endLine: b.endLine,
211
+ })),
212
+ linkSummary: {
213
+ total: links.length,
214
+ byType: {
215
+ inline: links.filter(l => l.type === 'inline').length,
216
+ image: links.filter(l => l.type === 'image').length,
217
+ reference: links.filter(l => l.type === 'reference').length,
218
+ autolink: links.filter(l => l.type === 'autolink').length,
219
+ },
220
+ uniqueUrls: [...new Set(links.map(l => l.url))].length,
221
+ },
222
+ };
223
+ }
@@ -0,0 +1,94 @@
1
+ // ── Email-Optimized HTML ─────────────────────────────────────────────
2
+ // Generates HTML with all styles inlined for email client compatibility
3
+ // No external CSS references, no class-based styles
4
+ import { unified } from 'unified';
5
+ import remarkParse from 'remark-parse';
6
+ import remarkGfm from 'remark-gfm';
7
+ import remarkRehype from 'remark-rehype';
8
+ import rehypeStringify from 'rehype-stringify';
9
+ export async function markdownToEmailHtml(md) {
10
+ // First convert to basic HTML
11
+ const htmlFile = await unified()
12
+ .use(remarkParse)
13
+ .use(remarkGfm)
14
+ // @ts-ignore
15
+ .use(remarkRehype)
16
+ // @ts-ignore
17
+ .use(rehypeStringify)
18
+ .process(md);
19
+ let html = String(htmlFile);
20
+ // Inline all styles for email client compatibility
21
+ html = inlineEmailStyles(html);
22
+ return `<!DOCTYPE html>
23
+ <html>
24
+ <head>
25
+ <meta charset="utf-8">
26
+ <meta name="viewport" content="width=device-width, initial-scale=1.0">
27
+ <!--[if mso]>
28
+ <noscript>
29
+ <xml>
30
+ <o:OfficeDocumentSettings>
31
+ <o:PixelsPerInch>96</o:PixelsPerInch>
32
+ </o:OfficeDocumentSettings>
33
+ </xml>
34
+ </noscript>
35
+ <![endif]-->
36
+ </head>
37
+ <body style="margin:0;padding:0;background-color:#f6f6f6;">
38
+ <table role="presentation" cellpadding="0" cellspacing="0" width="100%" style="background-color:#f6f6f6;">
39
+ <tr>
40
+ <td align="center" style="padding:20px 0;">
41
+ <table role="presentation" cellpadding="0" cellspacing="0" width="600" style="background-color:#ffffff;border-radius:4px;border:1px solid #e0e0e0;">
42
+ <tr>
43
+ <td style="padding:30px 40px;font-family:Arial,Helvetica,sans-serif;font-size:16px;line-height:1.6;color:#333333;">
44
+ ${html}
45
+ </td>
46
+ </tr>
47
+ </table>
48
+ </td>
49
+ </tr>
50
+ </table>
51
+ </body>
52
+ </html>`;
53
+ }
54
+ function inlineEmailStyles(html) {
55
+ let out = html;
56
+ // Headings
57
+ out = out.replace(/<h1([^>]*)>/gi, '<h1$1 style="font-family:Arial,Helvetica,sans-serif;font-size:28px;font-weight:bold;color:#1a1a1a;margin:24px 0 12px 0;line-height:1.3;">');
58
+ out = out.replace(/<h2([^>]*)>/gi, '<h2$1 style="font-family:Arial,Helvetica,sans-serif;font-size:24px;font-weight:bold;color:#1a1a1a;margin:20px 0 10px 0;line-height:1.3;">');
59
+ out = out.replace(/<h3([^>]*)>/gi, '<h3$1 style="font-family:Arial,Helvetica,sans-serif;font-size:20px;font-weight:bold;color:#1a1a1a;margin:18px 0 8px 0;line-height:1.3;">');
60
+ out = out.replace(/<h4([^>]*)>/gi, '<h4$1 style="font-family:Arial,Helvetica,sans-serif;font-size:18px;font-weight:bold;color:#333333;margin:16px 0 8px 0;line-height:1.3;">');
61
+ out = out.replace(/<h5([^>]*)>/gi, '<h5$1 style="font-family:Arial,Helvetica,sans-serif;font-size:16px;font-weight:bold;color:#333333;margin:14px 0 6px 0;line-height:1.3;">');
62
+ out = out.replace(/<h6([^>]*)>/gi, '<h6$1 style="font-family:Arial,Helvetica,sans-serif;font-size:14px;font-weight:bold;color:#555555;margin:12px 0 6px 0;line-height:1.3;">');
63
+ // Paragraphs
64
+ out = out.replace(/<p([^>]*)>/gi, '<p$1 style="margin:0 0 16px 0;font-size:16px;line-height:1.6;color:#333333;">');
65
+ // Links
66
+ out = out.replace(/<a([^>]*?)>/gi, '<a$1 style="color:#0066cc;text-decoration:underline;">');
67
+ // Code blocks (pre)
68
+ out = out.replace(/<pre([^>]*)>/gi, '<pre$1 style="background-color:#f4f4f4;border:1px solid #dddddd;border-radius:4px;padding:12px;overflow-x:auto;font-family:Consolas,Monaco,monospace;font-size:14px;line-height:1.4;margin:16px 0;">');
69
+ // Inline code
70
+ out = out.replace(/<code([^>]*)>/gi, (match, attrs) => {
71
+ // Don't re-style code inside pre
72
+ if (match.includes('style='))
73
+ return match;
74
+ return `<code${attrs} style="background-color:#f0f0f0;border:1px solid #e0e0e0;border-radius:3px;padding:2px 6px;font-family:Consolas,Monaco,monospace;font-size:14px;">`;
75
+ });
76
+ // Tables
77
+ out = out.replace(/<table([^>]*)>/gi, '<table$1 style="border-collapse:collapse;width:100%;margin:16px 0;border:1px solid #dddddd;">');
78
+ out = out.replace(/<th([^>]*)>/gi, '<th$1 style="border:1px solid #dddddd;padding:10px 12px;text-align:left;background-color:#f8f8f8;font-weight:bold;font-size:14px;">');
79
+ out = out.replace(/<td([^>]*)>/gi, '<td$1 style="border:1px solid #dddddd;padding:10px 12px;text-align:left;font-size:14px;">');
80
+ // Blockquotes
81
+ out = out.replace(/<blockquote([^>]*)>/gi, '<blockquote$1 style="border-left:4px solid #cccccc;margin:16px 0;padding:8px 16px;color:#666666;background-color:#fafafa;">');
82
+ // Lists
83
+ out = out.replace(/<ul([^>]*)>/gi, '<ul$1 style="margin:8px 0 16px 0;padding-left:24px;">');
84
+ out = out.replace(/<ol([^>]*)>/gi, '<ol$1 style="margin:8px 0 16px 0;padding-left:24px;">');
85
+ out = out.replace(/<li([^>]*)>/gi, '<li$1 style="margin:4px 0;line-height:1.6;">');
86
+ // Horizontal rules
87
+ out = out.replace(/<hr([^>]*)\/?>/gi, '<hr$1 style="border:none;border-top:1px solid #dddddd;margin:24px 0;">');
88
+ // Images
89
+ out = out.replace(/<img([^>]*)>/gi, '<img$1 style="max-width:100%;height:auto;border:0;">');
90
+ // Strong and em (just ensure they work)
91
+ out = out.replace(/<strong([^>]*)>/gi, '<strong$1 style="font-weight:bold;">');
92
+ out = out.replace(/<em([^>]*)>/gi, '<em$1 style="font-style:italic;">');
93
+ return out;
94
+ }
@@ -0,0 +1,158 @@
1
+ // ── HTML to Markdown (round-trip import) ─────────────────────────────
2
+ // Pure TypeScript implementation, no external dependencies
3
+ export function htmlToMarkdown(html) {
4
+ let md = html;
5
+ // Remove doctype, html/head/body wrappers
6
+ md = md.replace(/<!DOCTYPE[^>]*>/gi, '');
7
+ md = md.replace(/<html[^>]*>/gi, '');
8
+ md = md.replace(/<\/html>/gi, '');
9
+ md = md.replace(/<head[\s\S]*?<\/head>/gi, '');
10
+ md = md.replace(/<body[^>]*>/gi, '');
11
+ md = md.replace(/<\/body>/gi, '');
12
+ // Remove scripts and styles
13
+ md = md.replace(/<script[\s\S]*?<\/script>/gi, '');
14
+ md = md.replace(/<style[\s\S]*?<\/style>/gi, '');
15
+ // Convert block elements first (order matters)
16
+ // Pre/code blocks
17
+ md = md.replace(/<pre[^>]*><code[^>]*(?:class="[^"]*language-(\w+)[^"]*")?[^>]*>([\s\S]*?)<\/code><\/pre>/gi, (_m, lang, code) => `\n\`\`\`${lang || ''}\n${decodeHtmlEntities(code).trim()}\n\`\`\`\n`);
18
+ md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (_m, code) => `\n\`\`\`\n${decodeHtmlEntities(code).trim()}\n\`\`\`\n`);
19
+ // Headings
20
+ md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, '\n# $1\n');
21
+ md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, '\n## $1\n');
22
+ md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, '\n### $1\n');
23
+ md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, '\n#### $1\n');
24
+ md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, '\n##### $1\n');
25
+ md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, '\n###### $1\n');
26
+ // Blockquotes
27
+ md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_m, content) => {
28
+ return content.trim().split('\n').map((l) => '> ' + l.trim()).join('\n') + '\n';
29
+ });
30
+ // Tables
31
+ md = convertHtmlTables(md);
32
+ // Lists
33
+ md = convertHtmlLists(md);
34
+ // Horizontal rules
35
+ md = md.replace(/<hr[^>]*\/?>/gi, '\n---\n');
36
+ // Paragraphs
37
+ md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '\n$1\n');
38
+ // Line breaks
39
+ md = md.replace(/<br\s*\/?>/gi, ' \n');
40
+ // Now convert inline elements
41
+ // Bold
42
+ md = md.replace(/<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi, '**$2**');
43
+ // Italic
44
+ md = md.replace(/<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi, '*$2*');
45
+ // Strikethrough
46
+ md = md.replace(/<(del|s|strike)[^>]*>([\s\S]*?)<\/\1>/gi, '~~$2~~');
47
+ // Code
48
+ md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, '`$1`');
49
+ // Links
50
+ md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)');
51
+ // Images
52
+ md = md.replace(/<img[^>]*src="([^"]*)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '![$2]($1)');
53
+ md = md.replace(/<img[^>]*alt="([^"]*)"[^>]*src="([^"]*)"[^>]*\/?>/gi, '![$1]($2)');
54
+ md = md.replace(/<img[^>]*src="([^"]*)"[^>]*\/?>/gi, '![]($1)');
55
+ // Remove remaining HTML tags
56
+ md = md.replace(/<[^>]+>/g, '');
57
+ // Decode HTML entities
58
+ md = decodeHtmlEntities(md);
59
+ // Clean up whitespace
60
+ md = md.replace(/\n{3,}/g, '\n\n');
61
+ md = md.trim() + '\n';
62
+ return md;
63
+ }
64
+ function convertHtmlTables(html) {
65
+ return html.replace(/<table[^>]*>([\s\S]*?)<\/table>/gi, (_m, tableContent) => {
66
+ const rows = [];
67
+ // Extract thead rows
68
+ const theadMatch = tableContent.match(/<thead[^>]*>([\s\S]*?)<\/thead>/i);
69
+ if (theadMatch) {
70
+ const headerCells = extractCells(theadMatch[1], 'th');
71
+ if (headerCells.length === 0) {
72
+ const tdCells = extractCells(theadMatch[1], 'td');
73
+ if (tdCells.length > 0)
74
+ rows.push(tdCells);
75
+ }
76
+ else {
77
+ rows.push(headerCells);
78
+ }
79
+ }
80
+ // Extract tbody rows
81
+ const tbodyMatch = tableContent.match(/<tbody[^>]*>([\s\S]*?)<\/tbody>/i);
82
+ const bodyContent = tbodyMatch ? tbodyMatch[1] : tableContent;
83
+ const rowMatches = bodyContent.match(/<tr[^>]*>([\s\S]*?)<\/tr>/gi);
84
+ if (rowMatches) {
85
+ for (const row of rowMatches) {
86
+ if (theadMatch && row === rowMatches[0] && !tbodyMatch)
87
+ continue;
88
+ let cells = extractCells(row, 'td');
89
+ if (cells.length === 0)
90
+ cells = extractCells(row, 'th');
91
+ if (cells.length > 0) {
92
+ rows.push(cells);
93
+ }
94
+ }
95
+ }
96
+ if (rows.length === 0)
97
+ return '';
98
+ let result = '\n';
99
+ result += '| ' + rows[0].join(' | ') + ' |\n';
100
+ result += '| ' + rows[0].map(() => '---').join(' | ') + ' |\n';
101
+ for (let i = 1; i < rows.length; i++) {
102
+ // Pad to match column count
103
+ while (rows[i].length < rows[0].length)
104
+ rows[i].push('');
105
+ result += '| ' + rows[i].join(' | ') + ' |\n';
106
+ }
107
+ return result;
108
+ });
109
+ }
110
+ function extractCells(rowHtml, tag) {
111
+ const cells = [];
112
+ const regex = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'gi');
113
+ let match;
114
+ while ((match = regex.exec(rowHtml)) !== null) {
115
+ cells.push(match[1].replace(/<[^>]+>/g, '').trim());
116
+ }
117
+ return cells;
118
+ }
119
+ function convertHtmlLists(html) {
120
+ // Process nested lists from inside out (multiple passes)
121
+ let out = html;
122
+ for (let pass = 0; pass < 5; pass++) {
123
+ // Unordered lists
124
+ out = out.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (_m, content) => {
125
+ const items = extractListItems(content);
126
+ return '\n' + items.map(item => '- ' + item.trim()).join('\n') + '\n';
127
+ });
128
+ // Ordered lists
129
+ out = out.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (_m, content) => {
130
+ const items = extractListItems(content);
131
+ return '\n' + items.map((item, idx) => `${idx + 1}. ` + item.trim()).join('\n') + '\n';
132
+ });
133
+ }
134
+ return out;
135
+ }
136
+ function extractListItems(listHtml) {
137
+ const items = [];
138
+ const regex = /<li[^>]*>([\s\S]*?)<\/li>/gi;
139
+ let match;
140
+ while ((match = regex.exec(listHtml)) !== null) {
141
+ let item = match[1].trim();
142
+ // Indent nested list content
143
+ item = item.replace(/\n([-\d])/g, '\n $1');
144
+ items.push(item);
145
+ }
146
+ return items;
147
+ }
148
+ function decodeHtmlEntities(text) {
149
+ return text
150
+ .replace(/&amp;/g, '&')
151
+ .replace(/&lt;/g, '<')
152
+ .replace(/&gt;/g, '>')
153
+ .replace(/&quot;/g, '"')
154
+ .replace(/&#39;/g, "'")
155
+ .replace(/&nbsp;/g, ' ')
156
+ .replace(/&#(\d+);/g, (_m, code) => String.fromCharCode(parseInt(code)))
157
+ .replace(/&#x([0-9a-f]+);/gi, (_m, code) => String.fromCharCode(parseInt(code, 16)));
158
+ }