@xjtlumedia/markdown-mcp-server 1.0.5 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -0
- package/dist/document-analysis.js +223 -0
- package/dist/email-html.js +94 -0
- package/dist/html-import.js +158 -0
- package/dist/index.js +1067 -113
- package/dist/markdown-repair.js +263 -0
- package/dist/platform-converters.js +419 -0
- package/icon.svg +15 -0
- package/package.json +15 -4
package/README.md
CHANGED
|
@@ -5,6 +5,7 @@
|
|
|
5
5
|
[](https://www.npmjs.com/package/@xjtlumedia/markdown-mcp-server)
|
|
6
6
|
[](https://opensource.org/licenses/MIT)
|
|
7
7
|
[](https://registry.modelcontextprotocol.io)
|
|
8
|
+
[](https://glama.ai/mcp/servers/XJTLUmedia/AI_answer_copier)
|
|
8
9
|
[](https://github.com/XJTLUmedia/AI_answer_copier)
|
|
9
10
|
|
|
10
11
|
**A Model Context Protocol (MCP) server that gives your AI assistant the power to convert Markdown into 14 professional document formats** — PDF, DOCX, HTML, LaTeX, CSV, JSON, XML, XLSX, RTF, PNG, and more. Stop copy-pasting. Let the AI do the exporting.
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
// ── Document Analysis Functions ──────────────────────────────────────
|
|
2
|
+
// ── Code Block Extraction ────────────────────────────────────────────
|
|
3
|
+
export function extractCodeBlocks(md) {
|
|
4
|
+
const blocks = [];
|
|
5
|
+
const lines = md.split('\n');
|
|
6
|
+
let inBlock = false;
|
|
7
|
+
let currentLang = '';
|
|
8
|
+
let currentCode = [];
|
|
9
|
+
let startLine = 0;
|
|
10
|
+
for (let i = 0; i < lines.length; i++) {
|
|
11
|
+
const line = lines[i];
|
|
12
|
+
const fenceMatch = line.match(/^(`{3,}|~{3,})([\w+-]*)\s*$/);
|
|
13
|
+
if (fenceMatch) {
|
|
14
|
+
if (!inBlock) {
|
|
15
|
+
inBlock = true;
|
|
16
|
+
currentLang = fenceMatch[2] || 'text';
|
|
17
|
+
currentCode = [];
|
|
18
|
+
startLine = i + 1;
|
|
19
|
+
}
|
|
20
|
+
else {
|
|
21
|
+
blocks.push({
|
|
22
|
+
language: currentLang,
|
|
23
|
+
code: currentCode.join('\n'),
|
|
24
|
+
startLine: startLine + 1, // 1-indexed
|
|
25
|
+
endLine: i + 1,
|
|
26
|
+
});
|
|
27
|
+
inBlock = false;
|
|
28
|
+
}
|
|
29
|
+
}
|
|
30
|
+
else if (inBlock) {
|
|
31
|
+
currentCode.push(line);
|
|
32
|
+
}
|
|
33
|
+
}
|
|
34
|
+
// Handle unclosed fence
|
|
35
|
+
if (inBlock && currentCode.length > 0) {
|
|
36
|
+
blocks.push({
|
|
37
|
+
language: currentLang,
|
|
38
|
+
code: currentCode.join('\n'),
|
|
39
|
+
startLine: startLine + 1,
|
|
40
|
+
endLine: lines.length,
|
|
41
|
+
});
|
|
42
|
+
}
|
|
43
|
+
return blocks;
|
|
44
|
+
}
|
|
45
|
+
// ── Link Extraction ──────────────────────────────────────────────────
|
|
46
|
+
export function extractLinks(md) {
|
|
47
|
+
const links = [];
|
|
48
|
+
const lines = md.split('\n');
|
|
49
|
+
let inCodeBlock = false;
|
|
50
|
+
for (let i = 0; i < lines.length; i++) {
|
|
51
|
+
const line = lines[i];
|
|
52
|
+
if (/^(`{3,}|~{3,})/.test(line.trim())) {
|
|
53
|
+
inCodeBlock = !inCodeBlock;
|
|
54
|
+
continue;
|
|
55
|
+
}
|
|
56
|
+
if (inCodeBlock)
|
|
57
|
+
continue;
|
|
58
|
+
// Images: 
|
|
59
|
+
const imgRegex = /!\[([^\]]*)\]\(([^)]+)\)/g;
|
|
60
|
+
let match;
|
|
61
|
+
while ((match = imgRegex.exec(line)) !== null) {
|
|
62
|
+
links.push({ text: match[1], url: match[2], line: i + 1, type: 'image' });
|
|
63
|
+
}
|
|
64
|
+
// Inline links: [text](url) — but not images
|
|
65
|
+
const linkRegex = /(?<!!)\[([^\]]+)\]\(([^)]+)\)/g;
|
|
66
|
+
while ((match = linkRegex.exec(line)) !== null) {
|
|
67
|
+
links.push({ text: match[1], url: match[2], line: i + 1, type: 'inline' });
|
|
68
|
+
}
|
|
69
|
+
// Reference links: [text][ref]
|
|
70
|
+
const refRegex = /\[([^\]]+)\]\[([^\]]*)\]/g;
|
|
71
|
+
while ((match = refRegex.exec(line)) !== null) {
|
|
72
|
+
links.push({ text: match[1], url: match[2] || match[1], line: i + 1, type: 'reference' });
|
|
73
|
+
}
|
|
74
|
+
// Autolinks: <url>
|
|
75
|
+
const autoRegex = /<(https?:\/\/[^>]+)>/g;
|
|
76
|
+
while ((match = autoRegex.exec(line)) !== null) {
|
|
77
|
+
links.push({ text: match[1], url: match[1], line: i + 1, type: 'autolink' });
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
return links;
|
|
81
|
+
}
|
|
82
|
+
// ── Table of Contents Generation ─────────────────────────────────────
|
|
83
|
+
export function generateTOC(md, maxDepth = 6) {
|
|
84
|
+
const entries = extractTocEntries(md, maxDepth);
|
|
85
|
+
if (entries.length === 0)
|
|
86
|
+
return '';
|
|
87
|
+
const minLevel = Math.min(...entries.map(e => e.level));
|
|
88
|
+
const lines = entries.map(entry => {
|
|
89
|
+
const indent = ' '.repeat(entry.level - minLevel);
|
|
90
|
+
return `${indent}- [${entry.text}](#${entry.slug})`;
|
|
91
|
+
});
|
|
92
|
+
return '## Table of Contents\n\n' + lines.join('\n') + '\n';
|
|
93
|
+
}
|
|
94
|
+
export function extractTocEntries(md, maxDepth = 6) {
|
|
95
|
+
const entries = [];
|
|
96
|
+
const lines = md.split('\n');
|
|
97
|
+
let inCodeBlock = false;
|
|
98
|
+
const slugCounts = new Map();
|
|
99
|
+
for (let i = 0; i < lines.length; i++) {
|
|
100
|
+
const line = lines[i];
|
|
101
|
+
if (/^(`{3,}|~{3,})/.test(line.trim())) {
|
|
102
|
+
inCodeBlock = !inCodeBlock;
|
|
103
|
+
continue;
|
|
104
|
+
}
|
|
105
|
+
if (inCodeBlock)
|
|
106
|
+
continue;
|
|
107
|
+
const headingMatch = line.match(/^(#{1,6})\s+(.+)$/);
|
|
108
|
+
if (headingMatch) {
|
|
109
|
+
const level = headingMatch[1].length;
|
|
110
|
+
if (level > maxDepth)
|
|
111
|
+
continue;
|
|
112
|
+
const text = headingMatch[2].replace(/\s*#+\s*$/, '').trim();
|
|
113
|
+
let slug = text.toLowerCase()
|
|
114
|
+
.replace(/[^\w\s-]/g, '')
|
|
115
|
+
.replace(/\s+/g, '-')
|
|
116
|
+
.replace(/-+/g, '-')
|
|
117
|
+
.replace(/^-|-$/g, '');
|
|
118
|
+
// Handle duplicate slugs
|
|
119
|
+
const count = slugCounts.get(slug) || 0;
|
|
120
|
+
slugCounts.set(slug, count + 1);
|
|
121
|
+
if (count > 0)
|
|
122
|
+
slug = `${slug}-${count}`;
|
|
123
|
+
entries.push({ level, text, slug, line: i + 1 });
|
|
124
|
+
}
|
|
125
|
+
}
|
|
126
|
+
return entries;
|
|
127
|
+
}
|
|
128
|
+
// ── Document Statistics ──────────────────────────────────────────────
|
|
129
|
+
export function analyzeDocument(md) {
|
|
130
|
+
const lines = md.split('\n');
|
|
131
|
+
const plainText = md
|
|
132
|
+
.replace(/```[\s\S]*?```/g, '')
|
|
133
|
+
.replace(/`[^`]+`/g, '')
|
|
134
|
+
.replace(/<[^>]+>/g, '')
|
|
135
|
+
.replace(/[#*_~`>\[\]|()!]/g, ' ');
|
|
136
|
+
const words = plainText.split(/\s+/).filter(w => w.length > 0).length;
|
|
137
|
+
const sentences = plainText.split(/[.!?]+\s/).filter(s => s.trim().length > 0).length;
|
|
138
|
+
const paragraphs = md.split(/\n\s*\n/).filter(p => p.trim().length > 0).length;
|
|
139
|
+
let headings = 0;
|
|
140
|
+
let codeBlocks = 0;
|
|
141
|
+
let tables = 0;
|
|
142
|
+
let images = 0;
|
|
143
|
+
let lists = 0;
|
|
144
|
+
let blockquotes = 0;
|
|
145
|
+
let inCodeBlock = false;
|
|
146
|
+
let inTable = false;
|
|
147
|
+
for (const line of lines) {
|
|
148
|
+
const trimmed = line.trim();
|
|
149
|
+
if (/^(`{3,}|~{3,})/.test(trimmed)) {
|
|
150
|
+
if (!inCodeBlock)
|
|
151
|
+
codeBlocks++;
|
|
152
|
+
inCodeBlock = !inCodeBlock;
|
|
153
|
+
continue;
|
|
154
|
+
}
|
|
155
|
+
if (inCodeBlock)
|
|
156
|
+
continue;
|
|
157
|
+
if (/^#{1,6}\s/.test(trimmed))
|
|
158
|
+
headings++;
|
|
159
|
+
if (/^[-*+]\s|^\d+\.\s/.test(trimmed))
|
|
160
|
+
lists++;
|
|
161
|
+
if (/^>\s/.test(trimmed))
|
|
162
|
+
blockquotes++;
|
|
163
|
+
if (/!\[/.test(trimmed))
|
|
164
|
+
images += (trimmed.match(/!\[/g) || []).length;
|
|
165
|
+
if (trimmed.startsWith('|') && trimmed.endsWith('|')) {
|
|
166
|
+
if (!inTable) {
|
|
167
|
+
tables++;
|
|
168
|
+
inTable = true;
|
|
169
|
+
}
|
|
170
|
+
}
|
|
171
|
+
else {
|
|
172
|
+
inTable = false;
|
|
173
|
+
}
|
|
174
|
+
}
|
|
175
|
+
const linkMatches = md.match(/(?<!!)\[([^\]]+)\]\(([^)]+)\)/g);
|
|
176
|
+
const linkCount = linkMatches ? linkMatches.length : 0;
|
|
177
|
+
return {
|
|
178
|
+
lines: lines.length,
|
|
179
|
+
words,
|
|
180
|
+
characters: md.length,
|
|
181
|
+
paragraphs,
|
|
182
|
+
sentences,
|
|
183
|
+
headings,
|
|
184
|
+
codeBlocks,
|
|
185
|
+
tables,
|
|
186
|
+
links: linkCount,
|
|
187
|
+
images,
|
|
188
|
+
lists,
|
|
189
|
+
blockquotes,
|
|
190
|
+
estimatedReadingTimeMinutes: Math.max(1, Math.ceil(words / 200)),
|
|
191
|
+
};
|
|
192
|
+
}
|
|
193
|
+
// ── Heading Structure Extraction ─────────────────────────────────────
|
|
194
|
+
export function extractStructure(md) {
|
|
195
|
+
const entries = extractTocEntries(md);
|
|
196
|
+
const stats = analyzeDocument(md);
|
|
197
|
+
const codeBlocks = extractCodeBlocks(md);
|
|
198
|
+
const links = extractLinks(md);
|
|
199
|
+
return {
|
|
200
|
+
stats,
|
|
201
|
+
outline: entries.map(e => ({
|
|
202
|
+
level: e.level,
|
|
203
|
+
text: e.text,
|
|
204
|
+
line: e.line,
|
|
205
|
+
})),
|
|
206
|
+
codeBlocks: codeBlocks.map(b => ({
|
|
207
|
+
language: b.language,
|
|
208
|
+
lineCount: b.code.split('\n').length,
|
|
209
|
+
startLine: b.startLine,
|
|
210
|
+
endLine: b.endLine,
|
|
211
|
+
})),
|
|
212
|
+
linkSummary: {
|
|
213
|
+
total: links.length,
|
|
214
|
+
byType: {
|
|
215
|
+
inline: links.filter(l => l.type === 'inline').length,
|
|
216
|
+
image: links.filter(l => l.type === 'image').length,
|
|
217
|
+
reference: links.filter(l => l.type === 'reference').length,
|
|
218
|
+
autolink: links.filter(l => l.type === 'autolink').length,
|
|
219
|
+
},
|
|
220
|
+
uniqueUrls: [...new Set(links.map(l => l.url))].length,
|
|
221
|
+
},
|
|
222
|
+
};
|
|
223
|
+
}
|
|
@@ -0,0 +1,94 @@
|
|
|
1
|
+
// ── Email-Optimized HTML ─────────────────────────────────────────────
|
|
2
|
+
// Generates HTML with all styles inlined for email client compatibility
|
|
3
|
+
// No external CSS references, no class-based styles
|
|
4
|
+
import { unified } from 'unified';
|
|
5
|
+
import remarkParse from 'remark-parse';
|
|
6
|
+
import remarkGfm from 'remark-gfm';
|
|
7
|
+
import remarkRehype from 'remark-rehype';
|
|
8
|
+
import rehypeStringify from 'rehype-stringify';
|
|
9
|
+
export async function markdownToEmailHtml(md) {
|
|
10
|
+
// First convert to basic HTML
|
|
11
|
+
const htmlFile = await unified()
|
|
12
|
+
.use(remarkParse)
|
|
13
|
+
.use(remarkGfm)
|
|
14
|
+
// @ts-ignore
|
|
15
|
+
.use(remarkRehype)
|
|
16
|
+
// @ts-ignore
|
|
17
|
+
.use(rehypeStringify)
|
|
18
|
+
.process(md);
|
|
19
|
+
let html = String(htmlFile);
|
|
20
|
+
// Inline all styles for email client compatibility
|
|
21
|
+
html = inlineEmailStyles(html);
|
|
22
|
+
return `<!DOCTYPE html>
|
|
23
|
+
<html>
|
|
24
|
+
<head>
|
|
25
|
+
<meta charset="utf-8">
|
|
26
|
+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
27
|
+
<!--[if mso]>
|
|
28
|
+
<noscript>
|
|
29
|
+
<xml>
|
|
30
|
+
<o:OfficeDocumentSettings>
|
|
31
|
+
<o:PixelsPerInch>96</o:PixelsPerInch>
|
|
32
|
+
</o:OfficeDocumentSettings>
|
|
33
|
+
</xml>
|
|
34
|
+
</noscript>
|
|
35
|
+
<![endif]-->
|
|
36
|
+
</head>
|
|
37
|
+
<body style="margin:0;padding:0;background-color:#f6f6f6;">
|
|
38
|
+
<table role="presentation" cellpadding="0" cellspacing="0" width="100%" style="background-color:#f6f6f6;">
|
|
39
|
+
<tr>
|
|
40
|
+
<td align="center" style="padding:20px 0;">
|
|
41
|
+
<table role="presentation" cellpadding="0" cellspacing="0" width="600" style="background-color:#ffffff;border-radius:4px;border:1px solid #e0e0e0;">
|
|
42
|
+
<tr>
|
|
43
|
+
<td style="padding:30px 40px;font-family:Arial,Helvetica,sans-serif;font-size:16px;line-height:1.6;color:#333333;">
|
|
44
|
+
${html}
|
|
45
|
+
</td>
|
|
46
|
+
</tr>
|
|
47
|
+
</table>
|
|
48
|
+
</td>
|
|
49
|
+
</tr>
|
|
50
|
+
</table>
|
|
51
|
+
</body>
|
|
52
|
+
</html>`;
|
|
53
|
+
}
|
|
54
|
+
function inlineEmailStyles(html) {
|
|
55
|
+
let out = html;
|
|
56
|
+
// Headings
|
|
57
|
+
out = out.replace(/<h1([^>]*)>/gi, '<h1$1 style="font-family:Arial,Helvetica,sans-serif;font-size:28px;font-weight:bold;color:#1a1a1a;margin:24px 0 12px 0;line-height:1.3;">');
|
|
58
|
+
out = out.replace(/<h2([^>]*)>/gi, '<h2$1 style="font-family:Arial,Helvetica,sans-serif;font-size:24px;font-weight:bold;color:#1a1a1a;margin:20px 0 10px 0;line-height:1.3;">');
|
|
59
|
+
out = out.replace(/<h3([^>]*)>/gi, '<h3$1 style="font-family:Arial,Helvetica,sans-serif;font-size:20px;font-weight:bold;color:#1a1a1a;margin:18px 0 8px 0;line-height:1.3;">');
|
|
60
|
+
out = out.replace(/<h4([^>]*)>/gi, '<h4$1 style="font-family:Arial,Helvetica,sans-serif;font-size:18px;font-weight:bold;color:#333333;margin:16px 0 8px 0;line-height:1.3;">');
|
|
61
|
+
out = out.replace(/<h5([^>]*)>/gi, '<h5$1 style="font-family:Arial,Helvetica,sans-serif;font-size:16px;font-weight:bold;color:#333333;margin:14px 0 6px 0;line-height:1.3;">');
|
|
62
|
+
out = out.replace(/<h6([^>]*)>/gi, '<h6$1 style="font-family:Arial,Helvetica,sans-serif;font-size:14px;font-weight:bold;color:#555555;margin:12px 0 6px 0;line-height:1.3;">');
|
|
63
|
+
// Paragraphs
|
|
64
|
+
out = out.replace(/<p([^>]*)>/gi, '<p$1 style="margin:0 0 16px 0;font-size:16px;line-height:1.6;color:#333333;">');
|
|
65
|
+
// Links
|
|
66
|
+
out = out.replace(/<a([^>]*?)>/gi, '<a$1 style="color:#0066cc;text-decoration:underline;">');
|
|
67
|
+
// Code blocks (pre)
|
|
68
|
+
out = out.replace(/<pre([^>]*)>/gi, '<pre$1 style="background-color:#f4f4f4;border:1px solid #dddddd;border-radius:4px;padding:12px;overflow-x:auto;font-family:Consolas,Monaco,monospace;font-size:14px;line-height:1.4;margin:16px 0;">');
|
|
69
|
+
// Inline code
|
|
70
|
+
out = out.replace(/<code([^>]*)>/gi, (match, attrs) => {
|
|
71
|
+
// Don't re-style code inside pre
|
|
72
|
+
if (match.includes('style='))
|
|
73
|
+
return match;
|
|
74
|
+
return `<code${attrs} style="background-color:#f0f0f0;border:1px solid #e0e0e0;border-radius:3px;padding:2px 6px;font-family:Consolas,Monaco,monospace;font-size:14px;">`;
|
|
75
|
+
});
|
|
76
|
+
// Tables
|
|
77
|
+
out = out.replace(/<table([^>]*)>/gi, '<table$1 style="border-collapse:collapse;width:100%;margin:16px 0;border:1px solid #dddddd;">');
|
|
78
|
+
out = out.replace(/<th([^>]*)>/gi, '<th$1 style="border:1px solid #dddddd;padding:10px 12px;text-align:left;background-color:#f8f8f8;font-weight:bold;font-size:14px;">');
|
|
79
|
+
out = out.replace(/<td([^>]*)>/gi, '<td$1 style="border:1px solid #dddddd;padding:10px 12px;text-align:left;font-size:14px;">');
|
|
80
|
+
// Blockquotes
|
|
81
|
+
out = out.replace(/<blockquote([^>]*)>/gi, '<blockquote$1 style="border-left:4px solid #cccccc;margin:16px 0;padding:8px 16px;color:#666666;background-color:#fafafa;">');
|
|
82
|
+
// Lists
|
|
83
|
+
out = out.replace(/<ul([^>]*)>/gi, '<ul$1 style="margin:8px 0 16px 0;padding-left:24px;">');
|
|
84
|
+
out = out.replace(/<ol([^>]*)>/gi, '<ol$1 style="margin:8px 0 16px 0;padding-left:24px;">');
|
|
85
|
+
out = out.replace(/<li([^>]*)>/gi, '<li$1 style="margin:4px 0;line-height:1.6;">');
|
|
86
|
+
// Horizontal rules
|
|
87
|
+
out = out.replace(/<hr([^>]*)\/?>/gi, '<hr$1 style="border:none;border-top:1px solid #dddddd;margin:24px 0;">');
|
|
88
|
+
// Images
|
|
89
|
+
out = out.replace(/<img([^>]*)>/gi, '<img$1 style="max-width:100%;height:auto;border:0;">');
|
|
90
|
+
// Strong and em (just ensure they work)
|
|
91
|
+
out = out.replace(/<strong([^>]*)>/gi, '<strong$1 style="font-weight:bold;">');
|
|
92
|
+
out = out.replace(/<em([^>]*)>/gi, '<em$1 style="font-style:italic;">');
|
|
93
|
+
return out;
|
|
94
|
+
}
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
// ── HTML to Markdown (round-trip import) ─────────────────────────────
|
|
2
|
+
// Pure TypeScript implementation, no external dependencies
|
|
3
|
+
export function htmlToMarkdown(html) {
|
|
4
|
+
let md = html;
|
|
5
|
+
// Remove doctype, html/head/body wrappers
|
|
6
|
+
md = md.replace(/<!DOCTYPE[^>]*>/gi, '');
|
|
7
|
+
md = md.replace(/<html[^>]*>/gi, '');
|
|
8
|
+
md = md.replace(/<\/html>/gi, '');
|
|
9
|
+
md = md.replace(/<head[\s\S]*?<\/head>/gi, '');
|
|
10
|
+
md = md.replace(/<body[^>]*>/gi, '');
|
|
11
|
+
md = md.replace(/<\/body>/gi, '');
|
|
12
|
+
// Remove scripts and styles
|
|
13
|
+
md = md.replace(/<script[\s\S]*?<\/script>/gi, '');
|
|
14
|
+
md = md.replace(/<style[\s\S]*?<\/style>/gi, '');
|
|
15
|
+
// Convert block elements first (order matters)
|
|
16
|
+
// Pre/code blocks
|
|
17
|
+
md = md.replace(/<pre[^>]*><code[^>]*(?:class="[^"]*language-(\w+)[^"]*")?[^>]*>([\s\S]*?)<\/code><\/pre>/gi, (_m, lang, code) => `\n\`\`\`${lang || ''}\n${decodeHtmlEntities(code).trim()}\n\`\`\`\n`);
|
|
18
|
+
md = md.replace(/<pre[^>]*>([\s\S]*?)<\/pre>/gi, (_m, code) => `\n\`\`\`\n${decodeHtmlEntities(code).trim()}\n\`\`\`\n`);
|
|
19
|
+
// Headings
|
|
20
|
+
md = md.replace(/<h1[^>]*>([\s\S]*?)<\/h1>/gi, '\n# $1\n');
|
|
21
|
+
md = md.replace(/<h2[^>]*>([\s\S]*?)<\/h2>/gi, '\n## $1\n');
|
|
22
|
+
md = md.replace(/<h3[^>]*>([\s\S]*?)<\/h3>/gi, '\n### $1\n');
|
|
23
|
+
md = md.replace(/<h4[^>]*>([\s\S]*?)<\/h4>/gi, '\n#### $1\n');
|
|
24
|
+
md = md.replace(/<h5[^>]*>([\s\S]*?)<\/h5>/gi, '\n##### $1\n');
|
|
25
|
+
md = md.replace(/<h6[^>]*>([\s\S]*?)<\/h6>/gi, '\n###### $1\n');
|
|
26
|
+
// Blockquotes
|
|
27
|
+
md = md.replace(/<blockquote[^>]*>([\s\S]*?)<\/blockquote>/gi, (_m, content) => {
|
|
28
|
+
return content.trim().split('\n').map((l) => '> ' + l.trim()).join('\n') + '\n';
|
|
29
|
+
});
|
|
30
|
+
// Tables
|
|
31
|
+
md = convertHtmlTables(md);
|
|
32
|
+
// Lists
|
|
33
|
+
md = convertHtmlLists(md);
|
|
34
|
+
// Horizontal rules
|
|
35
|
+
md = md.replace(/<hr[^>]*\/?>/gi, '\n---\n');
|
|
36
|
+
// Paragraphs
|
|
37
|
+
md = md.replace(/<p[^>]*>([\s\S]*?)<\/p>/gi, '\n$1\n');
|
|
38
|
+
// Line breaks
|
|
39
|
+
md = md.replace(/<br\s*\/?>/gi, ' \n');
|
|
40
|
+
// Now convert inline elements
|
|
41
|
+
// Bold
|
|
42
|
+
md = md.replace(/<(strong|b)[^>]*>([\s\S]*?)<\/\1>/gi, '**$2**');
|
|
43
|
+
// Italic
|
|
44
|
+
md = md.replace(/<(em|i)[^>]*>([\s\S]*?)<\/\1>/gi, '*$2*');
|
|
45
|
+
// Strikethrough
|
|
46
|
+
md = md.replace(/<(del|s|strike)[^>]*>([\s\S]*?)<\/\1>/gi, '~~$2~~');
|
|
47
|
+
// Code
|
|
48
|
+
md = md.replace(/<code[^>]*>([\s\S]*?)<\/code>/gi, '`$1`');
|
|
49
|
+
// Links
|
|
50
|
+
md = md.replace(/<a[^>]*href="([^"]*)"[^>]*>([\s\S]*?)<\/a>/gi, '[$2]($1)');
|
|
51
|
+
// Images
|
|
52
|
+
md = md.replace(/<img[^>]*src="([^"]*)"[^>]*alt="([^"]*)"[^>]*\/?>/gi, '');
|
|
53
|
+
md = md.replace(/<img[^>]*alt="([^"]*)"[^>]*src="([^"]*)"[^>]*\/?>/gi, '');
|
|
54
|
+
md = md.replace(/<img[^>]*src="([^"]*)"[^>]*\/?>/gi, '');
|
|
55
|
+
// Remove remaining HTML tags
|
|
56
|
+
md = md.replace(/<[^>]+>/g, '');
|
|
57
|
+
// Decode HTML entities
|
|
58
|
+
md = decodeHtmlEntities(md);
|
|
59
|
+
// Clean up whitespace
|
|
60
|
+
md = md.replace(/\n{3,}/g, '\n\n');
|
|
61
|
+
md = md.trim() + '\n';
|
|
62
|
+
return md;
|
|
63
|
+
}
|
|
64
|
+
function convertHtmlTables(html) {
|
|
65
|
+
return html.replace(/<table[^>]*>([\s\S]*?)<\/table>/gi, (_m, tableContent) => {
|
|
66
|
+
const rows = [];
|
|
67
|
+
// Extract thead rows
|
|
68
|
+
const theadMatch = tableContent.match(/<thead[^>]*>([\s\S]*?)<\/thead>/i);
|
|
69
|
+
if (theadMatch) {
|
|
70
|
+
const headerCells = extractCells(theadMatch[1], 'th');
|
|
71
|
+
if (headerCells.length === 0) {
|
|
72
|
+
const tdCells = extractCells(theadMatch[1], 'td');
|
|
73
|
+
if (tdCells.length > 0)
|
|
74
|
+
rows.push(tdCells);
|
|
75
|
+
}
|
|
76
|
+
else {
|
|
77
|
+
rows.push(headerCells);
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
// Extract tbody rows
|
|
81
|
+
const tbodyMatch = tableContent.match(/<tbody[^>]*>([\s\S]*?)<\/tbody>/i);
|
|
82
|
+
const bodyContent = tbodyMatch ? tbodyMatch[1] : tableContent;
|
|
83
|
+
const rowMatches = bodyContent.match(/<tr[^>]*>([\s\S]*?)<\/tr>/gi);
|
|
84
|
+
if (rowMatches) {
|
|
85
|
+
for (const row of rowMatches) {
|
|
86
|
+
if (theadMatch && row === rowMatches[0] && !tbodyMatch)
|
|
87
|
+
continue;
|
|
88
|
+
let cells = extractCells(row, 'td');
|
|
89
|
+
if (cells.length === 0)
|
|
90
|
+
cells = extractCells(row, 'th');
|
|
91
|
+
if (cells.length > 0) {
|
|
92
|
+
rows.push(cells);
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
}
|
|
96
|
+
if (rows.length === 0)
|
|
97
|
+
return '';
|
|
98
|
+
let result = '\n';
|
|
99
|
+
result += '| ' + rows[0].join(' | ') + ' |\n';
|
|
100
|
+
result += '| ' + rows[0].map(() => '---').join(' | ') + ' |\n';
|
|
101
|
+
for (let i = 1; i < rows.length; i++) {
|
|
102
|
+
// Pad to match column count
|
|
103
|
+
while (rows[i].length < rows[0].length)
|
|
104
|
+
rows[i].push('');
|
|
105
|
+
result += '| ' + rows[i].join(' | ') + ' |\n';
|
|
106
|
+
}
|
|
107
|
+
return result;
|
|
108
|
+
});
|
|
109
|
+
}
|
|
110
|
+
function extractCells(rowHtml, tag) {
|
|
111
|
+
const cells = [];
|
|
112
|
+
const regex = new RegExp(`<${tag}[^>]*>([\\s\\S]*?)<\\/${tag}>`, 'gi');
|
|
113
|
+
let match;
|
|
114
|
+
while ((match = regex.exec(rowHtml)) !== null) {
|
|
115
|
+
cells.push(match[1].replace(/<[^>]+>/g, '').trim());
|
|
116
|
+
}
|
|
117
|
+
return cells;
|
|
118
|
+
}
|
|
119
|
+
function convertHtmlLists(html) {
|
|
120
|
+
// Process nested lists from inside out (multiple passes)
|
|
121
|
+
let out = html;
|
|
122
|
+
for (let pass = 0; pass < 5; pass++) {
|
|
123
|
+
// Unordered lists
|
|
124
|
+
out = out.replace(/<ul[^>]*>([\s\S]*?)<\/ul>/gi, (_m, content) => {
|
|
125
|
+
const items = extractListItems(content);
|
|
126
|
+
return '\n' + items.map(item => '- ' + item.trim()).join('\n') + '\n';
|
|
127
|
+
});
|
|
128
|
+
// Ordered lists
|
|
129
|
+
out = out.replace(/<ol[^>]*>([\s\S]*?)<\/ol>/gi, (_m, content) => {
|
|
130
|
+
const items = extractListItems(content);
|
|
131
|
+
return '\n' + items.map((item, idx) => `${idx + 1}. ` + item.trim()).join('\n') + '\n';
|
|
132
|
+
});
|
|
133
|
+
}
|
|
134
|
+
return out;
|
|
135
|
+
}
|
|
136
|
+
function extractListItems(listHtml) {
|
|
137
|
+
const items = [];
|
|
138
|
+
const regex = /<li[^>]*>([\s\S]*?)<\/li>/gi;
|
|
139
|
+
let match;
|
|
140
|
+
while ((match = regex.exec(listHtml)) !== null) {
|
|
141
|
+
let item = match[1].trim();
|
|
142
|
+
// Indent nested list content
|
|
143
|
+
item = item.replace(/\n([-\d])/g, '\n $1');
|
|
144
|
+
items.push(item);
|
|
145
|
+
}
|
|
146
|
+
return items;
|
|
147
|
+
}
|
|
148
|
+
function decodeHtmlEntities(text) {
|
|
149
|
+
return text
|
|
150
|
+
.replace(/&/g, '&')
|
|
151
|
+
.replace(/</g, '<')
|
|
152
|
+
.replace(/>/g, '>')
|
|
153
|
+
.replace(/"/g, '"')
|
|
154
|
+
.replace(/'/g, "'")
|
|
155
|
+
.replace(/ /g, ' ')
|
|
156
|
+
.replace(/&#(\d+);/g, (_m, code) => String.fromCharCode(parseInt(code)))
|
|
157
|
+
.replace(/&#x([0-9a-f]+);/gi, (_m, code) => String.fromCharCode(parseInt(code, 16)));
|
|
158
|
+
}
|