@xjtlumedia/markdown-mcp-server 2.1.2 → 2.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,4 +1,4 @@
1
- import { Paragraph, TextRun, HeadingLevel, Table, TableRow, TableCell, WidthType, BorderStyle, AlignmentType } from 'docx';
1
+ import { Paragraph, TextRun, HeadingLevel, Table, TableRow, TableCell, WidthType, BorderStyle, AlignmentType, ExternalHyperlink, FootnoteReferenceRun } from 'docx';
2
2
  import * as XLSX from 'xlsx';
3
3
  // ... (previous code)
4
4
  /**
@@ -92,13 +92,16 @@ export function stripMarkdown(text) {
92
92
  // 4. Block Level: Blockquotes & ATX Headings
93
93
  clean = clean.replace(/^[\s\t]*>+\s?/gm, '');
94
94
  clean = clean.replace(/^#{1,6}\s+/gm, '');
95
- // 5. Inline: Multi-pass Emphasis (Bold, Italic, Strikethrough)
95
+ // 5. Inline: Multi-pass Emphasis (Bold, Italic, Strikethrough, Highlight)
96
96
  for (let i = 0; i < 3; i++) {
97
97
  clean = clean.replace(/[*_]{3}([^*_]+)[*_]{3}/g, '$1');
98
98
  clean = clean.replace(/[*_]{2}([^*_]+)[*_]{2}/g, '$1');
99
99
  clean = clean.replace(/[*_]{1}([^*_]+)[*_]{1}/g, '$1');
100
100
  clean = clean.replace(/~~([^~]+)~~/g, '$1');
101
+ clean = clean.replace(/==([^=]+)==/g, '$1');
101
102
  }
103
+ // 5b. Block Level: Footnote definitions
104
+ clean = clean.replace(/^\[\^[^\]]+\]:\s+.*$/gm, '');
102
105
  // 6. Inline: Math, Links, Images, and Extended Syntax
103
106
  clean = clean.replace(/\$\$(.*?)\$\$/gs, '$1');
104
107
  clean = clean.replace(/\$(.*?)\$/g, '$1');
@@ -145,31 +148,254 @@ export function parseMarkdownTable(tableText) {
145
148
  export function cleanMarkdownText(text) {
146
149
  return stripMarkdown(text);
147
150
  }
151
+ /**
152
+ * Escape LaTeX special characters in text (not in commands)
153
+ */
154
+ function escapeLatex(text) {
155
+ return text
156
+ .replace(/\\/g, '\\textbackslash{}')
157
+ .replace(/([&%$#_{}])/g, '\\$1')
158
+ .replace(/~/g, '\\textasciitilde{}')
159
+ .replace(/\^/g, '\\textasciicircum{}');
160
+ }
161
+ /**
162
+ * Collect footnote definitions from markdown content.
163
+ * Returns a map of label → text and the content with definitions removed.
164
+ */
165
+ function collectFootnoteDefinitions(content) {
166
+ const footnoteMap = {};
167
+ const cleaned = content.replace(/^\[\^([^\]]+)\]:\s+(.+)$/gm, (_m, label, text) => {
168
+ footnoteMap[label] = text;
169
+ return '';
170
+ });
171
+ return { footnoteMap, cleaned };
172
+ }
173
+ /**
174
+ * Convert inline markdown to LaTeX (with optional footnote resolution)
175
+ */
176
+ function convertInlineLatex(text, footnoteMap = {}) {
177
+ let out = text;
178
+ // Extract math blocks first to protect them from escaping
179
+ const mathBlocks = [];
180
+ out = out.replace(/\$\$(.*?)\$\$/gs, (_m, math) => {
181
+ mathBlocks.push(`\\[${math}\\]`);
182
+ return `%%MATH${mathBlocks.length - 1}%%`;
183
+ });
184
+ out = out.replace(/\$(.*?)\$/g, (_m, math) => {
185
+ mathBlocks.push(`$${math}$`);
186
+ return `%%MATH${mathBlocks.length - 1}%%`;
187
+ });
188
+ // Inline code → \texttt{}
189
+ out = out.replace(/`([^`]+)`/g, (_m, code) => `\\texttt{${escapeLatex(code)}}`);
190
+ // Bold+Italic
191
+ out = out.replace(/\*\*\*([^*]+)\*\*\*/g, '\\textbf{\\textit{$1}}');
192
+ out = out.replace(/___([^_]+)___/g, '\\textbf{\\textit{$1}}');
193
+ // Bold
194
+ out = out.replace(/\*\*([^*]+)\*\*/g, '\\textbf{$1}');
195
+ out = out.replace(/__([^_]+)__/g, '\\textbf{$1}');
196
+ // Italic
197
+ out = out.replace(/\*([^*]+)\*/g, '\\textit{$1}');
198
+ out = out.replace(/_([^_]+)_/g, '\\textit{$1}');
199
+ // Strikethrough
200
+ out = out.replace(/~~([^~]+)~~/g, '\\sout{$1}');
201
+ // Highlight
202
+ out = out.replace(/==([^=]+)==/g, '\\hl{$1}');
203
+ // Raw HTML inline tags
204
+ out = out.replace(/<sup>([^<]+)<\/sup>/gi, '\\textsuperscript{$1}');
205
+ out = out.replace(/<sub>([^<]+)<\/sub>/gi, '\\textsubscript{$1}');
206
+ out = out.replace(/<mark>([^<]+)<\/mark>/gi, '\\hl{$1}');
207
+ out = out.replace(/<kbd>([^<]+)<\/kbd>/gi, '\\texttt{$1}');
208
+ // Links: [text](url)
209
+ out = out.replace(/\[([^\]]+)\]\(([^)]+)\)/g, '\\href{$2}{$1}');
210
+ // Images: ![alt](url)
211
+ out = out.replace(/!\[([^\]]*)\]\(([^)]+)\)/g, '\\includegraphics[width=\\linewidth]{$2}');
212
+ // Footnote references: [^label] → \footnote{text} if definition exists, else superscript label
213
+ out = out.replace(/\[\^([^\]]+)\]/g, (_m, label) => {
214
+ const fnText = footnoteMap[label];
215
+ return fnText ? `\\footnote{${fnText}}` : `\\textsuperscript{${label}}`;
216
+ });
217
+ // Escape remaining special chars that aren't part of LaTeX commands
218
+ out = out.replace(/(?<!\\)&/g, '\\&');
219
+ out = out.replace(/(?<!\\)%/g, '\\%');
220
+ out = out.replace(/(?<!\\)#/g, '\\#');
221
+ // Restore math blocks
222
+ for (let idx = 0; idx < mathBlocks.length; idx++) {
223
+ out = out.replace(`%%MATH${idx}%%`, mathBlocks[idx]);
224
+ }
225
+ return out;
226
+ }
227
+ /**
228
+ * Convert markdown table to LaTeX tabular
229
+ */
230
+ function convertTableToLatex(tableLines, footnoteMap = {}) {
231
+ const rows = tableLines.filter(l => !/^\|[\s-:]+\|/.test(l.trim()));
232
+ if (rows.length === 0)
233
+ return '';
234
+ const firstRow = rows[0].split('|').slice(1, -1).map(c => c.trim());
235
+ const cols = firstRow.length;
236
+ const colSpec = firstRow.map(() => 'l').join(' | ');
237
+ let latex = `\\begin{tabular}{| ${colSpec} |}\n\\hline\n`;
238
+ for (let r = 0; r < rows.length; r++) {
239
+ const cells = rows[r].split('|').slice(1, -1).map(c => convertInlineLatex(c.trim(), footnoteMap));
240
+ while (cells.length < cols)
241
+ cells.push('');
242
+ latex += cells.join(' & ') + ' \\\\\n\\hline\n';
243
+ }
244
+ latex += `\\end{tabular}`;
245
+ return latex;
246
+ }
148
247
  /**
149
248
  * Parse markdown content to LaTeX
150
249
  */
151
250
  export function parseMarkdownToLaTeX(content) {
152
- let processed = content
153
- .replace(/^# (.*)$/gm, '\\section{$1}')
154
- .replace(/^## (.*)$/gm, '\\subsection{$1}')
155
- .replace(/^### (.*)$/gm, '\\subsubsection{$1}')
156
- .replace(/\*\*(.*)\*\*/g, '\\textbf{$1}')
157
- .replace(/\*(.*)\*/g, '\\textit{$1}')
158
- .replace(/\$\$(.*?)\$\$/gs, '\\begin{equation}\n$1\n\\end{equation}')
159
- .replace(/\$(.*?)\$/g, '$ $1 $')
160
- .replace(/^-\s(.*)$/gm, '\\begin{itemize}\n\\item $1\n\\end{itemize}')
161
- .replace(/\\end{itemize}\n\\begin{itemize}/g, '');
162
- // Escape LaTeX special chars but try not to break our commands
163
- processed = processed.replace(/([_%$&~^\\{}])/g, (m) => m === '\\' ? m : `\\${m}`);
164
- // Final pass to remove any markdown-only artifacts (hashes, backticks, pipe)
165
- return processed.replace(/[*#`|]/g, '');
251
+ // Pre-process: collect footnote definitions and strip them
252
+ const { footnoteMap, cleaned } = collectFootnoteDefinitions(content);
253
+ const lines = cleaned.split('\n');
254
+ const result = [];
255
+ let i = 0;
256
+ let inCodeBlock = false;
257
+ let codeBlockContent = [];
258
+ let inItemize = false;
259
+ let inEnumerate = false;
260
+ while (i < lines.length) {
261
+ const line = lines[i];
262
+ const trimmed = line.trim();
263
+ // Code blocks
264
+ if (trimmed.startsWith('```')) {
265
+ if (inCodeBlock) {
266
+ result.push(`\\begin{verbatim}`);
267
+ result.push(codeBlockContent.join('\n'));
268
+ result.push(`\\end{verbatim}`);
269
+ codeBlockContent = [];
270
+ inCodeBlock = false;
271
+ }
272
+ else {
273
+ inCodeBlock = true;
274
+ }
275
+ i++;
276
+ continue;
277
+ }
278
+ if (inCodeBlock) {
279
+ codeBlockContent.push(line);
280
+ i++;
281
+ continue;
282
+ }
283
+ // Blank line — close open list environments
284
+ if (!trimmed) {
285
+ if (inItemize) {
286
+ result.push('\\end{itemize}');
287
+ inItemize = false;
288
+ }
289
+ if (inEnumerate) {
290
+ result.push('\\end{enumerate}');
291
+ inEnumerate = false;
292
+ }
293
+ result.push('');
294
+ i++;
295
+ continue;
296
+ }
297
+ // Horizontal rules
298
+ if (/^(\*\*\*|---|__{3,})\s*$/.test(trimmed)) {
299
+ result.push('\\begin{center}\\rule{0.5\\linewidth}{0.5pt}\\end{center}');
300
+ i++;
301
+ continue;
302
+ }
303
+ // Headings
304
+ if (trimmed.startsWith('###### ')) {
305
+ result.push(`\\textbf{${convertInlineLatex(trimmed.slice(7), footnoteMap)}}\\\\`);
306
+ }
307
+ else if (trimmed.startsWith('##### ')) {
308
+ result.push(`\\subparagraph{${convertInlineLatex(trimmed.slice(6), footnoteMap)}}`);
309
+ }
310
+ else if (trimmed.startsWith('#### ')) {
311
+ result.push(`\\paragraph{${convertInlineLatex(trimmed.slice(5), footnoteMap)}}`);
312
+ }
313
+ else if (trimmed.startsWith('### ')) {
314
+ result.push(`\\subsubsection{${convertInlineLatex(trimmed.slice(4), footnoteMap)}}`);
315
+ }
316
+ else if (trimmed.startsWith('## ')) {
317
+ result.push(`\\subsection{${convertInlineLatex(trimmed.slice(3), footnoteMap)}}`);
318
+ }
319
+ else if (trimmed.startsWith('# ')) {
320
+ result.push(`\\section{${convertInlineLatex(trimmed.slice(2), footnoteMap)}}`);
321
+ }
322
+ // Blockquotes
323
+ else if (trimmed.startsWith('>')) {
324
+ const text = trimmed.replace(/^>+\s*/, '');
325
+ result.push(`\\begin{quote}`);
326
+ result.push(convertInlineLatex(text, footnoteMap));
327
+ result.push(`\\end{quote}`);
328
+ }
329
+ // Task lists
330
+ else if (/^\s*[-*+]\s+\[[ xX]\]\s+/.test(line)) {
331
+ if (!inItemize) {
332
+ result.push('\\begin{itemize}');
333
+ inItemize = true;
334
+ }
335
+ const checked = /\[x\]/i.test(line);
336
+ const text = line.replace(/^\s*[-*+]\s+\[[ xX]\]\s+/, '');
337
+ result.push(`\\item[${checked ? '$\\boxtimes$' : '$\\square$'}] ${convertInlineLatex(text, footnoteMap)}`);
338
+ }
339
+ // Unordered lists
340
+ else if (/^\s*[-*+]\s+/.test(line)) {
341
+ if (!inItemize) {
342
+ result.push('\\begin{itemize}');
343
+ inItemize = true;
344
+ }
345
+ const text = line.replace(/^\s*[-*+]\s+/, '');
346
+ result.push(`\\item ${convertInlineLatex(text, footnoteMap)}`);
347
+ }
348
+ // Ordered lists
349
+ else if (/^\s*\d+\.\s+/.test(line)) {
350
+ if (!inEnumerate) {
351
+ result.push('\\begin{enumerate}');
352
+ inEnumerate = true;
353
+ }
354
+ const text = line.replace(/^\s*\d+\.\s+/, '');
355
+ result.push(`\\item ${convertInlineLatex(text, footnoteMap)}`);
356
+ }
357
+ // Tables
358
+ else if (trimmed.startsWith('|') && trimmed.endsWith('|')) {
359
+ const tableLines = [];
360
+ while (i < lines.length && lines[i].trim().startsWith('|')) {
361
+ tableLines.push(lines[i]);
362
+ i++;
363
+ }
364
+ result.push(convertTableToLatex(tableLines, footnoteMap));
365
+ continue;
366
+ }
367
+ // Regular paragraph
368
+ else {
369
+ if (inItemize) {
370
+ result.push('\\end{itemize}');
371
+ inItemize = false;
372
+ }
373
+ if (inEnumerate) {
374
+ result.push('\\end{enumerate}');
375
+ inEnumerate = false;
376
+ }
377
+ result.push(convertInlineLatex(trimmed, footnoteMap));
378
+ }
379
+ i++;
380
+ }
381
+ // Close any open environments
382
+ if (inItemize)
383
+ result.push('\\end{itemize}');
384
+ if (inEnumerate)
385
+ result.push('\\end{enumerate}');
386
+ if (inCodeBlock) {
387
+ result.push('\\begin{verbatim}');
388
+ result.push(codeBlockContent.join('\n'));
389
+ result.push('\\end{verbatim}');
390
+ }
391
+ return result.join('\n');
166
392
  }
167
393
  /**
168
394
  * Parse text with inline formatting to TextRuns
169
395
  */
170
- export function parseInlineFormatting(text) {
396
+ export function parseInlineFormatting(text, footnoteIdMap) {
171
397
  const runs = [];
172
- const regex = /(\$\$.*?\$\$|\$.*?\$|\*\*\*[^*]+\*\*\*|\*\*[^*]+\*\*|\*[^*]+\*|___[^_]+___|__[^_]+__|_[^_]+_|`[^`]+`|<br\s*\/?>)/g;
398
+ const regex = /(\$\$.*?\$\$|\$.*?\$|\*\*\*[^*]+\*\*\*|\*\*[^*]+\*\*|\*[^*]+\*|___[^_]+___|__[^_]+__|_[^_]+_|~~[^~]+~~|==[^=]+=+|`[^`]+`|!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|\[\^[^\]]+\]|<sup>[^<]+<\/sup>|<sub>[^<]+<\/sub>|<mark>[^<]+<\/mark>|<kbd>[^<]+<\/kbd>|<br\s*\/?>)/gi;
173
399
  const parts = text.split(regex);
174
400
  for (const part of parts) {
175
401
  if (!part)
@@ -192,6 +418,12 @@ export function parseInlineFormatting(text) {
192
418
  else if (part.startsWith('__') && part.endsWith('__')) {
193
419
  runs.push(new TextRun({ text: part.slice(2, -2), bold: true }));
194
420
  }
421
+ else if (part.startsWith('~~') && part.endsWith('~~')) {
422
+ runs.push(new TextRun({ text: part.slice(2, -2), strike: true }));
423
+ }
424
+ else if (part.startsWith('==') && part.endsWith('==')) {
425
+ runs.push(new TextRun({ text: part.slice(2, -2), highlight: 'yellow' }));
426
+ }
195
427
  else if (part.startsWith('*') && part.endsWith('*')) {
196
428
  runs.push(new TextRun({ text: part.slice(1, -1), italics: true }));
197
429
  }
@@ -201,6 +433,46 @@ export function parseInlineFormatting(text) {
201
433
  else if (part.startsWith('`') && part.endsWith('`')) {
202
434
  runs.push(new TextRun({ text: part.slice(1, -1), font: 'Consolas', shading: { fill: 'F0F0F0' } }));
203
435
  }
436
+ else if (/^<sup>/i.test(part)) {
437
+ const m = part.match(/<sup>([^<]+)<\/sup>/i);
438
+ if (m)
439
+ runs.push(new TextRun({ text: m[1], superScript: true }));
440
+ }
441
+ else if (/^<sub>/i.test(part)) {
442
+ const m = part.match(/<sub>([^<]+)<\/sub>/i);
443
+ if (m)
444
+ runs.push(new TextRun({ text: m[1], subScript: true }));
445
+ }
446
+ else if (/^<mark>/i.test(part)) {
447
+ const m = part.match(/<mark>([^<]+)<\/mark>/i);
448
+ if (m)
449
+ runs.push(new TextRun({ text: m[1], highlight: 'yellow' }));
450
+ }
451
+ else if (/^<kbd>/i.test(part)) {
452
+ const m = part.match(/<kbd>([^<]+)<\/kbd>/i);
453
+ if (m)
454
+ runs.push(new TextRun({ text: m[1], font: 'Consolas', shading: { fill: 'E5E7EB' } }));
455
+ }
456
+ else if (/^\[\^/.test(part)) {
457
+ const m = part.match(/^\[\^([^\]]+)\]$/);
458
+ if (m && footnoteIdMap && footnoteIdMap[m[1]] !== undefined) {
459
+ runs.push(new FootnoteReferenceRun(footnoteIdMap[m[1]]));
460
+ }
461
+ else if (m) {
462
+ runs.push(new TextRun({ text: m[1], superScript: true }));
463
+ }
464
+ }
465
+ else if (/^!\[/.test(part)) {
466
+ const m = part.match(/^!\[([^\]]*)\]\(([^)]+)\)/);
467
+ if (m)
468
+ runs.push(new TextRun({ text: m[1] ? `[Image: ${m[1]}]` : '[Image]', italics: true, color: '6B7280' }));
469
+ }
470
+ else if (/^\[/.test(part)) {
471
+ const m = part.match(/^\[([^\]]+)\]\(([^)]+)\)/);
472
+ if (m) {
473
+ runs.push(new ExternalHyperlink({ children: [new TextRun({ text: m[1], color: '2563EB', underline: { type: 'single' } })], link: m[2] }));
474
+ }
475
+ }
204
476
  else if (part.match(/<br\s*\/?>/i)) {
205
477
  runs.push(new TextRun({ text: '', break: 1 }));
206
478
  }
@@ -213,7 +485,7 @@ export function parseInlineFormatting(text) {
213
485
  /**
214
486
  * Create a Word table from parsed markdown table data
215
487
  */
216
- export function createDocxTable(headers, rows) {
488
+ export function createDocxTable(headers, rows, footnoteIdMap) {
217
489
  const allRows = [];
218
490
  if (headers.length > 0) {
219
491
  allRows.push(new TableRow({
@@ -225,7 +497,7 @@ export function createDocxTable(headers, rows) {
225
497
  }
226
498
  for (const row of rows) {
227
499
  allRows.push(new TableRow({
228
- children: row.map(cell => new TableCell({ children: [new Paragraph({ children: parseInlineFormatting(cell) })] }))
500
+ children: row.map(cell => new TableCell({ children: [new Paragraph({ children: parseInlineFormatting(cell, footnoteIdMap) })] }))
229
501
  }));
230
502
  }
231
503
  return new Table({
@@ -242,11 +514,24 @@ export function createDocxTable(headers, rows) {
242
514
  });
243
515
  }
244
516
  /**
245
- * Parse markdown content to docx elements
517
+ * Parse markdown content to docx elements with footnote support
246
518
  */
247
519
  export function parseMarkdownToDocx(content) {
520
+ // Pre-process: collect footnote definitions
521
+ const { footnoteMap, cleaned } = collectFootnoteDefinitions(content);
522
+ // Assign numeric IDs to footnotes (docx requires numeric IDs starting from 1)
523
+ const footnoteIdMap = {};
524
+ const docxFootnotes = {};
525
+ let fnId = 1;
526
+ for (const label of Object.keys(footnoteMap)) {
527
+ footnoteIdMap[label] = fnId;
528
+ docxFootnotes[fnId] = {
529
+ children: [new Paragraph({ children: [new TextRun({ text: footnoteMap[label] })] })]
530
+ };
531
+ fnId++;
532
+ }
248
533
  const elements = [];
249
- const lines = content.split('\n');
534
+ const lines = cleaned.split('\n');
250
535
  let i = 0;
251
536
  let inCodeBlock = false;
252
537
  let codeBlockContent = [];
@@ -278,12 +563,12 @@ export function parseMarkdownToDocx(content) {
278
563
  if (i + 1 < lines.length) {
279
564
  const nextLine = lines[i + 1].trim();
280
565
  if (/^={3,}\s*$/.test(nextLine)) {
281
- elements.push(new Paragraph({ heading: HeadingLevel.HEADING_1, children: parseInlineFormatting(trimmed), spacing: { before: 400, after: 200 } }));
566
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_1, children: parseInlineFormatting(trimmed, footnoteIdMap), spacing: { before: 400, after: 200 } }));
282
567
  i += 2;
283
568
  continue;
284
569
  }
285
570
  else if (/^-{3,}\s*$/.test(nextLine)) {
286
- elements.push(new Paragraph({ heading: HeadingLevel.HEADING_2, children: parseInlineFormatting(trimmed), spacing: { before: 300, after: 150 } }));
571
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_2, children: parseInlineFormatting(trimmed, footnoteIdMap), spacing: { before: 300, after: 150 } }));
287
572
  i += 2;
288
573
  continue;
289
574
  }
@@ -297,7 +582,7 @@ export function parseMarkdownToDocx(content) {
297
582
  if (tableLines.length >= 2) {
298
583
  const { headers, rows } = parseMarkdownTable(tableLines.join('\n'));
299
584
  if (headers.length > 0) {
300
- elements.push(createDocxTable(headers, rows));
585
+ elements.push(createDocxTable(headers, rows, footnoteIdMap));
301
586
  elements.push(new Paragraph({ spacing: { after: 200 } }));
302
587
  }
303
588
  }
@@ -308,17 +593,42 @@ export function parseMarkdownToDocx(content) {
308
593
  i++;
309
594
  continue;
310
595
  }
596
+ // Skip blank lines from footnote def removal
597
+ if (/^\[\^[^\]]+\]:\s+/.test(trimmed)) {
598
+ i++;
599
+ continue;
600
+ }
311
601
  if (trimmed.startsWith('# ')) {
312
- elements.push(new Paragraph({ heading: HeadingLevel.HEADING_1, children: parseInlineFormatting(trimmed.slice(2)), spacing: { before: 400, after: 200 } }));
602
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_1, children: parseInlineFormatting(trimmed.slice(2), footnoteIdMap), spacing: { before: 400, after: 200 } }));
313
603
  }
314
604
  else if (trimmed.startsWith('## ')) {
315
- elements.push(new Paragraph({ heading: HeadingLevel.HEADING_2, children: parseInlineFormatting(trimmed.slice(3)), spacing: { before: 300, after: 150 } }));
605
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_2, children: parseInlineFormatting(trimmed.slice(3), footnoteIdMap), spacing: { before: 300, after: 150 } }));
316
606
  }
317
607
  else if (trimmed.startsWith('### ')) {
318
- elements.push(new Paragraph({ heading: HeadingLevel.HEADING_3, children: parseInlineFormatting(trimmed.slice(4)), spacing: { before: 250, after: 100 } }));
608
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_3, children: parseInlineFormatting(trimmed.slice(4), footnoteIdMap), spacing: { before: 250, after: 100 } }));
319
609
  }
320
610
  else if (trimmed.startsWith('#### ')) {
321
- elements.push(new Paragraph({ heading: HeadingLevel.HEADING_4, children: parseInlineFormatting(trimmed.slice(5)), spacing: { before: 200, after: 100 } }));
611
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_4, children: parseInlineFormatting(trimmed.slice(5), footnoteIdMap), spacing: { before: 200, after: 100 } }));
612
+ }
613
+ else if (trimmed.startsWith('##### ')) {
614
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_5, children: parseInlineFormatting(trimmed.slice(6), footnoteIdMap), spacing: { before: 200, after: 80 } }));
615
+ }
616
+ else if (trimmed.startsWith('###### ')) {
617
+ elements.push(new Paragraph({ heading: HeadingLevel.HEADING_6, children: parseInlineFormatting(trimmed.slice(7), footnoteIdMap), spacing: { before: 150, after: 80 } }));
618
+ }
619
+ else if (/^\s*[-*+]\s+\[[ xX]\]\s+/.test(line)) {
620
+ const checked = /\[[xX]\]/.test(line);
621
+ const text = line.replace(/^\s*[-*+]\s+\[[ xX]\]\s+/, '');
622
+ const match = line.match(/^(\s*)/);
623
+ const indent = match ? Math.floor(match[1].length / 4) : 0;
624
+ elements.push(new Paragraph({
625
+ indent: { left: (indent + 1) * 360 },
626
+ children: [
627
+ new TextRun({ text: checked ? '☑ ' : '☐ ', font: 'Segoe UI Symbol' }),
628
+ ...parseInlineFormatting(text, footnoteIdMap)
629
+ ],
630
+ spacing: { after: 80 }
631
+ }));
322
632
  }
323
633
  else if (trimmed.startsWith('>')) {
324
634
  const level = (trimmed.match(/^>+/g) || ['>'])[0].length;
@@ -329,23 +639,23 @@ export function parseMarkdownToDocx(content) {
329
639
  const match = line.match(/^(\s*)([-*+]\s+)/);
330
640
  const indent = match ? Math.floor(match[1].length / 4) : 0;
331
641
  const text = line.replace(/^\s*[-*+]\s+/, '');
332
- elements.push(new Paragraph({ bullet: { level: indent }, children: parseInlineFormatting(text), spacing: { after: 80 } }));
642
+ elements.push(new Paragraph({ bullet: { level: indent }, children: parseInlineFormatting(text, footnoteIdMap), spacing: { after: 80 } }));
333
643
  }
334
644
  else if (/^(\s*)\d+\.\s+/.test(line)) {
335
645
  const match = line.match(/^(\s*)(\d+\.\s+)/);
336
646
  const indent = match ? Math.floor(match[1].length / 4) : 0;
337
647
  const text = line.replace(/^\s*\d+\.\s+/, '');
338
- elements.push(new Paragraph({ numbering: { reference: 'default-numbering', level: indent }, children: parseInlineFormatting(text), spacing: { after: 80 } }));
648
+ elements.push(new Paragraph({ numbering: { reference: 'default-numbering', level: indent }, children: parseInlineFormatting(text, footnoteIdMap), spacing: { after: 80 } }));
339
649
  }
340
650
  else {
341
- const paragraphChildren = parseInlineFormatting(trimmed);
651
+ const paragraphChildren = parseInlineFormatting(trimmed, footnoteIdMap);
342
652
  if (line.endsWith(' '))
343
653
  paragraphChildren.push(new TextRun({ text: '', break: 1 }));
344
654
  elements.push(new Paragraph({ children: paragraphChildren, spacing: { after: 150 } }));
345
655
  }
346
656
  i++;
347
657
  }
348
- return elements;
658
+ return { elements, footnotes: docxFootnotes };
349
659
  }
350
660
  /**
351
661
  * RTF Helper: Encode string with Unicode support and RTF escaping
@@ -369,8 +679,8 @@ export function encodeRTFText(str) {
369
679
  /**
370
680
  * RTF Helper: Parse inline markdown to RTF codes
371
681
  */
372
- export function parseInlineToRTF(text) {
373
- const regex = /(\$\$.*?\$\$|\$.*?\$|\*\*\*[^*]+\*\*\*|\*\*[^*]+\*\*|\*[^*]+\*|___[^_]+___|__[^_]+__|_[^_]+_|`[^`]+`|<br\s*\/?>)/g;
682
+ export function parseInlineToRTF(text, footnoteMap) {
683
+ const regex = /(\$\$.*?\$\$|\$.*?\$|\*\*\*[^*]+\*\*\*|\*\*[^*]+\*\*|\*[^*]+\*|___[^_]+___|__[^_]+__|_[^_]+_|~~[^~]+~~|==[^=]+=+|`[^`]+`|!\[[^\]]*\]\([^)]+\)|\[[^\]]+\]\([^)]+\)|\[\^[^\]]+\]|<sup>[^<]+<\/sup>|<sub>[^<]+<\/sub>|<mark>[^<]+<\/mark>|<kbd>[^<]+<\/kbd>|<br\s*\/?>)/gi;
374
684
  const parts = text.split(regex);
375
685
  let result = "";
376
686
  for (const part of parts) {
@@ -394,6 +704,12 @@ export function parseInlineToRTF(text) {
394
704
  else if (part.startsWith('__') && part.endsWith('__')) {
395
705
  result += `{\\b ${encodeRTFText(part.slice(2, -2))}}`;
396
706
  }
707
+ else if (part.startsWith('~~') && part.endsWith('~~')) {
708
+ result += `{\\strike ${encodeRTFText(part.slice(2, -2))}}`;
709
+ }
710
+ else if (part.startsWith('==') && part.endsWith('==')) {
711
+ result += `{\\highlight7 ${encodeRTFText(part.slice(2, -2))}}`;
712
+ }
397
713
  else if (part.startsWith('*') && part.endsWith('*')) {
398
714
  result += `{\\i ${encodeRTFText(part.slice(1, -1))}}`;
399
715
  }
@@ -403,6 +719,45 @@ export function parseInlineToRTF(text) {
403
719
  else if (part.startsWith('`') && part.endsWith('`')) {
404
720
  result += `{\\f1\\highlight3 ${encodeRTFText(part.slice(1, -1))}}`;
405
721
  }
722
+ else if (/^<sup>/i.test(part)) {
723
+ const m = part.match(/<sup>([^<]+)<\/sup>/i);
724
+ if (m)
725
+ result += `{\\super ${encodeRTFText(m[1])}}`;
726
+ }
727
+ else if (/^<sub>/i.test(part)) {
728
+ const m = part.match(/<sub>([^<]+)<\/sub>/i);
729
+ if (m)
730
+ result += `{\\sub ${encodeRTFText(m[1])}}`;
731
+ }
732
+ else if (/^<mark>/i.test(part)) {
733
+ const m = part.match(/<mark>([^<]+)<\/mark>/i);
734
+ if (m)
735
+ result += `{\\highlight7 ${encodeRTFText(m[1])}}`;
736
+ }
737
+ else if (/^<kbd>/i.test(part)) {
738
+ const m = part.match(/<kbd>([^<]+)<\/kbd>/i);
739
+ if (m)
740
+ result += `{\\f1\\highlight3 ${encodeRTFText(m[1])}}`;
741
+ }
742
+ else if (/^\[\^/.test(part)) {
743
+ const m = part.match(/^\[\^([^\]]+)\]$/);
744
+ if (m && footnoteMap && footnoteMap[m[1]]) {
745
+ result += `{\\super ${encodeRTFText(m[1])}}{\\*\\footnote\\pard\\plain\\s99\\f0\\fs20 {\\super ${encodeRTFText(m[1])}} ${encodeRTFText(footnoteMap[m[1]])}}`;
746
+ }
747
+ else if (m) {
748
+ result += `{\\super ${encodeRTFText(m[1])}}`;
749
+ }
750
+ }
751
+ else if (/^!\[/.test(part)) {
752
+ const m = part.match(/^!\[([^\]]*)\]\(([^)]+)\)/);
753
+ if (m)
754
+ result += `{\\i\\cf2 [Image: ${encodeRTFText(m[1] || 'image')}]}`;
755
+ }
756
+ else if (/^\[/.test(part)) {
757
+ const m = part.match(/^\[([^\]]+)\]\(([^)]+)\)/);
758
+ if (m)
759
+ result += `{\\field{\\*\\fldinst HYPERLINK "${m[2]}"}{\\fldrslt\\ul\\cf1 ${encodeRTFText(m[1])}}}`;
760
+ }
406
761
  else if (part.match(/<br\s*\/?>/i)) {
407
762
  result += "\\line ";
408
763
  }
@@ -416,7 +771,9 @@ export function parseInlineToRTF(text) {
416
771
  * RTF Helper: Main parser for Markdown to RTF conversion
417
772
  */
418
773
  export function parseMarkdownToRTF(content) {
419
- const lines = content.split('\n');
774
+ // Pre-process: collect footnote definitions
775
+ const { footnoteMap, cleaned } = collectFootnoteDefinitions(content);
776
+ const lines = cleaned.split('\n');
420
777
  let rtf = "";
421
778
  let i = 0;
422
779
  let inCodeBlock = false;
@@ -452,12 +809,12 @@ export function parseMarkdownToRTF(content) {
452
809
  if (i + 1 < lines.length) {
453
810
  const nextLine = lines[i + 1].trim();
454
811
  if (/^={3,}\s*$/.test(nextLine)) {
455
- rtf += "{\\pard\\b\\fs40\\sb400\\sa200 " + parseInlineToRTF(trimmed) + "\\par}\n";
812
+ rtf += "{\\pard\\b\\fs40\\sb400\\sa200 " + parseInlineToRTF(trimmed, footnoteMap) + "\\par}\n";
456
813
  i += 2;
457
814
  continue;
458
815
  }
459
816
  else if (/^-{3,}\s*$/.test(nextLine)) {
460
- rtf += "{\\pard\\b\\fs32\\sb300\\sa150 " + parseInlineToRTF(trimmed) + "\\par}\n";
817
+ rtf += "{\\pard\\b\\fs32\\sb300\\sa150 " + parseInlineToRTF(trimmed, footnoteMap) + "\\par}\n";
461
818
  i += 2;
462
819
  continue;
463
820
  }
@@ -480,7 +837,7 @@ export function parseMarkdownToRTF(content) {
480
837
  }
481
838
  rtf += "\\pard\\intbl\\ql ";
482
839
  for (const h of headers) {
483
- rtf += "{\\b " + parseInlineToRTF(h) + "}\\cell ";
840
+ rtf += "{\\b " + parseInlineToRTF(h, footnoteMap) + "}\\cell ";
484
841
  }
485
842
  rtf += "\\row\n";
486
843
  // Data Rows
@@ -491,7 +848,7 @@ export function parseMarkdownToRTF(content) {
491
848
  }
492
849
  rtf += "\\pard\\intbl\\ql ";
493
850
  for (const cell of row) {
494
- rtf += parseInlineToRTF(cell) + "\\cell ";
851
+ rtf += parseInlineToRTF(cell, footnoteMap) + "\\cell ";
495
852
  }
496
853
  rtf += "\\row\n";
497
854
  }
@@ -507,37 +864,50 @@ export function parseMarkdownToRTF(content) {
507
864
  }
508
865
  // Headings
509
866
  if (trimmed.startsWith('# ')) {
510
- rtf += "{\\pard\\b\\fs40\\sb400\\sa200 " + parseInlineToRTF(trimmed.slice(2)) + "\\par}\n";
867
+ rtf += "{\\pard\\b\\fs40\\sb400\\sa200 " + parseInlineToRTF(trimmed.slice(2), footnoteMap) + "\\par}\n";
511
868
  }
512
869
  else if (trimmed.startsWith('## ')) {
513
- rtf += "{\\pard\\b\\fs32\\sb300\\sa150 " + parseInlineToRTF(trimmed.slice(3)) + "\\par}\n";
870
+ rtf += "{\\pard\\b\\fs32\\sb300\\sa150 " + parseInlineToRTF(trimmed.slice(3), footnoteMap) + "\\par}\n";
514
871
  }
515
872
  else if (trimmed.startsWith('### ')) {
516
- rtf += "{\\pard\\b\\fs28\\sb250\\sa100 " + parseInlineToRTF(trimmed.slice(4)) + "\\par}\n";
873
+ rtf += "{\\pard\\b\\fs28\\sb250\\sa100 " + parseInlineToRTF(trimmed.slice(4), footnoteMap) + "\\par}\n";
517
874
  }
518
875
  else if (trimmed.startsWith('#### ')) {
519
- rtf += "{\\pard\\b\\fs26\\sb200\\sa100 " + parseInlineToRTF(trimmed.slice(5)) + "\\par}\n";
876
+ rtf += "{\\pard\\b\\fs26\\sb200\\sa100 " + parseInlineToRTF(trimmed.slice(5), footnoteMap) + "\\par}\n";
877
+ }
878
+ else if (trimmed.startsWith('##### ')) {
879
+ rtf += "{\\pard\\b\\fs24\\sb200\\sa80 " + parseInlineToRTF(trimmed.slice(6), footnoteMap) + "\\par}\n";
880
+ }
881
+ else if (trimmed.startsWith('###### ')) {
882
+ rtf += "{\\pard\\b\\fs22\\sb150\\sa80 " + parseInlineToRTF(trimmed.slice(7), footnoteMap) + "\\par}\n";
883
+ }
884
+ else if (/^\s*[-*+]\s+\[[ xX]\]\s+/.test(line)) {
885
+ const checked = /\[[xX]\]/.test(line);
886
+ const text = line.replace(/^\s*[-*+]\s+\[[ xX]\]\s+/, '');
887
+ const match = line.match(/^(\s*)/);
888
+ const indent = match ? Math.floor(match[1].length / 4) : 0;
889
+ rtf += `{\\pard\\li${(indent + 1) * 360}\\fi-360 ${checked ? '\\u9745?' : '\\u9744?'}\\tab ` + parseInlineToRTF(text, footnoteMap) + "\\par}\n";
520
890
  }
521
891
  else if (trimmed.startsWith('>')) {
522
892
  const level = (trimmed.match(/^>+/g) || ['>'])[0].length;
523
893
  const text = trimmed.replace(/^>+\s*/, '');
524
- rtf += `{\\pard\\li${level * 720}\\cf2\\i\\sa100 ` + parseInlineToRTF(text) + "\\par}\n";
894
+ rtf += `{\\pard\\li${level * 720}\\cf2\\i\\sa100 ` + parseInlineToRTF(text, footnoteMap) + "\\par}\n";
525
895
  }
526
896
  else if (/^(\s*)[-*+]\s+/.test(line)) {
527
897
  const match = line.match(/^(\s*)([-*+]\s+)/);
528
898
  const indent = match ? Math.floor(match[1].length / 4) : 0;
529
899
  const text = line.replace(/^\s*[-*+]\s+/, '');
530
- rtf += `{\\pard\\li${(indent + 1) * 360}\\fi-360\\'b7\\tab ` + parseInlineToRTF(text) + "\\par}\n";
900
+ rtf += `{\\pard\\li${(indent + 1) * 360}\\fi-360\\'b7\\tab ` + parseInlineToRTF(text, footnoteMap) + "\\par}\n";
531
901
  }
532
902
  else if (/^(\s*)\d+\.\s+/.test(line)) {
533
903
  const match = line.match(/^(\s*)(\d+\.\s+)/);
534
904
  const indent = match ? Math.floor(match[1].length / 4) : 0;
535
905
  const number = match ? match[2] : "1. ";
536
906
  const text = line.replace(/^\s*\d+\.\s+/, '');
537
- rtf += `{\\pard\\li${(indent + 1) * 360}\\fi-360 ${number}\\tab ` + parseInlineToRTF(text) + "\\par}\n";
907
+ rtf += `{\\pard\\li${(indent + 1) * 360}\\fi-360 ${number}\\tab ` + parseInlineToRTF(text, footnoteMap) + "\\par}\n";
538
908
  }
539
909
  else {
540
- rtf += "{\\pard\\sa150 " + parseInlineToRTF(trimmed) + "\\par}\n";
910
+ rtf += "{\\pard\\sa150 " + parseInlineToRTF(trimmed, footnoteMap) + "\\par}\n";
541
911
  }
542
912
  i++;
543
913
  }