npm - docrev - Versions diffs - 0.10.0 → 0.10.1 - Mend

docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/.gitattributes +1 -1
package/CHANGELOG.md +173 -164
package/PLAN-tables-and-postprocess.md +850 -850
package/README.md +431 -431
package/bin/rev.js +11 -11
package/bin/rev.ts +145 -145
package/completions/rev.bash +127 -127
package/completions/rev.ps1 +210 -210
package/completions/rev.zsh +207 -207
package/dist/lib/anchor-match.d.ts +1 -1
package/dist/lib/anchor-match.d.ts.map +1 -1
package/dist/lib/anchor-match.js +17 -47
package/dist/lib/anchor-match.js.map +1 -1
package/dist/lib/build.js +4 -4
package/dist/lib/commands/context.d.ts +1 -1
package/dist/lib/commands/context.d.ts.map +1 -1
package/dist/lib/commands/context.js +1 -1
package/dist/lib/commands/context.js.map +1 -1
package/dist/lib/commands/sections.js +7 -7
package/dist/lib/commands/sections.js.map +1 -1
package/dist/lib/commands/sync.d.ts.map +1 -1
package/dist/lib/commands/sync.js +15 -14
package/dist/lib/commands/sync.js.map +1 -1
package/dist/lib/commands/utilities.js +164 -164
package/dist/lib/commands/verify-anchors.js +6 -6
package/dist/lib/commands/verify-anchors.js.map +1 -1
package/dist/lib/commands/word-tools.js +8 -8
package/dist/lib/grammar.js +3 -3
package/dist/lib/macro-filter.lua +201 -201
package/dist/lib/pdf-comments.js +44 -44
package/dist/lib/plugins.js +57 -57
package/dist/lib/pptx-color-filter.lua +37 -37
package/dist/lib/pptx-themes.js +115 -115
package/dist/lib/sections.d.ts +35 -0
package/dist/lib/sections.d.ts.map +1 -1
package/dist/lib/sections.js +81 -0
package/dist/lib/sections.js.map +1 -1
package/dist/lib/spelling.js +2 -2
package/dist/lib/templates.js +387 -387
package/dist/lib/themes.js +51 -51
package/docs-src/build.py +113 -113
package/docs-src/extra.css +208 -208
package/docs-src/md-to-html.lua +6 -6
package/docs-src/template.html +116 -116
package/eslint.config.js +27 -27
package/lib/anchor-match.ts +276 -308
package/lib/annotations.ts +644 -644
package/lib/build.ts +1766 -1766
package/lib/citations.ts +160 -160
package/lib/commands/build.ts +855 -855
package/lib/commands/citations.ts +515 -515
package/lib/commands/comments.ts +1050 -1050
package/lib/commands/context.ts +176 -174
package/lib/commands/core.ts +309 -309
package/lib/commands/doi.ts +435 -435
package/lib/commands/file-ops.ts +372 -372
package/lib/commands/history.ts +320 -320
package/lib/commands/index.ts +87 -87
package/lib/commands/init.ts +259 -259
package/lib/commands/merge-resolve.ts +378 -378
package/lib/commands/preview.ts +178 -178
package/lib/commands/project-info.ts +244 -244
package/lib/commands/quality.ts +517 -517
package/lib/commands/response.ts +454 -454
package/lib/commands/section-boundaries.ts +82 -82
package/lib/commands/sections.ts +451 -451
package/lib/commands/sync.ts +709 -706
package/lib/commands/text-ops.ts +449 -449
package/lib/commands/utilities.ts +448 -448
package/lib/commands/verify-anchors.ts +272 -272
package/lib/commands/word-tools.ts +340 -340
package/lib/comment-realign.ts +517 -517
package/lib/config.ts +84 -84
package/lib/crossref.ts +781 -781
package/lib/csl.ts +191 -191
package/lib/dependencies.ts +98 -98
package/lib/diff-engine.ts +465 -465
package/lib/doi-cache.ts +115 -115
package/lib/doi.ts +897 -897
package/lib/equations.ts +506 -506
package/lib/errors.ts +346 -346
package/lib/format.ts +541 -541
package/lib/git.ts +326 -326
package/lib/grammar.ts +303 -303
package/lib/image-registry.ts +180 -180
package/lib/import.ts +911 -911
package/lib/journals.ts +543 -543
package/lib/macro-filter.lua +201 -201
package/lib/macros.ts +273 -273
package/lib/merge.ts +633 -633
package/lib/orcid.ts +144 -144
package/lib/pdf-comments.ts +263 -263
package/lib/pdf-import.ts +524 -524
package/lib/plugins.ts +362 -362
package/lib/postprocess.ts +188 -188
package/lib/pptx-color-filter.lua +37 -37
package/lib/pptx-template.ts +469 -469
package/lib/pptx-themes.ts +483 -483
package/lib/protect-restore.ts +520 -520
package/lib/rate-limiter.ts +94 -94
package/lib/response.ts +197 -197
package/lib/restore-references.ts +240 -240
package/lib/review.ts +327 -327
package/lib/schema.ts +488 -488
package/lib/scientific-words.ts +73 -73
package/lib/sections.ts +425 -335
package/lib/slides.ts +756 -756
package/lib/spelling.ts +334 -334
package/lib/templates.ts +526 -526
package/lib/themes.ts +742 -742
package/lib/trackchanges.ts +247 -247
package/lib/tui.ts +450 -450
package/lib/types.ts +550 -550
package/lib/undo.ts +250 -250
package/lib/utils.ts +69 -69
package/lib/variables.ts +179 -179
package/lib/word-extraction.ts +806 -806
package/lib/word.ts +643 -643
package/lib/wordcomments.ts +840 -840
package/mkdocs.yml +64 -64
package/package.json +137 -137
package/scripts/postbuild.js +47 -47
package/skill/REFERENCE.md +539 -539
package/skill/SKILL.md +295 -295
package/tsconfig.json +26 -26
package/types/index.d.ts +525 -525

package/lib/protect-restore.ts CHANGED Viewed

@@ -1,520 +1,520 @@
-/**
- * Protection and restoration utilities for markdown elements during Word import
- *
- * These functions protect special markdown syntax (anchors, cross-refs, math, citations,
- * images, tables) by replacing them with placeholders before diffing, then restore them after.
- */
-// =============================================================================
-// Interfaces
-// =============================================================================
-interface MarkdownPrefix {
-  prefix: string;
-  content: string;
-}
-interface ProtectedItem {
-  original: string;
-  placeholder: string;
-}
-interface ProtectedMath extends ProtectedItem {
-  type: 'inline' | 'display';
-  simplified: string;
-}
-interface ProtectedImage extends ProtectedItem {
-  label: string | null;
-  caption: string;
-  path: string;
-  figureNumber: string | null;
-}
-interface ProtectedTable extends ProtectedItem {
-  cellCount: number;
-}
-interface ProtectAnchorsResult {
-  text: string;
-  anchors: ProtectedItem[];
-}
-interface ProtectCrossrefsResult {
-  text: string;
-  crossrefs: ProtectedItem[];
-}
-interface ProtectMathResult {
-  text: string;
-  mathBlocks: ProtectedMath[];
-}
-interface ProtectCitationsResult {
-  text: string;
-  citations: string[];
-}
-interface ProtectImagesResult {
-  text: string;
-  images: ProtectedImage[];
-}
-interface ProtectTablesResult {
-  text: string;
-  tables: ProtectedTable[];
-}
-interface ImageRegistry {
-  byNumber?: Map<string, { label: string }>;
-}
-// =============================================================================
-// Shared Helpers
-// =============================================================================
-/**
- * Replace regex matches with indexed placeholders and collect originals
- */
-function collectAndReplace(
-  text: string,
-  pattern: RegExp,
-  prefix: string,
-  suffix: string,
-): { text: string; items: ProtectedItem[] } {
-  const items: ProtectedItem[] = [];
-  const result = text.replace(pattern, (match) => {
-    const idx = items.length;
-    const placeholder = `${prefix}${idx}${suffix}`;
-    items.push({ original: match, placeholder });
-    return placeholder;
-  });
-  return { text: result, items };
-}
-/**
- * Restore protected items from placeholders, handling annotation wrappers
- * (deletion {--...--} and insertion {++...++} wrappers are unwrapped)
- */
-function restoreProtectedItems(text: string, items: ProtectedItem[]): string {
-  for (const item of items) {
-    const deletionPattern = new RegExp(`\\{--[^}]*?${item.placeholder}[^}]*?--\\}`, 'g');
-    text = text.replace(deletionPattern, item.original);
-    const insertionPattern = new RegExp(`\\{\\+\\+[^}]*?${item.placeholder}[^}]*?\\+\\+\\}`, 'g');
-    text = text.replace(insertionPattern, item.original);
-    text = text.split(item.placeholder).join(item.original);
-  }
-  return text;
-}
-// =============================================================================
-// Public Functions
-// =============================================================================
-/**
- * Extract markdown prefix (headers, list markers) from a line
- */
-export function extractMarkdownPrefix(line: string): MarkdownPrefix {
-  // Headers
-  const headerMatch = line.match(/^(#{1,6}\s+)/);
-  if (headerMatch && headerMatch[1]) {
-    return { prefix: headerMatch[1], content: line.slice(headerMatch[1].length) };
-  }
-  // List items
-  const listMatch = line.match(/^(\s*[-*+]\s+|\s*\d+\.\s+)/);
-  if (listMatch && listMatch[1]) {
-    return { prefix: listMatch[1], content: line.slice(listMatch[1].length) };
-  }
-  // Blockquotes
-  const quoteMatch = line.match(/^(>\s*)/);
-  if (quoteMatch && quoteMatch[1]) {
-    return { prefix: quoteMatch[1], content: line.slice(quoteMatch[1].length) };
-  }
-  return { prefix: '', content: line };
-}
-/**
- * Protect figure/table anchors before diffing
- * Anchors like {#fig:heatmap} and {#tbl:results} should never be deleted
- */
-export function protectAnchors(md: string): ProtectAnchorsResult {
-  // Match {#fig:label}, {#tbl:label}, {#eq:label}, {#sec:label} etc.
-  // Also match with additional attributes like {#fig:label width=50%}
-  const { text, items: anchors } = collectAndReplace(
-    md, /\{#(fig|tbl|eq|sec|lst):[^}]+\}/g, 'ANCHORBLOCK', 'ENDANCHOR',
-  );
-  return { text, anchors };
-}
-/**
- * Restore anchors from placeholders
- */
-export function restoreAnchors(text: string, anchors: ProtectedItem[]): string {
-  for (const anchor of anchors) {
-    // Handle case where anchor is inside a deletion annotation
-    // {--...ANCHORBLOCK0ENDANCHOR--} should become {--...--}{#fig:label}
-    const deletionPattern = new RegExp(`\\{--([^}]*?)${anchor.placeholder}([^}]*?)--\\}`, 'g');
-    text = text.replace(deletionPattern, (match, before, after) => {
-      const cleanBefore = before.trim();
-      const cleanAfter = after.trim();
-      let result = '';
-      if (cleanBefore) result += `{--${cleanBefore}--}`;
-      result += anchor.original;
-      if (cleanAfter) result += `{--${cleanAfter}--}`;
-      return result;
-    });
-    // Handle case where anchor is inside a substitution
-    // {~~old ANCHORBLOCK0ENDANCHOR~>new~~} -> {~~old~>new~~}{#fig:label}
-    const substitutionPattern = new RegExp(`\\{~~([^~]*?)${anchor.placeholder}([^~]*?)~>([^~]*)~~\\}`, 'g');
-    text = text.replace(substitutionPattern, (match: string, oldBefore: string, oldAfter: string, newText: string) => {
-      const cleanOldBefore = (oldBefore ?? '').trim();
-      const cleanOldAfter = (oldAfter ?? '').trim();
-      const cleanNew = (newText ?? '').trim();
-      const oldText = (cleanOldBefore + ' ' + cleanOldAfter).trim();
-      let result = '';
-      if (oldText !== cleanNew) {
-        result += `{~~${oldText}~>${cleanNew}~~}`;
-      } else {
-        result += cleanNew;
-      }
-      result += anchor.original;
-      return result;
-    });
-    // Normal replacement
-    text = text.split(anchor.placeholder).join(anchor.original);
-  }
-  return text;
-}
-/**
- * Protect cross-references before diffing
- * References like @fig:label, @tbl:label should be preserved
- */
-export function protectCrossrefs(md: string): ProtectCrossrefsResult {
-  // Match @fig:label, @tbl:label, @eq:label, @sec:label
-  // Can appear as @fig:label or (@fig:label) or [@fig:label]
-  const { text, items: crossrefs } = collectAndReplace(
-    md, /@(fig|tbl|eq|sec|lst):[a-zA-Z0-9_-]+/g, 'XREFBLOCK', 'ENDXREF',
-  );
-  return { text, crossrefs };
-}
-/**
- * Restore cross-references from placeholders
- */
-export function restoreCrossrefs(text: string, crossrefs: ProtectedItem[]): string {
-  for (const xref of crossrefs) {
-    // Handle deletions - restore the reference even if marked deleted
-    const deletionPattern = new RegExp(`\\{--([^}]*?)${xref.placeholder}([^}]*?)--\\}`, 'g');
-    text = text.replace(deletionPattern, (match, before, after) => {
-      const cleanBefore = before.trim();
-      const cleanAfter = after.trim();
-      let result = '';
-      if (cleanBefore) result += `{--${cleanBefore}--}`;
-      result += xref.original;
-      if (cleanAfter) result += `{--${cleanAfter}--}`;
-      return result;
-    });
-    // Handle substitutions where rendered form (Figure 1) replaced the reference
-    // {~~XREFBLOCK0ENDXREF~>Figure 1~~} -> @fig:label
-    const substitutionPattern = new RegExp(`\\{~~${xref.placeholder}~>[^~]+~~\\}`, 'g');
-    text = text.replace(substitutionPattern, xref.original);
-    // Normal replacement
-    text = text.split(xref.placeholder).join(xref.original);
-  }
-  return text;
-}
-/**
- * Simplify LaTeX math for fuzzy matching against Word text
- * Word renders math as text, so we need to match the rendered form
- */
-export function simplifyMathForMatching(latex: string): string {
-  return latex
-    // Remove common LaTeX commands
-    .replace(/\\text\{([^}]+)\}/g, '$1')
-    .replace(/\\hat\{([^}]+)\}/g, '$1')
-    .replace(/\\bar\{([^}]+)\}/g, '$1')
-    .replace(/\\frac\{([^}]+)\}\{([^}]+)\}/g, '$1/$2')
-    .replace(/\\sum_([a-z])/g, 'Σ')
-    .replace(/\\sum/g, 'Σ')
-    .replace(/\\cdot/g, '·')
-    .replace(/\\quad/g, ' ')
-    .replace(/\\,/g, ' ')
-    .replace(/\\_/g, '_')
-    .replace(/\\{/g, '{')
-    .replace(/\\}/g, '}')
-    .replace(/\\/g, '')  // Remove remaining backslashes
-    .replace(/[{}]/g, '')  // Remove braces
-    .replace(/\s+/g, ' ')
-    .trim();
-}
-/**
- * Protect mathematical notation before diffing by replacing with placeholders
- * Handles both inline $...$ and display $$...$$ math
- */
-export function protectMath(md: string): ProtectMathResult {
-  const mathBlocks: ProtectedMath[] = [];
-  // First protect display math ($$...$$) - must be done before inline math
-  let text = md.replace(/\$\$([^$]+)\$\$/g, (match, content) => {
-    const idx = mathBlocks.length;
-    const placeholder = `MATHBLOCK${idx}ENDMATH`;
-    // Create simplified version for matching in Word text
-    const simplified = simplifyMathForMatching(content);
-    mathBlocks.push({ original: match, placeholder, type: 'display', simplified });
-    return placeholder;
-  });
-  // Then protect inline math ($...$)
-  text = text.replace(/\$([^$\n]+)\$/g, (match, content) => {
-    const idx = mathBlocks.length;
-    const placeholder = `MATHBLOCK${idx}ENDMATH`;
-    const simplified = simplifyMathForMatching(content);
-    mathBlocks.push({ original: match, placeholder, type: 'inline', simplified });
-    return placeholder;
-  });
-  return { text, mathBlocks };
-}
-/**
- * Restore math from placeholders
- */
-export function restoreMath(text: string, mathBlocks: ProtectedMath[]): string {
-  for (const block of mathBlocks) {
-    text = text.split(block.placeholder).join(block.original);
-  }
-  return text;
-}
-/**
- * Replace rendered math in Word text with matching placeholders
- * This is heuristic-based since Word can render math in various ways
- */
-export function replaceRenderedMath(wordText: string, mathBlocks: ProtectedMath[]): string {
-  let result = wordText;
-  for (const block of mathBlocks) {
-    // For inline math, try to find the simplified form in Word text
-    if (block.simplified.length >= 2) {
-      // Try exact match first
-      if (result.includes(block.simplified)) {
-        result = result.replace(block.simplified, block.placeholder);
-      }
-    }
-  }
-  return result;
-}
-/**
- * Protect citations before diffing by replacing with placeholders
- */
-export function protectCitations(md: string): ProtectCitationsResult {
-  const citations: string[] = [];
-  const text = md.replace(/\[@[^\]]+\]/g, (match) => {
-    const idx = citations.length;
-    citations.push(match);
-    return `CITEREF${idx}ENDCITE`;
-  });
-  return { text, citations };
-}
-/**
- * Restore citations from placeholders
- */
-export function restoreCitations(text: string, citations: string[]): string {
-  for (let i = 0; i < citations.length; i++) {
-    // Handle cases where placeholder might be inside annotations
-    const placeholder = `CITEREF${i}ENDCITE`;
-    text = text.split(placeholder).join(citations[i]);
-  }
-  return text;
-}
-/**
- * Remove rendered citations from Word text (replace with matching placeholders)
- */
-export function replaceRenderedCitations(wordText: string, count: number): string {
-  // Match rendered citation patterns: (Author 2021), (Author et al. 2021), etc.
-  const pattern = /\((?:[A-Z][a-zé]+(?:\s+et\s+al\.?)?(?:\s*[&,;]\s*[A-Z][a-zé]+(?:\s+et\s+al\.?)?)*\s+\d{4}(?:[a-z])?(?:\s*[,;]\s*(?:[A-Z][a-zé]+(?:\s+et\s+al\.?)?\s+)?\d{4}(?:[a-z])?)*)\)/g;
-  let idx = 0;
-  return wordText.replace(pattern, (match) => {
-    if (idx < count) {
-      const placeholder = `CITEREF${idx}ENDCITE`;
-      idx++;
-      return placeholder;
-    }
-    return match;
-  });
-}
-/**
- * Protect markdown images before diffing by replacing with placeholders
- * Images are treated as atomic blocks to prevent corruption during diff
- *
- * Matches: ![caption](path){#fig:label} or ![caption](path)
- * Also matches Word-style: ![Figure N: caption](media/path)
- */
-export function protectImages(md: string, registry: ImageRegistry | null = null): ProtectImagesResult {
-  const images: ProtectedImage[] = [];
-  // Match markdown images: ![caption](path){#anchor} or ![caption](path)
-  // The anchor is optional and can have additional attributes
-  const imagePattern = /!\[([^\]]*)\]\(([^)]+)\)(?:\{([^}]+)\})?/g;
-  const text = md.replace(imagePattern, (match, caption, path, anchor) => {
-    const idx = images.length;
-    const placeholder = `IMAGEBLOCK${idx}ENDIMAGE`;
-    // Extract label from anchor if present (e.g., "#fig:map" -> "map")
-    let label: string | null = null;
-    if (anchor) {
-      const labelMatch = anchor.match(/#(fig|tbl):([a-zA-Z0-9_-]+)/);
-      if (labelMatch) {
-        label = labelMatch[2];
-      }
-    }
-    // Try to extract figure number from Word-style caption "Figure N: ..."
-    let figureNumber: string | null = null;
-    const figNumMatch = caption.match(/^(?:Figure|Fig\.?|Table|Tbl\.?)\s+(\d+|S\d+)[:\.]?\s*/i);
-    if (figNumMatch) {
-      figureNumber = figNumMatch[1];
-    }
-    images.push({
-      original: match,
-      placeholder,
-      label,
-      caption: caption.trim(),
-      path,
-      figureNumber,
-    });
-    return placeholder;
-  });
-  return { text, images };
-}
-/**
- * Restore images from placeholders
- */
-export function restoreImages(text: string, images: ProtectedImage[]): string {
-  return restoreProtectedItems(text, images);
-}
-/**
- * Match Word-extracted images to original images using registry
- * Returns a mapping of Word image placeholders to original image placeholders
- */
-export function matchWordImagesToOriginal(
-  originalImages: ProtectedImage[],
-  wordImages: ProtectedImage[],
-  registry: ImageRegistry | null = null
-): Map<string, string> {
-  const mapping = new Map<string, string>();
-  const usedOriginals = new Set<string>();
-  for (const wordImg of wordImages) {
-    let bestMatch: ProtectedImage | null = null;
-    let bestScore = 0;
-    for (const origImg of originalImages) {
-      if (usedOriginals.has(origImg.placeholder)) continue;
-      let score = 0;
-      // Match by label (most reliable)
-      if (wordImg.label && origImg.label && wordImg.label === origImg.label) {
-        score += 100;
-      }
-      // Match by figure number via registry
-      if (wordImg.figureNumber && registry) {
-        const entry = registry.byNumber?.get(`fig:${wordImg.figureNumber}`);
-        if (entry && entry.label === origImg.label) {
-          score += 90;
-        }
-      }
-      // Match by caption similarity (first 50 chars, normalized)
-      const wordCaption = wordImg.caption.replace(/^(?:Figure|Fig\.?|Table|Tbl\.?)\s+\d+[:\.]?\s*/i, '').toLowerCase().slice(0, 50);
-      const origCaption = origImg.caption.toLowerCase().slice(0, 50);
-      if (wordCaption && origCaption && wordCaption === origCaption) {
-        score += 80;
-      } else if (wordCaption && origCaption && (wordCaption.includes(origCaption.slice(0, 30)) || origCaption.includes(wordCaption.slice(0, 30)))) {
-        score += 40;
-      }
-      // Match by path similarity (filename)
-      const wordFile = wordImg.path.split('/').pop()?.toLowerCase() || '';
-      const origFile = origImg.path.split('/').pop()?.toLowerCase() || '';
-      if (wordFile === origFile) {
-        score += 30;
-      }
-      if (score > bestScore) {
-        bestScore = score;
-        bestMatch = origImg;
-      }
-    }
-    if (bestMatch && bestScore >= 40) {
-      mapping.set(wordImg.placeholder, bestMatch.placeholder);
-      usedOriginals.add(bestMatch.placeholder);
-    }
-  }
-  return mapping;
-}
-/**
- * Protect markdown tables before diffing by replacing with placeholders
- * Tables are treated as atomic blocks to prevent corruption during diff
- */
-export function protectTables(md: string): ProtectTablesResult {
-  const tables: ProtectedTable[] = [];
-  // Match markdown tables: lines starting with | and containing |
-  // A table is: optional caption, header row, separator row (|---|), data rows
-  const tablePattern = /(?:^(?:\*\*)?Table[^\n]*\n\n?)?(?:^\|[^\n]+\|\n)+/gm;
-  const text = md.replace(tablePattern, (match) => {
-    // Verify it's actually a table (has separator row with dashes)
-    if (!match.includes('|---') && !match.includes('| ---') && !match.includes('|:--')) {
-      return match; // Not a real table, just lines with pipes
-    }
-    const idx = tables.length;
-    const placeholder = `\n\nTABLEBLOCK${idx}ENDTABLE\n\n`;
-    // Count cells for matching in Word (approximate)
-    const cellCount = (match.match(/\|/g) || []).length;
-    tables.push({ original: match.trim(), placeholder: placeholder.trim(), cellCount });
-    return placeholder;
-  });
-  return { text, tables };
-}
-/**
- * Restore tables from placeholders
- */
-export function restoreTables(text: string, tables: ProtectedTable[]): string {
-  return restoreProtectedItems(text, tables);
-}
+/**
+ * Protection and restoration utilities for markdown elements during Word import
+ *
+ * These functions protect special markdown syntax (anchors, cross-refs, math, citations,
+ * images, tables) by replacing them with placeholders before diffing, then restore them after.
+ */
+// =============================================================================
+// Interfaces
+// =============================================================================
+interface MarkdownPrefix {
+  prefix: string;
+  content: string;
+}
+interface ProtectedItem {
+  original: string;
+  placeholder: string;
+}
+interface ProtectedMath extends ProtectedItem {
+  type: 'inline' | 'display';
+  simplified: string;
+}
+interface ProtectedImage extends ProtectedItem {
+  label: string | null;
+  caption: string;
+  path: string;
+  figureNumber: string | null;
+}
+interface ProtectedTable extends ProtectedItem {
+  cellCount: number;
+}
+interface ProtectAnchorsResult {
+  text: string;
+  anchors: ProtectedItem[];
+}
+interface ProtectCrossrefsResult {
+  text: string;
+  crossrefs: ProtectedItem[];
+}
+interface ProtectMathResult {
+  text: string;
+  mathBlocks: ProtectedMath[];
+}
+interface ProtectCitationsResult {
+  text: string;
+  citations: string[];
+}
+interface ProtectImagesResult {
+  text: string;
+  images: ProtectedImage[];
+}
+interface ProtectTablesResult {
+  text: string;
+  tables: ProtectedTable[];
+}
+interface ImageRegistry {
+  byNumber?: Map<string, { label: string }>;
+}
+// =============================================================================
+// Shared Helpers
+// =============================================================================
+/**
+ * Replace regex matches with indexed placeholders and collect originals
+ */
+function collectAndReplace(
+  text: string,
+  pattern: RegExp,
+  prefix: string,
+  suffix: string,
+): { text: string; items: ProtectedItem[] } {
+  const items: ProtectedItem[] = [];
+  const result = text.replace(pattern, (match) => {
+    const idx = items.length;
+    const placeholder = `${prefix}${idx}${suffix}`;
+    items.push({ original: match, placeholder });
+    return placeholder;
+  });
+  return { text: result, items };
+}
+/**
+ * Restore protected items from placeholders, handling annotation wrappers
+ * (deletion {--...--} and insertion {++...++} wrappers are unwrapped)
+ */
+function restoreProtectedItems(text: string, items: ProtectedItem[]): string {
+  for (const item of items) {
+    const deletionPattern = new RegExp(`\\{--[^}]*?${item.placeholder}[^}]*?--\\}`, 'g');
+    text = text.replace(deletionPattern, item.original);
+    const insertionPattern = new RegExp(`\\{\\+\\+[^}]*?${item.placeholder}[^}]*?\\+\\+\\}`, 'g');
+    text = text.replace(insertionPattern, item.original);
+    text = text.split(item.placeholder).join(item.original);
+  }
+  return text;
+}
+// =============================================================================
+// Public Functions
+// =============================================================================
+/**
+ * Extract markdown prefix (headers, list markers) from a line
+ */
+export function extractMarkdownPrefix(line: string): MarkdownPrefix {
+  // Headers
+  const headerMatch = line.match(/^(#{1,6}\s+)/);
+  if (headerMatch && headerMatch[1]) {
+    return { prefix: headerMatch[1], content: line.slice(headerMatch[1].length) };
+  }
+  // List items
+  const listMatch = line.match(/^(\s*[-*+]\s+|\s*\d+\.\s+)/);
+  if (listMatch && listMatch[1]) {
+    return { prefix: listMatch[1], content: line.slice(listMatch[1].length) };
+  }
+  // Blockquotes
+  const quoteMatch = line.match(/^(>\s*)/);
+  if (quoteMatch && quoteMatch[1]) {
+    return { prefix: quoteMatch[1], content: line.slice(quoteMatch[1].length) };
+  }
+  return { prefix: '', content: line };
+}
+/**
+ * Protect figure/table anchors before diffing
+ * Anchors like {#fig:heatmap} and {#tbl:results} should never be deleted
+ */
+export function protectAnchors(md: string): ProtectAnchorsResult {
+  // Match {#fig:label}, {#tbl:label}, {#eq:label}, {#sec:label} etc.
+  // Also match with additional attributes like {#fig:label width=50%}
+  const { text, items: anchors } = collectAndReplace(
+    md, /\{#(fig|tbl|eq|sec|lst):[^}]+\}/g, 'ANCHORBLOCK', 'ENDANCHOR',
+  );
+  return { text, anchors };
+}
+/**
+ * Restore anchors from placeholders
+ */
+export function restoreAnchors(text: string, anchors: ProtectedItem[]): string {
+  for (const anchor of anchors) {
+    // Handle case where anchor is inside a deletion annotation
+    // {--...ANCHORBLOCK0ENDANCHOR--} should become {--...--}{#fig:label}
+    const deletionPattern = new RegExp(`\\{--([^}]*?)${anchor.placeholder}([^}]*?)--\\}`, 'g');
+    text = text.replace(deletionPattern, (match, before, after) => {
+      const cleanBefore = before.trim();
+      const cleanAfter = after.trim();
+      let result = '';
+      if (cleanBefore) result += `{--${cleanBefore}--}`;
+      result += anchor.original;
+      if (cleanAfter) result += `{--${cleanAfter}--}`;
+      return result;
+    });
+    // Handle case where anchor is inside a substitution
+    // {~~old ANCHORBLOCK0ENDANCHOR~>new~~} -> {~~old~>new~~}{#fig:label}
+    const substitutionPattern = new RegExp(`\\{~~([^~]*?)${anchor.placeholder}([^~]*?)~>([^~]*)~~\\}`, 'g');
+    text = text.replace(substitutionPattern, (match: string, oldBefore: string, oldAfter: string, newText: string) => {
+      const cleanOldBefore = (oldBefore ?? '').trim();
+      const cleanOldAfter = (oldAfter ?? '').trim();
+      const cleanNew = (newText ?? '').trim();
+      const oldText = (cleanOldBefore + ' ' + cleanOldAfter).trim();
+      let result = '';
+      if (oldText !== cleanNew) {
+        result += `{~~${oldText}~>${cleanNew}~~}`;
+      } else {
+        result += cleanNew;
+      }
+      result += anchor.original;
+      return result;
+    });
+    // Normal replacement
+    text = text.split(anchor.placeholder).join(anchor.original);
+  }
+  return text;
+}
+/**
+ * Protect cross-references before diffing
+ * References like @fig:label, @tbl:label should be preserved
+ */
+export function protectCrossrefs(md: string): ProtectCrossrefsResult {
+  // Match @fig:label, @tbl:label, @eq:label, @sec:label
+  // Can appear as @fig:label or (@fig:label) or [@fig:label]
+  const { text, items: crossrefs } = collectAndReplace(
+    md, /@(fig|tbl|eq|sec|lst):[a-zA-Z0-9_-]+/g, 'XREFBLOCK', 'ENDXREF',
+  );
+  return { text, crossrefs };
+}
+/**
+ * Restore cross-references from placeholders
+ */
+export function restoreCrossrefs(text: string, crossrefs: ProtectedItem[]): string {
+  for (const xref of crossrefs) {
+    // Handle deletions - restore the reference even if marked deleted
+    const deletionPattern = new RegExp(`\\{--([^}]*?)${xref.placeholder}([^}]*?)--\\}`, 'g');
+    text = text.replace(deletionPattern, (match, before, after) => {
+      const cleanBefore = before.trim();
+      const cleanAfter = after.trim();
+      let result = '';
+      if (cleanBefore) result += `{--${cleanBefore}--}`;
+      result += xref.original;
+      if (cleanAfter) result += `{--${cleanAfter}--}`;
+      return result;
+    });
+    // Handle substitutions where rendered form (Figure 1) replaced the reference
+    // {~~XREFBLOCK0ENDXREF~>Figure 1~~} -> @fig:label
+    const substitutionPattern = new RegExp(`\\{~~${xref.placeholder}~>[^~]+~~\\}`, 'g');
+    text = text.replace(substitutionPattern, xref.original);
+    // Normal replacement
+    text = text.split(xref.placeholder).join(xref.original);
+  }
+  return text;
+}
+/**
+ * Simplify LaTeX math for fuzzy matching against Word text
+ * Word renders math as text, so we need to match the rendered form
+ */
+export function simplifyMathForMatching(latex: string): string {
+  return latex
+    // Remove common LaTeX commands
+    .replace(/\\text\{([^}]+)\}/g, '$1')
+    .replace(/\\hat\{([^}]+)\}/g, '$1')
+    .replace(/\\bar\{([^}]+)\}/g, '$1')
+    .replace(/\\frac\{([^}]+)\}\{([^}]+)\}/g, '$1/$2')
+    .replace(/\\sum_([a-z])/g, 'Σ')
+    .replace(/\\sum/g, 'Σ')
+    .replace(/\\cdot/g, '·')
+    .replace(/\\quad/g, ' ')
+    .replace(/\\,/g, ' ')
+    .replace(/\\_/g, '_')
+    .replace(/\\{/g, '{')
+    .replace(/\\}/g, '}')
+    .replace(/\\/g, '')  // Remove remaining backslashes
+    .replace(/[{}]/g, '')  // Remove braces
+    .replace(/\s+/g, ' ')
+    .trim();
+}
+/**
+ * Protect mathematical notation before diffing by replacing with placeholders
+ * Handles both inline $...$ and display $$...$$ math
+ */
+export function protectMath(md: string): ProtectMathResult {
+  const mathBlocks: ProtectedMath[] = [];
+  // First protect display math ($$...$$) - must be done before inline math
+  let text = md.replace(/\$\$([^$]+)\$\$/g, (match, content) => {
+    const idx = mathBlocks.length;
+    const placeholder = `MATHBLOCK${idx}ENDMATH`;
+    // Create simplified version for matching in Word text
+    const simplified = simplifyMathForMatching(content);
+    mathBlocks.push({ original: match, placeholder, type: 'display', simplified });
+    return placeholder;
+  });
+  // Then protect inline math ($...$)
+  text = text.replace(/\$([^$\n]+)\$/g, (match, content) => {
+    const idx = mathBlocks.length;
+    const placeholder = `MATHBLOCK${idx}ENDMATH`;
+    const simplified = simplifyMathForMatching(content);
+    mathBlocks.push({ original: match, placeholder, type: 'inline', simplified });
+    return placeholder;
+  });
+  return { text, mathBlocks };
+}
+/**
+ * Restore math from placeholders
+ */
+export function restoreMath(text: string, mathBlocks: ProtectedMath[]): string {
+  for (const block of mathBlocks) {
+    text = text.split(block.placeholder).join(block.original);
+  }
+  return text;
+}
+/**
+ * Replace rendered math in Word text with matching placeholders
+ * This is heuristic-based since Word can render math in various ways
+ */
+export function replaceRenderedMath(wordText: string, mathBlocks: ProtectedMath[]): string {
+  let result = wordText;
+  for (const block of mathBlocks) {
+    // For inline math, try to find the simplified form in Word text
+    if (block.simplified.length >= 2) {
+      // Try exact match first
+      if (result.includes(block.simplified)) {
+        result = result.replace(block.simplified, block.placeholder);
+      }
+    }
+  }
+  return result;
+}
+/**
+ * Protect citations before diffing by replacing with placeholders
+ */
+export function protectCitations(md: string): ProtectCitationsResult {
+  const citations: string[] = [];
+  const text = md.replace(/\[@[^\]]+\]/g, (match) => {
+    const idx = citations.length;
+    citations.push(match);
+    return `CITEREF${idx}ENDCITE`;
+  });
+  return { text, citations };
+}
+/**
+ * Restore citations from placeholders
+ */
+export function restoreCitations(text: string, citations: string[]): string {
+  for (let i = 0; i < citations.length; i++) {
+    // Handle cases where placeholder might be inside annotations
+    const placeholder = `CITEREF${i}ENDCITE`;
+    text = text.split(placeholder).join(citations[i]);
+  }
+  return text;
+}
+/**
+ * Remove rendered citations from Word text (replace with matching placeholders)
+ */
+export function replaceRenderedCitations(wordText: string, count: number): string {
+  // Match rendered citation patterns: (Author 2021), (Author et al. 2021), etc.
+  const pattern = /\((?:[A-Z][a-zé]+(?:\s+et\s+al\.?)?(?:\s*[&,;]\s*[A-Z][a-zé]+(?:\s+et\s+al\.?)?)*\s+\d{4}(?:[a-z])?(?:\s*[,;]\s*(?:[A-Z][a-zé]+(?:\s+et\s+al\.?)?\s+)?\d{4}(?:[a-z])?)*)\)/g;
+  let idx = 0;
+  return wordText.replace(pattern, (match) => {
+    if (idx < count) {
+      const placeholder = `CITEREF${idx}ENDCITE`;
+      idx++;
+      return placeholder;
+    }
+    return match;
+  });
+}
+/**
+ * Protect markdown images before diffing by replacing with placeholders
+ * Images are treated as atomic blocks to prevent corruption during diff
+ *
+ * Matches: ![caption](path){#fig:label} or ![caption](path)
+ * Also matches Word-style: ![Figure N: caption](media/path)
+ */
+export function protectImages(md: string, registry: ImageRegistry | null = null): ProtectImagesResult {
+  const images: ProtectedImage[] = [];
+  // Match markdown images: ![caption](path){#anchor} or ![caption](path)
+  // The anchor is optional and can have additional attributes
+  const imagePattern = /!\[([^\]]*)\]\(([^)]+)\)(?:\{([^}]+)\})?/g;
+  const text = md.replace(imagePattern, (match, caption, path, anchor) => {
+    const idx = images.length;
+    const placeholder = `IMAGEBLOCK${idx}ENDIMAGE`;
+    // Extract label from anchor if present (e.g., "#fig:map" -> "map")
+    let label: string | null = null;
+    if (anchor) {
+      const labelMatch = anchor.match(/#(fig|tbl):([a-zA-Z0-9_-]+)/);
+      if (labelMatch) {
+        label = labelMatch[2];
+      }
+    }
+    // Try to extract figure number from Word-style caption "Figure N: ..."
+    let figureNumber: string | null = null;
+    const figNumMatch = caption.match(/^(?:Figure|Fig\.?|Table|Tbl\.?)\s+(\d+|S\d+)[:\.]?\s*/i);
+    if (figNumMatch) {
+      figureNumber = figNumMatch[1];
+    }
+    images.push({
+      original: match,
+      placeholder,
+      label,
+      caption: caption.trim(),
+      path,
+      figureNumber,
+    });
+    return placeholder;
+  });
+  return { text, images };
+}
+/**
+ * Restore images from placeholders
+ */
+export function restoreImages(text: string, images: ProtectedImage[]): string {
+  return restoreProtectedItems(text, images);
+}
+/**
+ * Match Word-extracted images to original images using registry
+ * Returns a mapping of Word image placeholders to original image placeholders
+ */
+export function matchWordImagesToOriginal(
+  originalImages: ProtectedImage[],
+  wordImages: ProtectedImage[],
+  registry: ImageRegistry | null = null
+): Map<string, string> {
+  const mapping = new Map<string, string>();
+  const usedOriginals = new Set<string>();
+  for (const wordImg of wordImages) {
+    let bestMatch: ProtectedImage | null = null;
+    let bestScore = 0;
+    for (const origImg of originalImages) {
+      if (usedOriginals.has(origImg.placeholder)) continue;
+      let score = 0;
+      // Match by label (most reliable)
+      if (wordImg.label && origImg.label && wordImg.label === origImg.label) {
+        score += 100;
+      }
+      // Match by figure number via registry
+      if (wordImg.figureNumber && registry) {
+        const entry = registry.byNumber?.get(`fig:${wordImg.figureNumber}`);
+        if (entry && entry.label === origImg.label) {
+          score += 90;
+        }
+      }
+      // Match by caption similarity (first 50 chars, normalized)
+      const wordCaption = wordImg.caption.replace(/^(?:Figure|Fig\.?|Table|Tbl\.?)\s+\d+[:\.]?\s*/i, '').toLowerCase().slice(0, 50);
+      const origCaption = origImg.caption.toLowerCase().slice(0, 50);
+      if (wordCaption && origCaption && wordCaption === origCaption) {
+        score += 80;
+      } else if (wordCaption && origCaption && (wordCaption.includes(origCaption.slice(0, 30)) || origCaption.includes(wordCaption.slice(0, 30)))) {
+        score += 40;
+      }
+      // Match by path similarity (filename)
+      const wordFile = wordImg.path.split('/').pop()?.toLowerCase() || '';
+      const origFile = origImg.path.split('/').pop()?.toLowerCase() || '';
+      if (wordFile === origFile) {
+        score += 30;
+      }
+      if (score > bestScore) {
+        bestScore = score;
+        bestMatch = origImg;
+      }
+    }
+    if (bestMatch && bestScore >= 40) {
+      mapping.set(wordImg.placeholder, bestMatch.placeholder);
+      usedOriginals.add(bestMatch.placeholder);
+    }
+  }
+  return mapping;
+}
+/**
+ * Protect markdown tables before diffing by replacing with placeholders
+ * Tables are treated as atomic blocks to prevent corruption during diff
+ */
+export function protectTables(md: string): ProtectTablesResult {
+  const tables: ProtectedTable[] = [];
+  // Match markdown tables: lines starting with | and containing |
+  // A table is: optional caption, header row, separator row (|---|), data rows
+  const tablePattern = /(?:^(?:\*\*)?Table[^\n]*\n\n?)?(?:^\|[^\n]+\|\n)+/gm;
+  const text = md.replace(tablePattern, (match) => {
+    // Verify it's actually a table (has separator row with dashes)
+    if (!match.includes('|---') && !match.includes('| ---') && !match.includes('|:--')) {
+      return match; // Not a real table, just lines with pipes
+    }
+    const idx = tables.length;
+    const placeholder = `\n\nTABLEBLOCK${idx}ENDTABLE\n\n`;
+    // Count cells for matching in Word (approximate)
+    const cellCount = (match.match(/\|/g) || []).length;
+    tables.push({ original: match.trim(), placeholder: placeholder.trim(), cellCount });
+    return placeholder;
+  });
+  return { text, tables };
+}
+/**
+ * Restore tables from placeholders
+ */
+export function restoreTables(text: string, tables: ProtectedTable[]): string {
+  return restoreProtectedItems(text, tables);
+}