npm - docrev - Versions diffs - 0.10.0 → 0.10.1 - Mend

docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/.gitattributes +1 -1
package/CHANGELOG.md +173 -164
package/PLAN-tables-and-postprocess.md +850 -850
package/README.md +431 -431
package/bin/rev.js +11 -11
package/bin/rev.ts +145 -145
package/completions/rev.bash +127 -127
package/completions/rev.ps1 +210 -210
package/completions/rev.zsh +207 -207
package/dist/lib/anchor-match.d.ts +1 -1
package/dist/lib/anchor-match.d.ts.map +1 -1
package/dist/lib/anchor-match.js +17 -47
package/dist/lib/anchor-match.js.map +1 -1
package/dist/lib/build.js +4 -4
package/dist/lib/commands/context.d.ts +1 -1
package/dist/lib/commands/context.d.ts.map +1 -1
package/dist/lib/commands/context.js +1 -1
package/dist/lib/commands/context.js.map +1 -1
package/dist/lib/commands/sections.js +7 -7
package/dist/lib/commands/sections.js.map +1 -1
package/dist/lib/commands/sync.d.ts.map +1 -1
package/dist/lib/commands/sync.js +15 -14
package/dist/lib/commands/sync.js.map +1 -1
package/dist/lib/commands/utilities.js +164 -164
package/dist/lib/commands/verify-anchors.js +6 -6
package/dist/lib/commands/verify-anchors.js.map +1 -1
package/dist/lib/commands/word-tools.js +8 -8
package/dist/lib/grammar.js +3 -3
package/dist/lib/macro-filter.lua +201 -201
package/dist/lib/pdf-comments.js +44 -44
package/dist/lib/plugins.js +57 -57
package/dist/lib/pptx-color-filter.lua +37 -37
package/dist/lib/pptx-themes.js +115 -115
package/dist/lib/sections.d.ts +35 -0
package/dist/lib/sections.d.ts.map +1 -1
package/dist/lib/sections.js +81 -0
package/dist/lib/sections.js.map +1 -1
package/dist/lib/spelling.js +2 -2
package/dist/lib/templates.js +387 -387
package/dist/lib/themes.js +51 -51
package/docs-src/build.py +113 -113
package/docs-src/extra.css +208 -208
package/docs-src/md-to-html.lua +6 -6
package/docs-src/template.html +116 -116
package/eslint.config.js +27 -27
package/lib/anchor-match.ts +276 -308
package/lib/annotations.ts +644 -644
package/lib/build.ts +1766 -1766
package/lib/citations.ts +160 -160
package/lib/commands/build.ts +855 -855
package/lib/commands/citations.ts +515 -515
package/lib/commands/comments.ts +1050 -1050
package/lib/commands/context.ts +176 -174
package/lib/commands/core.ts +309 -309
package/lib/commands/doi.ts +435 -435
package/lib/commands/file-ops.ts +372 -372
package/lib/commands/history.ts +320 -320
package/lib/commands/index.ts +87 -87
package/lib/commands/init.ts +259 -259
package/lib/commands/merge-resolve.ts +378 -378
package/lib/commands/preview.ts +178 -178
package/lib/commands/project-info.ts +244 -244
package/lib/commands/quality.ts +517 -517
package/lib/commands/response.ts +454 -454
package/lib/commands/section-boundaries.ts +82 -82
package/lib/commands/sections.ts +451 -451
package/lib/commands/sync.ts +709 -706
package/lib/commands/text-ops.ts +449 -449
package/lib/commands/utilities.ts +448 -448
package/lib/commands/verify-anchors.ts +272 -272
package/lib/commands/word-tools.ts +340 -340
package/lib/comment-realign.ts +517 -517
package/lib/config.ts +84 -84
package/lib/crossref.ts +781 -781
package/lib/csl.ts +191 -191
package/lib/dependencies.ts +98 -98
package/lib/diff-engine.ts +465 -465
package/lib/doi-cache.ts +115 -115
package/lib/doi.ts +897 -897
package/lib/equations.ts +506 -506
package/lib/errors.ts +346 -346
package/lib/format.ts +541 -541
package/lib/git.ts +326 -326
package/lib/grammar.ts +303 -303
package/lib/image-registry.ts +180 -180
package/lib/import.ts +911 -911
package/lib/journals.ts +543 -543
package/lib/macro-filter.lua +201 -201
package/lib/macros.ts +273 -273
package/lib/merge.ts +633 -633
package/lib/orcid.ts +144 -144
package/lib/pdf-comments.ts +263 -263
package/lib/pdf-import.ts +524 -524
package/lib/plugins.ts +362 -362
package/lib/postprocess.ts +188 -188
package/lib/pptx-color-filter.lua +37 -37
package/lib/pptx-template.ts +469 -469
package/lib/pptx-themes.ts +483 -483
package/lib/protect-restore.ts +520 -520
package/lib/rate-limiter.ts +94 -94
package/lib/response.ts +197 -197
package/lib/restore-references.ts +240 -240
package/lib/review.ts +327 -327
package/lib/schema.ts +488 -488
package/lib/scientific-words.ts +73 -73
package/lib/sections.ts +425 -335
package/lib/slides.ts +756 -756
package/lib/spelling.ts +334 -334
package/lib/templates.ts +526 -526
package/lib/themes.ts +742 -742
package/lib/trackchanges.ts +247 -247
package/lib/tui.ts +450 -450
package/lib/types.ts +550 -550
package/lib/undo.ts +250 -250
package/lib/utils.ts +69 -69
package/lib/variables.ts +179 -179
package/lib/word-extraction.ts +806 -806
package/lib/word.ts +643 -643
package/lib/wordcomments.ts +840 -840
package/mkdocs.yml +64 -64
package/package.json +137 -137
package/scripts/postbuild.js +47 -47
package/skill/REFERENCE.md +539 -539
package/skill/SKILL.md +295 -295
package/tsconfig.json +26 -26
package/types/index.d.ts +525 -525

package/lib/diff-engine.ts CHANGED Viewed

@@ -1,465 +1,465 @@
-/**
- * Diff engine - diffing and annotation processing for Word→Markdown import
- */
-import { diffWords, Change } from 'diff';
-import {
-  extractMarkdownPrefix,
-  protectAnchors,
-  restoreAnchors,
-  protectCrossrefs,
-  restoreCrossrefs,
-  protectMath,
-  restoreMath,
-  replaceRenderedMath,
-  protectCitations,
-  restoreCitations,
-  replaceRenderedCitations,
-  protectImages,
-  restoreImages,
-  matchWordImagesToOriginal,
-  protectTables,
-  restoreTables,
-} from './protect-restore.js';
-import { normalizeWhitespace } from './utils.js';
-import type { WordTable } from './word-extraction.js';
-// ============================================
-// Type Definitions
-// ============================================
-export interface GenerateSmartDiffOptions {
-  wordTables?: WordTable[];
-  imageRegistry?: any;
-}
-// ============================================
-// Functions
-// ============================================
-/**
- * Fix citation and math annotations by preserving original markdown syntax
- */
-export function fixCitationAnnotations(text: string, originalMd: string): string {
-  // Fix math annotations - preserve inline and display math
-  text = text.replace(/\{--(\$[^$]+\$)--\}/g, '$1');
-  text = text.replace(/\{--(\$\$[^$]+\$\$)--\}/g, '$1');
-  text = text.replace(/\{~~(\$[^$]+\$)~>[^~]+~~\}/g, '$1');
-  text = text.replace(/\{~~(\$\$[^$]+\$\$)~>[^~]+~~\}/g, '$1');
-  // Extract all citations from original markdown
-  const citationPattern = /\[@[^\]]+\]/g;
-  const originalCitations = [...originalMd.matchAll(citationPattern)].map(m => m[0]);
-  // Fix substitutions where left side has markdown citation
-  text = text.replace(/\{~~(\[@[^\]]+\])~>[^~]+~~\}/g, '$1');
-  // Fix substitutions where left side STARTS with markdown citation
-  text = text.replace(/\{~~(\[@[^\]]+\])\s*([^~]*)~>([^~]*)~~\}/g, (match, cite, oldText, newText) => {
-    if (oldText.trim() === '' && newText.trim() === '') {
-      return cite;
-    }
-    if (oldText.trim() || newText.trim()) {
-      return cite + (oldText.trim() !== newText.trim() ? ` {~~${oldText.trim()}~>${newText.trim()}~~}` : ` ${newText}`);
-    }
-    return cite;
-  });
-  // Fix deletions of markdown citations
-  text = text.replace(/\{--(\[@[^\]]+\])--\}/g, '$1');
-  // Fix insertions of rendered citations
-  text = text.replace(/\{\+\+\([A-Z][^)]*\d{4}[^)]*\)\+\+\}/g, '');
-  // Clean up broken multi-part substitutions
-  text = text.replace(/\{~~(@[A-Za-z]+\d{4})~>[^~]+~~\}/g, '[$1]');
-  // Fix citations split across substitution boundaries
-  text = text.replace(/\{~~\[@~>[^~]*~~\}([A-Za-z]+\d{4})\]/g, '[@$1]');
-  // Clean up any remaining partial citations
-  text = text.replace(/\{~~;\s*@([A-Za-z]+\d{4})\]~>[^~]*~~\}/g, '; [@$1]');
-  // Remove rendered citation insertions (with Unicode support)
-  text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\+\+\}/gu, '');
-  text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
-  // Trailing citation fragments
-  text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
-  text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
-  // Just year with closing paren
-  text = text.replace(/\{\+\+\d{4}[a-z]?\)\.\s*\+\+\}/g, '');
-  text = text.replace(/\{\+\+\d{4}[a-z]?\)\s*\+\+\}/g, '');
-  // Leading citation fragments
-  text = text.replace(/\{\+\+\(?\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s*\+\+\}/gu, '');
-  // Semicolon-separated fragments
-  text = text.replace(/\{\+\+[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?\+\+\}/gu, '');
-  // Year ranges with authors
-  text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
-  text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
-  // Clean up double spaces and orphaned punctuation
-  text = text.replace(/  +/g, ' ');
-  text = text.replace(/\s+\./g, '.');
-  text = text.replace(/\s+,/g, ',');
-  // Final cleanup - remove empty annotations
-  text = text.replace(/\{~~\s*~>\s*~~\}/g, '');
-  text = text.replace(/\{\+\+\s*\+\+\}/g, '');
-  text = text.replace(/\{--\s*--\}/g, '');
-  return text;
-}
-/**
- * Strip markdown syntax to get plain text
- */
-function stripMarkdownSyntax(md: string): string {
-  return md
-    .replace(/^---[\s\S]*?---\n*/m, '')
-    .replace(/^#{1,6}\s+/gm, '')
-    .replace(/(\*\*|__)(.*?)\1/g, '$2')
-    .replace(/(\*|_)(.*?)\1/g, '$2')
-    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
-    .replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
-    .replace(/`([^`]+)`/g, '$1')
-    .replace(/```[\s\S]*?```/g, '')
-    .replace(/^>\s*/gm, '')
-    .replace(/^[-*_]{3,}\s*$/gm, '')
-    .replace(/^[\s]*[-*+]\s+/gm, '')
-    .replace(/^[\s]*\d+\.\s+/gm, '')
-    .replace(/\|/g, ' ')
-    .replace(/^[-:]+$/gm, '')
-    .replace(/\n{3,}/g, '\n\n')
-    .trim();
-}
-/**
- * Inject Word tables (extracted from XML) into pandoc text output
- */
-function injectWordTables(pandocText: string, wordTables: WordTable[]): string {
-  if (!wordTables || wordTables.length === 0) {
-    return pandocText;
-  }
-  let result = pandocText;
-  for (const table of wordTables) {
-    const firstLine = table.markdown.split('\n')[0];
-    const headerCells = firstLine
-      .split('|')
-      .map((c) => c.trim())
-      .filter((c) => c.length > 0);
-    if (headerCells.length === 0) continue;
-    const firstCell = headerCells[0];
-    const startIdx = result.indexOf(firstCell);
-    if (startIdx === -1) continue;
-    const lastLine = table.markdown.split('\n').pop();
-    const lastCells = lastLine!
-      .split('|')
-      .map((c) => c.trim())
-      .filter((c) => c.length > 0);
-    const lastCell = lastCells[lastCells.length - 1] || lastCells[0];
-    const endIdx = result.indexOf(lastCell, startIdx);
-    if (endIdx === -1) continue;
-    let regionStart = result.lastIndexOf('\n\n', startIdx);
-    if (regionStart === -1) regionStart = 0;
-    else regionStart += 2;
-    let regionEnd = result.indexOf('\n\n', endIdx + lastCell.length);
-    if (regionEnd === -1) regionEnd = result.length;
-    result = result.slice(0, regionStart) + table.markdown + '\n\n' + result.slice(regionEnd);
-  }
-  return result;
-}
-/**
- * Generate annotated markdown by diffing original MD against Word text
- */
-export function generateAnnotatedDiff(originalMd: string, wordText: string, author: string = 'Reviewer'): string {
-  const normalizedOriginal = normalizeWhitespace(originalMd);
-  const normalizedWord = normalizeWhitespace(wordText);
-  const changes = diffWords(normalizedOriginal, normalizedWord);
-  let result = '';
-  for (const part of changes) {
-    if (part.added) {
-      result += `{++${part.value}++}`;
-    } else if (part.removed) {
-      result += `{--${part.value}--}`;
-    } else {
-      result += part.value;
-    }
-  }
-  return result;
-}
-/**
- * Smart paragraph-level diff that preserves markdown structure
- */
-export function generateSmartDiff(
-  originalMd: string,
-  wordText: string,
-  author: string = 'Reviewer',
-  options: GenerateSmartDiffOptions = {}
-): string {
-  const { wordTables = [], imageRegistry = null } = options;
-  // Inject Word tables into pandoc output
-  let wordTextWithTables = injectWordTables(wordText, wordTables);
-  // Protect markdown tables
-  const { text: mdWithTablesProtected, tables } = protectTables(originalMd);
-  // Also protect tables in Word text
-  const { text: wordWithTablesProtected, tables: wordTableBlocks } = protectTables(wordTextWithTables);
-  // Protect images
-  const { text: mdWithImagesProtected, images: origImages } = protectImages(mdWithTablesProtected, imageRegistry);
-  const { text: wordWithImagesProtected, images: wordImages } = protectImages(wordWithTablesProtected, imageRegistry);
-  // Match Word images to original images
-  const imageMapping = matchWordImagesToOriginal(origImages, wordImages, imageRegistry);
-  // Replace Word image placeholders with matching original placeholders
-  let wordWithMappedImages = wordWithImagesProtected;
-  for (const [wordPlaceholder, origPlaceholder] of imageMapping) {
-    wordWithMappedImages = wordWithMappedImages.split(wordPlaceholder).join(origPlaceholder);
-  }
-  // Protect figure/table anchors
-  const { text: mdWithAnchorsProtected, anchors: figAnchors } = protectAnchors(mdWithImagesProtected);
-  // Protect cross-references
-  const { text: mdWithXrefsProtected, crossrefs } = protectCrossrefs(mdWithAnchorsProtected);
-  // Protect math
-  const { text: mdWithMathProtected, mathBlocks } = protectMath(mdWithXrefsProtected);
-  // Protect citations
-  const { text: mdProtected, citations } = protectCitations(mdWithMathProtected);
-  // Replace rendered elements in Word text
-  let wordProtected = wordWithMappedImages;
-  wordProtected = replaceRenderedMath(wordProtected, mathBlocks);
-  wordProtected = replaceRenderedCitations(wordProtected, citations.length);
-  // Split into paragraphs
-  const originalParas = mdProtected.split(/\n\n+/);
-  const wordParas = wordProtected.split(/\n\n+/);
-  const result: string[] = [];
-  // Try to match paragraphs intelligently
-  let wordIdx = 0;
-  for (let i = 0; i < originalParas.length; i++) {
-    const orig = originalParas[i] || '';
-    const { prefix: mdPrefix, content: origContent } = extractMarkdownPrefix(orig.split('\n')[0]);
-    // Find best matching word paragraph
-    let bestMatch = -1;
-    let bestScore = 0;
-    for (let j = wordIdx; j < Math.min(wordIdx + 3, wordParas.length); j++) {
-      const wordPara = wordParas[j] || '';
-      const origWords = new Set(origContent.toLowerCase().split(/\s+/));
-      const wordWords = wordPara.toLowerCase().split(/\s+/);
-      const common = wordWords.filter((w) => origWords.has(w)).length;
-      const score = common / Math.max(origWords.size, wordWords.length);
-      if (score > bestScore && score > 0.3) {
-        bestScore = score;
-        bestMatch = j;
-      }
-    }
-    if (bestMatch === -1) {
-      if (mdPrefix && wordIdx < wordParas.length) {
-        const wordPara = wordParas[wordIdx];
-        if (wordPara.toLowerCase().includes(origContent.toLowerCase().slice(0, 20))) {
-          bestMatch = wordIdx;
-        }
-      }
-    }
-    if (bestMatch >= 0) {
-      const word = wordParas[bestMatch];
-      const origStripped = stripMarkdownSyntax(orig);
-      const wordNormalized = normalizeWhitespace(word);
-      if (origStripped === wordNormalized) {
-        result.push(orig);
-      } else {
-        const changes = diffWords(origStripped, wordNormalized);
-        let annotated = mdPrefix;
-        for (const part of changes) {
-          if (part.added) {
-            annotated += `{++${part.value}++}`;
-          } else if (part.removed) {
-            annotated += `{--${part.value}--}`;
-          } else {
-            annotated += part.value;
-          }
-        }
-        result.push(annotated);
-      }
-      wordIdx = bestMatch + 1;
-    } else {
-      // Paragraph deleted entirely
-      if (mdPrefix && mdPrefix.match(/^#{1,6}\s+/)) {
-        result.push(orig);
-      } else {
-        result.push(`{--${orig}--}`);
-      }
-    }
-  }
-  // Any remaining word paragraphs are additions
-  for (let j = wordIdx; j < wordParas.length; j++) {
-    const word = wordParas[j];
-    if (word.trim()) {
-      result.push(`{++${word}++}`);
-    }
-  }
-  // Restore protected content
-  let finalResult = result.join('\n\n');
-  finalResult = restoreCitations(finalResult, citations);
-  finalResult = restoreMath(finalResult, mathBlocks);
-  finalResult = restoreCrossrefs(finalResult, crossrefs);
-  finalResult = restoreAnchors(finalResult, figAnchors);
-  finalResult = restoreImages(finalResult, origImages);
-  finalResult = restoreImages(finalResult, wordImages);
-  finalResult = restoreTables(finalResult, tables);
-  finalResult = restoreTables(finalResult, wordTableBlocks);
-  return finalResult;
-}
-/**
- * Clean up redundant adjacent annotations
- */
-export function cleanupAnnotations(text: string): string {
-  // Convert adjacent delete+insert to substitution
-  text = text.replace(/\{--(.+?)--\}\s*\{\+\+(.+?)\+\+\}/g, '{~~$1~>$2~~}');
-  // Also handle insert+delete
-  text = text.replace(/\{\+\+(.+?)\+\+\}\s*\{--(.+?)--\}/g, '{~~$2~>$1~~}');
-  // Fix malformed patterns
-  text = text.replace(/\{--([^}]+?)~>([^}]+?)~~\}/g, '{~~$1~>$2~~}');
-  // Fix malformed substitutions that got split
-  text = text.replace(/\{~~([^~]+)\s*--\}/g, '{--$1--}');
-  text = text.replace(/\{\+\+([^+]+)~~\}/g, '{++$1++}');
-  // Clean up empty annotations
-  text = text.replace(/\{--\s*--\}/g, '');
-  text = text.replace(/\{\+\+\s*\+\+\}/g, '');
-  // Clean up double spaces in prose, but preserve table formatting
-  const lines = text.split('\n');
-  let inTable = false;
-  const processedLines = lines.map((line, idx) => {
-    const isSeparator = /^[-]+(\s+[-]+)+\s*$/.test(line.trim());
-    const looksLikeTableRow = /\S+\s{2,}\S+/.test(line);
-    if (isSeparator) {
-      if (!inTable) {
-        inTable = true;
-      }
-      return line;
-    }
-    if (inTable) {
-      if (line.trim() === '') {
-        let lookAhead = idx + 1;
-        let foundTableContent = false;
-        let foundEndSeparator = false;
-        while (lookAhead < lines.length && lookAhead < idx + 20) {
-          const nextLine = lines[lookAhead].trim();
-          if (nextLine === '') {
-            lookAhead++;
-            continue;
-          }
-          if (/^[-]+(\s+[-]+)+\s*$/.test(nextLine)) {
-            foundEndSeparator = true;
-            break;
-          }
-          if (/\S+\s{2,}\S+/.test(nextLine)) {
-            foundTableContent = true;
-            break;
-          }
-          if (/^\*[^*]+\*\s*$/.test(nextLine)) {
-            foundTableContent = true;
-            break;
-          }
-          if (lines[lookAhead].startsWith('  ')) {
-            lookAhead++;
-            continue;
-          }
-          break;
-        }
-        if (foundTableContent || foundEndSeparator) {
-          return line;
-        }
-        inTable = false;
-        return line;
-      }
-      return line;
-    }
-    if (looksLikeTableRow) {
-      let nextIdx = idx + 1;
-      while (nextIdx < lines.length && lines[nextIdx].trim() === '') {
-        nextIdx++;
-      }
-      if (nextIdx < lines.length && /^[-]+(\s+[-]+)+\s*$/.test(lines[nextIdx].trim())) {
-        return line;
-      }
-    }
-    if (line.trim().startsWith('|')) {
-      return line;
-    }
-    return line.replace(/  +/g, ' ');
-  });
-  text = processedLines.join('\n');
-  return text;
-}
+/**
+ * Diff engine - diffing and annotation processing for Word→Markdown import
+ */
+import { diffWords, Change } from 'diff';
+import {
+  extractMarkdownPrefix,
+  protectAnchors,
+  restoreAnchors,
+  protectCrossrefs,
+  restoreCrossrefs,
+  protectMath,
+  restoreMath,
+  replaceRenderedMath,
+  protectCitations,
+  restoreCitations,
+  replaceRenderedCitations,
+  protectImages,
+  restoreImages,
+  matchWordImagesToOriginal,
+  protectTables,
+  restoreTables,
+} from './protect-restore.js';
+import { normalizeWhitespace } from './utils.js';
+import type { WordTable } from './word-extraction.js';
+// ============================================
+// Type Definitions
+// ============================================
+export interface GenerateSmartDiffOptions {
+  wordTables?: WordTable[];
+  imageRegistry?: any;
+}
+// ============================================
+// Functions
+// ============================================
+/**
+ * Fix citation and math annotations by preserving original markdown syntax
+ */
+export function fixCitationAnnotations(text: string, originalMd: string): string {
+  // Fix math annotations - preserve inline and display math
+  text = text.replace(/\{--(\$[^$]+\$)--\}/g, '$1');
+  text = text.replace(/\{--(\$\$[^$]+\$\$)--\}/g, '$1');
+  text = text.replace(/\{~~(\$[^$]+\$)~>[^~]+~~\}/g, '$1');
+  text = text.replace(/\{~~(\$\$[^$]+\$\$)~>[^~]+~~\}/g, '$1');
+  // Extract all citations from original markdown
+  const citationPattern = /\[@[^\]]+\]/g;
+  const originalCitations = [...originalMd.matchAll(citationPattern)].map(m => m[0]);
+  // Fix substitutions where left side has markdown citation
+  text = text.replace(/\{~~(\[@[^\]]+\])~>[^~]+~~\}/g, '$1');
+  // Fix substitutions where left side STARTS with markdown citation
+  text = text.replace(/\{~~(\[@[^\]]+\])\s*([^~]*)~>([^~]*)~~\}/g, (match, cite, oldText, newText) => {
+    if (oldText.trim() === '' && newText.trim() === '') {
+      return cite;
+    }
+    if (oldText.trim() || newText.trim()) {
+      return cite + (oldText.trim() !== newText.trim() ? ` {~~${oldText.trim()}~>${newText.trim()}~~}` : ` ${newText}`);
+    }
+    return cite;
+  });
+  // Fix deletions of markdown citations
+  text = text.replace(/\{--(\[@[^\]]+\])--\}/g, '$1');
+  // Fix insertions of rendered citations
+  text = text.replace(/\{\+\+\([A-Z][^)]*\d{4}[^)]*\)\+\+\}/g, '');
+  // Clean up broken multi-part substitutions
+  text = text.replace(/\{~~(@[A-Za-z]+\d{4})~>[^~]+~~\}/g, '[$1]');
+  // Fix citations split across substitution boundaries
+  text = text.replace(/\{~~\[@~>[^~]*~~\}([A-Za-z]+\d{4})\]/g, '[@$1]');
+  // Clean up any remaining partial citations
+  text = text.replace(/\{~~;\s*@([A-Za-z]+\d{4})\]~>[^~]*~~\}/g, '; [@$1]');
+  // Remove rendered citation insertions (with Unicode support)
+  text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\+\+\}/gu, '');
+  text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
+  // Trailing citation fragments
+  text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
+  text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
+  // Just year with closing paren
+  text = text.replace(/\{\+\+\d{4}[a-z]?\)\.\s*\+\+\}/g, '');
+  text = text.replace(/\{\+\+\d{4}[a-z]?\)\s*\+\+\}/g, '');
+  // Leading citation fragments
+  text = text.replace(/\{\+\+\(?\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s*\+\+\}/gu, '');
+  // Semicolon-separated fragments
+  text = text.replace(/\{\+\+[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?\+\+\}/gu, '');
+  // Year ranges with authors
+  text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
+  text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
+  // Clean up double spaces and orphaned punctuation
+  text = text.replace(/  +/g, ' ');
+  text = text.replace(/\s+\./g, '.');
+  text = text.replace(/\s+,/g, ',');
+  // Final cleanup - remove empty annotations
+  text = text.replace(/\{~~\s*~>\s*~~\}/g, '');
+  text = text.replace(/\{\+\+\s*\+\+\}/g, '');
+  text = text.replace(/\{--\s*--\}/g, '');
+  return text;
+}
+/**
+ * Strip markdown syntax to get plain text
+ */
+function stripMarkdownSyntax(md: string): string {
+  return md
+    .replace(/^---[\s\S]*?---\n*/m, '')
+    .replace(/^#{1,6}\s+/gm, '')
+    .replace(/(\*\*|__)(.*?)\1/g, '$2')
+    .replace(/(\*|_)(.*?)\1/g, '$2')
+    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
+    .replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
+    .replace(/`([^`]+)`/g, '$1')
+    .replace(/```[\s\S]*?```/g, '')
+    .replace(/^>\s*/gm, '')
+    .replace(/^[-*_]{3,}\s*$/gm, '')
+    .replace(/^[\s]*[-*+]\s+/gm, '')
+    .replace(/^[\s]*\d+\.\s+/gm, '')
+    .replace(/\|/g, ' ')
+    .replace(/^[-:]+$/gm, '')
+    .replace(/\n{3,}/g, '\n\n')
+    .trim();
+}
+/**
+ * Inject Word tables (extracted from XML) into pandoc text output
+ */
+function injectWordTables(pandocText: string, wordTables: WordTable[]): string {
+  if (!wordTables || wordTables.length === 0) {
+    return pandocText;
+  }
+  let result = pandocText;
+  for (const table of wordTables) {
+    const firstLine = table.markdown.split('\n')[0];
+    const headerCells = firstLine
+      .split('|')
+      .map((c) => c.trim())
+      .filter((c) => c.length > 0);
+    if (headerCells.length === 0) continue;
+    const firstCell = headerCells[0];
+    const startIdx = result.indexOf(firstCell);
+    if (startIdx === -1) continue;
+    const lastLine = table.markdown.split('\n').pop();
+    const lastCells = lastLine!
+      .split('|')
+      .map((c) => c.trim())
+      .filter((c) => c.length > 0);
+    const lastCell = lastCells[lastCells.length - 1] || lastCells[0];
+    const endIdx = result.indexOf(lastCell, startIdx);
+    if (endIdx === -1) continue;
+    let regionStart = result.lastIndexOf('\n\n', startIdx);
+    if (regionStart === -1) regionStart = 0;
+    else regionStart += 2;
+    let regionEnd = result.indexOf('\n\n', endIdx + lastCell.length);
+    if (regionEnd === -1) regionEnd = result.length;
+    result = result.slice(0, regionStart) + table.markdown + '\n\n' + result.slice(regionEnd);
+  }
+  return result;
+}
+/**
+ * Generate annotated markdown by diffing original MD against Word text
+ */
+export function generateAnnotatedDiff(originalMd: string, wordText: string, author: string = 'Reviewer'): string {
+  const normalizedOriginal = normalizeWhitespace(originalMd);
+  const normalizedWord = normalizeWhitespace(wordText);
+  const changes = diffWords(normalizedOriginal, normalizedWord);
+  let result = '';
+  for (const part of changes) {
+    if (part.added) {
+      result += `{++${part.value}++}`;
+    } else if (part.removed) {
+      result += `{--${part.value}--}`;
+    } else {
+      result += part.value;
+    }
+  }
+  return result;
+}
+/**
+ * Smart paragraph-level diff that preserves markdown structure
+ */
+export function generateSmartDiff(
+  originalMd: string,
+  wordText: string,
+  author: string = 'Reviewer',
+  options: GenerateSmartDiffOptions = {}
+): string {
+  const { wordTables = [], imageRegistry = null } = options;
+  // Inject Word tables into pandoc output
+  let wordTextWithTables = injectWordTables(wordText, wordTables);
+  // Protect markdown tables
+  const { text: mdWithTablesProtected, tables } = protectTables(originalMd);
+  // Also protect tables in Word text
+  const { text: wordWithTablesProtected, tables: wordTableBlocks } = protectTables(wordTextWithTables);
+  // Protect images
+  const { text: mdWithImagesProtected, images: origImages } = protectImages(mdWithTablesProtected, imageRegistry);
+  const { text: wordWithImagesProtected, images: wordImages } = protectImages(wordWithTablesProtected, imageRegistry);
+  // Match Word images to original images
+  const imageMapping = matchWordImagesToOriginal(origImages, wordImages, imageRegistry);
+  // Replace Word image placeholders with matching original placeholders
+  let wordWithMappedImages = wordWithImagesProtected;
+  for (const [wordPlaceholder, origPlaceholder] of imageMapping) {
+    wordWithMappedImages = wordWithMappedImages.split(wordPlaceholder).join(origPlaceholder);
+  }
+  // Protect figure/table anchors
+  const { text: mdWithAnchorsProtected, anchors: figAnchors } = protectAnchors(mdWithImagesProtected);
+  // Protect cross-references
+  const { text: mdWithXrefsProtected, crossrefs } = protectCrossrefs(mdWithAnchorsProtected);
+  // Protect math
+  const { text: mdWithMathProtected, mathBlocks } = protectMath(mdWithXrefsProtected);
+  // Protect citations
+  const { text: mdProtected, citations } = protectCitations(mdWithMathProtected);
+  // Replace rendered elements in Word text
+  let wordProtected = wordWithMappedImages;
+  wordProtected = replaceRenderedMath(wordProtected, mathBlocks);
+  wordProtected = replaceRenderedCitations(wordProtected, citations.length);
+  // Split into paragraphs
+  const originalParas = mdProtected.split(/\n\n+/);
+  const wordParas = wordProtected.split(/\n\n+/);
+  const result: string[] = [];
+  // Try to match paragraphs intelligently
+  let wordIdx = 0;
+  for (let i = 0; i < originalParas.length; i++) {
+    const orig = originalParas[i] || '';
+    const { prefix: mdPrefix, content: origContent } = extractMarkdownPrefix(orig.split('\n')[0]);
+    // Find best matching word paragraph
+    let bestMatch = -1;
+    let bestScore = 0;
+    for (let j = wordIdx; j < Math.min(wordIdx + 3, wordParas.length); j++) {
+      const wordPara = wordParas[j] || '';
+      const origWords = new Set(origContent.toLowerCase().split(/\s+/));
+      const wordWords = wordPara.toLowerCase().split(/\s+/);
+      const common = wordWords.filter((w) => origWords.has(w)).length;
+      const score = common / Math.max(origWords.size, wordWords.length);
+      if (score > bestScore && score > 0.3) {
+        bestScore = score;
+        bestMatch = j;
+      }
+    }
+    if (bestMatch === -1) {
+      if (mdPrefix && wordIdx < wordParas.length) {
+        const wordPara = wordParas[wordIdx];
+        if (wordPara.toLowerCase().includes(origContent.toLowerCase().slice(0, 20))) {
+          bestMatch = wordIdx;
+        }
+      }
+    }
+    if (bestMatch >= 0) {
+      const word = wordParas[bestMatch];
+      const origStripped = stripMarkdownSyntax(orig);
+      const wordNormalized = normalizeWhitespace(word);
+      if (origStripped === wordNormalized) {
+        result.push(orig);
+      } else {
+        const changes = diffWords(origStripped, wordNormalized);
+        let annotated = mdPrefix;
+        for (const part of changes) {
+          if (part.added) {
+            annotated += `{++${part.value}++}`;
+          } else if (part.removed) {
+            annotated += `{--${part.value}--}`;
+          } else {
+            annotated += part.value;
+          }
+        }
+        result.push(annotated);
+      }
+      wordIdx = bestMatch + 1;
+    } else {
+      // Paragraph deleted entirely
+      if (mdPrefix && mdPrefix.match(/^#{1,6}\s+/)) {
+        result.push(orig);
+      } else {
+        result.push(`{--${orig}--}`);
+      }
+    }
+  }
+  // Any remaining word paragraphs are additions
+  for (let j = wordIdx; j < wordParas.length; j++) {
+    const word = wordParas[j];
+    if (word.trim()) {
+      result.push(`{++${word}++}`);
+    }
+  }
+  // Restore protected content
+  let finalResult = result.join('\n\n');
+  finalResult = restoreCitations(finalResult, citations);
+  finalResult = restoreMath(finalResult, mathBlocks);
+  finalResult = restoreCrossrefs(finalResult, crossrefs);
+  finalResult = restoreAnchors(finalResult, figAnchors);
+  finalResult = restoreImages(finalResult, origImages);
+  finalResult = restoreImages(finalResult, wordImages);
+  finalResult = restoreTables(finalResult, tables);
+  finalResult = restoreTables(finalResult, wordTableBlocks);
+  return finalResult;
+}
+/**
+ * Clean up redundant adjacent annotations
+ */
+export function cleanupAnnotations(text: string): string {
+  // Convert adjacent delete+insert to substitution
+  text = text.replace(/\{--(.+?)--\}\s*\{\+\+(.+?)\+\+\}/g, '{~~$1~>$2~~}');
+  // Also handle insert+delete
+  text = text.replace(/\{\+\+(.+?)\+\+\}\s*\{--(.+?)--\}/g, '{~~$2~>$1~~}');
+  // Fix malformed patterns
+  text = text.replace(/\{--([^}]+?)~>([^}]+?)~~\}/g, '{~~$1~>$2~~}');
+  // Fix malformed substitutions that got split
+  text = text.replace(/\{~~([^~]+)\s*--\}/g, '{--$1--}');
+  text = text.replace(/\{\+\+([^+]+)~~\}/g, '{++$1++}');
+  // Clean up empty annotations
+  text = text.replace(/\{--\s*--\}/g, '');
+  text = text.replace(/\{\+\+\s*\+\+\}/g, '');
+  // Clean up double spaces in prose, but preserve table formatting
+  const lines = text.split('\n');
+  let inTable = false;
+  const processedLines = lines.map((line, idx) => {
+    const isSeparator = /^[-]+(\s+[-]+)+\s*$/.test(line.trim());
+    const looksLikeTableRow = /\S+\s{2,}\S+/.test(line);
+    if (isSeparator) {
+      if (!inTable) {
+        inTable = true;
+      }
+      return line;
+    }
+    if (inTable) {
+      if (line.trim() === '') {
+        let lookAhead = idx + 1;
+        let foundTableContent = false;
+        let foundEndSeparator = false;
+        while (lookAhead < lines.length && lookAhead < idx + 20) {
+          const nextLine = lines[lookAhead].trim();
+          if (nextLine === '') {
+            lookAhead++;
+            continue;
+          }
+          if (/^[-]+(\s+[-]+)+\s*$/.test(nextLine)) {
+            foundEndSeparator = true;
+            break;
+          }
+          if (/\S+\s{2,}\S+/.test(nextLine)) {
+            foundTableContent = true;
+            break;
+          }
+          if (/^\*[^*]+\*\s*$/.test(nextLine)) {
+            foundTableContent = true;
+            break;
+          }
+          if (lines[lookAhead].startsWith('  ')) {
+            lookAhead++;
+            continue;
+          }
+          break;
+        }
+        if (foundTableContent || foundEndSeparator) {
+          return line;
+        }
+        inTable = false;
+        return line;
+      }
+      return line;
+    }
+    if (looksLikeTableRow) {
+      let nextIdx = idx + 1;
+      while (nextIdx < lines.length && lines[nextIdx].trim() === '') {
+        nextIdx++;
+      }
+      if (nextIdx < lines.length && /^[-]+(\s+[-]+)+\s*$/.test(lines[nextIdx].trim())) {
+        return line;
+      }
+    }
+    if (line.trim().startsWith('|')) {
+      return line;
+    }
+    return line.replace(/  +/g, ' ');
+  });
+  text = processedLines.join('\n');
+  return text;
+}