npm - docrev - Versions diffs - 0.9.18 → 0.10.1 - Mend

docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (134) hide show

package/.gitattributes +1 -1
package/CHANGELOG.md +173 -149
package/PLAN-tables-and-postprocess.md +850 -850
package/README.md +431 -406
package/bin/rev.js +11 -11
package/bin/rev.ts +145 -145
package/completions/rev.bash +127 -127
package/completions/rev.ps1 +210 -210
package/completions/rev.zsh +207 -207
package/dist/lib/build.d.ts +8 -0
package/dist/lib/build.d.ts.map +1 -1
package/dist/lib/build.js +62 -6
package/dist/lib/build.js.map +1 -1
package/dist/lib/commands/context.d.ts +1 -1
package/dist/lib/commands/context.d.ts.map +1 -1
package/dist/lib/commands/context.js +1 -1
package/dist/lib/commands/context.js.map +1 -1
package/dist/lib/commands/sections.js +7 -7
package/dist/lib/commands/sections.js.map +1 -1
package/dist/lib/commands/sync.d.ts.map +1 -1
package/dist/lib/commands/sync.js +15 -14
package/dist/lib/commands/sync.js.map +1 -1
package/dist/lib/commands/utilities.js +164 -164
package/dist/lib/commands/verify-anchors.js +6 -6
package/dist/lib/commands/verify-anchors.js.map +1 -1
package/dist/lib/commands/word-tools.js +8 -8
package/dist/lib/grammar.js +3 -3
package/dist/lib/macro-filter.lua +201 -0
package/dist/lib/macros.d.ts +102 -0
package/dist/lib/macros.d.ts.map +1 -0
package/dist/lib/macros.js +218 -0
package/dist/lib/macros.js.map +1 -0
package/dist/lib/pdf-comments.js +44 -44
package/dist/lib/plugins.js +57 -57
package/dist/lib/pptx-color-filter.lua +37 -0
package/dist/lib/pptx-themes.js +115 -115
package/dist/lib/schema.d.ts.map +1 -1
package/dist/lib/schema.js +34 -0
package/dist/lib/schema.js.map +1 -1
package/dist/lib/sections.d.ts +35 -0
package/dist/lib/sections.d.ts.map +1 -1
package/dist/lib/sections.js +81 -0
package/dist/lib/sections.js.map +1 -1
package/dist/lib/spelling.js +2 -2
package/dist/lib/templates.js +387 -387
package/dist/lib/themes.js +51 -51
package/eslint.config.js +27 -27
package/lib/anchor-match.ts +276 -276
package/lib/annotations.ts +644 -644
package/lib/build.ts +1766 -1694
package/lib/citations.ts +160 -160
package/lib/commands/build.ts +855 -855
package/lib/commands/citations.ts +515 -515
package/lib/commands/comments.ts +1050 -1050
package/lib/commands/context.ts +176 -174
package/lib/commands/core.ts +309 -309
package/lib/commands/doi.ts +435 -435
package/lib/commands/file-ops.ts +372 -372
package/lib/commands/history.ts +320 -320
package/lib/commands/index.ts +87 -87
package/lib/commands/init.ts +259 -259
package/lib/commands/merge-resolve.ts +378 -378
package/lib/commands/preview.ts +178 -178
package/lib/commands/project-info.ts +244 -244
package/lib/commands/quality.ts +517 -517
package/lib/commands/response.ts +454 -454
package/lib/commands/section-boundaries.ts +82 -82
package/lib/commands/sections.ts +451 -451
package/lib/commands/sync.ts +709 -706
package/lib/commands/text-ops.ts +449 -449
package/lib/commands/utilities.ts +448 -448
package/lib/commands/verify-anchors.ts +272 -272
package/lib/commands/word-tools.ts +340 -340
package/lib/comment-realign.ts +517 -517
package/lib/config.ts +84 -84
package/lib/crossref.ts +781 -781
package/lib/csl.ts +191 -191
package/lib/dependencies.ts +98 -98
package/lib/diff-engine.ts +465 -465
package/lib/doi-cache.ts +115 -115
package/lib/doi.ts +897 -897
package/lib/equations.ts +506 -506
package/lib/errors.ts +346 -346
package/lib/format.ts +541 -541
package/lib/git.ts +326 -326
package/lib/grammar.ts +303 -303
package/lib/image-registry.ts +180 -180
package/lib/import.ts +911 -911
package/lib/journals.ts +543 -543
package/lib/macro-filter.lua +201 -0
package/lib/macros.ts +273 -0
package/lib/merge.ts +633 -633
package/lib/orcid.ts +144 -144
package/lib/pdf-comments.ts +263 -263
package/lib/pdf-import.ts +524 -524
package/lib/plugins.ts +362 -362
package/lib/postprocess.ts +188 -188
package/lib/pptx-color-filter.lua +37 -37
package/lib/pptx-template.ts +469 -469
package/lib/pptx-themes.ts +483 -483
package/lib/protect-restore.ts +520 -520
package/lib/rate-limiter.ts +94 -94
package/lib/response.ts +197 -197
package/lib/restore-references.ts +240 -240
package/lib/review.ts +327 -327
package/lib/schema.ts +488 -454
package/lib/scientific-words.ts +73 -73
package/lib/sections.ts +425 -335
package/lib/slides.ts +756 -756
package/lib/spelling.ts +334 -334
package/lib/templates.ts +526 -526
package/lib/themes.ts +742 -742
package/lib/trackchanges.ts +247 -247
package/lib/tui.ts +450 -450
package/lib/types.ts +550 -550
package/lib/undo.ts +250 -250
package/lib/utils.ts +69 -69
package/lib/variables.ts +179 -179
package/lib/word-extraction.ts +806 -806
package/lib/word.ts +643 -643
package/lib/wordcomments.ts +840 -840
package/package.json +137 -137
package/scripts/postbuild.js +47 -28
package/skill/REFERENCE.md +539 -539
package/skill/SKILL.md +295 -295
package/tsconfig.json +26 -26
package/types/index.d.ts +525 -525
package/issues.md +0 -180
package/site/assets/extra.css +0 -208
package/site/commands.html +0 -926
package/site/configuration.html +0 -469
package/site/index.html +0 -288
package/site/troubleshooting.html +0 -461
package/site/workflow.html +0 -518

package/lib/word.ts CHANGED Viewed

@@ -1,643 +1,643 @@
-/**
- * Word document extraction utilities
- * Handle reading text, comments, and anchors from .docx files
- */
-import * as fs from 'fs';
-import * as path from 'path';
-import AdmZip from 'adm-zip';
-import { parseString } from 'xml2js';
-import { promisify } from 'util';
-import type { WordComment, CommentAnchor, WordMetadata, TrackChangesResult } from './types.js';
-const parseXml = promisify(parseString);
-// =============================================================================
-// Constants
-// =============================================================================
-/** Characters of context to extract around comment anchors */
-const ANCHOR_CONTEXT_SIZE = 100;
-/** Characters of context before comment range start */
-const CONTEXT_BEFORE_SIZE = 500;
-// =============================================================================
-// Public API
-// =============================================================================
-/**
- * Extract comments from Word document's comments.xml
- * @param docxPath - Path to .docx file
- * @returns Array of extracted comments
- * @throws {TypeError} If docxPath is not a string
- * @throws {Error} If file not found or invalid docx
- */
-export async function extractWordComments(docxPath: string): Promise<WordComment[]> {
-  if (typeof docxPath !== 'string') {
-    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
-  }
-  if (!fs.existsSync(docxPath)) {
-    throw new Error(`File not found: ${docxPath}`);
-  }
-  const zip = new AdmZip(docxPath);
-  const commentsEntry = zip.getEntry('word/comments.xml');
-  if (!commentsEntry) {
-    return []; // No comments in document
-  }
-  const commentsXml = zip.readAsText(commentsEntry);
-  const parsed = await parseXml(commentsXml) as any;
-  if (!parsed?.['w:comments'] || !parsed['w:comments']['w:comment']) {
-    return [];
-  }
-  const comments: WordComment[] = [];
-  const rawComments = parsed['w:comments']['w:comment'];
-  for (const comment of rawComments) {
-    const id = comment.$?.['w:id'];
-    const author = comment.$?.['w:author'] || 'Unknown';
-    const date = comment.$?.['w:date'];
-    // Extract text from all paragraphs in comment
-    let text = '';
-    const paragraphs = comment['w:p'] || [];
-    for (const para of paragraphs) {
-      const runs = para['w:r'] || [];
-      for (const run of runs) {
-        const texts = run['w:t'] || [];
-        for (const t of texts) {
-          text += typeof t === 'string' ? t : (t._ || '');
-        }
-      }
-    }
-    if (id && text.trim()) {
-      comments.push({
-        id,
-        author,
-        date,
-        text: text.trim(),
-      });
-    }
-  }
-  return comments;
-}
-/**
- * Extract comment anchors (where comments are attached) from document.xml
- * Returns mapping of comment ID to the text they're anchored to
- * @param docxPath - Path to .docx file
- * @returns Map of comment ID to anchor info
- * @throws {TypeError} If docxPath is not a string
- * @throws {Error} If invalid docx structure
- */
-export async function extractCommentAnchors(docxPath: string): Promise<Map<string, CommentAnchor>> {
-  if (typeof docxPath !== 'string') {
-    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
-  }
-  const zip = new AdmZip(docxPath);
-  const documentEntry = zip.getEntry('word/document.xml');
-  if (!documentEntry) {
-    throw new Error('Invalid docx: no document.xml');
-  }
-  const documentXml = zip.readAsText(documentEntry);
-  const anchors = new Map<string, CommentAnchor>();
-  // Find commentRangeStart and commentRangeEnd pairs
-  // The text between them is what the comment is anchored to
-  const startPattern = /<w:commentRangeStart w:id="(\d+)"\/>/g;
-  const endPattern = /<w:commentRangeEnd w:id="(\d+)"\/>/g;
-  let match: RegExpExecArray | null;
-  const starts = new Map<string, number>();
-  const ends = new Map<string, number>();
-  while ((match = startPattern.exec(documentXml)) !== null) {
-    if (match[1]) {
-      starts.set(match[1], match.index);
-    }
-  }
-  while ((match = endPattern.exec(documentXml)) !== null) {
-    if (match[1]) {
-      ends.set(match[1], match.index);
-    }
-  }
-  // For each comment, extract the text between start and end
-  for (const [id, startPos] of starts) {
-    const endPos = ends.get(id);
-    if (!endPos) continue;
-    const segment = documentXml.slice(startPos, endPos);
-    // Extract all text content from the segment
-    const textPattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
-    let text = '';
-    let textMatch: RegExpExecArray | null;
-    while ((textMatch = textPattern.exec(segment)) !== null) {
-      text += textMatch[1] ?? '';
-    }
-    // Get surrounding context (text before the anchor)
-    const contextStart = Math.max(0, startPos - CONTEXT_BEFORE_SIZE);
-    const contextSegment = documentXml.slice(contextStart, startPos);
-    let context = '';
-    while ((textMatch = textPattern.exec(contextSegment)) !== null) {
-      context += textMatch[1] ?? '';
-    }
-    anchors.set(id, {
-      text: text.trim(),
-      context: context.slice(-ANCHOR_CONTEXT_SIZE),
-    });
-  }
-  return anchors;
-}
-/**
- * Extract plain text from Word document (strips track change markup)
- * @param docxPath - Path to .docx file
- * @returns Extracted plain text (accepted changes applied)
- * @throws {TypeError} If docxPath is not a string
- * @throws {Error} If file not found
- */
-export async function extractTextFromWord(docxPath: string): Promise<string> {
-  if (typeof docxPath !== 'string') {
-    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
-  }
-  const result = await extractPlainTextWithTrackChanges(docxPath);
-  // Strip CriticMarkup: accept insertions, remove deletions, apply substitutions
-  let text = result.text;
-  text = text.replace(/\{~~[^~]*~>([^~]*)~~\}/g, '$1');  // substitutions → new
-  text = text.replace(/\{\+\+([^+]*)\+\+\}/g, '$1');      // insertions → keep
-  text = text.replace(/\{--[^}]*--\}/g, '');               // deletions → remove
-  return text;
-}
-/**
- * Get document metadata from Word file
- * @param docxPath - Path to .docx file
- * @returns Document metadata
- * @throws {TypeError} If docxPath is not a string
- */
-export async function getWordMetadata(docxPath: string): Promise<WordMetadata> {
-  if (typeof docxPath !== 'string') {
-    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
-  }
-  const zip = new AdmZip(docxPath);
-  const coreEntry = zip.getEntry('docProps/core.xml');
-  if (!coreEntry) {
-    return {};
-  }
-  const coreXml = zip.readAsText(coreEntry);
-  const metadata: WordMetadata = {};
-  // Extract common metadata fields
-  const patterns: Record<string, RegExp> = {
-    title: /<dc:title>([^<]*)<\/dc:title>/,
-    author: /<dc:creator>([^<]*)<\/dc:creator>/,
-    created: /<dcterms:created[^>]*>([^<]*)<\/dcterms:created>/,
-    modified: /<dcterms:modified[^>]*>([^<]*)<\/dcterms:modified>/,
-  };
-  for (const [key, pattern] of Object.entries(patterns)) {
-    const match = coreXml.match(pattern);
-    if (match) {
-      (metadata as any)[key] = match[1];
-    }
-  }
-  return metadata;
-}
-/**
- * Check if file is a valid Word document
- * @param filePath - Path to file to check
- * @returns True if valid .docx file
- */
-export function isWordDocument(filePath: string): boolean {
-  if (typeof filePath !== 'string') return false;
-  if (!fs.existsSync(filePath)) return false;
-  if (!filePath.toLowerCase().endsWith('.docx')) return false;
-  try {
-    const zip = new AdmZip(filePath);
-    return zip.getEntry('word/document.xml') !== null;
-  } catch {
-    return false;
-  }
-}
-/**
- * Extract text content from XML element, handling nested elements
- * @param xml - XML string
- * @returns Plain text content
- */
-function extractTextFromXml(xml: string): string {
-  let text = '';
-  // Match w:t elements (regular text)
-  const textPattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
-  let match: RegExpExecArray | null;
-  while ((match = textPattern.exec(xml)) !== null) {
-    text += match[1];
-  }
-  // Also match w:delText (deleted text)
-  const delTextPattern = /<w:delText[^>]*>([^<]*)<\/w:delText>/g;
-  while ((match = delTextPattern.exec(xml)) !== null) {
-    text += match[1];
-  }
-  return text;
-}
-/**
- * Extract track changes (insertions and deletions) from Word document
- * Converts Word's w:ins and w:del elements to CriticMarkup format
- *
- * @param docxPath - Path to Word document
- * @returns Track changes result with content and stats
- */
-export async function extractTrackChanges(docxPath: string): Promise<TrackChangesResult> {
-  if (!fs.existsSync(docxPath)) {
-    throw new Error(`File not found: ${docxPath}`);
-  }
-  const zip = new AdmZip(docxPath);
-  const documentEntry = zip.getEntry('word/document.xml');
-  if (!documentEntry) {
-    throw new Error('Invalid docx: no document.xml');
-  }
-  let xml = zip.readAsText(documentEntry);
-  let insertions = 0;
-  let deletions = 0;
-  // Check if there are any track changes
-  const hasInsertions = xml.includes('<w:ins ');
-  const hasDeletions = xml.includes('<w:del ');
-  if (!hasInsertions && !hasDeletions) {
-    return { hasTrackChanges: false, content: null, stats: { insertions: 0, deletions: 0 } };
-  }
-  // Process insertions: <w:ins ...>...</w:ins> -> {++...++}
-  // Match the full w:ins element including nested content
-  xml = xml.replace(/<w:ins\b[^>]*>([\s\S]*?)<\/w:ins>/g, (match, content) => {
-    const text = extractTextFromXml(content);
-    if (text.trim()) {
-      insertions++;
-      return `{++${text}++}`;
-    }
-    return text;
-  });
-  // Process deletions: <w:del ...>...</w:del> -> {--...--}
-  xml = xml.replace(/<w:del\b[^>]*>([\s\S]*?)<\/w:del>/g, (match, content) => {
-    const text = extractTextFromXml(content);
-    if (text.trim()) {
-      deletions++;
-      return `{--${text}--}`;
-    }
-    return '';
-  });
-  return {
-    hasTrackChanges: true,
-    content: xml,
-    stats: { insertions, deletions },
-  };
-}
-/**
- * Extract a single marker's content starting at position i.
- * Returns { content, end } where end is the position after the closing marker,
- * or null if no valid closing marker found.
- */
-function extractMarker(text: string, i: number, open: string, close: string): { content: string; end: number } | null {
-  if (!text.startsWith(open, i)) return null;
-  const start = i + open.length;
-  const closeIdx = text.indexOf(close, start);
-  if (closeIdx === -1) return null;
-  return { content: text.slice(start, closeIdx), end: closeIdx + close.length };
-}
-/**
- * Greedily collect consecutive markers of the same type.
- * E.g. {++a++}{++b++}{++c++} → "abc", advancing past all three.
- */
-function collectConsecutive(text: string, i: number, open: string, close: string): { content: string; end: number } | null {
-  const first = extractMarker(text, i, open, close);
-  if (!first) return null;
-  let content = first.content;
-  let end = first.end;
-  while (end < text.length) {
-    const next = extractMarker(text, end, open, close);
-    if (!next) break;
-    content += next.content;
-    end = next.end;
-  }
-  return { content, end };
-}
-/**
- * Scan text for adjacent CriticMarkup markers and:
- * 1. Merge consecutive same-type markers: {++a++}{++b++} → {++ab++}
- * 2. Merge adjacent del+ins or ins+del into substitutions: {--old--}{++new++} → {~~old~>new~~}
- *
- * Uses a linear scanner — no regex backtracking, no ambiguity.
- */
-function mergeAdjacentMarkers(text: string): string {
-  let result = '';
-  let i = 0;
-  while (i < text.length) {
-    // --- Deletion block ---
-    if (text.startsWith('{--', i)) {
-      const del = collectConsecutive(text, i, '{--', '--}');
-      if (!del) { result += text[i]; i++; continue; }
-      // Skip spaces, then check for adjacent insertion
-      let j = del.end;
-      while (j < text.length && text[j] === ' ') j++;
-      const ins = collectConsecutive(text, j, '{++', '++}');
-      if (ins) {
-        // Merge into substitution
-        const trailing = del.content.endsWith(' ') || ins.content.endsWith(' ');
-        result += `{~~${del.content.trimEnd()}~>${ins.content.trimEnd()}~~}${trailing ? ' ' : ''}`;
-        i = ins.end;
-      } else {
-        // Emit merged deletion
-        result += `{--${del.content}--}`;
-        i = del.end;
-      }
-      continue;
-    }
-    // --- Insertion block ---
-    if (text.startsWith('{++', i)) {
-      const ins = collectConsecutive(text, i, '{++', '++}');
-      if (!ins) { result += text[i]; i++; continue; }
-      // Skip spaces, then check for adjacent deletion
-      let j = ins.end;
-      while (j < text.length && text[j] === ' ') j++;
-      const del = collectConsecutive(text, j, '{--', '--}');
-      if (del) {
-        // Merge into substitution (del → ins order in output)
-        const trailing = del.content.endsWith(' ') || ins.content.endsWith(' ');
-        result += `{~~${del.content.trimEnd()}~>${ins.content.trimEnd()}~~}${trailing ? ' ' : ''}`;
-        i = del.end;
-      } else {
-        // Emit merged insertion
-        result += `{++${ins.content}++}`;
-        i = ins.end;
-      }
-      continue;
-    }
-    result += text[i];
-    i++;
-  }
-  return result;
-}
-/**
- * Extract plain text from Word XML with track changes preserved as CriticMarkup.
- * This is a pandoc-free fallback that reads document.xml directly.
- *
- * Converts:
- *   <w:ins> content </w:ins>  →  {++text++}
- *   <w:del> content </w:del>  →  {--text--}
- *
- * Also detects headings (w:pStyle Heading1-6) and outputs markdown # syntax.
- *
- * @param docxPath - Path to Word document
- * @returns Plain text with CriticMarkup and stats
- */
-export async function extractPlainTextWithTrackChanges(docxPath: string): Promise<{
-  text: string;
-  hasTrackChanges: boolean;
-  stats: { insertions: number; deletions: number };
-}> {
-  if (!fs.existsSync(docxPath)) {
-    throw new Error(`File not found: ${docxPath}`);
-  }
-  const zip = new AdmZip(docxPath);
-  const docEntry = zip.getEntry('word/document.xml');
-  if (!docEntry) {
-    throw new Error('Invalid docx: no document.xml');
-  }
-  let xml = docEntry.getData().toString('utf8');
-  let insertions = 0;
-  let deletions = 0;
-  // Use unique markers (null bytes) that won't appear in normal text
-  const INS_S = '\x00IS\x00';
-  const INS_E = '\x00IE\x00';
-  const DEL_S = '\x00DS\x00';
-  const DEL_E = '\x00DE\x00';
-  // Step 1: Replace <w:ins> with marker-wrapped text injected as <w:t>
-  // Whitespace-only insertions are kept as plain text (not markers) to preserve spacing.
-  xml = xml.replace(/<w:ins\b[^>]*>([\s\S]*?)<\/w:ins>/g, (_match, content: string) => {
-    const texts: string[] = [];
-    const tPat = /<w:t[^>]*>([^<]*)<\/w:t>/g;
-    let m: RegExpExecArray | null;
-    while ((m = tPat.exec(content)) !== null) {
-      texts.push(m[1] || '');
-    }
-    const text = texts.join('');
-    if (text.trim()) {
-      insertions++;
-      return `<w:r><w:t>${INS_S}${text}${INS_E}</w:t></w:r>`;
-    }
-    // Whitespace-only: preserve as plain text for spacing
-    if (text.length > 0) {
-      return `<w:r><w:t>${text}</w:t></w:r>`;
-    }
-    return '';
-  });
-  // Step 2: Replace <w:del> similarly (uses w:delText inside)
-  // Whitespace-only deletions are kept as plain text to preserve spacing.
-  xml = xml.replace(/<w:del\b[^>]*>([\s\S]*?)<\/w:del>/g, (_match, content: string) => {
-    const texts: string[] = [];
-    const tPat = /<w:delText[^>]*>([^<]*)<\/w:delText>|<w:t[^>]*>([^<]*)<\/w:t>/g;
-    let m: RegExpExecArray | null;
-    while ((m = tPat.exec(content)) !== null) {
-      texts.push(m[1] || m[2] || '');
-    }
-    const text = texts.join('');
-    if (text.trim()) {
-      deletions++;
-      return `<w:r><w:t>${DEL_S}${text}${DEL_E}</w:t></w:r>`;
-    }
-    // Whitespace-only: preserve as plain text for spacing
-    if (text.length > 0) {
-      return `<w:r><w:t>${text}</w:t></w:r>`;
-    }
-    return '';
-  });
-  // Step 3: Extract text paragraph by paragraph
-  const paragraphs: string[] = [];
-  const paraPattern = /<w:p\b[^>]*>([\s\S]*?)<\/w:p>/g;
-  let pm: RegExpExecArray | null;
-  while ((pm = paraPattern.exec(xml)) !== null) {
-    const paraXml = pm[1];
-    // Detect heading level from paragraph style
-    let headingLevel = 0;
-    const styleMatch = paraXml.match(/<w:pStyle\s+w:val="Heading(\d)"/i);
-    if (styleMatch && styleMatch[1]) {
-      headingLevel = parseInt(styleMatch[1], 10);
-    }
-    // Extract all <w:t> text in order
-    const texts: string[] = [];
-    const tPat = /<w:t[^>]*>([^<]*)<\/w:t>/g;
-    let tm: RegExpExecArray | null;
-    while ((tm = tPat.exec(paraXml)) !== null) {
-      texts.push(tm[1] || '');
-    }
-    let paraText = texts.join('');
-    // Decode XML entities
-    paraText = paraText
-      .replace(/&amp;/g, '&')
-      .replace(/&lt;/g, '<')
-      .replace(/&gt;/g, '>')
-      .replace(/&quot;/g, '"')
-      .replace(/&apos;/g, "'");
-    // Convert markers to CriticMarkup
-    paraText = paraText
-      .split(INS_S).join('{++')
-      .split(INS_E).join('++}')
-      .split(DEL_S).join('{--')
-      .split(DEL_E).join('--}');
-    // Merge adjacent del+ins (or ins+del) into substitutions.
-    // Uses a scanner instead of regex to avoid backtracking across marker boundaries.
-    paraText = mergeAdjacentMarkers(paraText);
-    // Collapse runs of multiple spaces into single space
-    paraText = paraText.replace(/ {2,}/g, ' ');
-    if (paraText.trim()) {
-      if (headingLevel > 0 && headingLevel <= 6) {
-        paragraphs.push('#'.repeat(headingLevel) + ' ' + paraText.trim());
-      } else {
-        paragraphs.push(paraText);
-      }
-    }
-  }
-  return {
-    text: paragraphs.join('\n\n'),
-    hasTrackChanges: insertions > 0 || deletions > 0,
-    stats: { insertions, deletions },
-  };
-}
-interface ExtractWithTrackChangesOptions {
-  mediaDir?: string;
-}
-/**
- * Extract Word document content with track changes preserved as CriticMarkup
- * Uses pandoc with track-changes=all option to preserve insertions/deletions
- *
- * @param docxPath - Path to Word document
- * @param options - Options
- * @returns Track changes result with text and stats
- */
-export async function extractWithTrackChanges(
-  docxPath: string,
-  options: ExtractWithTrackChangesOptions = {}
-): Promise<{ text: string; hasTrackChanges: boolean; stats: { insertions: number; deletions: number } }> {
-  const { mediaDir } = options;
-  if (!fs.existsSync(docxPath)) {
-    throw new Error(`File not found: ${docxPath}`);
-  }
-  const { execSync } = await import('child_process');
-  // Use pandoc with --track-changes=all to preserve track changes
-  // This outputs insertions as [insertion]{.insertion} and deletions as [deletion]{.deletion}
-  let pandocArgs = `"${docxPath}" -t markdown --wrap=none --track-changes=all`;
-  if (mediaDir) {
-    pandocArgs += ` --extract-media="${mediaDir}"`;
-  }
-  let text: string;
-  try {
-    text = execSync(`pandoc ${pandocArgs}`, {
-      encoding: 'utf-8',
-      maxBuffer: 50 * 1024 * 1024,
-    });
-  } catch (err: any) {
-    throw new Error(`Pandoc extraction failed: ${err.message}`);
-  }
-  // Count track changes from pandoc output
-  let insertions = 0;
-  let deletions = 0;
-  // Pandoc outputs track changes as:
-  // [inserted text]{.insertion author="..."}
-  // [deleted text]{.deletion author="..."}
-  // Convert pandoc's track change format to CriticMarkup
-  // Insertions: [text]{.insertion ...} -> {++text++}
-  text = text.replace(/\[([^\]]*)\]\{\.insertion[^}]*\}/g, (match, content) => {
-    if (content.trim()) {
-      insertions++;
-      return `{++${content}++}`;
-    }
-    return '';
-  });
-  // Deletions: [text]{.deletion ...} -> {--text--}
-  text = text.replace(/\[([^\]]*)\]\{\.deletion[^}]*\}/g, (match, content) => {
-    if (content.trim()) {
-      deletions++;
-      return `{--${content}--}`;
-    }
-    return '';
-  });
-  const hasTrackChanges = insertions > 0 || deletions > 0;
-  return {
-    text,
-    hasTrackChanges,
-    stats: { insertions, deletions },
-  };
-}
+/**
+ * Word document extraction utilities
+ * Handle reading text, comments, and anchors from .docx files
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import AdmZip from 'adm-zip';
+import { parseString } from 'xml2js';
+import { promisify } from 'util';
+import type { WordComment, CommentAnchor, WordMetadata, TrackChangesResult } from './types.js';
+const parseXml = promisify(parseString);
+// =============================================================================
+// Constants
+// =============================================================================
+/** Characters of context to extract around comment anchors */
+const ANCHOR_CONTEXT_SIZE = 100;
+/** Characters of context before comment range start */
+const CONTEXT_BEFORE_SIZE = 500;
+// =============================================================================
+// Public API
+// =============================================================================
+/**
+ * Extract comments from Word document's comments.xml
+ * @param docxPath - Path to .docx file
+ * @returns Array of extracted comments
+ * @throws {TypeError} If docxPath is not a string
+ * @throws {Error} If file not found or invalid docx
+ */
+export async function extractWordComments(docxPath: string): Promise<WordComment[]> {
+  if (typeof docxPath !== 'string') {
+    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
+  }
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const commentsEntry = zip.getEntry('word/comments.xml');
+  if (!commentsEntry) {
+    return []; // No comments in document
+  }
+  const commentsXml = zip.readAsText(commentsEntry);
+  const parsed = await parseXml(commentsXml) as any;
+  if (!parsed?.['w:comments'] || !parsed['w:comments']['w:comment']) {
+    return [];
+  }
+  const comments: WordComment[] = [];
+  const rawComments = parsed['w:comments']['w:comment'];
+  for (const comment of rawComments) {
+    const id = comment.$?.['w:id'];
+    const author = comment.$?.['w:author'] || 'Unknown';
+    const date = comment.$?.['w:date'];
+    // Extract text from all paragraphs in comment
+    let text = '';
+    const paragraphs = comment['w:p'] || [];
+    for (const para of paragraphs) {
+      const runs = para['w:r'] || [];
+      for (const run of runs) {
+        const texts = run['w:t'] || [];
+        for (const t of texts) {
+          text += typeof t === 'string' ? t : (t._ || '');
+        }
+      }
+    }
+    if (id && text.trim()) {
+      comments.push({
+        id,
+        author,
+        date,
+        text: text.trim(),
+      });
+    }
+  }
+  return comments;
+}
+/**
+ * Extract comment anchors (where comments are attached) from document.xml
+ * Returns mapping of comment ID to the text they're anchored to
+ * @param docxPath - Path to .docx file
+ * @returns Map of comment ID to anchor info
+ * @throws {TypeError} If docxPath is not a string
+ * @throws {Error} If invalid docx structure
+ */
+export async function extractCommentAnchors(docxPath: string): Promise<Map<string, CommentAnchor>> {
+  if (typeof docxPath !== 'string') {
+    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const documentEntry = zip.getEntry('word/document.xml');
+  if (!documentEntry) {
+    throw new Error('Invalid docx: no document.xml');
+  }
+  const documentXml = zip.readAsText(documentEntry);
+  const anchors = new Map<string, CommentAnchor>();
+  // Find commentRangeStart and commentRangeEnd pairs
+  // The text between them is what the comment is anchored to
+  const startPattern = /<w:commentRangeStart w:id="(\d+)"\/>/g;
+  const endPattern = /<w:commentRangeEnd w:id="(\d+)"\/>/g;
+  let match: RegExpExecArray | null;
+  const starts = new Map<string, number>();
+  const ends = new Map<string, number>();
+  while ((match = startPattern.exec(documentXml)) !== null) {
+    if (match[1]) {
+      starts.set(match[1], match.index);
+    }
+  }
+  while ((match = endPattern.exec(documentXml)) !== null) {
+    if (match[1]) {
+      ends.set(match[1], match.index);
+    }
+  }
+  // For each comment, extract the text between start and end
+  for (const [id, startPos] of starts) {
+    const endPos = ends.get(id);
+    if (!endPos) continue;
+    const segment = documentXml.slice(startPos, endPos);
+    // Extract all text content from the segment
+    const textPattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
+    let text = '';
+    let textMatch: RegExpExecArray | null;
+    while ((textMatch = textPattern.exec(segment)) !== null) {
+      text += textMatch[1] ?? '';
+    }
+    // Get surrounding context (text before the anchor)
+    const contextStart = Math.max(0, startPos - CONTEXT_BEFORE_SIZE);
+    const contextSegment = documentXml.slice(contextStart, startPos);
+    let context = '';
+    while ((textMatch = textPattern.exec(contextSegment)) !== null) {
+      context += textMatch[1] ?? '';
+    }
+    anchors.set(id, {
+      text: text.trim(),
+      context: context.slice(-ANCHOR_CONTEXT_SIZE),
+    });
+  }
+  return anchors;
+}
+/**
+ * Extract plain text from Word document (strips track change markup)
+ * @param docxPath - Path to .docx file
+ * @returns Extracted plain text (accepted changes applied)
+ * @throws {TypeError} If docxPath is not a string
+ * @throws {Error} If file not found
+ */
+export async function extractTextFromWord(docxPath: string): Promise<string> {
+  if (typeof docxPath !== 'string') {
+    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
+  }
+  const result = await extractPlainTextWithTrackChanges(docxPath);
+  // Strip CriticMarkup: accept insertions, remove deletions, apply substitutions
+  let text = result.text;
+  text = text.replace(/\{~~[^~]*~>([^~]*)~~\}/g, '$1');  // substitutions → new
+  text = text.replace(/\{\+\+([^+]*)\+\+\}/g, '$1');      // insertions → keep
+  text = text.replace(/\{--[^}]*--\}/g, '');               // deletions → remove
+  return text;
+}
+/**
+ * Get document metadata from Word file
+ * @param docxPath - Path to .docx file
+ * @returns Document metadata
+ * @throws {TypeError} If docxPath is not a string
+ */
+export async function getWordMetadata(docxPath: string): Promise<WordMetadata> {
+  if (typeof docxPath !== 'string') {
+    throw new TypeError(`docxPath must be a string, got ${typeof docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const coreEntry = zip.getEntry('docProps/core.xml');
+  if (!coreEntry) {
+    return {};
+  }
+  const coreXml = zip.readAsText(coreEntry);
+  const metadata: WordMetadata = {};
+  // Extract common metadata fields
+  const patterns: Record<string, RegExp> = {
+    title: /<dc:title>([^<]*)<\/dc:title>/,
+    author: /<dc:creator>([^<]*)<\/dc:creator>/,
+    created: /<dcterms:created[^>]*>([^<]*)<\/dcterms:created>/,
+    modified: /<dcterms:modified[^>]*>([^<]*)<\/dcterms:modified>/,
+  };
+  for (const [key, pattern] of Object.entries(patterns)) {
+    const match = coreXml.match(pattern);
+    if (match) {
+      (metadata as any)[key] = match[1];
+    }
+  }
+  return metadata;
+}
+/**
+ * Check if file is a valid Word document
+ * @param filePath - Path to file to check
+ * @returns True if valid .docx file
+ */
+export function isWordDocument(filePath: string): boolean {
+  if (typeof filePath !== 'string') return false;
+  if (!fs.existsSync(filePath)) return false;
+  if (!filePath.toLowerCase().endsWith('.docx')) return false;
+  try {
+    const zip = new AdmZip(filePath);
+    return zip.getEntry('word/document.xml') !== null;
+  } catch {
+    return false;
+  }
+}
+/**
+ * Extract text content from XML element, handling nested elements
+ * @param xml - XML string
+ * @returns Plain text content
+ */
+function extractTextFromXml(xml: string): string {
+  let text = '';
+  // Match w:t elements (regular text)
+  const textPattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
+  let match: RegExpExecArray | null;
+  while ((match = textPattern.exec(xml)) !== null) {
+    text += match[1];
+  }
+  // Also match w:delText (deleted text)
+  const delTextPattern = /<w:delText[^>]*>([^<]*)<\/w:delText>/g;
+  while ((match = delTextPattern.exec(xml)) !== null) {
+    text += match[1];
+  }
+  return text;
+}
+/**
+ * Extract track changes (insertions and deletions) from Word document
+ * Converts Word's w:ins and w:del elements to CriticMarkup format
+ *
+ * @param docxPath - Path to Word document
+ * @returns Track changes result with content and stats
+ */
+export async function extractTrackChanges(docxPath: string): Promise<TrackChangesResult> {
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const documentEntry = zip.getEntry('word/document.xml');
+  if (!documentEntry) {
+    throw new Error('Invalid docx: no document.xml');
+  }
+  let xml = zip.readAsText(documentEntry);
+  let insertions = 0;
+  let deletions = 0;
+  // Check if there are any track changes
+  const hasInsertions = xml.includes('<w:ins ');
+  const hasDeletions = xml.includes('<w:del ');
+  if (!hasInsertions && !hasDeletions) {
+    return { hasTrackChanges: false, content: null, stats: { insertions: 0, deletions: 0 } };
+  }
+  // Process insertions: <w:ins ...>...</w:ins> -> {++...++}
+  // Match the full w:ins element including nested content
+  xml = xml.replace(/<w:ins\b[^>]*>([\s\S]*?)<\/w:ins>/g, (match, content) => {
+    const text = extractTextFromXml(content);
+    if (text.trim()) {
+      insertions++;
+      return `{++${text}++}`;
+    }
+    return text;
+  });
+  // Process deletions: <w:del ...>...</w:del> -> {--...--}
+  xml = xml.replace(/<w:del\b[^>]*>([\s\S]*?)<\/w:del>/g, (match, content) => {
+    const text = extractTextFromXml(content);
+    if (text.trim()) {
+      deletions++;
+      return `{--${text}--}`;
+    }
+    return '';
+  });
+  return {
+    hasTrackChanges: true,
+    content: xml,
+    stats: { insertions, deletions },
+  };
+}
+/**
+ * Extract a single marker's content starting at position i.
+ * Returns { content, end } where end is the position after the closing marker,
+ * or null if no valid closing marker found.
+ */
+function extractMarker(text: string, i: number, open: string, close: string): { content: string; end: number } | null {
+  if (!text.startsWith(open, i)) return null;
+  const start = i + open.length;
+  const closeIdx = text.indexOf(close, start);
+  if (closeIdx === -1) return null;
+  return { content: text.slice(start, closeIdx), end: closeIdx + close.length };
+}
+/**
+ * Greedily collect consecutive markers of the same type.
+ * E.g. {++a++}{++b++}{++c++} → "abc", advancing past all three.
+ */
+function collectConsecutive(text: string, i: number, open: string, close: string): { content: string; end: number } | null {
+  const first = extractMarker(text, i, open, close);
+  if (!first) return null;
+  let content = first.content;
+  let end = first.end;
+  while (end < text.length) {
+    const next = extractMarker(text, end, open, close);
+    if (!next) break;
+    content += next.content;
+    end = next.end;
+  }
+  return { content, end };
+}
+/**
+ * Scan text for adjacent CriticMarkup markers and:
+ * 1. Merge consecutive same-type markers: {++a++}{++b++} → {++ab++}
+ * 2. Merge adjacent del+ins or ins+del into substitutions: {--old--}{++new++} → {~~old~>new~~}
+ *
+ * Uses a linear scanner — no regex backtracking, no ambiguity.
+ */
+function mergeAdjacentMarkers(text: string): string {
+  let result = '';
+  let i = 0;
+  while (i < text.length) {
+    // --- Deletion block ---
+    if (text.startsWith('{--', i)) {
+      const del = collectConsecutive(text, i, '{--', '--}');
+      if (!del) { result += text[i]; i++; continue; }
+      // Skip spaces, then check for adjacent insertion
+      let j = del.end;
+      while (j < text.length && text[j] === ' ') j++;
+      const ins = collectConsecutive(text, j, '{++', '++}');
+      if (ins) {
+        // Merge into substitution
+        const trailing = del.content.endsWith(' ') || ins.content.endsWith(' ');
+        result += `{~~${del.content.trimEnd()}~>${ins.content.trimEnd()}~~}${trailing ? ' ' : ''}`;
+        i = ins.end;
+      } else {
+        // Emit merged deletion
+        result += `{--${del.content}--}`;
+        i = del.end;
+      }
+      continue;
+    }
+    // --- Insertion block ---
+    if (text.startsWith('{++', i)) {
+      const ins = collectConsecutive(text, i, '{++', '++}');
+      if (!ins) { result += text[i]; i++; continue; }
+      // Skip spaces, then check for adjacent deletion
+      let j = ins.end;
+      while (j < text.length && text[j] === ' ') j++;
+      const del = collectConsecutive(text, j, '{--', '--}');
+      if (del) {
+        // Merge into substitution (del → ins order in output)
+        const trailing = del.content.endsWith(' ') || ins.content.endsWith(' ');
+        result += `{~~${del.content.trimEnd()}~>${ins.content.trimEnd()}~~}${trailing ? ' ' : ''}`;
+        i = del.end;
+      } else {
+        // Emit merged insertion
+        result += `{++${ins.content}++}`;
+        i = ins.end;
+      }
+      continue;
+    }
+    result += text[i];
+    i++;
+  }
+  return result;
+}
+/**
+ * Extract plain text from Word XML with track changes preserved as CriticMarkup.
+ * This is a pandoc-free fallback that reads document.xml directly.
+ *
+ * Converts:
+ *   <w:ins> content </w:ins>  →  {++text++}
+ *   <w:del> content </w:del>  →  {--text--}
+ *
+ * Also detects headings (w:pStyle Heading1-6) and outputs markdown # syntax.
+ *
+ * @param docxPath - Path to Word document
+ * @returns Plain text with CriticMarkup and stats
+ */
+export async function extractPlainTextWithTrackChanges(docxPath: string): Promise<{
+  text: string;
+  hasTrackChanges: boolean;
+  stats: { insertions: number; deletions: number };
+}> {
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const docEntry = zip.getEntry('word/document.xml');
+  if (!docEntry) {
+    throw new Error('Invalid docx: no document.xml');
+  }
+  let xml = docEntry.getData().toString('utf8');
+  let insertions = 0;
+  let deletions = 0;
+  // Use unique markers (null bytes) that won't appear in normal text
+  const INS_S = '\x00IS\x00';
+  const INS_E = '\x00IE\x00';
+  const DEL_S = '\x00DS\x00';
+  const DEL_E = '\x00DE\x00';
+  // Step 1: Replace <w:ins> with marker-wrapped text injected as <w:t>
+  // Whitespace-only insertions are kept as plain text (not markers) to preserve spacing.
+  xml = xml.replace(/<w:ins\b[^>]*>([\s\S]*?)<\/w:ins>/g, (_match, content: string) => {
+    const texts: string[] = [];
+    const tPat = /<w:t[^>]*>([^<]*)<\/w:t>/g;
+    let m: RegExpExecArray | null;
+    while ((m = tPat.exec(content)) !== null) {
+      texts.push(m[1] || '');
+    }
+    const text = texts.join('');
+    if (text.trim()) {
+      insertions++;
+      return `<w:r><w:t>${INS_S}${text}${INS_E}</w:t></w:r>`;
+    }
+    // Whitespace-only: preserve as plain text for spacing
+    if (text.length > 0) {
+      return `<w:r><w:t>${text}</w:t></w:r>`;
+    }
+    return '';
+  });
+  // Step 2: Replace <w:del> similarly (uses w:delText inside)
+  // Whitespace-only deletions are kept as plain text to preserve spacing.
+  xml = xml.replace(/<w:del\b[^>]*>([\s\S]*?)<\/w:del>/g, (_match, content: string) => {
+    const texts: string[] = [];
+    const tPat = /<w:delText[^>]*>([^<]*)<\/w:delText>|<w:t[^>]*>([^<]*)<\/w:t>/g;
+    let m: RegExpExecArray | null;
+    while ((m = tPat.exec(content)) !== null) {
+      texts.push(m[1] || m[2] || '');
+    }
+    const text = texts.join('');
+    if (text.trim()) {
+      deletions++;
+      return `<w:r><w:t>${DEL_S}${text}${DEL_E}</w:t></w:r>`;
+    }
+    // Whitespace-only: preserve as plain text for spacing
+    if (text.length > 0) {
+      return `<w:r><w:t>${text}</w:t></w:r>`;
+    }
+    return '';
+  });
+  // Step 3: Extract text paragraph by paragraph
+  const paragraphs: string[] = [];
+  const paraPattern = /<w:p\b[^>]*>([\s\S]*?)<\/w:p>/g;
+  let pm: RegExpExecArray | null;
+  while ((pm = paraPattern.exec(xml)) !== null) {
+    const paraXml = pm[1];
+    // Detect heading level from paragraph style
+    let headingLevel = 0;
+    const styleMatch = paraXml.match(/<w:pStyle\s+w:val="Heading(\d)"/i);
+    if (styleMatch && styleMatch[1]) {
+      headingLevel = parseInt(styleMatch[1], 10);
+    }
+    // Extract all <w:t> text in order
+    const texts: string[] = [];
+    const tPat = /<w:t[^>]*>([^<]*)<\/w:t>/g;
+    let tm: RegExpExecArray | null;
+    while ((tm = tPat.exec(paraXml)) !== null) {
+      texts.push(tm[1] || '');
+    }
+    let paraText = texts.join('');
+    // Decode XML entities
+    paraText = paraText
+      .replace(/&amp;/g, '&')
+      .replace(/&lt;/g, '<')
+      .replace(/&gt;/g, '>')
+      .replace(/&quot;/g, '"')
+      .replace(/&apos;/g, "'");
+    // Convert markers to CriticMarkup
+    paraText = paraText
+      .split(INS_S).join('{++')
+      .split(INS_E).join('++}')
+      .split(DEL_S).join('{--')
+      .split(DEL_E).join('--}');
+    // Merge adjacent del+ins (or ins+del) into substitutions.
+    // Uses a scanner instead of regex to avoid backtracking across marker boundaries.
+    paraText = mergeAdjacentMarkers(paraText);
+    // Collapse runs of multiple spaces into single space
+    paraText = paraText.replace(/ {2,}/g, ' ');
+    if (paraText.trim()) {
+      if (headingLevel > 0 && headingLevel <= 6) {
+        paragraphs.push('#'.repeat(headingLevel) + ' ' + paraText.trim());
+      } else {
+        paragraphs.push(paraText);
+      }
+    }
+  }
+  return {
+    text: paragraphs.join('\n\n'),
+    hasTrackChanges: insertions > 0 || deletions > 0,
+    stats: { insertions, deletions },
+  };
+}
+interface ExtractWithTrackChangesOptions {
+  mediaDir?: string;
+}
+/**
+ * Extract Word document content with track changes preserved as CriticMarkup
+ * Uses pandoc with track-changes=all option to preserve insertions/deletions
+ *
+ * @param docxPath - Path to Word document
+ * @param options - Options
+ * @returns Track changes result with text and stats
+ */
+export async function extractWithTrackChanges(
+  docxPath: string,
+  options: ExtractWithTrackChangesOptions = {}
+): Promise<{ text: string; hasTrackChanges: boolean; stats: { insertions: number; deletions: number } }> {
+  const { mediaDir } = options;
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const { execSync } = await import('child_process');
+  // Use pandoc with --track-changes=all to preserve track changes
+  // This outputs insertions as [insertion]{.insertion} and deletions as [deletion]{.deletion}
+  let pandocArgs = `"${docxPath}" -t markdown --wrap=none --track-changes=all`;
+  if (mediaDir) {
+    pandocArgs += ` --extract-media="${mediaDir}"`;
+  }
+  let text: string;
+  try {
+    text = execSync(`pandoc ${pandocArgs}`, {
+      encoding: 'utf-8',
+      maxBuffer: 50 * 1024 * 1024,
+    });
+  } catch (err: any) {
+    throw new Error(`Pandoc extraction failed: ${err.message}`);
+  }
+  // Count track changes from pandoc output
+  let insertions = 0;
+  let deletions = 0;
+  // Pandoc outputs track changes as:
+  // [inserted text]{.insertion author="..."}
+  // [deleted text]{.deletion author="..."}
+  // Convert pandoc's track change format to CriticMarkup
+  // Insertions: [text]{.insertion ...} -> {++text++}
+  text = text.replace(/\[([^\]]*)\]\{\.insertion[^}]*\}/g, (match, content) => {
+    if (content.trim()) {
+      insertions++;
+      return `{++${content}++}`;
+    }
+    return '';
+  });
+  // Deletions: [text]{.deletion ...} -> {--text--}
+  text = text.replace(/\[([^\]]*)\]\{\.deletion[^}]*\}/g, (match, content) => {
+    if (content.trim()) {
+      deletions++;
+      return `{--${content}--}`;
+    }
+    return '';
+  });
+  const hasTrackChanges = insertions > 0 || deletions > 0;
+  return {
+    text,
+    hasTrackChanges,
+    stats: { insertions, deletions },
+  };
+}