npm - docrev - Versions diffs - 0.9.6 → 0.9.7 - Mend

docrev 0.9.6 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (43) hide show

package/CHANGELOG.md +20 -0
package/dev_notes/bug_repro_comment_parser.md +71 -0
package/dist/lib/anchor-match.d.ts +41 -0
package/dist/lib/anchor-match.d.ts.map +1 -0
package/dist/lib/anchor-match.js +192 -0
package/dist/lib/anchor-match.js.map +1 -0
package/dist/lib/annotations.d.ts.map +1 -1
package/dist/lib/annotations.js +8 -5
package/dist/lib/annotations.js.map +1 -1
package/dist/lib/commands/index.d.ts +2 -1
package/dist/lib/commands/index.d.ts.map +1 -1
package/dist/lib/commands/index.js +3 -1
package/dist/lib/commands/index.js.map +1 -1
package/dist/lib/commands/section-boundaries.d.ts +22 -0
package/dist/lib/commands/section-boundaries.d.ts.map +1 -0
package/dist/lib/commands/section-boundaries.js +53 -0
package/dist/lib/commands/section-boundaries.js.map +1 -0
package/dist/lib/commands/sync.d.ts.map +1 -1
package/dist/lib/commands/sync.js +135 -0
package/dist/lib/commands/sync.js.map +1 -1
package/dist/lib/commands/verify-anchors.d.ts +17 -0
package/dist/lib/commands/verify-anchors.d.ts.map +1 -0
package/dist/lib/commands/verify-anchors.js +215 -0
package/dist/lib/commands/verify-anchors.js.map +1 -0
package/dist/lib/import.d.ts +14 -8
package/dist/lib/import.d.ts.map +1 -1
package/dist/lib/import.js +16 -144
package/dist/lib/import.js.map +1 -1
package/dist/lib/word-extraction.d.ts +23 -0
package/dist/lib/word-extraction.d.ts.map +1 -1
package/dist/lib/word-extraction.js +79 -0
package/dist/lib/word-extraction.js.map +1 -1
package/lib/anchor-match.ts +238 -0
package/lib/annotations.ts +9 -5
package/lib/commands/index.ts +3 -0
package/lib/commands/section-boundaries.ts +72 -0
package/lib/commands/sync.ts +165 -0
package/lib/commands/verify-anchors.ts +261 -0
package/lib/import.ts +29 -165
package/lib/word-extraction.ts +93 -0
package/package.json +1 -1
package/skill/REFERENCE.md +29 -2
package/skill/SKILL.md +12 -2

package/lib/commands/verify-anchors.ts ADDED Viewed

@@ -0,0 +1,261 @@
+/**
+ * VERIFY-ANCHORS command: report drift between Word comment anchors
+ * and the current markdown.
+ *
+ * Useful when prose has been revised between sending the docx out for
+ * review and receiving it back. Each comment is classified by how well
+ * its anchor still matches the current section prose:
+ *
+ *   clean        – exact or whitespace-normalized hit
+ *   drift        – anchor only matches via stripped/partial fallbacks
+ *   context-only – anchor text is gone, only surrounding context survives
+ *   ambiguous    – multiple matches, can't pick one without context
+ *   unmatched    – nothing maps; user must place the comment manually
+ */
+import {
+  chalk,
+  fs,
+  path,
+  fmt,
+  loadConfig,
+  jsonMode,
+  jsonOutput,
+} from './context.js';
+import type { Command } from 'commander';
+import { findAnchorInText, classifyStrategy, type AnchorMatchQuality } from '../anchor-match.js';
+import type { CommentAnchorData } from '../word-extraction.js';
+import { computeSectionBoundaries } from './section-boundaries.js';
+interface VerifyOptions {
+  config: string;
+  dir: string;
+  json?: boolean;
+}
+interface CommentReport {
+  id: string;
+  author: string;
+  text: string;
+  section: string | null;
+  quality: AnchorMatchQuality | 'ambiguous';
+  strategy: string;
+  anchor: string;
+  occurrences: number;
+}
+export function register(program: Command): void {
+  program
+    .command('verify-anchors')
+    .description('Report drift between Word comment anchors and current markdown')
+    .argument('<file>', 'Word document with reviewer comments (.docx)')
+    .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
+    .option('-d, --dir <directory>', 'Directory with section files', '.')
+    .option('--json', 'Output JSON report (for scripting)')
+    .action(async (docxPath: string, options: VerifyOptions) => {
+      if (!fs.existsSync(docxPath)) {
+        console.error(fmt.status('error', `File not found: ${docxPath}`));
+        process.exit(1);
+      }
+      const configPath = path.resolve(options.dir, options.config);
+      if (!fs.existsSync(configPath)) {
+        console.error(fmt.status('error', `Config not found: ${configPath}`));
+        console.error(chalk.dim('  Run "rev init" first to generate sections.yaml'));
+        process.exit(1);
+      }
+      const config = loadConfig(configPath);
+      const { extractWordComments, extractCommentAnchors, extractHeadings } = await import('../import.js');
+      let comments;
+      let anchors;
+      let headings;
+      try {
+        comments = await extractWordComments(docxPath);
+        const result = await extractCommentAnchors(docxPath);
+        anchors = result.anchors;
+        headings = await extractHeadings(docxPath);
+      } catch (err) {
+        const error = err as Error;
+        console.error(fmt.status('error', `Failed to read ${path.basename(docxPath)}: ${error.message}`));
+        if (process.env.DEBUG) console.error(error.stack);
+        process.exit(1);
+      }
+      if (comments.length === 0) {
+        console.log(fmt.status('info', 'No comments found in document.'));
+        return;
+      }
+      const boundaries = computeSectionBoundaries(config.sections, headings);
+      // Cache section markdown contents on first read
+      const sectionCache = new Map<string, string>();
+      function loadSection(file: string): string | null {
+        if (sectionCache.has(file)) return sectionCache.get(file)!;
+        const sectionPath = path.join(options.dir, file);
+        if (!fs.existsSync(sectionPath)) return null;
+        const content = fs.readFileSync(sectionPath, 'utf-8');
+        sectionCache.set(file, content);
+        return content;
+      }
+      const firstBoundaryStart = boundaries.length > 0 ? boundaries[0].start : 0;
+      const reports: CommentReport[] = [];
+      for (const c of comments) {
+        const anchor: CommentAnchorData | undefined = anchors.get(c.id);
+        const anchorText = anchor?.anchor || '';
+        if (!anchor) {
+          reports.push({
+            id: c.id,
+            author: c.author,
+            text: c.text,
+            section: null,
+            quality: 'unmatched',
+            strategy: 'no-anchor',
+            anchor: '',
+            occurrences: 0,
+          });
+          continue;
+        }
+        // Determine which section file this comment lives in
+        let sectionFile: string | null = null;
+        for (const b of boundaries) {
+          if (anchor.docPosition >= b.start && anchor.docPosition < b.end) {
+            sectionFile = b.file;
+            break;
+          }
+        }
+        if (!sectionFile && boundaries.length > 0 && anchor.docPosition < firstBoundaryStart) {
+          sectionFile = boundaries[0].file;
+        }
+        if (!sectionFile) {
+          reports.push({
+            id: c.id,
+            author: c.author,
+            text: c.text,
+            section: null,
+            quality: 'unmatched',
+            strategy: 'no-section',
+            anchor: anchorText,
+            occurrences: 0,
+          });
+          continue;
+        }
+        const md = loadSection(sectionFile);
+        if (md === null) {
+          reports.push({
+            id: c.id,
+            author: c.author,
+            text: c.text,
+            section: sectionFile,
+            quality: 'unmatched',
+            strategy: 'missing-file',
+            anchor: anchorText,
+            occurrences: 0,
+          });
+          continue;
+        }
+        const search = findAnchorInText(anchor.anchor, md, anchor.before, anchor.after);
+        let quality: AnchorMatchQuality | 'ambiguous' = classifyStrategy(search.strategy, search.occurrences.length);
+        if (quality === 'clean' && search.occurrences.length > 1) {
+          quality = 'ambiguous';
+        }
+        reports.push({
+          id: c.id,
+          author: c.author,
+          text: c.text,
+          section: sectionFile,
+          quality,
+          strategy: search.strategy,
+          anchor: anchorText,
+          occurrences: search.occurrences.length,
+        });
+      }
+      if (options.json || jsonMode) {
+        jsonOutput({
+          file: docxPath,
+          totalComments: comments.length,
+          summary: tally(reports),
+          comments: reports,
+        });
+        return;
+      }
+      printReport(docxPath, reports);
+    });
+}
+function tally(reports: CommentReport[]): Record<string, number> {
+  const out: Record<string, number> = { clean: 0, drift: 0, 'context-only': 0, ambiguous: 0, unmatched: 0 };
+  for (const r of reports) out[r.quality] = (out[r.quality] || 0) + 1;
+  return out;
+}
+function printReport(docxPath: string, reports: CommentReport[]): void {
+  console.log(fmt.header(`Anchor Verification: ${path.basename(docxPath)}`));
+  console.log();
+  const totals = tally(reports);
+  const summaryLines: string[] = [];
+  summaryLines.push(`${chalk.green(totals.clean)} clean (anchor still matches)`);
+  if (totals.drift) summaryLines.push(`${chalk.cyan(totals.drift)} drifted (matched via fallback strategies)`);
+  if (totals['context-only']) summaryLines.push(`${chalk.yellow(totals['context-only'])} context-only (anchor text gone, neighbors survive)`);
+  if (totals.ambiguous) summaryLines.push(`${chalk.magenta(totals.ambiguous)} ambiguous (multiple candidate positions)`);
+  if (totals.unmatched) summaryLines.push(`${chalk.red(totals.unmatched)} unmatched (manual placement needed)`);
+  console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
+  console.log();
+  // Per-comment table for everything that isn't a clean direct hit
+  const problems = reports.filter(r => r.quality !== 'clean');
+  if (problems.length === 0) {
+    console.log(fmt.status('success', 'All comment anchors match the current markdown.'));
+    return;
+  }
+  const rows = problems.map(r => [
+    chalk.dim(`#${r.id}`),
+    qualityColor(r.quality),
+    r.section ? chalk.bold(r.section) : chalk.dim('—'),
+    chalk.dim(r.strategy),
+    truncate(r.anchor, 35),
+    truncate(r.text, 35),
+  ]);
+  console.log(fmt.table(
+    ['ID', 'Quality', 'Section', 'Strategy', 'Anchor (Word)', 'Comment'],
+    rows,
+    { align: ['right', 'left', 'left', 'left', 'left', 'left'] },
+  ));
+  if (totals.unmatched > 0 || totals.ambiguous > 0) {
+    console.log();
+    console.log(chalk.dim('Comments flagged "unmatched" or "ambiguous" need manual placement.'));
+    console.log(chalk.dim('Run "rev sync --no-overwrite" to import the matched ones without touching prose.'));
+  }
+}
+function qualityColor(q: string): string {
+  switch (q) {
+    case 'clean': return chalk.green('clean');
+    case 'drift': return chalk.cyan('drift');
+    case 'context-only': return chalk.yellow('context');
+    case 'ambiguous': return chalk.magenta('ambiguous');
+    case 'unmatched': return chalk.red('unmatched');
+    default: return q;
+  }
+}
+function truncate(s: string, max: number): string {
+  if (!s) return chalk.dim('—');
+  const flat = s.replace(/\s+/g, ' ').trim();
+  return flat.length > max ? flat.slice(0, max - 1) + '…' : flat;
+}

package/lib/import.ts CHANGED Viewed

@@ -36,12 +36,14 @@ import {
   parseVisibleComments,
   convertVisibleComments,
 } from './restore-references.js';
+import { findAnchorInText } from './anchor-match.js';
 // Re-export everything so existing imports from './import.js' still work
 export {
   extractFromWord,
   extractWordComments,
   extractCommentAnchors,
+  extractHeadings,
   extractWordTables,
 } from './word-extraction.js';
 export type {
@@ -49,6 +51,7 @@ export type {
   TextNode,
   CommentAnchorData,
   CommentAnchorsResult,
+  DocxHeading,
   WordTable,
   ParsedRow,
   ExtractFromWordOptions,
@@ -86,6 +89,17 @@ const execAsync = promisify(exec);
 export interface InsertCommentsOptions {
   quiet?: boolean;
   sectionBoundary?: { start: number; end: number } | null;
+  /**
+   * When true (default), comments wrap their anchor text in `[anchor]{.mark}`
+   * so the rebuilt docx restores the original Word comment range. When false,
+   * comments are inserted as standalone `{>>...<<}` blocks adjacent to the
+   * anchor — the prose stays byte-identical except for the inserted blocks.
+   *
+   * Set to false from `sync --comments-only` so a draft revised after the
+   * docx was sent for review keeps its prose intact, and so multiple
+   * comments sharing one anchor don't produce nested broken markup.
+   */
+  wrapAnchor?: boolean;
 }
 export interface CommentWithPos {
@@ -100,12 +114,7 @@ export interface CommentWithPos {
   strategy?: string;
 }
-export interface AnchorSearchResult {
-  occurrences: number[];
-  matchedAnchor: string | null;
-  strategy: string;
-  stripped?: boolean;
-}
+export type { AnchorSearchResult } from './anchor-match.js';
 export interface MarkdownPrefixResult {
   prefix: string;
@@ -175,165 +184,14 @@ export function insertCommentsIntoMarkdown(
   anchors: Map<string, CommentAnchorData | string>,
   options: InsertCommentsOptions = {}
 ): string {
-  const { quiet = false, sectionBoundary = null } = options;
+  const { quiet = false, sectionBoundary = null, wrapAnchor = true } = options;
   let result = markdown;
   let unmatchedCount = 0;
   const duplicateWarnings: string[] = [];
   const usedPositions = new Set<number>(); // For tie-breaking: track used positions
-  // Helper: Strip CriticMarkup from text to get "clean" version for matching
-  function stripCriticMarkup(text: string): string {
-    return text
-      .replace(/\{\+\+([^+]*)\+\+\}/g, '$1')  // insertions: keep inserted text
-      .replace(/\{--([^-]*)--\}/g, '')         // deletions: remove deleted text
-      .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2')  // substitutions: keep new text
-      .replace(/\{>>[^<]*<<\}/g, '')           // comments: remove
-      .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
-  }
-  // Helper: Find anchor in text with multiple fallback strategies
-  function findAnchorInText(anchor: string, text: string, before: string = '', after: string = ''): AnchorSearchResult {
-    // If anchor is empty, skip directly to context-based matching
-    if (!anchor || anchor.trim().length === 0) {
-      // Jump to context-based strategies (Strategy 5)
-      if (before || after) {
-        const beforeLower = (before || '').toLowerCase();
-        const afterLower = (after || '').toLowerCase();
-        const textLower = text.toLowerCase();
-        if (before && after) {
-          const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
-          if (beforeIdx !== -1) {
-            const searchStart = beforeIdx + beforeLower.slice(-50).length;
-            const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
-            if (afterIdx !== -1 && afterIdx - searchStart < 500) {
-              return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
-            }
-          }
-        }
-        if (before) {
-          const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
-          if (beforeIdx !== -1) {
-            return { occurrences: [beforeIdx + beforeLower.slice(-30).length], matchedAnchor: null, strategy: 'context-before' };
-          }
-        }
-        if (after) {
-          const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
-          if (afterIdx !== -1) {
-            return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
-          }
-        }
-      }
-      return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
-    }
-    const anchorLower = anchor.toLowerCase();
-    const textLower = text.toLowerCase();
-    // Strategy 1: Direct match
-    let occurrences = findAllOccurrences(textLower, anchorLower);
-    if (occurrences.length > 0) {
-      return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
-    }
-    // Strategy 2: Normalized whitespace
-    const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
-    const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
-    let idx = normalizedText.indexOf(normalizedAnchor);
-    if (idx !== -1) {
-      return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
-    }
-    // Strategy 3: Try matching in stripped CriticMarkup version
-    const strippedText = stripCriticMarkup(text);
-    const strippedLower = strippedText.toLowerCase();
-    occurrences = findAllOccurrences(strippedLower, anchorLower);
-    if (occurrences.length > 0) {
-      return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
-    }
-    // Strategy 4: First N words of anchor (for long anchors)
-    const words = anchor.split(/\s+/);
-    if (words.length > 3) {
-      for (let n = Math.min(6, words.length); n >= 3; n--) {
-        const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
-        if (partialAnchor.length >= 15) {
-          occurrences = findAllOccurrences(textLower, partialAnchor);
-          if (occurrences.length > 0) {
-            return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
-          }
-          occurrences = findAllOccurrences(strippedLower, partialAnchor);
-          if (occurrences.length > 0) {
-            return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start-stripped', stripped: true };
-          }
-        }
-      }
-    }
-    // Strategy 5: Use context (before/after) to find approximate position
-    if (before || after) {
-      const beforeLower = before.toLowerCase();
-      const afterLower = after.toLowerCase();
-      if (before && after) {
-        const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
-        if (beforeIdx !== -1) {
-          const searchStart = beforeIdx + beforeLower.slice(-50).length;
-          const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
-          if (afterIdx !== -1 && afterIdx - searchStart < 500) {
-            return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
-          }
-        }
-      }
-      if (before) {
-        const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
-        if (beforeIdx !== -1) {
-          return { occurrences: [beforeIdx + beforeLower.slice(-30).length], matchedAnchor: null, strategy: 'context-before' };
-        }
-      }
-      if (after) {
-        const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
-        if (afterIdx !== -1) {
-          return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
-        }
-      }
-    }
-    // Strategy 6: Try splitting anchor on common transition words
-    const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
-    for (const sep of splitPatterns) {
-      if (anchor.includes(sep)) {
-        const parts = anchor.split(sep).filter(p => p.length >= 4);
-        for (const part of parts) {
-          const partLower = part.toLowerCase();
-          occurrences = findAllOccurrences(textLower, partLower);
-          if (occurrences.length > 0 && occurrences.length < 5) {
-            return { occurrences, matchedAnchor: part, strategy: 'split-match' };
-          }
-        }
-      }
-    }
-    return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
-  }
-  // Helper: Find all occurrences of needle in haystack
-  function findAllOccurrences(haystack: string, needle: string): number[] {
-    if (!needle || needle.length === 0) {
-      return [];
-    }
-    const occurrences: number[] = [];
-    let idx = 0;
-    while ((idx = haystack.indexOf(needle, idx)) !== -1) {
-      occurrences.push(idx);
-      idx += 1;
-    }
-    return occurrences;
-  }
+  // Anchor matching primitives live in lib/anchor-match.ts so that
+  // `rev verify-anchors` can use the same strategies for drift reporting.
   // Get all positions in order (for sequential tie-breaking)
   const commentsWithPositions = comments.map((c): CommentWithPos => {
@@ -489,18 +347,24 @@ export function insertCommentsIntoMarkdown(
   // Sort by position descending (insert from end to avoid offset issues)
   matched.sort((a, b) => b.pos - a.pos);
-  // Insert each comment with anchor marking
+  // Insert each comment. With `wrapAnchor` (the default), the anchor text
+  // gets wrapped in `[anchor]{.mark}` so the rebuilt docx restores the
+  // original Word comment range. Without it, the comment block is inserted
+  // adjacent to the anchor and prose stays untouched — required for
+  // comments-only sync where multiple comments may share one anchor.
   for (const c of matched) {
     const comment = `{>>${c.author}: ${c.text}<<}`;
-    if (c.anchorText && c.anchorEnd) {
-      // Replace anchor text with: {>>comment<<}[anchor]{.mark}
+    if (wrapAnchor && c.anchorText && c.anchorEnd) {
       const before = result.slice(0, c.pos);
       const anchor = result.slice(c.pos, c.anchorEnd);
       const after = result.slice(c.anchorEnd);
       result = before + comment + `[${anchor}]{.mark}` + after;
     } else {
-      // No anchor - just insert comment at position
-      result = result.slice(0, c.pos) + ` ${comment}` + result.slice(c.pos);
+      // Insert comment at the anchor position with no surrounding whitespace
+      // tweaks; CriticMarkup blocks are invisible to readers, and adding a
+      // leading space would shift prose byte-for-byte (relevant when callers
+      // verify that --comments-only didn't touch the original).
+      result = result.slice(0, c.pos) + comment + result.slice(c.pos);
     }
   }

package/lib/word-extraction.ts CHANGED Viewed

@@ -42,6 +42,17 @@ export interface CommentAnchorsResult {
   fullDocText: string;
 }
+export interface DocxHeading {
+  /** Heading style name from `<w:pStyle>`, e.g. "Heading1" */
+  style: string;
+  /** Heading depth: 1, 2, 3, ... (parsed from style name; 0 if unknown) */
+  level: number;
+  /** Concatenated text content of the heading paragraph */
+  text: string;
+  /** Position in fullDocText (same coordinate system as CommentAnchorData.docPosition) */
+  docPosition: number;
+}
 export interface WordTable {
   markdown: string;
   rowCount: number;
@@ -331,6 +342,88 @@ export async function extractCommentAnchors(docxPath: string): Promise<CommentAn
   return { anchors, fullDocText };
 }
+/**
+ * Extract heading paragraphs from a docx, with their text positions in the
+ * same coordinate system as `extractCommentAnchors`'s `fullDocText` and
+ * `CommentAnchorData.docPosition`.
+ *
+ * Headings are paragraphs whose `<w:pStyle>` is a Heading style. Reading
+ * styles directly is more reliable than keyword-matching the concatenated
+ * body text — there, paragraph boundaries are gone, so the literal string
+ * "Methods" can appear inside prose ("results across countries") and the
+ * structured-abstract label "Methods:" loses its colon when text runs are
+ * concatenated.
+ */
+export async function extractHeadings(docxPath: string): Promise<DocxHeading[]> {
+  const AdmZip = (await import('adm-zip')).default;
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const docEntry = zip.getEntry('word/document.xml');
+  if (!docEntry) return [];
+  const xml = docEntry.getData().toString('utf8');
+  // Build the same xml-pos → text-pos mapping that extractCommentAnchors does
+  const textNodePattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
+  const nodes: Array<{ xmlStart: number; xmlEnd: number; textStart: number; textEnd: number }> = [];
+  let textPos = 0;
+  let m;
+  while ((m = textNodePattern.exec(xml)) !== null) {
+    const decoded = decodeXmlEntities(m[1] ?? '');
+    nodes.push({
+      xmlStart: m.index,
+      xmlEnd: m.index + m[0].length,
+      textStart: textPos,
+      textEnd: textPos + decoded.length,
+    });
+    textPos += decoded.length;
+  }
+  function xmlToTextPos(xmlPos: number): number {
+    for (const n of nodes) {
+      if (xmlPos >= n.xmlStart && xmlPos < n.xmlEnd) return n.textStart;
+      if (xmlPos < n.xmlStart) return n.textStart;
+    }
+    return nodes.length ? nodes[nodes.length - 1].textEnd : 0;
+  }
+  const headings: DocxHeading[] = [];
+  const paraPattern = /<w:p\b[^>]*>([\s\S]*?)<\/w:p>/g;
+  let pm;
+  while ((pm = paraPattern.exec(xml)) !== null) {
+    const inner = pm[1];
+    const styleMatch = inner.match(/<w:pStyle[^>]*w:val="([^"]+)"/);
+    if (!styleMatch) continue;
+    const style = styleMatch[1];
+    if (!/heading/i.test(style)) continue;
+    // Concatenate text runs; include w:delText so a heading inside a tracked
+    // deletion is still surfaced (verifying anchors against an original draft)
+    const textInRange = /<w:t[^>]*>([^<]*)<\/w:t>|<w:delText[^>]*>([^<]*)<\/w:delText>/g;
+    let txt = '';
+    let tm;
+    while ((tm = textInRange.exec(inner)) !== null) {
+      txt += decodeXmlEntities(tm[1] || tm[2] || '');
+    }
+    const trimmed = txt.trim();
+    if (!trimmed) continue;
+    const levelMatch = style.match(/(\d+)/);
+    const level = levelMatch ? parseInt(levelMatch[1], 10) : 0;
+    headings.push({
+      style,
+      level,
+      text: trimmed,
+      docPosition: xmlToTextPos(pm.index),
+    });
+  }
+  return headings;
+}
 /**
  * Decode XML entities in text
  */

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "docrev",
-  "version": "0.9.6",
+  "version": "0.9.7",
   "description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
   "type": "module",
   "types": "dist/lib/types.d.ts",

package/skill/REFERENCE.md CHANGED Viewed

@@ -21,11 +21,38 @@ rev import manuscript.docx --output ./project
 ### rev sync
 Sync feedback from a reviewed Word document into existing markdown sections.
 ```bash
-rev sync reviewed.docx       # Updates markdown with track changes/comments
-rev sync                     # Auto-detect most recent .docx
+rev sync reviewed.docx          # Updates markdown with track changes/comments
+rev sync                        # Auto-detect most recent .docx
 rev sync reviewed.docx methods  # Sync only methods section
+rev sync reviewed.docx --comments-only  # Insert comments only; never modify prose
 ```
+`--comments-only` skips the Word→Markdown diff entirely. Use it when the
+markdown has been revised between sending the docx out for review and
+receiving it back: applying track changes from a stale draft would clobber
+newer edits, but comments still need to land. Comments are placed at
+fuzzy-matched anchors against the current prose. Pair with
+`rev verify-anchors` to see which ones won't fit before you run sync.
+### rev verify-anchors
+Report drift between Word comment anchors and the current markdown.
+```bash
+rev verify-anchors reviewed.docx       # Print per-comment match quality
+rev verify-anchors reviewed.docx --json  # Machine-readable report
+```
+Each comment is classified by how well its anchor still matches the current
+section prose:
+- `clean` – exact or whitespace-normalized hit
+- `drift` – anchor only matches via stripped-CriticMarkup or partial-prefix fallbacks
+- `context-only` – anchor text is gone, only surrounding context survives
+- `ambiguous` – multiple candidate positions; needs context to disambiguate
+- `unmatched` – nothing maps; user must place the comment manually
+Useful before `rev sync --comments-only` to plan which comments will land
+automatically and which need manual placement.
 ### rev build
 Build output documents from markdown sections.
 ```bash