npm - docrev - Versions diffs - 0.9.6 → 0.9.11 - Mend

docrev 0.9.6 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (80) hide show

package/CHANGELOG.md +41 -0
package/dev_notes/bug_repro_comment_parser.md +71 -0
package/dev_notes/stress2/adversarial.docx +0 -0
package/dev_notes/stress2/build_adversarial.ts +186 -0
package/dev_notes/stress2/drift_matcher.ts +62 -0
package/dev_notes/stress2/probe_anchors.ts +35 -0
package/dev_notes/stress2/project/adversarial.docx +0 -0
package/dev_notes/stress2/project/discussion.before.md +3 -0
package/dev_notes/stress2/project/discussion.md +3 -0
package/dev_notes/stress2/project/methods.before.md +20 -0
package/dev_notes/stress2/project/methods.md +20 -0
package/dev_notes/stress2/project/rev.yaml +5 -0
package/dev_notes/stress2/project/sections.yaml +4 -0
package/dev_notes/stress2/sections.yaml +5 -0
package/dev_notes/stress2/trace_placement.ts +50 -0
package/dev_notes/stresstest_boundaries.ts +27 -0
package/dev_notes/stresstest_drift_apply.ts +43 -0
package/dev_notes/stresstest_drift_compare.ts +43 -0
package/dev_notes/stresstest_drift_v2.ts +54 -0
package/dev_notes/stresstest_inspect.ts +54 -0
package/dev_notes/stresstest_pstyle.ts +55 -0
package/dev_notes/stresstest_section_debug.ts +23 -0
package/dev_notes/stresstest_split.ts +70 -0
package/dev_notes/stresstest_trace.ts +19 -0
package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
package/dist/lib/anchor-match.d.ts +51 -0
package/dist/lib/anchor-match.d.ts.map +1 -0
package/dist/lib/anchor-match.js +227 -0
package/dist/lib/anchor-match.js.map +1 -0
package/dist/lib/annotations.d.ts.map +1 -1
package/dist/lib/annotations.js +24 -11
package/dist/lib/annotations.js.map +1 -1
package/dist/lib/commands/index.d.ts +2 -1
package/dist/lib/commands/index.d.ts.map +1 -1
package/dist/lib/commands/index.js +3 -1
package/dist/lib/commands/index.js.map +1 -1
package/dist/lib/commands/quality.js +1 -1
package/dist/lib/commands/quality.js.map +1 -1
package/dist/lib/commands/section-boundaries.d.ts +22 -0
package/dist/lib/commands/section-boundaries.d.ts.map +1 -0
package/dist/lib/commands/section-boundaries.js +63 -0
package/dist/lib/commands/section-boundaries.js.map +1 -0
package/dist/lib/commands/sync.d.ts.map +1 -1
package/dist/lib/commands/sync.js +141 -0
package/dist/lib/commands/sync.js.map +1 -1
package/dist/lib/commands/verify-anchors.d.ts +17 -0
package/dist/lib/commands/verify-anchors.d.ts.map +1 -0
package/dist/lib/commands/verify-anchors.js +226 -0
package/dist/lib/commands/verify-anchors.js.map +1 -0
package/dist/lib/comment-realign.js +2 -2
package/dist/lib/comment-realign.js.map +1 -1
package/dist/lib/import.d.ts +26 -8
package/dist/lib/import.d.ts.map +1 -1
package/dist/lib/import.js +166 -187
package/dist/lib/import.js.map +1 -1
package/dist/lib/response.js +1 -1
package/dist/lib/response.js.map +1 -1
package/dist/lib/word-extraction.d.ts +23 -0
package/dist/lib/word-extraction.d.ts.map +1 -1
package/dist/lib/word-extraction.js +79 -0
package/dist/lib/word-extraction.js.map +1 -1
package/dist/lib/wordcomments.d.ts.map +1 -1
package/dist/lib/wordcomments.js +165 -73
package/dist/lib/wordcomments.js.map +1 -1
package/lib/anchor-match.ts +276 -0
package/lib/annotations.ts +25 -11
package/lib/commands/index.ts +3 -0
package/lib/commands/quality.ts +1 -1
package/lib/commands/section-boundaries.ts +82 -0
package/lib/commands/sync.ts +170 -0
package/lib/commands/verify-anchors.ts +272 -0
package/lib/comment-realign.ts +2 -2
package/lib/import.ts +197 -209
package/lib/response.ts +1 -1
package/lib/word-extraction.ts +93 -0
package/lib/wordcomments.ts +180 -82
package/package.json +1 -1
package/skill/REFERENCE.md +29 -2
package/skill/SKILL.md +12 -2
package/dist/package.json +0 -137

package/lib/import.ts CHANGED Viewed

@@ -36,12 +36,72 @@ import {
   parseVisibleComments,
   convertVisibleComments,
 } from './restore-references.js';
+import { findAnchorInText, findAllOccurrences } from './anchor-match.js';
+/**
+ * Pick the best position from candidate `occurrences` given the
+ * surrounding `before` / `after` context from the docx, while
+ * respecting `usedPositions` to avoid stacking distinct comments at
+ * the same anchor instance.
+ *
+ * Returns the chosen position, or -1 if every candidate is already used.
+ */
+function pickBestOccurrence(
+  occurrences: number[],
+  result: string,
+  before: string,
+  after: string,
+  anchorLen: number,
+  usedPositions: Set<number>,
+): number {
+  if (occurrences.length === 0) return -1;
+  if (occurrences.length === 1) {
+    return usedPositions.has(occurrences[0]) ? -1 : occurrences[0];
+  }
+  let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? -1;
+  if (bestIdx < 0) return -1;
+  let bestScore = -1;
+  for (const pos of occurrences) {
+    if (usedPositions.has(pos)) continue;
+    let score = 0;
+    if (before) {
+      const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
+      const beforeLower = before.toLowerCase();
+      const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
+      for (const word of beforeWords) {
+        if (contextBefore.includes(word)) score += 2;
+      }
+      if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
+    }
+    if (after) {
+      const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
+      const afterLower = after.toLowerCase();
+      const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
+      for (const word of afterWords) {
+        if (contextAfter.includes(word)) score += 2;
+      }
+      if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
+    }
+    if (score > bestScore || (score === bestScore && pos < bestIdx)) {
+      bestScore = score;
+      bestIdx = pos;
+    }
+  }
+  return bestIdx;
+}
 // Re-export everything so existing imports from './import.js' still work
 export {
   extractFromWord,
   extractWordComments,
   extractCommentAnchors,
+  extractHeadings,
   extractWordTables,
 } from './word-extraction.js';
 export type {
@@ -49,6 +109,7 @@ export type {
   TextNode,
   CommentAnchorData,
   CommentAnchorsResult,
+  DocxHeading,
   WordTable,
   ParsedRow,
   ExtractFromWordOptions,
@@ -86,6 +147,25 @@ const execAsync = promisify(exec);
 export interface InsertCommentsOptions {
   quiet?: boolean;
   sectionBoundary?: { start: number; end: number } | null;
+  /**
+   * When true (default), comments wrap their anchor text in `[anchor]{.mark}`
+   * so the rebuilt docx restores the original Word comment range. When false,
+   * comments are inserted as standalone `{>>...<<}` blocks adjacent to the
+   * anchor — the prose stays byte-identical except for the inserted blocks.
+   *
+   * Set to false from `sync --comments-only` so a draft revised after the
+   * docx was sent for review keeps its prose intact, and so multiple
+   * comments sharing one anchor don't produce nested broken markup.
+   */
+  wrapAnchor?: boolean;
+  /**
+   * Mutable output: when provided, the function fills in counters so callers
+   * can distinguish placement outcomes in their summary. `placed` counts new
+   * insertions, `deduped` counts comments that were already present at their
+   * anchor (skipped to avoid duplication on re-sync), `unmatched` counts
+   * comments whose anchor couldn't be located.
+   */
+  outStats?: { placed: number; deduped: number; unmatched: number };
 }
 export interface CommentWithPos {
@@ -100,12 +180,7 @@ export interface CommentWithPos {
   strategy?: string;
 }
-export interface AnchorSearchResult {
-  occurrences: number[];
-  matchedAnchor: string | null;
-  strategy: string;
-  stripped?: boolean;
-}
+export type { AnchorSearchResult } from './anchor-match.js';
 export interface MarkdownPrefixResult {
   prefix: string;
@@ -166,6 +241,34 @@ export interface MoveExtractedMediaResult {
 // Functions
 // ============================================
+/**
+ * If `pos` lands inside a section file's leading `# Heading` line (or the
+ * blank line right after it), advance past the first paragraph break so
+ * the comment stays inside the section. A comment authored at the very
+ * start of a Word section maps to `pos === 0`, but inserting at column 0
+ * of a markdown file that begins with `# Heading` puts the `{>>...<<}`
+ * before the heading marker — Pandoc then treats the line as ordinary
+ * paragraph text and the comment renders in the previous section.
+ */
+function pushPastSectionHeading(text: string, pos: number): number {
+  if (pos > 0) {
+    const headingMatch = text.match(/^#{1,6}\s.+$/m);
+    if (!headingMatch || headingMatch.index === undefined) return pos;
+    const headingEnd = headingMatch.index + headingMatch[0].length;
+    if (pos >= headingEnd) return pos;
+  }
+  // pos is at-or-before the first heading line. Advance to the first
+  // non-blank position after the heading paragraph.
+  const headingLine = text.match(/^#{1,6}\s.+(?:\n|$)/m);
+  if (!headingLine || headingLine.index === undefined) return pos;
+  let after = headingLine.index + headingLine[0].length;
+  // Skip blank lines so we land at the start of the first body paragraph.
+  while (after < text.length && (text[after] === '\n' || text[after] === '\r')) {
+    after++;
+  }
+  return after;
+}
 /**
  * Insert comments into markdown text based on anchor texts with context
  */
@@ -175,165 +278,15 @@ export function insertCommentsIntoMarkdown(
   anchors: Map<string, CommentAnchorData | string>,
   options: InsertCommentsOptions = {}
 ): string {
-  const { quiet = false, sectionBoundary = null } = options;
+  const { quiet = false, sectionBoundary = null, wrapAnchor = true, outStats } = options;
   let result = markdown;
   let unmatchedCount = 0;
+  let placedCount = 0;
   const duplicateWarnings: string[] = [];
   const usedPositions = new Set<number>(); // For tie-breaking: track used positions
-  // Helper: Strip CriticMarkup from text to get "clean" version for matching
-  function stripCriticMarkup(text: string): string {
-    return text
-      .replace(/\{\+\+([^+]*)\+\+\}/g, '$1')  // insertions: keep inserted text
-      .replace(/\{--([^-]*)--\}/g, '')         // deletions: remove deleted text
-      .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2')  // substitutions: keep new text
-      .replace(/\{>>[^<]*<<\}/g, '')           // comments: remove
-      .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
-  }
-  // Helper: Find anchor in text with multiple fallback strategies
-  function findAnchorInText(anchor: string, text: string, before: string = '', after: string = ''): AnchorSearchResult {
-    // If anchor is empty, skip directly to context-based matching
-    if (!anchor || anchor.trim().length === 0) {
-      // Jump to context-based strategies (Strategy 5)
-      if (before || after) {
-        const beforeLower = (before || '').toLowerCase();
-        const afterLower = (after || '').toLowerCase();
-        const textLower = text.toLowerCase();
-        if (before && after) {
-          const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
-          if (beforeIdx !== -1) {
-            const searchStart = beforeIdx + beforeLower.slice(-50).length;
-            const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
-            if (afterIdx !== -1 && afterIdx - searchStart < 500) {
-              return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
-            }
-          }
-        }
-        if (before) {
-          const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
-          if (beforeIdx !== -1) {
-            return { occurrences: [beforeIdx + beforeLower.slice(-30).length], matchedAnchor: null, strategy: 'context-before' };
-          }
-        }
-        if (after) {
-          const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
-          if (afterIdx !== -1) {
-            return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
-          }
-        }
-      }
-      return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
-    }
-    const anchorLower = anchor.toLowerCase();
-    const textLower = text.toLowerCase();
-    // Strategy 1: Direct match
-    let occurrences = findAllOccurrences(textLower, anchorLower);
-    if (occurrences.length > 0) {
-      return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
-    }
-    // Strategy 2: Normalized whitespace
-    const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
-    const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
-    let idx = normalizedText.indexOf(normalizedAnchor);
-    if (idx !== -1) {
-      return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
-    }
-    // Strategy 3: Try matching in stripped CriticMarkup version
-    const strippedText = stripCriticMarkup(text);
-    const strippedLower = strippedText.toLowerCase();
-    occurrences = findAllOccurrences(strippedLower, anchorLower);
-    if (occurrences.length > 0) {
-      return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
-    }
-    // Strategy 4: First N words of anchor (for long anchors)
-    const words = anchor.split(/\s+/);
-    if (words.length > 3) {
-      for (let n = Math.min(6, words.length); n >= 3; n--) {
-        const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
-        if (partialAnchor.length >= 15) {
-          occurrences = findAllOccurrences(textLower, partialAnchor);
-          if (occurrences.length > 0) {
-            return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
-          }
-          occurrences = findAllOccurrences(strippedLower, partialAnchor);
-          if (occurrences.length > 0) {
-            return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start-stripped', stripped: true };
-          }
-        }
-      }
-    }
-    // Strategy 5: Use context (before/after) to find approximate position
-    if (before || after) {
-      const beforeLower = before.toLowerCase();
-      const afterLower = after.toLowerCase();
-      if (before && after) {
-        const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
-        if (beforeIdx !== -1) {
-          const searchStart = beforeIdx + beforeLower.slice(-50).length;
-          const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
-          if (afterIdx !== -1 && afterIdx - searchStart < 500) {
-            return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
-          }
-        }
-      }
-      if (before) {
-        const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
-        if (beforeIdx !== -1) {
-          return { occurrences: [beforeIdx + beforeLower.slice(-30).length], matchedAnchor: null, strategy: 'context-before' };
-        }
-      }
-      if (after) {
-        const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
-        if (afterIdx !== -1) {
-          return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
-        }
-      }
-    }
-    // Strategy 6: Try splitting anchor on common transition words
-    const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
-    for (const sep of splitPatterns) {
-      if (anchor.includes(sep)) {
-        const parts = anchor.split(sep).filter(p => p.length >= 4);
-        for (const part of parts) {
-          const partLower = part.toLowerCase();
-          occurrences = findAllOccurrences(textLower, partLower);
-          if (occurrences.length > 0 && occurrences.length < 5) {
-            return { occurrences, matchedAnchor: part, strategy: 'split-match' };
-          }
-        }
-      }
-    }
-    return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
-  }
-  // Helper: Find all occurrences of needle in haystack
-  function findAllOccurrences(haystack: string, needle: string): number[] {
-    if (!needle || needle.length === 0) {
-      return [];
-    }
-    const occurrences: number[] = [];
-    let idx = 0;
-    while ((idx = haystack.indexOf(needle, idx)) !== -1) {
-      occurrences.push(idx);
-      idx += 1;
-    }
-    return occurrences;
-  }
+  // Anchor matching primitives live in lib/anchor-match.ts so that
+  // `rev verify-anchors` can use the same strategies for drift reporting.
   // Get all positions in order (for sequential tie-breaking)
   const commentsWithPositions = comments.map((c): CommentWithPos => {
@@ -364,6 +317,18 @@ export function insertCommentsIntoMarkdown(
         const proportion = Math.min(relativePos / sectionLength, 1.0);
         const markdownPos = Math.floor(proportion * result.length);
+        // For empty anchors, before/after context is the only signal that
+        // pinpoints the original split — without it, proportional placement
+        // can land mid-word or split unrelated phrases. Try context match
+        // first; only fall through to proportional when context is gone.
+        if ((!anchor || isEmpty) && (before || after)) {
+          const ctx = findAnchorInText('', result, before, after);
+          if (ctx.occurrences.length > 0) {
+            const pos = pushPastSectionHeading(result, ctx.occurrences[0]);
+            return { ...c, pos, anchorText: null, isEmpty: true, strategy: `ctx:${ctx.strategy}` };
+          }
+        }
         let insertPos = markdownPos;
         // Look for nearby word boundary
@@ -373,26 +338,52 @@ export function insertCommentsIntoMarkdown(
           insertPos = Math.max(0, markdownPos - 25) + spaceIdx;
         }
-        // If we have anchor text, try to find it near this position
+        // If we have anchor text, try to find it near this position.
+        // Collect ALL occurrences in the local window, then disambiguate
+        // via before/after context + usedPositions — otherwise two
+        // comments sharing the same anchor word would both collide at
+        // the leftmost match. The context-scoring helper handles the
+        // "repeated formulaic prose" case using docx-side context, which
+        // is a stronger signal than raw distance to the proportional
+        // insertPos (insertPos is itself an approximation).
         if (anchor && !isEmpty) {
           const searchStart = Math.max(0, insertPos - 200);
           const searchEnd = Math.min(result.length, insertPos + 200);
           const localSearch = result.slice(searchStart, searchEnd).toLowerCase();
           const anchorLower = anchor.toLowerCase();
-          const localIdx = localSearch.indexOf(anchorLower);
-          if (localIdx !== -1) {
-            return { ...c, pos: searchStart + localIdx, anchorText: anchor, anchorEnd: searchStart + localIdx + anchor.length, strategy: 'position+text' };
+          const localHits = findAllOccurrences(localSearch, anchorLower).map(i => searchStart + i);
+          if (localHits.length > 0) {
+            const chosen = pickBestOccurrence(localHits, result, before, after, anchor.length, usedPositions);
+            if (chosen >= 0) {
+              if (localHits.length > 1) {
+                duplicateWarnings.push(`"${anchor.slice(0, 40)}${anchor.length > 40 ? '...' : ''}" appears ${localHits.length} times in section window`);
+              }
+              usedPositions.add(chosen);
+              return { ...c, pos: chosen, anchorText: anchor, anchorEnd: chosen + anchor.length, strategy: 'position+text' };
+            }
           }
           // Try first few words
           const words = anchor.split(/\s+/).slice(0, 4).join(' ').toLowerCase();
           if (words.length >= 10) {
-            const partialIdx = localSearch.indexOf(words);
-            if (partialIdx !== -1) {
-              return { ...c, pos: searchStart + partialIdx, anchorText: words, anchorEnd: searchStart + partialIdx + words.length, strategy: 'position+partial' };
+            const partialHits = findAllOccurrences(localSearch, words).map(i => searchStart + i);
+            if (partialHits.length > 0) {
+              const chosen = pickBestOccurrence(partialHits, result, before, after, words.length, usedPositions);
+              if (chosen >= 0) {
+                usedPositions.add(chosen);
+                return { ...c, pos: chosen, anchorText: words, anchorEnd: chosen + words.length, strategy: 'position+partial' };
+              }
             }
           }
         }
+        // A docPosition at the very start of a section maps to markdownPos=0,
+        // which sits before the file's `# Heading` line and gets rendered in
+        // the previous section. Push past the heading line so the comment
+        // stays inside the section it was authored in.
+        insertPos = pushPastSectionHeading(result, insertPos);
         return { ...c, pos: insertPos, anchorText: null, strategy: 'position-only' };
       }
     }
@@ -432,46 +423,14 @@ export function insertCommentsIntoMarkdown(
       duplicateWarnings.push(`"${matchedAnchor.slice(0, 40)}${matchedAnchor.length > 40 ? '...' : ''}" appears ${occurrences.length} times`);
     }
-    let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? occurrences[0];
-    let bestScore = -1;
-    for (const pos of occurrences) {
-      if (usedPositions.has(pos)) continue;
-      let score = 0;
-      if (before) {
-        const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
-        const beforeLower = before.toLowerCase();
-        const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
-        for (const word of beforeWords) {
-          if (contextBefore.includes(word)) score += 2;
-        }
-        if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
-      }
-      if (after) {
-        const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
-        const afterLower = after.toLowerCase();
-        const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
-        for (const word of afterWords) {
-          if (contextAfter.includes(word)) score += 2;
-        }
-        if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
-      }
-      if (score > bestScore || (score === bestScore && pos < bestIdx)) {
-        bestScore = score;
-        bestIdx = pos;
-      }
-    }
-    usedPositions.add(bestIdx);
+    const bestIdx = pickBestOccurrence(occurrences, result, before, after, anchorLen, usedPositions);
+    const finalIdx = bestIdx >= 0 ? bestIdx : occurrences[0];
+    usedPositions.add(finalIdx);
     if (matchedAnchor) {
-      return { ...c, pos: bestIdx, anchorText: matchedAnchor, anchorEnd: bestIdx + anchorLen };
+      return { ...c, pos: finalIdx, anchorText: matchedAnchor, anchorEnd: finalIdx + anchorLen };
     } else {
-      return { ...c, pos: bestIdx, anchorText: null };
+      return { ...c, pos: finalIdx, anchorText: null };
     }
   });
@@ -489,19 +448,45 @@ export function insertCommentsIntoMarkdown(
   // Sort by position descending (insert from end to avoid offset issues)
   matched.sort((a, b) => b.pos - a.pos);
-  // Insert each comment with anchor marking
+  // Insert each comment. With `wrapAnchor` (the default), the anchor text
+  // gets wrapped in `[anchor]{.mark}` so the rebuilt docx restores the
+  // original Word comment range. Without it, the comment block is inserted
+  // adjacent to the anchor and prose stays untouched — required for
+  // comments-only sync where multiple comments may share one anchor.
+  // Skip insertion when an identical comment already lives near the target.
+  // Re-running sync against the same docx would otherwise stack duplicate
+  // CriticMarkup blocks (`{>>R1: ...<<}{>>R1: ...<<}...`) on each invocation.
+  // A 200-char window catches both wrapped (`{>>...<<}[anchor]{.mark}`) and
+  // bare (`{>>...<<}anchor`) forms while ignoring incidental matches farther
+  // away.
+  let dedupedCount = 0;
   for (const c of matched) {
     const comment = `{>>${c.author}: ${c.text}<<}`;
-    if (c.anchorText && c.anchorEnd) {
-      // Replace anchor text with: {>>comment<<}[anchor]{.mark}
+    const windowStart = Math.max(0, c.pos - 200);
+    const windowEnd = Math.min(result.length, c.pos + 200);
+    if (result.slice(windowStart, windowEnd).includes(comment)) {
+      dedupedCount++;
+      continue;
+    }
+    if (wrapAnchor && c.anchorText && c.anchorEnd) {
       const before = result.slice(0, c.pos);
       const anchor = result.slice(c.pos, c.anchorEnd);
       const after = result.slice(c.anchorEnd);
       result = before + comment + `[${anchor}]{.mark}` + after;
     } else {
-      // No anchor - just insert comment at position
-      result = result.slice(0, c.pos) + ` ${comment}` + result.slice(c.pos);
+      // Insert comment at the anchor position with no surrounding whitespace
+      // tweaks; CriticMarkup blocks are invisible to readers, and adding a
+      // leading space would shift prose byte-for-byte (relevant when callers
+      // verify that --comments-only didn't touch the original).
+      result = result.slice(0, c.pos) + comment + result.slice(c.pos);
     }
+    placedCount++;
+  }
+  if (outStats) {
+    outStats.placed = placedCount;
+    outStats.deduped = dedupedCount;
+    outStats.unmatched = unmatchedCount;
   }
   // Log warnings unless quiet mode
@@ -509,6 +494,9 @@ export function insertCommentsIntoMarkdown(
     if (unmatchedCount > 0) {
       console.warn(`Warning: ${unmatchedCount} comment(s) could not be matched to anchor text`);
     }
+    if (dedupedCount > 0) {
+      console.warn(`Note: ${dedupedCount} comment(s) already present at anchor — skipped to avoid duplication`);
+    }
     if (duplicateWarnings.length > 0) {
       console.warn(`Warning: Duplicate anchor text found (using context & tie-breaks for placement):`);
       for (const w of duplicateWarnings) {

package/lib/response.ts CHANGED Viewed

@@ -46,7 +46,7 @@ export function parseCommentsWithReplies(text: string, file: string = ''): Comme
     if (matches.length === 0) continue;
     // Get context (surrounding text without comments)
-    const contextLine = line.replace(/\{>>[^<]+<<\}/g, '').trim();
+    const contextLine = line.replace(/\{>>[\s\S]+?<<\}/g, '').trim();
     const context = contextLine.slice(0, 100) + (contextLine.length > 100 ? '...' : '');
     // First match is the original comment, rest are replies

package/lib/word-extraction.ts CHANGED Viewed

@@ -42,6 +42,17 @@ export interface CommentAnchorsResult {
   fullDocText: string;
 }
+export interface DocxHeading {
+  /** Heading style name from `<w:pStyle>`, e.g. "Heading1" */
+  style: string;
+  /** Heading depth: 1, 2, 3, ... (parsed from style name; 0 if unknown) */
+  level: number;
+  /** Concatenated text content of the heading paragraph */
+  text: string;
+  /** Position in fullDocText (same coordinate system as CommentAnchorData.docPosition) */
+  docPosition: number;
+}
 export interface WordTable {
   markdown: string;
   rowCount: number;
@@ -331,6 +342,88 @@ export async function extractCommentAnchors(docxPath: string): Promise<CommentAn
   return { anchors, fullDocText };
 }
+/**
+ * Extract heading paragraphs from a docx, with their text positions in the
+ * same coordinate system as `extractCommentAnchors`'s `fullDocText` and
+ * `CommentAnchorData.docPosition`.
+ *
+ * Headings are paragraphs whose `<w:pStyle>` is a Heading style. Reading
+ * styles directly is more reliable than keyword-matching the concatenated
+ * body text — there, paragraph boundaries are gone, so the literal string
+ * "Methods" can appear inside prose ("results across countries") and the
+ * structured-abstract label "Methods:" loses its colon when text runs are
+ * concatenated.
+ */
+export async function extractHeadings(docxPath: string): Promise<DocxHeading[]> {
+  const AdmZip = (await import('adm-zip')).default;
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const docEntry = zip.getEntry('word/document.xml');
+  if (!docEntry) return [];
+  const xml = docEntry.getData().toString('utf8');
+  // Build the same xml-pos → text-pos mapping that extractCommentAnchors does
+  const textNodePattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
+  const nodes: Array<{ xmlStart: number; xmlEnd: number; textStart: number; textEnd: number }> = [];
+  let textPos = 0;
+  let m;
+  while ((m = textNodePattern.exec(xml)) !== null) {
+    const decoded = decodeXmlEntities(m[1] ?? '');
+    nodes.push({
+      xmlStart: m.index,
+      xmlEnd: m.index + m[0].length,
+      textStart: textPos,
+      textEnd: textPos + decoded.length,
+    });
+    textPos += decoded.length;
+  }
+  function xmlToTextPos(xmlPos: number): number {
+    for (const n of nodes) {
+      if (xmlPos >= n.xmlStart && xmlPos < n.xmlEnd) return n.textStart;
+      if (xmlPos < n.xmlStart) return n.textStart;
+    }
+    return nodes.length ? nodes[nodes.length - 1].textEnd : 0;
+  }
+  const headings: DocxHeading[] = [];
+  const paraPattern = /<w:p\b[^>]*>([\s\S]*?)<\/w:p>/g;
+  let pm;
+  while ((pm = paraPattern.exec(xml)) !== null) {
+    const inner = pm[1];
+    const styleMatch = inner.match(/<w:pStyle[^>]*w:val="([^"]+)"/);
+    if (!styleMatch) continue;
+    const style = styleMatch[1];
+    if (!/heading/i.test(style)) continue;
+    // Concatenate text runs; include w:delText so a heading inside a tracked
+    // deletion is still surfaced (verifying anchors against an original draft)
+    const textInRange = /<w:t[^>]*>([^<]*)<\/w:t>|<w:delText[^>]*>([^<]*)<\/w:delText>/g;
+    let txt = '';
+    let tm;
+    while ((tm = textInRange.exec(inner)) !== null) {
+      txt += decodeXmlEntities(tm[1] || tm[2] || '');
+    }
+    const trimmed = txt.trim();
+    if (!trimmed) continue;
+    const levelMatch = style.match(/(\d+)/);
+    const level = levelMatch ? parseInt(levelMatch[1], 10) : 0;
+    headings.push({
+      style,
+      level,
+      text: trimmed,
+      docPosition: xmlToTextPos(pm.index),
+    });
+  }
+  return headings;
+}
 /**
  * Decode XML entities in text
  */