npm - docrev - Versions diffs - 0.9.7 → 0.9.13 - Mend

docrev 0.9.7 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (78) hide show

package/CHANGELOG.md +21 -0
package/dev_notes/stress2/adversarial.docx +0 -0
package/dev_notes/stress2/build_adversarial.ts +186 -0
package/dev_notes/stress2/drift_matcher.ts +62 -0
package/dev_notes/stress2/probe_anchors.ts +35 -0
package/dev_notes/stress2/project/adversarial.docx +0 -0
package/dev_notes/stress2/project/discussion.before.md +3 -0
package/dev_notes/stress2/project/discussion.md +3 -0
package/dev_notes/stress2/project/methods.before.md +20 -0
package/dev_notes/stress2/project/methods.md +20 -0
package/dev_notes/stress2/project/rev.yaml +5 -0
package/dev_notes/stress2/project/sections.yaml +4 -0
package/dev_notes/stress2/sections.yaml +5 -0
package/dev_notes/stress2/trace_placement.ts +50 -0
package/dev_notes/stresstest_boundaries.ts +27 -0
package/dev_notes/stresstest_drift_apply.ts +43 -0
package/dev_notes/stresstest_drift_compare.ts +43 -0
package/dev_notes/stresstest_drift_v2.ts +54 -0
package/dev_notes/stresstest_inspect.ts +54 -0
package/dev_notes/stresstest_pstyle.ts +55 -0
package/dev_notes/stresstest_section_debug.ts +23 -0
package/dev_notes/stresstest_split.ts +70 -0
package/dev_notes/stresstest_trace.ts +19 -0
package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
package/dist/lib/anchor-match.d.ts +10 -0
package/dist/lib/anchor-match.d.ts.map +1 -1
package/dist/lib/anchor-match.js +35 -0
package/dist/lib/anchor-match.js.map +1 -1
package/dist/lib/annotations.d.ts.map +1 -1
package/dist/lib/annotations.js +16 -6
package/dist/lib/annotations.js.map +1 -1
package/dist/lib/build.d.ts +12 -0
package/dist/lib/build.d.ts.map +1 -1
package/dist/lib/build.js +12 -0
package/dist/lib/build.js.map +1 -1
package/dist/lib/commands/quality.js +1 -1
package/dist/lib/commands/quality.js.map +1 -1
package/dist/lib/commands/section-boundaries.d.ts +1 -1
package/dist/lib/commands/section-boundaries.d.ts.map +1 -1
package/dist/lib/commands/section-boundaries.js +12 -2
package/dist/lib/commands/section-boundaries.js.map +1 -1
package/dist/lib/commands/sync.js +19 -13
package/dist/lib/commands/sync.js.map +1 -1
package/dist/lib/commands/verify-anchors.d.ts.map +1 -1
package/dist/lib/commands/verify-anchors.js +15 -4
package/dist/lib/commands/verify-anchors.js.map +1 -1
package/dist/lib/comment-realign.js +2 -2
package/dist/lib/comment-realign.js.map +1 -1
package/dist/lib/import.d.ts +12 -0
package/dist/lib/import.d.ts.map +1 -1
package/dist/lib/import.js +289 -60
package/dist/lib/import.js.map +1 -1
package/dist/lib/response.js +1 -1
package/dist/lib/response.js.map +1 -1
package/dist/lib/types.d.ts +20 -0
package/dist/lib/types.d.ts.map +1 -1
package/dist/lib/word-extraction.d.ts +6 -0
package/dist/lib/word-extraction.d.ts.map +1 -1
package/dist/lib/word-extraction.js +46 -3
package/dist/lib/word-extraction.js.map +1 -1
package/dist/lib/wordcomments.d.ts.map +1 -1
package/dist/lib/wordcomments.js +188 -78
package/dist/lib/wordcomments.js.map +1 -1
package/lib/anchor-match.ts +38 -0
package/lib/annotations.ts +16 -6
package/lib/build.ts +24 -0
package/lib/commands/quality.ts +1 -1
package/lib/commands/section-boundaries.ts +11 -1
package/lib/commands/sync.ts +21 -16
package/lib/commands/verify-anchors.ts +15 -4
package/lib/comment-realign.ts +2 -2
package/lib/import.ts +304 -61
package/lib/response.ts +1 -1
package/lib/types.ts +20 -0
package/lib/word-extraction.ts +50 -3
package/lib/wordcomments.ts +205 -88
package/package.json +1 -1
package/dist/package.json +0 -137

package/lib/commands/sync.ts CHANGED Viewed

@@ -571,10 +571,12 @@ async function syncCommentsOnly(
   let comments;
   let anchors;
   let headings;
+  let fullDocText = '';
   try {
     comments = await extractWordComments(docx);
     const result = await extractCommentAnchors(docx);
     anchors = result.anchors;
+    fullDocText = result.fullDocText;
     headings = await extractHeadings(docx);
     spin.stop();
   } catch (err) {
@@ -592,7 +594,7 @@ async function syncCommentsOnly(
     return;
   }
-  const boundaries = computeSectionBoundaries(config.sections, headings);
+  const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
   if (boundaries.length === 0) {
     console.error(fmt.status('warning', 'No section headings detected in Word document.'));
@@ -615,12 +617,12 @@ async function syncCommentsOnly(
   }
   const firstBoundaryStart = boundaries[0].start;
-  const results: Array<{ file: string; placed: number; unmatched: number; skipped: boolean }> = [];
+  const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
   for (const boundary of activeBoundaries) {
     const sectionPath = path.join(options.dir, boundary.file);
     if (!fs.existsSync(sectionPath)) {
-      results.push({ file: boundary.file, placed: 0, unmatched: 0, skipped: true });
+      results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
       continue;
     }
@@ -635,55 +637,58 @@ async function syncCommentsOnly(
     });
     if (sectionComments.length === 0) {
-      results.push({ file: boundary.file, placed: 0, unmatched: 0, skipped: false });
+      results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
       continue;
     }
     const original = fs.readFileSync(sectionPath, 'utf-8');
-    const commentPattern = /\{>>.*?<<\}/gs;
-    const beforeCount = (original.match(commentPattern) || []).length;
+    const stats = { placed: 0, deduped: 0, unmatched: 0 };
     const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
       quiet: !process.env.DEBUG,
       sectionBoundary: { start: boundary.start, end: boundary.end },
       wrapAnchor: false,
+      outStats: stats,
     });
-    const afterCount = (annotated.match(commentPattern) || []).length;
-    const placed = afterCount - beforeCount;
-    const unmatched = sectionComments.length - placed;
-    if (!options.dryRun && placed > 0) {
+    if (!options.dryRun && stats.placed > 0) {
       fs.writeFileSync(sectionPath, annotated, 'utf-8');
     }
-    results.push({ file: boundary.file, placed, unmatched, skipped: false });
+    results.push({ file: boundary.file, ...stats, skipped: false });
   }
   const tableRows = results.map(r => {
     if (r.skipped) {
-      return [chalk.dim(r.file), chalk.yellow('missing'), '', ''];
+      return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
     }
     return [
       chalk.bold(r.file),
       chalk.green(`${r.placed}`),
+      r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
       r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
       chalk.dim('comments only'),
     ];
   });
   console.log(fmt.table(
-    ['File', 'Placed', 'Unmatched', 'Mode'],
+    ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
     tableRows,
-    { align: ['left', 'right', 'right', 'left'] },
+    { align: ['left', 'right', 'right', 'right', 'left'] },
   ));
   console.log();
   const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
+  const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
   const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
   const lines: string[] = [];
   lines.push(`${chalk.bold(comments.length)} comments in document`);
-  lines.push(`${chalk.bold(totalPlaced)} placed at fuzzy-matched anchors`);
+  if (totalPlaced > 0) {
+    lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
+  }
+  if (totalDeduped > 0) {
+    lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
+  }
   if (totalUnmatched > 0) {
     lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
   }

package/lib/commands/verify-anchors.ts CHANGED Viewed

@@ -23,7 +23,7 @@ import {
   jsonOutput,
 } from './context.js';
 import type { Command } from 'commander';
-import { findAnchorInText, classifyStrategy, type AnchorMatchQuality } from '../anchor-match.js';
+import { findAnchorInText, classifyStrategy, scoreContextAt, type AnchorMatchQuality } from '../anchor-match.js';
 import type { CommentAnchorData } from '../word-extraction.js';
 import { computeSectionBoundaries } from './section-boundaries.js';
@@ -71,10 +71,12 @@ export function register(program: Command): void {
       let comments;
       let anchors;
       let headings;
+      let fullDocText = '';
       try {
         comments = await extractWordComments(docxPath);
         const result = await extractCommentAnchors(docxPath);
         anchors = result.anchors;
+        fullDocText = result.fullDocText;
         headings = await extractHeadings(docxPath);
       } catch (err) {
         const error = err as Error;
@@ -88,7 +90,7 @@ export function register(program: Command): void {
         return;
       }
-      const boundaries = computeSectionBoundaries(config.sections, headings);
+      const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
       // Cache section markdown contents on first read
       const sectionCache = new Map<string, string>();
@@ -165,7 +167,16 @@ export function register(program: Command): void {
         const search = findAnchorInText(anchor.anchor, md, anchor.before, anchor.after);
         let quality: AnchorMatchQuality | 'ambiguous' = classifyStrategy(search.strategy, search.occurrences.length);
         if (quality === 'clean' && search.occurrences.length > 1) {
-          quality = 'ambiguous';
+          // Multiple direct hits — only flag as ambiguous when before/after
+          // context can't pick a clear winner. If one candidate scores
+          // strictly higher than the others, sync will place it correctly.
+          const anchorLen = anchor.anchor.length;
+          const scores = search.occurrences.map(p => scoreContextAt(p, md, anchor.before, anchor.after, anchorLen));
+          const max = Math.max(...scores);
+          const winners = scores.filter(s => s === max).length;
+          if (max === 0 || winners > 1) {
+            quality = 'ambiguous';
+          }
         }
         reports.push({
@@ -239,7 +250,7 @@ function printReport(docxPath: string, reports: CommentReport[]): void {
   if (totals.unmatched > 0 || totals.ambiguous > 0) {
     console.log();
     console.log(chalk.dim('Comments flagged "unmatched" or "ambiguous" need manual placement.'));
-    console.log(chalk.dim('Run "rev sync --no-overwrite" to import the matched ones without touching prose.'));
+    console.log(chalk.dim('Run "rev sync --comments-only" to import the matched ones without touching prose.'));
   }
 }

package/lib/comment-realign.ts CHANGED Viewed

@@ -370,7 +370,7 @@ export async function realignComments(
   // Strip ALL comments (both authors) from markdown to start fresh
   let markdown = originalMarkdown;
-  markdown = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
+  markdown = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
   console.log(`Stripped all comments from markdown`);
   // Parse markdown paragraphs
@@ -469,7 +469,7 @@ export async function realignMarkdown(
     );
     // Strip ALL comments from markdown
-    let result = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
+    let result = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
     // Parse markdown paragraphs
     const mdParagraphs = parseMdParagraphs(result);

package/lib/import.ts CHANGED Viewed

@@ -36,7 +36,65 @@ import {
   parseVisibleComments,
   convertVisibleComments,
 } from './restore-references.js';
-import { findAnchorInText } from './anchor-match.js';
+import { findAnchorInText, findAllOccurrences } from './anchor-match.js';
+/**
+ * Pick the best position from candidate `occurrences` given the
+ * surrounding `before` / `after` context from the docx, while
+ * respecting `usedPositions` to avoid stacking distinct comments at
+ * the same anchor instance.
+ *
+ * Returns the chosen position, or -1 if every candidate is already used.
+ */
+function pickBestOccurrence(
+  occurrences: number[],
+  result: string,
+  before: string,
+  after: string,
+  anchorLen: number,
+  usedPositions: Set<number>,
+): number {
+  if (occurrences.length === 0) return -1;
+  if (occurrences.length === 1) {
+    return usedPositions.has(occurrences[0]) ? -1 : occurrences[0];
+  }
+  let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? -1;
+  if (bestIdx < 0) return -1;
+  let bestScore = -1;
+  for (const pos of occurrences) {
+    if (usedPositions.has(pos)) continue;
+    let score = 0;
+    if (before) {
+      const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
+      const beforeLower = before.toLowerCase();
+      const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
+      for (const word of beforeWords) {
+        if (contextBefore.includes(word)) score += 2;
+      }
+      if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
+    }
+    if (after) {
+      const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
+      const afterLower = after.toLowerCase();
+      const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
+      for (const word of afterWords) {
+        if (contextAfter.includes(word)) score += 2;
+      }
+      if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
+    }
+    if (score > bestScore || (score === bestScore && pos < bestIdx)) {
+      bestScore = score;
+      bestIdx = pos;
+    }
+  }
+  return bestIdx;
+}
 // Re-export everything so existing imports from './import.js' still work
 export {
@@ -100,6 +158,14 @@ export interface InsertCommentsOptions {
    * comments sharing one anchor don't produce nested broken markup.
    */
   wrapAnchor?: boolean;
+  /**
+   * Mutable output: when provided, the function fills in counters so callers
+   * can distinguish placement outcomes in their summary. `placed` counts new
+   * insertions, `deduped` counts comments that were already present at their
+   * anchor (skipped to avoid duplication on re-sync), `unmatched` counts
+   * comments whose anchor couldn't be located.
+   */
+  outStats?: { placed: number; deduped: number; unmatched: number };
 }
 export interface CommentWithPos {
@@ -175,6 +241,71 @@ export interface MoveExtractedMediaResult {
 // Functions
 // ============================================
+/**
+ * If `pos` lands inside a section file's leading `# Heading` line (or the
+ * blank line right after it), advance past the first paragraph break so
+ * the comment stays inside the section. A comment authored at the very
+ * start of a Word section maps to `pos === 0`, but inserting at column 0
+ * of a markdown file that begins with `# Heading` puts the `{>>...<<}`
+ * before the heading marker — Pandoc then treats the line as ordinary
+ * paragraph text and the comment renders in the previous section.
+ */
+function pushPastSectionHeading(text: string, pos: number): number {
+  if (pos > 0) {
+    const headingMatch = text.match(/^#{1,6}\s.+$/m);
+    if (!headingMatch || headingMatch.index === undefined) return pos;
+    const headingEnd = headingMatch.index + headingMatch[0].length;
+    if (pos >= headingEnd) return pos;
+  }
+  // pos is at-or-before the first heading line. Advance to the first
+  // non-blank position after the heading paragraph.
+  const headingLine = text.match(/^#{1,6}\s.+(?:\n|$)/m);
+  if (!headingLine || headingLine.index === undefined) return pos;
+  let after = headingLine.index + headingLine[0].length;
+  // Skip blank lines so we land at the start of the first body paragraph.
+  while (after < text.length && (text[after] === '\n' || text[after] === '\r')) {
+    after++;
+  }
+  return after;
+}
+/**
+ * Snap a position to the nearest whitespace boundary within ±50 chars so a
+ * proportional fallback insertion never lands mid-word.
+ */
+function snapToWordBoundary(text: string, pos: number): number {
+  if (pos <= 0) return 0;
+  if (pos >= text.length) return text.length;
+  if (/\s/.test(text[pos] ?? '')) return pos;
+  for (let d = 1; d <= 50; d++) {
+    if (pos + d < text.length && /\s/.test(text[pos + d] ?? '')) return pos + d;
+    if (pos - d >= 0 && /\s/.test(text[pos - d] ?? '')) return pos - d;
+  }
+  return pos;
+}
+/**
+ * Final-resort placement when every text-matching strategy failed. The docx
+ * carries a real `<w:commentRangeStart w:id="N">` marker at a known offset
+ * inside its body text — that's a structural anchor, even if the anchored
+ * span itself is empty and the surrounding context drifted in the target.
+ *
+ * Map docPosition into the target markdown proportionally and snap to a word
+ * boundary. This is approximate when the document was heavily restructured,
+ * but it's strictly better than silently dropping a reviewer's comment: the
+ * comment lands in roughly the right neighborhood and the reviewer can
+ * relocate it during their next pass.
+ */
+function proportionalFallback(
+  anchorData: CommentAnchorData,
+  target: string,
+): number | null {
+  if (anchorData.docLength <= 0) return null;
+  const proportion = Math.min(anchorData.docPosition / anchorData.docLength, 1.0);
+  const rawPos = Math.floor(proportion * target.length);
+  return pushPastSectionHeading(target, snapToWordBoundary(target, rawPos));
+}
 /**
  * Insert comments into markdown text based on anchor texts with context
  */
@@ -184,17 +315,48 @@ export function insertCommentsIntoMarkdown(
   anchors: Map<string, CommentAnchorData | string>,
   options: InsertCommentsOptions = {}
 ): string {
-  const { quiet = false, sectionBoundary = null, wrapAnchor = true } = options;
+  const { quiet = false, sectionBoundary = null, wrapAnchor = true, outStats } = options;
   let result = markdown;
   let unmatchedCount = 0;
+  let placedCount = 0;
   const duplicateWarnings: string[] = [];
   const usedPositions = new Set<number>(); // For tie-breaking: track used positions
+  // Resolve threading: replies share their parent's anchor in Word, so they
+  // must inherit the parent's position and ride alongside it as one cluster.
+  // Letting each reply run through anchor scoring scatters the cluster (the
+  // same docPosition forces `usedPositions` to push later replies onto a
+  // different occurrence), which on re-build looks like independent comments
+  // and loses the paraIdParent threading. See gcol33/docrev issue #2.
+  const inputById = new Map<string, WordComment>();
+  for (const c of comments) inputById.set(c.id, c);
+  function rootIdOf(c: WordComment): string {
+    let cur: WordComment = c;
+    const seen = new Set<string>();
+    while (cur.parentId && !seen.has(cur.id)) {
+      seen.add(cur.id);
+      const parent = inputById.get(cur.parentId);
+      if (!parent || parent === cur) break;
+      cur = parent;
+    }
+    return cur.id;
+  }
+  const replyRootId = new Map<string, string>();
+  for (const c of comments) {
+    const root = rootIdOf(c);
+    if (root !== c.id) replyRootId.set(c.id, root);
+  }
   // Anchor matching primitives live in lib/anchor-match.ts so that
   // `rev verify-anchors` can use the same strategies for drift reporting.
-  // Get all positions in order (for sequential tie-breaking)
+  // Get all positions in order (for sequential tie-breaking).
+  // Replies skip scoring entirely — they piggyback on their root's position
+  // in the emit pass below.
   const commentsWithPositions = comments.map((c): CommentWithPos => {
+    if (replyRootId.has(c.id)) {
+      return { ...c, pos: -1, anchorText: null, strategy: 'reply' };
+    }
     const anchorData = anchors.get(c.id);
     if (!anchorData) {
       unmatchedCount++;
@@ -222,6 +384,18 @@ export function insertCommentsIntoMarkdown(
         const proportion = Math.min(relativePos / sectionLength, 1.0);
         const markdownPos = Math.floor(proportion * result.length);
+        // For empty anchors, before/after context is the only signal that
+        // pinpoints the original split — without it, proportional placement
+        // can land mid-word or split unrelated phrases. Try context match
+        // first; only fall through to proportional when context is gone.
+        if ((!anchor || isEmpty) && (before || after)) {
+          const ctx = findAnchorInText('', result, before, after);
+          if (ctx.occurrences.length > 0) {
+            const pos = pushPastSectionHeading(result, ctx.occurrences[0]);
+            return { ...c, pos, anchorText: null, isEmpty: true, strategy: `ctx:${ctx.strategy}` };
+          }
+        }
         let insertPos = markdownPos;
         // Look for nearby word boundary
@@ -231,26 +405,52 @@ export function insertCommentsIntoMarkdown(
           insertPos = Math.max(0, markdownPos - 25) + spaceIdx;
         }
-        // If we have anchor text, try to find it near this position
+        // If we have anchor text, try to find it near this position.
+        // Collect ALL occurrences in the local window, then disambiguate
+        // via before/after context + usedPositions — otherwise two
+        // comments sharing the same anchor word would both collide at
+        // the leftmost match. The context-scoring helper handles the
+        // "repeated formulaic prose" case using docx-side context, which
+        // is a stronger signal than raw distance to the proportional
+        // insertPos (insertPos is itself an approximation).
         if (anchor && !isEmpty) {
           const searchStart = Math.max(0, insertPos - 200);
           const searchEnd = Math.min(result.length, insertPos + 200);
           const localSearch = result.slice(searchStart, searchEnd).toLowerCase();
           const anchorLower = anchor.toLowerCase();
-          const localIdx = localSearch.indexOf(anchorLower);
-          if (localIdx !== -1) {
-            return { ...c, pos: searchStart + localIdx, anchorText: anchor, anchorEnd: searchStart + localIdx + anchor.length, strategy: 'position+text' };
+          const localHits = findAllOccurrences(localSearch, anchorLower).map(i => searchStart + i);
+          if (localHits.length > 0) {
+            const chosen = pickBestOccurrence(localHits, result, before, after, anchor.length, usedPositions);
+            if (chosen >= 0) {
+              if (localHits.length > 1) {
+                duplicateWarnings.push(`"${anchor.slice(0, 40)}${anchor.length > 40 ? '...' : ''}" appears ${localHits.length} times in section window`);
+              }
+              usedPositions.add(chosen);
+              return { ...c, pos: chosen, anchorText: anchor, anchorEnd: chosen + anchor.length, strategy: 'position+text' };
+            }
           }
           // Try first few words
           const words = anchor.split(/\s+/).slice(0, 4).join(' ').toLowerCase();
           if (words.length >= 10) {
-            const partialIdx = localSearch.indexOf(words);
-            if (partialIdx !== -1) {
-              return { ...c, pos: searchStart + partialIdx, anchorText: words, anchorEnd: searchStart + partialIdx + words.length, strategy: 'position+partial' };
+            const partialHits = findAllOccurrences(localSearch, words).map(i => searchStart + i);
+            if (partialHits.length > 0) {
+              const chosen = pickBestOccurrence(partialHits, result, before, after, words.length, usedPositions);
+              if (chosen >= 0) {
+                usedPositions.add(chosen);
+                return { ...c, pos: chosen, anchorText: words, anchorEnd: chosen + words.length, strategy: 'position+partial' };
+              }
             }
           }
         }
+        // A docPosition at the very start of a section maps to markdownPos=0,
+        // which sits before the file's `# Heading` line and gets rendered in
+        // the previous section. Push past the heading line so the comment
+        // stays inside the section it was authored in.
+        insertPos = pushPastSectionHeading(result, insertPos);
         return { ...c, pos: insertPos, anchorText: null, strategy: 'position-only' };
       }
     }
@@ -263,6 +463,14 @@ export function insertCommentsIntoMarkdown(
           return { ...c, pos: occurrences[0], anchorText: null, isEmpty: true };
         }
       }
+      // Last resort: docx carried a structural marker at docPosition; map
+      // it proportionally into the target so the comment isn't dropped.
+      if (typeof anchorData === 'object') {
+        const fallback = proportionalFallback(anchorData, result);
+        if (fallback !== null) {
+          return { ...c, pos: fallback, anchorText: null, isEmpty: true, strategy: 'proportional-fallback' };
+        }
+      }
       unmatchedCount++;
       return { ...c, pos: -1, anchorText: null, isEmpty: true };
     }
@@ -271,6 +479,14 @@ export function insertCommentsIntoMarkdown(
     const { occurrences, matchedAnchor, strategy, stripped } = findAnchorInText(anchor, result, before, after);
     if (occurrences.length === 0) {
+      // Same last-resort as the empty-anchor path: anchor text is gone from
+      // the target, but the marker's text-offset survived extraction.
+      if (typeof anchorData === 'object') {
+        const fallback = proportionalFallback(anchorData, result);
+        if (fallback !== null) {
+          return { ...c, pos: fallback, anchorText: null, strategy: 'proportional-fallback' };
+        }
+      }
       unmatchedCount++;
       return { ...c, pos: -1, anchorText: null };
     }
@@ -290,82 +506,106 @@ export function insertCommentsIntoMarkdown(
       duplicateWarnings.push(`"${matchedAnchor.slice(0, 40)}${matchedAnchor.length > 40 ? '...' : ''}" appears ${occurrences.length} times`);
     }
-    let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? occurrences[0];
-    let bestScore = -1;
-    for (const pos of occurrences) {
-      if (usedPositions.has(pos)) continue;
-      let score = 0;
-      if (before) {
-        const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
-        const beforeLower = before.toLowerCase();
-        const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
-        for (const word of beforeWords) {
-          if (contextBefore.includes(word)) score += 2;
-        }
-        if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
-      }
-      if (after) {
-        const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
-        const afterLower = after.toLowerCase();
-        const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
-        for (const word of afterWords) {
-          if (contextAfter.includes(word)) score += 2;
-        }
-        if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
-      }
-      if (score > bestScore || (score === bestScore && pos < bestIdx)) {
-        bestScore = score;
-        bestIdx = pos;
-      }
-    }
-    usedPositions.add(bestIdx);
+    const bestIdx = pickBestOccurrence(occurrences, result, before, after, anchorLen, usedPositions);
+    const finalIdx = bestIdx >= 0 ? bestIdx : occurrences[0];
+    usedPositions.add(finalIdx);
     if (matchedAnchor) {
-      return { ...c, pos: bestIdx, anchorText: matchedAnchor, anchorEnd: bestIdx + anchorLen };
+      return { ...c, pos: finalIdx, anchorText: matchedAnchor, anchorEnd: finalIdx + anchorLen };
     } else {
-      return { ...c, pos: bestIdx, anchorText: null };
+      return { ...c, pos: finalIdx, anchorText: null };
     }
   });
-  // Log any unmatched comments for debugging
-  const unmatched = commentsWithPositions.filter((c) => c.pos < 0);
+  // Group comments into clusters (root + ordered replies). The root carries
+  // the resolved position; replies inherit it and ride along in input order
+  // so the rebuilt CriticMarkup looks like `{>>p<<}{>>r1<<}{>>r2<<}[anchor]`
+  // and adjacency-based reply detection picks the cluster up again.
+  const byId = new Map<string, CommentWithPos>();
+  for (const cwp of commentsWithPositions) byId.set(cwp.id, cwp);
+  const repliesByRoot = new Map<string, CommentWithPos[]>();
+  for (const c of comments) {
+    const rootId = replyRootId.get(c.id);
+    if (!rootId) continue;
+    const cwp = byId.get(c.id);
+    if (!cwp) continue;
+    const list = repliesByRoot.get(rootId);
+    if (list) list.push(cwp);
+    else repliesByRoot.set(rootId, [cwp]);
+  }
+  // Replies whose root never resolved (parent missing from the input slice or
+  // parent unmatched) count as unmatched too — there's no position to attach
+  // them to.
+  for (const [rootId, replies] of repliesByRoot) {
+    const root = byId.get(rootId);
+    if (!root || root.pos < 0) {
+      unmatchedCount += replies.length;
+    }
+  }
+  // Roots only — replies attach during emission.
+  const rootsWithPos = commentsWithPositions.filter(
+    c => !replyRootId.has(c.id)
+  );
+  // Log any unmatched roots for debugging
+  const unmatched = rootsWithPos.filter((c) => c.pos < 0);
   if (process.env.DEBUG) {
-    console.log(`[DEBUG] insertComments: ${comments.length} input, ${commentsWithPositions.length} processed, ${unmatched.length} unmatched`);
+    console.log(`[DEBUG] insertComments: ${comments.length} input, ${rootsWithPos.length} roots, ${unmatched.length} unmatched roots, ${replyRootId.size} replies`);
     if (unmatched.length > 0) {
       unmatched.forEach(c => console.log(`[DEBUG]   Unmatched ID=${c.id}: anchor="${(c.anchorText || 'none').slice(0,30)}"`));
     }
   }
-  const matched = commentsWithPositions.filter((c) => c.pos >= 0);
+  const matchedRoots = rootsWithPos.filter((c) => c.pos >= 0);
   // Sort by position descending (insert from end to avoid offset issues)
-  matched.sort((a, b) => b.pos - a.pos);
+  matchedRoots.sort((a, b) => b.pos - a.pos);
-  // Insert each comment. With `wrapAnchor` (the default), the anchor text
+  // Insert each cluster. With `wrapAnchor` (the default), the anchor text
   // gets wrapped in `[anchor]{.mark}` so the rebuilt docx restores the
   // original Word comment range. Without it, the comment block is inserted
   // adjacent to the anchor and prose stays untouched — required for
   // comments-only sync where multiple comments may share one anchor.
-  for (const c of matched) {
-    const comment = `{>>${c.author}: ${c.text}<<}`;
+  // Skip insertion when the parent's CriticMarkup already lives near the
+  // target — re-running sync against the same docx would otherwise stack
+  // duplicates. A 200-char window catches both wrapped
+  // (`{>>...<<}[anchor]{.mark}`) and bare (`{>>...<<}anchor`) forms while
+  // ignoring incidental matches farther away.
+  let dedupedCount = 0;
+  for (const c of matchedRoots) {
+    const parentBlock = `{>>${c.author}: ${c.text}<<}`;
+    const replies = repliesByRoot.get(c.id) ?? [];
+    const windowStart = Math.max(0, c.pos - 200);
+    const windowEnd = Math.min(result.length, c.pos + 200);
+    if (result.slice(windowStart, windowEnd).includes(parentBlock)) {
+      // Cluster already synced; treat all members as deduped.
+      dedupedCount += 1 + replies.length;
+      continue;
+    }
+    // Replies carry an explicit `↪ ` author prefix so the round-trip does not
+    // depend on positional adjacency in the markdown. On dense reviewer docs
+    // distinct clusters frequently land at the same anchor position; without
+    // the prefix the re-parse would misthread them. The injection side strips
+    // `↪ ` back off the author so Word renders the original name.
+    const replyBlocks = replies.map(r => `{>>↪ ${r.author}: ${r.text}<<}`);
+    const combined = parentBlock + replyBlocks.join('');
     if (wrapAnchor && c.anchorText && c.anchorEnd) {
       const before = result.slice(0, c.pos);
       const anchor = result.slice(c.pos, c.anchorEnd);
       const after = result.slice(c.anchorEnd);
-      result = before + comment + `[${anchor}]{.mark}` + after;
+      result = before + combined + `[${anchor}]{.mark}` + after;
     } else {
-      // Insert comment at the anchor position with no surrounding whitespace
-      // tweaks; CriticMarkup blocks are invisible to readers, and adding a
-      // leading space would shift prose byte-for-byte (relevant when callers
-      // verify that --comments-only didn't touch the original).
-      result = result.slice(0, c.pos) + comment + result.slice(c.pos);
+      result = result.slice(0, c.pos) + combined + result.slice(c.pos);
     }
+    placedCount += 1 + replies.length;
+  }
+  if (outStats) {
+    outStats.placed = placedCount;
+    outStats.deduped = dedupedCount;
+    outStats.unmatched = unmatchedCount;
   }
   // Log warnings unless quiet mode
@@ -373,6 +613,9 @@ export function insertCommentsIntoMarkdown(
     if (unmatchedCount > 0) {
       console.warn(`Warning: ${unmatchedCount} comment(s) could not be matched to anchor text`);
     }
+    if (dedupedCount > 0) {
+      console.warn(`Note: ${dedupedCount} comment(s) already present at anchor — skipped to avoid duplication`);
+    }
     if (duplicateWarnings.length > 0) {
       console.warn(`Warning: Duplicate anchor text found (using context & tie-breaks for placement):`);
       for (const w of duplicateWarnings) {

package/lib/response.ts CHANGED Viewed

@@ -46,7 +46,7 @@ export function parseCommentsWithReplies(text: string, file: string = ''): Comme
     if (matches.length === 0) continue;
     // Get context (surrounding text without comments)
-    const contextLine = line.replace(/\{>>[^<]+<<\}/g, '').trim();
+    const contextLine = line.replace(/\{>>[\s\S]+?<<\}/g, '').trim();
     const context = contextLine.slice(0, 100) + (contextLine.length > 100 ? '...' : '');
     // First match is the original comment, rest are replies