docrev 0.9.7 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/dev_notes/stress2/adversarial.docx +0 -0
  3. package/dev_notes/stress2/build_adversarial.ts +186 -0
  4. package/dev_notes/stress2/drift_matcher.ts +62 -0
  5. package/dev_notes/stress2/probe_anchors.ts +35 -0
  6. package/dev_notes/stress2/project/adversarial.docx +0 -0
  7. package/dev_notes/stress2/project/discussion.before.md +3 -0
  8. package/dev_notes/stress2/project/discussion.md +3 -0
  9. package/dev_notes/stress2/project/methods.before.md +20 -0
  10. package/dev_notes/stress2/project/methods.md +20 -0
  11. package/dev_notes/stress2/project/rev.yaml +5 -0
  12. package/dev_notes/stress2/project/sections.yaml +4 -0
  13. package/dev_notes/stress2/sections.yaml +5 -0
  14. package/dev_notes/stress2/trace_placement.ts +50 -0
  15. package/dev_notes/stresstest_boundaries.ts +27 -0
  16. package/dev_notes/stresstest_drift_apply.ts +43 -0
  17. package/dev_notes/stresstest_drift_compare.ts +43 -0
  18. package/dev_notes/stresstest_drift_v2.ts +54 -0
  19. package/dev_notes/stresstest_inspect.ts +54 -0
  20. package/dev_notes/stresstest_pstyle.ts +55 -0
  21. package/dev_notes/stresstest_section_debug.ts +23 -0
  22. package/dev_notes/stresstest_split.ts +70 -0
  23. package/dev_notes/stresstest_trace.ts +19 -0
  24. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
  25. package/dist/lib/anchor-match.d.ts +10 -0
  26. package/dist/lib/anchor-match.d.ts.map +1 -1
  27. package/dist/lib/anchor-match.js +35 -0
  28. package/dist/lib/anchor-match.js.map +1 -1
  29. package/dist/lib/annotations.d.ts.map +1 -1
  30. package/dist/lib/annotations.js +16 -6
  31. package/dist/lib/annotations.js.map +1 -1
  32. package/dist/lib/build.d.ts +12 -0
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +12 -0
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/quality.js +1 -1
  37. package/dist/lib/commands/quality.js.map +1 -1
  38. package/dist/lib/commands/section-boundaries.d.ts +1 -1
  39. package/dist/lib/commands/section-boundaries.d.ts.map +1 -1
  40. package/dist/lib/commands/section-boundaries.js +12 -2
  41. package/dist/lib/commands/section-boundaries.js.map +1 -1
  42. package/dist/lib/commands/sync.js +19 -13
  43. package/dist/lib/commands/sync.js.map +1 -1
  44. package/dist/lib/commands/verify-anchors.d.ts.map +1 -1
  45. package/dist/lib/commands/verify-anchors.js +15 -4
  46. package/dist/lib/commands/verify-anchors.js.map +1 -1
  47. package/dist/lib/comment-realign.js +2 -2
  48. package/dist/lib/comment-realign.js.map +1 -1
  49. package/dist/lib/import.d.ts +12 -0
  50. package/dist/lib/import.d.ts.map +1 -1
  51. package/dist/lib/import.js +289 -60
  52. package/dist/lib/import.js.map +1 -1
  53. package/dist/lib/response.js +1 -1
  54. package/dist/lib/response.js.map +1 -1
  55. package/dist/lib/types.d.ts +20 -0
  56. package/dist/lib/types.d.ts.map +1 -1
  57. package/dist/lib/word-extraction.d.ts +6 -0
  58. package/dist/lib/word-extraction.d.ts.map +1 -1
  59. package/dist/lib/word-extraction.js +46 -3
  60. package/dist/lib/word-extraction.js.map +1 -1
  61. package/dist/lib/wordcomments.d.ts.map +1 -1
  62. package/dist/lib/wordcomments.js +188 -78
  63. package/dist/lib/wordcomments.js.map +1 -1
  64. package/lib/anchor-match.ts +38 -0
  65. package/lib/annotations.ts +16 -6
  66. package/lib/build.ts +24 -0
  67. package/lib/commands/quality.ts +1 -1
  68. package/lib/commands/section-boundaries.ts +11 -1
  69. package/lib/commands/sync.ts +21 -16
  70. package/lib/commands/verify-anchors.ts +15 -4
  71. package/lib/comment-realign.ts +2 -2
  72. package/lib/import.ts +304 -61
  73. package/lib/response.ts +1 -1
  74. package/lib/types.ts +20 -0
  75. package/lib/word-extraction.ts +50 -3
  76. package/lib/wordcomments.ts +205 -88
  77. package/package.json +1 -1
  78. package/dist/package.json +0 -137
@@ -571,10 +571,12 @@ async function syncCommentsOnly(
571
571
  let comments;
572
572
  let anchors;
573
573
  let headings;
574
+ let fullDocText = '';
574
575
  try {
575
576
  comments = await extractWordComments(docx);
576
577
  const result = await extractCommentAnchors(docx);
577
578
  anchors = result.anchors;
579
+ fullDocText = result.fullDocText;
578
580
  headings = await extractHeadings(docx);
579
581
  spin.stop();
580
582
  } catch (err) {
@@ -592,7 +594,7 @@ async function syncCommentsOnly(
592
594
  return;
593
595
  }
594
596
 
595
- const boundaries = computeSectionBoundaries(config.sections, headings);
597
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
596
598
 
597
599
  if (boundaries.length === 0) {
598
600
  console.error(fmt.status('warning', 'No section headings detected in Word document.'));
@@ -615,12 +617,12 @@ async function syncCommentsOnly(
615
617
  }
616
618
 
617
619
  const firstBoundaryStart = boundaries[0].start;
618
- const results: Array<{ file: string; placed: number; unmatched: number; skipped: boolean }> = [];
620
+ const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
619
621
 
620
622
  for (const boundary of activeBoundaries) {
621
623
  const sectionPath = path.join(options.dir, boundary.file);
622
624
  if (!fs.existsSync(sectionPath)) {
623
- results.push({ file: boundary.file, placed: 0, unmatched: 0, skipped: true });
625
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
624
626
  continue;
625
627
  }
626
628
 
@@ -635,55 +637,58 @@ async function syncCommentsOnly(
635
637
  });
636
638
 
637
639
  if (sectionComments.length === 0) {
638
- results.push({ file: boundary.file, placed: 0, unmatched: 0, skipped: false });
640
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
639
641
  continue;
640
642
  }
641
643
 
642
644
  const original = fs.readFileSync(sectionPath, 'utf-8');
643
- const commentPattern = /\{>>.*?<<\}/gs;
644
- const beforeCount = (original.match(commentPattern) || []).length;
645
645
 
646
+ const stats = { placed: 0, deduped: 0, unmatched: 0 };
646
647
  const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
647
648
  quiet: !process.env.DEBUG,
648
649
  sectionBoundary: { start: boundary.start, end: boundary.end },
649
650
  wrapAnchor: false,
651
+ outStats: stats,
650
652
  });
651
653
 
652
- const afterCount = (annotated.match(commentPattern) || []).length;
653
- const placed = afterCount - beforeCount;
654
- const unmatched = sectionComments.length - placed;
655
-
656
- if (!options.dryRun && placed > 0) {
654
+ if (!options.dryRun && stats.placed > 0) {
657
655
  fs.writeFileSync(sectionPath, annotated, 'utf-8');
658
656
  }
659
- results.push({ file: boundary.file, placed, unmatched, skipped: false });
657
+ results.push({ file: boundary.file, ...stats, skipped: false });
660
658
  }
661
659
 
662
660
  const tableRows = results.map(r => {
663
661
  if (r.skipped) {
664
- return [chalk.dim(r.file), chalk.yellow('missing'), '', ''];
662
+ return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
665
663
  }
666
664
  return [
667
665
  chalk.bold(r.file),
668
666
  chalk.green(`${r.placed}`),
667
+ r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
669
668
  r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
670
669
  chalk.dim('comments only'),
671
670
  ];
672
671
  });
673
672
 
674
673
  console.log(fmt.table(
675
- ['File', 'Placed', 'Unmatched', 'Mode'],
674
+ ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
676
675
  tableRows,
677
- { align: ['left', 'right', 'right', 'left'] },
676
+ { align: ['left', 'right', 'right', 'right', 'left'] },
678
677
  ));
679
678
  console.log();
680
679
 
681
680
  const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
681
+ const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
682
682
  const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
683
683
 
684
684
  const lines: string[] = [];
685
685
  lines.push(`${chalk.bold(comments.length)} comments in document`);
686
- lines.push(`${chalk.bold(totalPlaced)} placed at fuzzy-matched anchors`);
686
+ if (totalPlaced > 0) {
687
+ lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
688
+ }
689
+ if (totalDeduped > 0) {
690
+ lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
691
+ }
687
692
  if (totalUnmatched > 0) {
688
693
  lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
689
694
  }
@@ -23,7 +23,7 @@ import {
23
23
  jsonOutput,
24
24
  } from './context.js';
25
25
  import type { Command } from 'commander';
26
- import { findAnchorInText, classifyStrategy, type AnchorMatchQuality } from '../anchor-match.js';
26
+ import { findAnchorInText, classifyStrategy, scoreContextAt, type AnchorMatchQuality } from '../anchor-match.js';
27
27
  import type { CommentAnchorData } from '../word-extraction.js';
28
28
  import { computeSectionBoundaries } from './section-boundaries.js';
29
29
 
@@ -71,10 +71,12 @@ export function register(program: Command): void {
71
71
  let comments;
72
72
  let anchors;
73
73
  let headings;
74
+ let fullDocText = '';
74
75
  try {
75
76
  comments = await extractWordComments(docxPath);
76
77
  const result = await extractCommentAnchors(docxPath);
77
78
  anchors = result.anchors;
79
+ fullDocText = result.fullDocText;
78
80
  headings = await extractHeadings(docxPath);
79
81
  } catch (err) {
80
82
  const error = err as Error;
@@ -88,7 +90,7 @@ export function register(program: Command): void {
88
90
  return;
89
91
  }
90
92
 
91
- const boundaries = computeSectionBoundaries(config.sections, headings);
93
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
92
94
 
93
95
  // Cache section markdown contents on first read
94
96
  const sectionCache = new Map<string, string>();
@@ -165,7 +167,16 @@ export function register(program: Command): void {
165
167
  const search = findAnchorInText(anchor.anchor, md, anchor.before, anchor.after);
166
168
  let quality: AnchorMatchQuality | 'ambiguous' = classifyStrategy(search.strategy, search.occurrences.length);
167
169
  if (quality === 'clean' && search.occurrences.length > 1) {
168
- quality = 'ambiguous';
170
+ // Multiple direct hits — only flag as ambiguous when before/after
171
+ // context can't pick a clear winner. If one candidate scores
172
+ // strictly higher than the others, sync will place it correctly.
173
+ const anchorLen = anchor.anchor.length;
174
+ const scores = search.occurrences.map(p => scoreContextAt(p, md, anchor.before, anchor.after, anchorLen));
175
+ const max = Math.max(...scores);
176
+ const winners = scores.filter(s => s === max).length;
177
+ if (max === 0 || winners > 1) {
178
+ quality = 'ambiguous';
179
+ }
169
180
  }
170
181
 
171
182
  reports.push({
@@ -239,7 +250,7 @@ function printReport(docxPath: string, reports: CommentReport[]): void {
239
250
  if (totals.unmatched > 0 || totals.ambiguous > 0) {
240
251
  console.log();
241
252
  console.log(chalk.dim('Comments flagged "unmatched" or "ambiguous" need manual placement.'));
242
- console.log(chalk.dim('Run "rev sync --no-overwrite" to import the matched ones without touching prose.'));
253
+ console.log(chalk.dim('Run "rev sync --comments-only" to import the matched ones without touching prose.'));
243
254
  }
244
255
  }
245
256
 
@@ -370,7 +370,7 @@ export async function realignComments(
370
370
 
371
371
  // Strip ALL comments (both authors) from markdown to start fresh
372
372
  let markdown = originalMarkdown;
373
- markdown = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
373
+ markdown = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
374
374
  console.log(`Stripped all comments from markdown`);
375
375
 
376
376
  // Parse markdown paragraphs
@@ -469,7 +469,7 @@ export async function realignMarkdown(
469
469
  );
470
470
 
471
471
  // Strip ALL comments from markdown
472
- let result = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
472
+ let result = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
473
473
 
474
474
  // Parse markdown paragraphs
475
475
  const mdParagraphs = parseMdParagraphs(result);
package/lib/import.ts CHANGED
@@ -36,7 +36,65 @@ import {
36
36
  parseVisibleComments,
37
37
  convertVisibleComments,
38
38
  } from './restore-references.js';
39
- import { findAnchorInText } from './anchor-match.js';
39
+ import { findAnchorInText, findAllOccurrences } from './anchor-match.js';
40
+
41
+ /**
42
+ * Pick the best position from candidate `occurrences` given the
43
+ * surrounding `before` / `after` context from the docx, while
44
+ * respecting `usedPositions` to avoid stacking distinct comments at
45
+ * the same anchor instance.
46
+ *
47
+ * Returns the chosen position, or -1 if every candidate is already used.
48
+ */
49
+ function pickBestOccurrence(
50
+ occurrences: number[],
51
+ result: string,
52
+ before: string,
53
+ after: string,
54
+ anchorLen: number,
55
+ usedPositions: Set<number>,
56
+ ): number {
57
+ if (occurrences.length === 0) return -1;
58
+ if (occurrences.length === 1) {
59
+ return usedPositions.has(occurrences[0]) ? -1 : occurrences[0];
60
+ }
61
+
62
+ let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? -1;
63
+ if (bestIdx < 0) return -1;
64
+ let bestScore = -1;
65
+
66
+ for (const pos of occurrences) {
67
+ if (usedPositions.has(pos)) continue;
68
+ let score = 0;
69
+
70
+ if (before) {
71
+ const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
72
+ const beforeLower = before.toLowerCase();
73
+ const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
74
+ for (const word of beforeWords) {
75
+ if (contextBefore.includes(word)) score += 2;
76
+ }
77
+ if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
78
+ }
79
+
80
+ if (after) {
81
+ const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
82
+ const afterLower = after.toLowerCase();
83
+ const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
84
+ for (const word of afterWords) {
85
+ if (contextAfter.includes(word)) score += 2;
86
+ }
87
+ if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
88
+ }
89
+
90
+ if (score > bestScore || (score === bestScore && pos < bestIdx)) {
91
+ bestScore = score;
92
+ bestIdx = pos;
93
+ }
94
+ }
95
+
96
+ return bestIdx;
97
+ }
40
98
 
41
99
  // Re-export everything so existing imports from './import.js' still work
42
100
  export {
@@ -100,6 +158,14 @@ export interface InsertCommentsOptions {
100
158
  * comments sharing one anchor don't produce nested broken markup.
101
159
  */
102
160
  wrapAnchor?: boolean;
161
+ /**
162
+ * Mutable output: when provided, the function fills in counters so callers
163
+ * can distinguish placement outcomes in their summary. `placed` counts new
164
+ * insertions, `deduped` counts comments that were already present at their
165
+ * anchor (skipped to avoid duplication on re-sync), `unmatched` counts
166
+ * comments whose anchor couldn't be located.
167
+ */
168
+ outStats?: { placed: number; deduped: number; unmatched: number };
103
169
  }
104
170
 
105
171
  export interface CommentWithPos {
@@ -175,6 +241,71 @@ export interface MoveExtractedMediaResult {
175
241
  // Functions
176
242
  // ============================================
177
243
 
244
+ /**
245
+ * If `pos` lands inside a section file's leading `# Heading` line (or the
246
+ * blank line right after it), advance past the first paragraph break so
247
+ * the comment stays inside the section. A comment authored at the very
248
+ * start of a Word section maps to `pos === 0`, but inserting at column 0
249
+ * of a markdown file that begins with `# Heading` puts the `{>>...<<}`
250
+ * before the heading marker — Pandoc then treats the line as ordinary
251
+ * paragraph text and the comment renders in the previous section.
252
+ */
253
+ function pushPastSectionHeading(text: string, pos: number): number {
254
+ if (pos > 0) {
255
+ const headingMatch = text.match(/^#{1,6}\s.+$/m);
256
+ if (!headingMatch || headingMatch.index === undefined) return pos;
257
+ const headingEnd = headingMatch.index + headingMatch[0].length;
258
+ if (pos >= headingEnd) return pos;
259
+ }
260
+ // pos is at-or-before the first heading line. Advance to the first
261
+ // non-blank position after the heading paragraph.
262
+ const headingLine = text.match(/^#{1,6}\s.+(?:\n|$)/m);
263
+ if (!headingLine || headingLine.index === undefined) return pos;
264
+ let after = headingLine.index + headingLine[0].length;
265
+ // Skip blank lines so we land at the start of the first body paragraph.
266
+ while (after < text.length && (text[after] === '\n' || text[after] === '\r')) {
267
+ after++;
268
+ }
269
+ return after;
270
+ }
271
+
272
+ /**
273
+ * Snap a position to the nearest whitespace boundary within ±50 chars so a
274
+ * proportional fallback insertion never lands mid-word.
275
+ */
276
+ function snapToWordBoundary(text: string, pos: number): number {
277
+ if (pos <= 0) return 0;
278
+ if (pos >= text.length) return text.length;
279
+ if (/\s/.test(text[pos] ?? '')) return pos;
280
+ for (let d = 1; d <= 50; d++) {
281
+ if (pos + d < text.length && /\s/.test(text[pos + d] ?? '')) return pos + d;
282
+ if (pos - d >= 0 && /\s/.test(text[pos - d] ?? '')) return pos - d;
283
+ }
284
+ return pos;
285
+ }
286
+
287
+ /**
288
+ * Final-resort placement when every text-matching strategy failed. The docx
289
+ * carries a real `<w:commentRangeStart w:id="N">` marker at a known offset
290
+ * inside its body text — that's a structural anchor, even if the anchored
291
+ * span itself is empty and the surrounding context drifted in the target.
292
+ *
293
+ * Map docPosition into the target markdown proportionally and snap to a word
294
+ * boundary. This is approximate when the document was heavily restructured,
295
+ * but it's strictly better than silently dropping a reviewer's comment: the
296
+ * comment lands in roughly the right neighborhood and the reviewer can
297
+ * relocate it during their next pass.
298
+ */
299
+ function proportionalFallback(
300
+ anchorData: CommentAnchorData,
301
+ target: string,
302
+ ): number | null {
303
+ if (anchorData.docLength <= 0) return null;
304
+ const proportion = Math.min(anchorData.docPosition / anchorData.docLength, 1.0);
305
+ const rawPos = Math.floor(proportion * target.length);
306
+ return pushPastSectionHeading(target, snapToWordBoundary(target, rawPos));
307
+ }
308
+
178
309
  /**
179
310
  * Insert comments into markdown text based on anchor texts with context
180
311
  */
@@ -184,17 +315,48 @@ export function insertCommentsIntoMarkdown(
184
315
  anchors: Map<string, CommentAnchorData | string>,
185
316
  options: InsertCommentsOptions = {}
186
317
  ): string {
187
- const { quiet = false, sectionBoundary = null, wrapAnchor = true } = options;
318
+ const { quiet = false, sectionBoundary = null, wrapAnchor = true, outStats } = options;
188
319
  let result = markdown;
189
320
  let unmatchedCount = 0;
321
+ let placedCount = 0;
190
322
  const duplicateWarnings: string[] = [];
191
323
  const usedPositions = new Set<number>(); // For tie-breaking: track used positions
192
324
 
325
+ // Resolve threading: replies share their parent's anchor in Word, so they
326
+ // must inherit the parent's position and ride alongside it as one cluster.
327
+ // Letting each reply run through anchor scoring scatters the cluster (the
328
+ // same docPosition forces `usedPositions` to push later replies onto a
329
+ // different occurrence), which on re-build looks like independent comments
330
+ // and loses the paraIdParent threading. See gcol33/docrev issue #2.
331
+ const inputById = new Map<string, WordComment>();
332
+ for (const c of comments) inputById.set(c.id, c);
333
+ function rootIdOf(c: WordComment): string {
334
+ let cur: WordComment = c;
335
+ const seen = new Set<string>();
336
+ while (cur.parentId && !seen.has(cur.id)) {
337
+ seen.add(cur.id);
338
+ const parent = inputById.get(cur.parentId);
339
+ if (!parent || parent === cur) break;
340
+ cur = parent;
341
+ }
342
+ return cur.id;
343
+ }
344
+ const replyRootId = new Map<string, string>();
345
+ for (const c of comments) {
346
+ const root = rootIdOf(c);
347
+ if (root !== c.id) replyRootId.set(c.id, root);
348
+ }
349
+
193
350
  // Anchor matching primitives live in lib/anchor-match.ts so that
194
351
  // `rev verify-anchors` can use the same strategies for drift reporting.
195
352
 
196
- // Get all positions in order (for sequential tie-breaking)
353
+ // Get all positions in order (for sequential tie-breaking).
354
+ // Replies skip scoring entirely — they piggyback on their root's position
355
+ // in the emit pass below.
197
356
  const commentsWithPositions = comments.map((c): CommentWithPos => {
357
+ if (replyRootId.has(c.id)) {
358
+ return { ...c, pos: -1, anchorText: null, strategy: 'reply' };
359
+ }
198
360
  const anchorData = anchors.get(c.id);
199
361
  if (!anchorData) {
200
362
  unmatchedCount++;
@@ -222,6 +384,18 @@ export function insertCommentsIntoMarkdown(
222
384
  const proportion = Math.min(relativePos / sectionLength, 1.0);
223
385
  const markdownPos = Math.floor(proportion * result.length);
224
386
 
387
+ // For empty anchors, before/after context is the only signal that
388
+ // pinpoints the original split — without it, proportional placement
389
+ // can land mid-word or split unrelated phrases. Try context match
390
+ // first; only fall through to proportional when context is gone.
391
+ if ((!anchor || isEmpty) && (before || after)) {
392
+ const ctx = findAnchorInText('', result, before, after);
393
+ if (ctx.occurrences.length > 0) {
394
+ const pos = pushPastSectionHeading(result, ctx.occurrences[0]);
395
+ return { ...c, pos, anchorText: null, isEmpty: true, strategy: `ctx:${ctx.strategy}` };
396
+ }
397
+ }
398
+
225
399
  let insertPos = markdownPos;
226
400
 
227
401
  // Look for nearby word boundary
@@ -231,26 +405,52 @@ export function insertCommentsIntoMarkdown(
231
405
  insertPos = Math.max(0, markdownPos - 25) + spaceIdx;
232
406
  }
233
407
 
234
- // If we have anchor text, try to find it near this position
408
+ // If we have anchor text, try to find it near this position.
409
+ // Collect ALL occurrences in the local window, then disambiguate
410
+ // via before/after context + usedPositions — otherwise two
411
+ // comments sharing the same anchor word would both collide at
412
+ // the leftmost match. The context-scoring helper handles the
413
+ // "repeated formulaic prose" case using docx-side context, which
414
+ // is a stronger signal than raw distance to the proportional
415
+ // insertPos (insertPos is itself an approximation).
235
416
  if (anchor && !isEmpty) {
236
417
  const searchStart = Math.max(0, insertPos - 200);
237
418
  const searchEnd = Math.min(result.length, insertPos + 200);
238
419
  const localSearch = result.slice(searchStart, searchEnd).toLowerCase();
239
420
  const anchorLower = anchor.toLowerCase();
240
- const localIdx = localSearch.indexOf(anchorLower);
241
- if (localIdx !== -1) {
242
- return { ...c, pos: searchStart + localIdx, anchorText: anchor, anchorEnd: searchStart + localIdx + anchor.length, strategy: 'position+text' };
421
+
422
+ const localHits = findAllOccurrences(localSearch, anchorLower).map(i => searchStart + i);
423
+ if (localHits.length > 0) {
424
+ const chosen = pickBestOccurrence(localHits, result, before, after, anchor.length, usedPositions);
425
+ if (chosen >= 0) {
426
+ if (localHits.length > 1) {
427
+ duplicateWarnings.push(`"${anchor.slice(0, 40)}${anchor.length > 40 ? '...' : ''}" appears ${localHits.length} times in section window`);
428
+ }
429
+ usedPositions.add(chosen);
430
+ return { ...c, pos: chosen, anchorText: anchor, anchorEnd: chosen + anchor.length, strategy: 'position+text' };
431
+ }
243
432
  }
433
+
244
434
  // Try first few words
245
435
  const words = anchor.split(/\s+/).slice(0, 4).join(' ').toLowerCase();
246
436
  if (words.length >= 10) {
247
- const partialIdx = localSearch.indexOf(words);
248
- if (partialIdx !== -1) {
249
- return { ...c, pos: searchStart + partialIdx, anchorText: words, anchorEnd: searchStart + partialIdx + words.length, strategy: 'position+partial' };
437
+ const partialHits = findAllOccurrences(localSearch, words).map(i => searchStart + i);
438
+ if (partialHits.length > 0) {
439
+ const chosen = pickBestOccurrence(partialHits, result, before, after, words.length, usedPositions);
440
+ if (chosen >= 0) {
441
+ usedPositions.add(chosen);
442
+ return { ...c, pos: chosen, anchorText: words, anchorEnd: chosen + words.length, strategy: 'position+partial' };
443
+ }
250
444
  }
251
445
  }
252
446
  }
253
447
 
448
+ // A docPosition at the very start of a section maps to markdownPos=0,
449
+ // which sits before the file's `# Heading` line and gets rendered in
450
+ // the previous section. Push past the heading line so the comment
451
+ // stays inside the section it was authored in.
452
+ insertPos = pushPastSectionHeading(result, insertPos);
453
+
254
454
  return { ...c, pos: insertPos, anchorText: null, strategy: 'position-only' };
255
455
  }
256
456
  }
@@ -263,6 +463,14 @@ export function insertCommentsIntoMarkdown(
263
463
  return { ...c, pos: occurrences[0], anchorText: null, isEmpty: true };
264
464
  }
265
465
  }
466
+ // Last resort: docx carried a structural marker at docPosition; map
467
+ // it proportionally into the target so the comment isn't dropped.
468
+ if (typeof anchorData === 'object') {
469
+ const fallback = proportionalFallback(anchorData, result);
470
+ if (fallback !== null) {
471
+ return { ...c, pos: fallback, anchorText: null, isEmpty: true, strategy: 'proportional-fallback' };
472
+ }
473
+ }
266
474
  unmatchedCount++;
267
475
  return { ...c, pos: -1, anchorText: null, isEmpty: true };
268
476
  }
@@ -271,6 +479,14 @@ export function insertCommentsIntoMarkdown(
271
479
  const { occurrences, matchedAnchor, strategy, stripped } = findAnchorInText(anchor, result, before, after);
272
480
 
273
481
  if (occurrences.length === 0) {
482
+ // Same last-resort as the empty-anchor path: anchor text is gone from
483
+ // the target, but the marker's text-offset survived extraction.
484
+ if (typeof anchorData === 'object') {
485
+ const fallback = proportionalFallback(anchorData, result);
486
+ if (fallback !== null) {
487
+ return { ...c, pos: fallback, anchorText: null, strategy: 'proportional-fallback' };
488
+ }
489
+ }
274
490
  unmatchedCount++;
275
491
  return { ...c, pos: -1, anchorText: null };
276
492
  }
@@ -290,82 +506,106 @@ export function insertCommentsIntoMarkdown(
290
506
  duplicateWarnings.push(`"${matchedAnchor.slice(0, 40)}${matchedAnchor.length > 40 ? '...' : ''}" appears ${occurrences.length} times`);
291
507
  }
292
508
 
293
- let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? occurrences[0];
294
- let bestScore = -1;
295
-
296
- for (const pos of occurrences) {
297
- if (usedPositions.has(pos)) continue;
298
-
299
- let score = 0;
300
-
301
- if (before) {
302
- const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
303
- const beforeLower = before.toLowerCase();
304
- const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
305
- for (const word of beforeWords) {
306
- if (contextBefore.includes(word)) score += 2;
307
- }
308
- if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
309
- }
310
-
311
- if (after) {
312
- const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
313
- const afterLower = after.toLowerCase();
314
- const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
315
- for (const word of afterWords) {
316
- if (contextAfter.includes(word)) score += 2;
317
- }
318
- if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
319
- }
320
-
321
- if (score > bestScore || (score === bestScore && pos < bestIdx)) {
322
- bestScore = score;
323
- bestIdx = pos;
324
- }
325
- }
326
-
327
- usedPositions.add(bestIdx);
509
+ const bestIdx = pickBestOccurrence(occurrences, result, before, after, anchorLen, usedPositions);
510
+ const finalIdx = bestIdx >= 0 ? bestIdx : occurrences[0];
511
+ usedPositions.add(finalIdx);
328
512
 
329
513
  if (matchedAnchor) {
330
- return { ...c, pos: bestIdx, anchorText: matchedAnchor, anchorEnd: bestIdx + anchorLen };
514
+ return { ...c, pos: finalIdx, anchorText: matchedAnchor, anchorEnd: finalIdx + anchorLen };
331
515
  } else {
332
- return { ...c, pos: bestIdx, anchorText: null };
516
+ return { ...c, pos: finalIdx, anchorText: null };
333
517
  }
334
518
  });
335
519
 
336
- // Log any unmatched comments for debugging
337
- const unmatched = commentsWithPositions.filter((c) => c.pos < 0);
520
+ // Group comments into clusters (root + ordered replies). The root carries
521
+ // the resolved position; replies inherit it and ride along in input order
522
+ // so the rebuilt CriticMarkup looks like `{>>p<<}{>>r1<<}{>>r2<<}[anchor]`
523
+ // and adjacency-based reply detection picks the cluster up again.
524
+ const byId = new Map<string, CommentWithPos>();
525
+ for (const cwp of commentsWithPositions) byId.set(cwp.id, cwp);
526
+ const repliesByRoot = new Map<string, CommentWithPos[]>();
527
+ for (const c of comments) {
528
+ const rootId = replyRootId.get(c.id);
529
+ if (!rootId) continue;
530
+ const cwp = byId.get(c.id);
531
+ if (!cwp) continue;
532
+ const list = repliesByRoot.get(rootId);
533
+ if (list) list.push(cwp);
534
+ else repliesByRoot.set(rootId, [cwp]);
535
+ }
536
+
537
+ // Replies whose root never resolved (parent missing from the input slice or
538
+ // parent unmatched) count as unmatched too — there's no position to attach
539
+ // them to.
540
+ for (const [rootId, replies] of repliesByRoot) {
541
+ const root = byId.get(rootId);
542
+ if (!root || root.pos < 0) {
543
+ unmatchedCount += replies.length;
544
+ }
545
+ }
546
+
547
+ // Roots only — replies attach during emission.
548
+ const rootsWithPos = commentsWithPositions.filter(
549
+ c => !replyRootId.has(c.id)
550
+ );
551
+
552
+ // Log any unmatched roots for debugging
553
+ const unmatched = rootsWithPos.filter((c) => c.pos < 0);
338
554
  if (process.env.DEBUG) {
339
- console.log(`[DEBUG] insertComments: ${comments.length} input, ${commentsWithPositions.length} processed, ${unmatched.length} unmatched`);
555
+ console.log(`[DEBUG] insertComments: ${comments.length} input, ${rootsWithPos.length} roots, ${unmatched.length} unmatched roots, ${replyRootId.size} replies`);
340
556
  if (unmatched.length > 0) {
341
557
  unmatched.forEach(c => console.log(`[DEBUG] Unmatched ID=${c.id}: anchor="${(c.anchorText || 'none').slice(0,30)}"`));
342
558
  }
343
559
  }
344
560
 
345
- const matched = commentsWithPositions.filter((c) => c.pos >= 0);
561
+ const matchedRoots = rootsWithPos.filter((c) => c.pos >= 0);
346
562
 
347
563
  // Sort by position descending (insert from end to avoid offset issues)
348
- matched.sort((a, b) => b.pos - a.pos);
564
+ matchedRoots.sort((a, b) => b.pos - a.pos);
349
565
 
350
- // Insert each comment. With `wrapAnchor` (the default), the anchor text
566
+ // Insert each cluster. With `wrapAnchor` (the default), the anchor text
351
567
  // gets wrapped in `[anchor]{.mark}` so the rebuilt docx restores the
352
568
  // original Word comment range. Without it, the comment block is inserted
353
569
  // adjacent to the anchor and prose stays untouched — required for
354
570
  // comments-only sync where multiple comments may share one anchor.
355
- for (const c of matched) {
356
- const comment = `{>>${c.author}: ${c.text}<<}`;
571
+ // Skip insertion when the parent's CriticMarkup already lives near the
572
+ // target re-running sync against the same docx would otherwise stack
573
+ // duplicates. A 200-char window catches both wrapped
574
+ // (`{>>...<<}[anchor]{.mark}`) and bare (`{>>...<<}anchor`) forms while
575
+ // ignoring incidental matches farther away.
576
+ let dedupedCount = 0;
577
+ for (const c of matchedRoots) {
578
+ const parentBlock = `{>>${c.author}: ${c.text}<<}`;
579
+ const replies = repliesByRoot.get(c.id) ?? [];
580
+ const windowStart = Math.max(0, c.pos - 200);
581
+ const windowEnd = Math.min(result.length, c.pos + 200);
582
+ if (result.slice(windowStart, windowEnd).includes(parentBlock)) {
583
+ // Cluster already synced; treat all members as deduped.
584
+ dedupedCount += 1 + replies.length;
585
+ continue;
586
+ }
587
+ // Replies carry an explicit `↪ ` author prefix so the round-trip does not
588
+ // depend on positional adjacency in the markdown. On dense reviewer docs
589
+ // distinct clusters frequently land at the same anchor position; without
590
+ // the prefix the re-parse would misthread them. The injection side strips
591
+ // `↪ ` back off the author so Word renders the original name.
592
+ const replyBlocks = replies.map(r => `{>>↪ ${r.author}: ${r.text}<<}`);
593
+ const combined = parentBlock + replyBlocks.join('');
357
594
  if (wrapAnchor && c.anchorText && c.anchorEnd) {
358
595
  const before = result.slice(0, c.pos);
359
596
  const anchor = result.slice(c.pos, c.anchorEnd);
360
597
  const after = result.slice(c.anchorEnd);
361
- result = before + comment + `[${anchor}]{.mark}` + after;
598
+ result = before + combined + `[${anchor}]{.mark}` + after;
362
599
  } else {
363
- // Insert comment at the anchor position with no surrounding whitespace
364
- // tweaks; CriticMarkup blocks are invisible to readers, and adding a
365
- // leading space would shift prose byte-for-byte (relevant when callers
366
- // verify that --comments-only didn't touch the original).
367
- result = result.slice(0, c.pos) + comment + result.slice(c.pos);
600
+ result = result.slice(0, c.pos) + combined + result.slice(c.pos);
368
601
  }
602
+ placedCount += 1 + replies.length;
603
+ }
604
+
605
+ if (outStats) {
606
+ outStats.placed = placedCount;
607
+ outStats.deduped = dedupedCount;
608
+ outStats.unmatched = unmatchedCount;
369
609
  }
370
610
 
371
611
  // Log warnings unless quiet mode
@@ -373,6 +613,9 @@ export function insertCommentsIntoMarkdown(
373
613
  if (unmatchedCount > 0) {
374
614
  console.warn(`Warning: ${unmatchedCount} comment(s) could not be matched to anchor text`);
375
615
  }
616
+ if (dedupedCount > 0) {
617
+ console.warn(`Note: ${dedupedCount} comment(s) already present at anchor — skipped to avoid duplication`);
618
+ }
376
619
  if (duplicateWarnings.length > 0) {
377
620
  console.warn(`Warning: Duplicate anchor text found (using context & tie-breaks for placement):`);
378
621
  for (const w of duplicateWarnings) {
package/lib/response.ts CHANGED
@@ -46,7 +46,7 @@ export function parseCommentsWithReplies(text: string, file: string = ''): Comme
46
46
  if (matches.length === 0) continue;
47
47
 
48
48
  // Get context (surrounding text without comments)
49
- const contextLine = line.replace(/\{>>[^<]+<<\}/g, '').trim();
49
+ const contextLine = line.replace(/\{>>[\s\S]+?<<\}/g, '').trim();
50
50
  const context = contextLine.slice(0, 100) + (contextLine.length > 100 ? '...' : '');
51
51
 
52
52
  // First match is the original comment, rest are replies