docrev 0.9.7 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/dev_notes/stress2/adversarial.docx +0 -0
  3. package/dev_notes/stress2/build_adversarial.ts +186 -0
  4. package/dev_notes/stress2/drift_matcher.ts +62 -0
  5. package/dev_notes/stress2/probe_anchors.ts +35 -0
  6. package/dev_notes/stress2/project/adversarial.docx +0 -0
  7. package/dev_notes/stress2/project/discussion.before.md +3 -0
  8. package/dev_notes/stress2/project/discussion.md +3 -0
  9. package/dev_notes/stress2/project/methods.before.md +20 -0
  10. package/dev_notes/stress2/project/methods.md +20 -0
  11. package/dev_notes/stress2/project/rev.yaml +5 -0
  12. package/dev_notes/stress2/project/sections.yaml +4 -0
  13. package/dev_notes/stress2/sections.yaml +5 -0
  14. package/dev_notes/stress2/trace_placement.ts +50 -0
  15. package/dev_notes/stresstest_boundaries.ts +27 -0
  16. package/dev_notes/stresstest_drift_apply.ts +43 -0
  17. package/dev_notes/stresstest_drift_compare.ts +43 -0
  18. package/dev_notes/stresstest_drift_v2.ts +54 -0
  19. package/dev_notes/stresstest_inspect.ts +54 -0
  20. package/dev_notes/stresstest_pstyle.ts +55 -0
  21. package/dev_notes/stresstest_section_debug.ts +23 -0
  22. package/dev_notes/stresstest_split.ts +70 -0
  23. package/dev_notes/stresstest_trace.ts +19 -0
  24. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
  25. package/dist/lib/anchor-match.d.ts +10 -0
  26. package/dist/lib/anchor-match.d.ts.map +1 -1
  27. package/dist/lib/anchor-match.js +35 -0
  28. package/dist/lib/anchor-match.js.map +1 -1
  29. package/dist/lib/annotations.d.ts.map +1 -1
  30. package/dist/lib/annotations.js +16 -6
  31. package/dist/lib/annotations.js.map +1 -1
  32. package/dist/lib/commands/quality.js +1 -1
  33. package/dist/lib/commands/quality.js.map +1 -1
  34. package/dist/lib/commands/section-boundaries.d.ts +1 -1
  35. package/dist/lib/commands/section-boundaries.d.ts.map +1 -1
  36. package/dist/lib/commands/section-boundaries.js +12 -2
  37. package/dist/lib/commands/section-boundaries.js.map +1 -1
  38. package/dist/lib/commands/sync.js +19 -13
  39. package/dist/lib/commands/sync.js.map +1 -1
  40. package/dist/lib/commands/verify-anchors.d.ts.map +1 -1
  41. package/dist/lib/commands/verify-anchors.js +15 -4
  42. package/dist/lib/commands/verify-anchors.js.map +1 -1
  43. package/dist/lib/comment-realign.js +2 -2
  44. package/dist/lib/comment-realign.js.map +1 -1
  45. package/dist/lib/import.d.ts +12 -0
  46. package/dist/lib/import.d.ts.map +1 -1
  47. package/dist/lib/import.js +152 -45
  48. package/dist/lib/import.js.map +1 -1
  49. package/dist/lib/response.js +1 -1
  50. package/dist/lib/response.js.map +1 -1
  51. package/dist/lib/wordcomments.d.ts.map +1 -1
  52. package/dist/lib/wordcomments.js +165 -73
  53. package/dist/lib/wordcomments.js.map +1 -1
  54. package/lib/anchor-match.ts +38 -0
  55. package/lib/annotations.ts +16 -6
  56. package/lib/commands/quality.ts +1 -1
  57. package/lib/commands/section-boundaries.ts +11 -1
  58. package/lib/commands/sync.ts +21 -16
  59. package/lib/commands/verify-anchors.ts +15 -4
  60. package/lib/comment-realign.ts +2 -2
  61. package/lib/import.ts +170 -46
  62. package/lib/response.ts +1 -1
  63. package/lib/wordcomments.ts +180 -82
  64. package/package.json +1 -1
  65. package/dist/package.json +0 -137
@@ -23,7 +23,7 @@ import {
23
23
  jsonOutput,
24
24
  } from './context.js';
25
25
  import type { Command } from 'commander';
26
- import { findAnchorInText, classifyStrategy, type AnchorMatchQuality } from '../anchor-match.js';
26
+ import { findAnchorInText, classifyStrategy, scoreContextAt, type AnchorMatchQuality } from '../anchor-match.js';
27
27
  import type { CommentAnchorData } from '../word-extraction.js';
28
28
  import { computeSectionBoundaries } from './section-boundaries.js';
29
29
 
@@ -71,10 +71,12 @@ export function register(program: Command): void {
71
71
  let comments;
72
72
  let anchors;
73
73
  let headings;
74
+ let fullDocText = '';
74
75
  try {
75
76
  comments = await extractWordComments(docxPath);
76
77
  const result = await extractCommentAnchors(docxPath);
77
78
  anchors = result.anchors;
79
+ fullDocText = result.fullDocText;
78
80
  headings = await extractHeadings(docxPath);
79
81
  } catch (err) {
80
82
  const error = err as Error;
@@ -88,7 +90,7 @@ export function register(program: Command): void {
88
90
  return;
89
91
  }
90
92
 
91
- const boundaries = computeSectionBoundaries(config.sections, headings);
93
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
92
94
 
93
95
  // Cache section markdown contents on first read
94
96
  const sectionCache = new Map<string, string>();
@@ -165,7 +167,16 @@ export function register(program: Command): void {
165
167
  const search = findAnchorInText(anchor.anchor, md, anchor.before, anchor.after);
166
168
  let quality: AnchorMatchQuality | 'ambiguous' = classifyStrategy(search.strategy, search.occurrences.length);
167
169
  if (quality === 'clean' && search.occurrences.length > 1) {
168
- quality = 'ambiguous';
170
+ // Multiple direct hits — only flag as ambiguous when before/after
171
+ // context can't pick a clear winner. If one candidate scores
172
+ // strictly higher than the others, sync will place it correctly.
173
+ const anchorLen = anchor.anchor.length;
174
+ const scores = search.occurrences.map(p => scoreContextAt(p, md, anchor.before, anchor.after, anchorLen));
175
+ const max = Math.max(...scores);
176
+ const winners = scores.filter(s => s === max).length;
177
+ if (max === 0 || winners > 1) {
178
+ quality = 'ambiguous';
179
+ }
169
180
  }
170
181
 
171
182
  reports.push({
@@ -239,7 +250,7 @@ function printReport(docxPath: string, reports: CommentReport[]): void {
239
250
  if (totals.unmatched > 0 || totals.ambiguous > 0) {
240
251
  console.log();
241
252
  console.log(chalk.dim('Comments flagged "unmatched" or "ambiguous" need manual placement.'));
242
- console.log(chalk.dim('Run "rev sync --no-overwrite" to import the matched ones without touching prose.'));
253
+ console.log(chalk.dim('Run "rev sync --comments-only" to import the matched ones without touching prose.'));
243
254
  }
244
255
  }
245
256
 
@@ -370,7 +370,7 @@ export async function realignComments(
370
370
 
371
371
  // Strip ALL comments (both authors) from markdown to start fresh
372
372
  let markdown = originalMarkdown;
373
- markdown = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
373
+ markdown = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
374
374
  console.log(`Stripped all comments from markdown`);
375
375
 
376
376
  // Parse markdown paragraphs
@@ -469,7 +469,7 @@ export async function realignMarkdown(
469
469
  );
470
470
 
471
471
  // Strip ALL comments from markdown
472
- let result = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
472
+ let result = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
473
473
 
474
474
  // Parse markdown paragraphs
475
475
  const mdParagraphs = parseMdParagraphs(result);
package/lib/import.ts CHANGED
@@ -36,7 +36,65 @@ import {
36
36
  parseVisibleComments,
37
37
  convertVisibleComments,
38
38
  } from './restore-references.js';
39
- import { findAnchorInText } from './anchor-match.js';
39
+ import { findAnchorInText, findAllOccurrences } from './anchor-match.js';
40
+
41
+ /**
42
+ * Pick the best position from candidate `occurrences` given the
43
+ * surrounding `before` / `after` context from the docx, while
44
+ * respecting `usedPositions` to avoid stacking distinct comments at
45
+ * the same anchor instance.
46
+ *
47
+ * Returns the chosen position, or -1 if every candidate is already used.
48
+ */
49
+ function pickBestOccurrence(
50
+ occurrences: number[],
51
+ result: string,
52
+ before: string,
53
+ after: string,
54
+ anchorLen: number,
55
+ usedPositions: Set<number>,
56
+ ): number {
57
+ if (occurrences.length === 0) return -1;
58
+ if (occurrences.length === 1) {
59
+ return usedPositions.has(occurrences[0]) ? -1 : occurrences[0];
60
+ }
61
+
62
+ let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? -1;
63
+ if (bestIdx < 0) return -1;
64
+ let bestScore = -1;
65
+
66
+ for (const pos of occurrences) {
67
+ if (usedPositions.has(pos)) continue;
68
+ let score = 0;
69
+
70
+ if (before) {
71
+ const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
72
+ const beforeLower = before.toLowerCase();
73
+ const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
74
+ for (const word of beforeWords) {
75
+ if (contextBefore.includes(word)) score += 2;
76
+ }
77
+ if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
78
+ }
79
+
80
+ if (after) {
81
+ const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
82
+ const afterLower = after.toLowerCase();
83
+ const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
84
+ for (const word of afterWords) {
85
+ if (contextAfter.includes(word)) score += 2;
86
+ }
87
+ if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
88
+ }
89
+
90
+ if (score > bestScore || (score === bestScore && pos < bestIdx)) {
91
+ bestScore = score;
92
+ bestIdx = pos;
93
+ }
94
+ }
95
+
96
+ return bestIdx;
97
+ }
40
98
 
41
99
  // Re-export everything so existing imports from './import.js' still work
42
100
  export {
@@ -100,6 +158,14 @@ export interface InsertCommentsOptions {
100
158
  * comments sharing one anchor don't produce nested broken markup.
101
159
  */
102
160
  wrapAnchor?: boolean;
161
+ /**
162
+ * Mutable output: when provided, the function fills in counters so callers
163
+ * can distinguish placement outcomes in their summary. `placed` counts new
164
+ * insertions, `deduped` counts comments that were already present at their
165
+ * anchor (skipped to avoid duplication on re-sync), `unmatched` counts
166
+ * comments whose anchor couldn't be located.
167
+ */
168
+ outStats?: { placed: number; deduped: number; unmatched: number };
103
169
  }
104
170
 
105
171
  export interface CommentWithPos {
@@ -175,6 +241,34 @@ export interface MoveExtractedMediaResult {
175
241
  // Functions
176
242
  // ============================================
177
243
 
244
+ /**
245
+ * If `pos` lands inside a section file's leading `# Heading` line (or the
246
+ * blank line right after it), advance past the first paragraph break so
247
+ * the comment stays inside the section. A comment authored at the very
248
+ * start of a Word section maps to `pos === 0`, but inserting at column 0
249
+ * of a markdown file that begins with `# Heading` puts the `{>>...<<}`
250
+ * before the heading marker — Pandoc then treats the line as ordinary
251
+ * paragraph text and the comment renders in the previous section.
252
+ */
253
+ function pushPastSectionHeading(text: string, pos: number): number {
254
+ if (pos > 0) {
255
+ const headingMatch = text.match(/^#{1,6}\s.+$/m);
256
+ if (!headingMatch || headingMatch.index === undefined) return pos;
257
+ const headingEnd = headingMatch.index + headingMatch[0].length;
258
+ if (pos >= headingEnd) return pos;
259
+ }
260
+ // pos is at-or-before the first heading line. Advance to the first
261
+ // non-blank position after the heading paragraph.
262
+ const headingLine = text.match(/^#{1,6}\s.+(?:\n|$)/m);
263
+ if (!headingLine || headingLine.index === undefined) return pos;
264
+ let after = headingLine.index + headingLine[0].length;
265
+ // Skip blank lines so we land at the start of the first body paragraph.
266
+ while (after < text.length && (text[after] === '\n' || text[after] === '\r')) {
267
+ after++;
268
+ }
269
+ return after;
270
+ }
271
+
178
272
  /**
179
273
  * Insert comments into markdown text based on anchor texts with context
180
274
  */
@@ -184,9 +278,10 @@ export function insertCommentsIntoMarkdown(
184
278
  anchors: Map<string, CommentAnchorData | string>,
185
279
  options: InsertCommentsOptions = {}
186
280
  ): string {
187
- const { quiet = false, sectionBoundary = null, wrapAnchor = true } = options;
281
+ const { quiet = false, sectionBoundary = null, wrapAnchor = true, outStats } = options;
188
282
  let result = markdown;
189
283
  let unmatchedCount = 0;
284
+ let placedCount = 0;
190
285
  const duplicateWarnings: string[] = [];
191
286
  const usedPositions = new Set<number>(); // For tie-breaking: track used positions
192
287
 
@@ -222,6 +317,18 @@ export function insertCommentsIntoMarkdown(
222
317
  const proportion = Math.min(relativePos / sectionLength, 1.0);
223
318
  const markdownPos = Math.floor(proportion * result.length);
224
319
 
320
+ // For empty anchors, before/after context is the only signal that
321
+ // pinpoints the original split — without it, proportional placement
322
+ // can land mid-word or split unrelated phrases. Try context match
323
+ // first; only fall through to proportional when context is gone.
324
+ if ((!anchor || isEmpty) && (before || after)) {
325
+ const ctx = findAnchorInText('', result, before, after);
326
+ if (ctx.occurrences.length > 0) {
327
+ const pos = pushPastSectionHeading(result, ctx.occurrences[0]);
328
+ return { ...c, pos, anchorText: null, isEmpty: true, strategy: `ctx:${ctx.strategy}` };
329
+ }
330
+ }
331
+
225
332
  let insertPos = markdownPos;
226
333
 
227
334
  // Look for nearby word boundary
@@ -231,26 +338,52 @@ export function insertCommentsIntoMarkdown(
231
338
  insertPos = Math.max(0, markdownPos - 25) + spaceIdx;
232
339
  }
233
340
 
234
- // If we have anchor text, try to find it near this position
341
+ // If we have anchor text, try to find it near this position.
342
+ // Collect ALL occurrences in the local window, then disambiguate
343
+ // via before/after context + usedPositions — otherwise two
344
+ // comments sharing the same anchor word would both collide at
345
+ // the leftmost match. The context-scoring helper handles the
346
+ // "repeated formulaic prose" case using docx-side context, which
347
+ // is a stronger signal than raw distance to the proportional
348
+ // insertPos (insertPos is itself an approximation).
235
349
  if (anchor && !isEmpty) {
236
350
  const searchStart = Math.max(0, insertPos - 200);
237
351
  const searchEnd = Math.min(result.length, insertPos + 200);
238
352
  const localSearch = result.slice(searchStart, searchEnd).toLowerCase();
239
353
  const anchorLower = anchor.toLowerCase();
240
- const localIdx = localSearch.indexOf(anchorLower);
241
- if (localIdx !== -1) {
242
- return { ...c, pos: searchStart + localIdx, anchorText: anchor, anchorEnd: searchStart + localIdx + anchor.length, strategy: 'position+text' };
354
+
355
+ const localHits = findAllOccurrences(localSearch, anchorLower).map(i => searchStart + i);
356
+ if (localHits.length > 0) {
357
+ const chosen = pickBestOccurrence(localHits, result, before, after, anchor.length, usedPositions);
358
+ if (chosen >= 0) {
359
+ if (localHits.length > 1) {
360
+ duplicateWarnings.push(`"${anchor.slice(0, 40)}${anchor.length > 40 ? '...' : ''}" appears ${localHits.length} times in section window`);
361
+ }
362
+ usedPositions.add(chosen);
363
+ return { ...c, pos: chosen, anchorText: anchor, anchorEnd: chosen + anchor.length, strategy: 'position+text' };
364
+ }
243
365
  }
366
+
244
367
  // Try first few words
245
368
  const words = anchor.split(/\s+/).slice(0, 4).join(' ').toLowerCase();
246
369
  if (words.length >= 10) {
247
- const partialIdx = localSearch.indexOf(words);
248
- if (partialIdx !== -1) {
249
- return { ...c, pos: searchStart + partialIdx, anchorText: words, anchorEnd: searchStart + partialIdx + words.length, strategy: 'position+partial' };
370
+ const partialHits = findAllOccurrences(localSearch, words).map(i => searchStart + i);
371
+ if (partialHits.length > 0) {
372
+ const chosen = pickBestOccurrence(partialHits, result, before, after, words.length, usedPositions);
373
+ if (chosen >= 0) {
374
+ usedPositions.add(chosen);
375
+ return { ...c, pos: chosen, anchorText: words, anchorEnd: chosen + words.length, strategy: 'position+partial' };
376
+ }
250
377
  }
251
378
  }
252
379
  }
253
380
 
381
+ // A docPosition at the very start of a section maps to markdownPos=0,
382
+ // which sits before the file's `# Heading` line and gets rendered in
383
+ // the previous section. Push past the heading line so the comment
384
+ // stays inside the section it was authored in.
385
+ insertPos = pushPastSectionHeading(result, insertPos);
386
+
254
387
  return { ...c, pos: insertPos, anchorText: null, strategy: 'position-only' };
255
388
  }
256
389
  }
@@ -290,46 +423,14 @@ export function insertCommentsIntoMarkdown(
290
423
  duplicateWarnings.push(`"${matchedAnchor.slice(0, 40)}${matchedAnchor.length > 40 ? '...' : ''}" appears ${occurrences.length} times`);
291
424
  }
292
425
 
293
- let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? occurrences[0];
294
- let bestScore = -1;
295
-
296
- for (const pos of occurrences) {
297
- if (usedPositions.has(pos)) continue;
298
-
299
- let score = 0;
300
-
301
- if (before) {
302
- const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
303
- const beforeLower = before.toLowerCase();
304
- const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
305
- for (const word of beforeWords) {
306
- if (contextBefore.includes(word)) score += 2;
307
- }
308
- if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
309
- }
310
-
311
- if (after) {
312
- const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
313
- const afterLower = after.toLowerCase();
314
- const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
315
- for (const word of afterWords) {
316
- if (contextAfter.includes(word)) score += 2;
317
- }
318
- if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
319
- }
320
-
321
- if (score > bestScore || (score === bestScore && pos < bestIdx)) {
322
- bestScore = score;
323
- bestIdx = pos;
324
- }
325
- }
326
-
327
- usedPositions.add(bestIdx);
426
+ const bestIdx = pickBestOccurrence(occurrences, result, before, after, anchorLen, usedPositions);
427
+ const finalIdx = bestIdx >= 0 ? bestIdx : occurrences[0];
428
+ usedPositions.add(finalIdx);
328
429
 
329
430
  if (matchedAnchor) {
330
- return { ...c, pos: bestIdx, anchorText: matchedAnchor, anchorEnd: bestIdx + anchorLen };
431
+ return { ...c, pos: finalIdx, anchorText: matchedAnchor, anchorEnd: finalIdx + anchorLen };
331
432
  } else {
332
- return { ...c, pos: bestIdx, anchorText: null };
433
+ return { ...c, pos: finalIdx, anchorText: null };
333
434
  }
334
435
  });
335
436
 
@@ -352,8 +453,21 @@ export function insertCommentsIntoMarkdown(
352
453
  // original Word comment range. Without it, the comment block is inserted
353
454
  // adjacent to the anchor and prose stays untouched — required for
354
455
  // comments-only sync where multiple comments may share one anchor.
456
+ // Skip insertion when an identical comment already lives near the target.
457
+ // Re-running sync against the same docx would otherwise stack duplicate
458
+ // CriticMarkup blocks (`{>>R1: ...<<}{>>R1: ...<<}...`) on each invocation.
459
+ // A 200-char window catches both wrapped (`{>>...<<}[anchor]{.mark}`) and
460
+ // bare (`{>>...<<}anchor`) forms while ignoring incidental matches farther
461
+ // away.
462
+ let dedupedCount = 0;
355
463
  for (const c of matched) {
356
464
  const comment = `{>>${c.author}: ${c.text}<<}`;
465
+ const windowStart = Math.max(0, c.pos - 200);
466
+ const windowEnd = Math.min(result.length, c.pos + 200);
467
+ if (result.slice(windowStart, windowEnd).includes(comment)) {
468
+ dedupedCount++;
469
+ continue;
470
+ }
357
471
  if (wrapAnchor && c.anchorText && c.anchorEnd) {
358
472
  const before = result.slice(0, c.pos);
359
473
  const anchor = result.slice(c.pos, c.anchorEnd);
@@ -366,6 +480,13 @@ export function insertCommentsIntoMarkdown(
366
480
  // verify that --comments-only didn't touch the original).
367
481
  result = result.slice(0, c.pos) + comment + result.slice(c.pos);
368
482
  }
483
+ placedCount++;
484
+ }
485
+
486
+ if (outStats) {
487
+ outStats.placed = placedCount;
488
+ outStats.deduped = dedupedCount;
489
+ outStats.unmatched = unmatchedCount;
369
490
  }
370
491
 
371
492
  // Log warnings unless quiet mode
@@ -373,6 +494,9 @@ export function insertCommentsIntoMarkdown(
373
494
  if (unmatchedCount > 0) {
374
495
  console.warn(`Warning: ${unmatchedCount} comment(s) could not be matched to anchor text`);
375
496
  }
497
+ if (dedupedCount > 0) {
498
+ console.warn(`Note: ${dedupedCount} comment(s) already present at anchor — skipped to avoid duplication`);
499
+ }
376
500
  if (duplicateWarnings.length > 0) {
377
501
  console.warn(`Warning: Duplicate anchor text found (using context & tie-breaks for placement):`);
378
502
  for (const w of duplicateWarnings) {
package/lib/response.ts CHANGED
@@ -46,7 +46,7 @@ export function parseCommentsWithReplies(text: string, file: string = ''): Comme
46
46
  if (matches.length === 0) continue;
47
47
 
48
48
  // Get context (surrounding text without comments)
49
- const contextLine = line.replace(/\{>>[^<]+<<\}/g, '').trim();
49
+ const contextLine = line.replace(/\{>>[\s\S]+?<<\}/g, '').trim();
50
50
  const context = contextLine.slice(0, 100) + (contextLine.length > 100 ? '...' : '');
51
51
 
52
52
  // First match is the original comment, rest are replies