docrev 0.9.6 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +41 -0
  2. package/dev_notes/bug_repro_comment_parser.md +71 -0
  3. package/dev_notes/stress2/adversarial.docx +0 -0
  4. package/dev_notes/stress2/build_adversarial.ts +186 -0
  5. package/dev_notes/stress2/drift_matcher.ts +62 -0
  6. package/dev_notes/stress2/probe_anchors.ts +35 -0
  7. package/dev_notes/stress2/project/adversarial.docx +0 -0
  8. package/dev_notes/stress2/project/discussion.before.md +3 -0
  9. package/dev_notes/stress2/project/discussion.md +3 -0
  10. package/dev_notes/stress2/project/methods.before.md +20 -0
  11. package/dev_notes/stress2/project/methods.md +20 -0
  12. package/dev_notes/stress2/project/rev.yaml +5 -0
  13. package/dev_notes/stress2/project/sections.yaml +4 -0
  14. package/dev_notes/stress2/sections.yaml +5 -0
  15. package/dev_notes/stress2/trace_placement.ts +50 -0
  16. package/dev_notes/stresstest_boundaries.ts +27 -0
  17. package/dev_notes/stresstest_drift_apply.ts +43 -0
  18. package/dev_notes/stresstest_drift_compare.ts +43 -0
  19. package/dev_notes/stresstest_drift_v2.ts +54 -0
  20. package/dev_notes/stresstest_inspect.ts +54 -0
  21. package/dev_notes/stresstest_pstyle.ts +55 -0
  22. package/dev_notes/stresstest_section_debug.ts +23 -0
  23. package/dev_notes/stresstest_split.ts +70 -0
  24. package/dev_notes/stresstest_trace.ts +19 -0
  25. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
  26. package/dist/lib/anchor-match.d.ts +51 -0
  27. package/dist/lib/anchor-match.d.ts.map +1 -0
  28. package/dist/lib/anchor-match.js +227 -0
  29. package/dist/lib/anchor-match.js.map +1 -0
  30. package/dist/lib/annotations.d.ts.map +1 -1
  31. package/dist/lib/annotations.js +24 -11
  32. package/dist/lib/annotations.js.map +1 -1
  33. package/dist/lib/commands/index.d.ts +2 -1
  34. package/dist/lib/commands/index.d.ts.map +1 -1
  35. package/dist/lib/commands/index.js +3 -1
  36. package/dist/lib/commands/index.js.map +1 -1
  37. package/dist/lib/commands/quality.js +1 -1
  38. package/dist/lib/commands/quality.js.map +1 -1
  39. package/dist/lib/commands/section-boundaries.d.ts +22 -0
  40. package/dist/lib/commands/section-boundaries.d.ts.map +1 -0
  41. package/dist/lib/commands/section-boundaries.js +63 -0
  42. package/dist/lib/commands/section-boundaries.js.map +1 -0
  43. package/dist/lib/commands/sync.d.ts.map +1 -1
  44. package/dist/lib/commands/sync.js +141 -0
  45. package/dist/lib/commands/sync.js.map +1 -1
  46. package/dist/lib/commands/verify-anchors.d.ts +17 -0
  47. package/dist/lib/commands/verify-anchors.d.ts.map +1 -0
  48. package/dist/lib/commands/verify-anchors.js +226 -0
  49. package/dist/lib/commands/verify-anchors.js.map +1 -0
  50. package/dist/lib/comment-realign.js +2 -2
  51. package/dist/lib/comment-realign.js.map +1 -1
  52. package/dist/lib/import.d.ts +26 -8
  53. package/dist/lib/import.d.ts.map +1 -1
  54. package/dist/lib/import.js +166 -187
  55. package/dist/lib/import.js.map +1 -1
  56. package/dist/lib/response.js +1 -1
  57. package/dist/lib/response.js.map +1 -1
  58. package/dist/lib/word-extraction.d.ts +23 -0
  59. package/dist/lib/word-extraction.d.ts.map +1 -1
  60. package/dist/lib/word-extraction.js +79 -0
  61. package/dist/lib/word-extraction.js.map +1 -1
  62. package/dist/lib/wordcomments.d.ts.map +1 -1
  63. package/dist/lib/wordcomments.js +165 -73
  64. package/dist/lib/wordcomments.js.map +1 -1
  65. package/lib/anchor-match.ts +276 -0
  66. package/lib/annotations.ts +25 -11
  67. package/lib/commands/index.ts +3 -0
  68. package/lib/commands/quality.ts +1 -1
  69. package/lib/commands/section-boundaries.ts +82 -0
  70. package/lib/commands/sync.ts +170 -0
  71. package/lib/commands/verify-anchors.ts +272 -0
  72. package/lib/comment-realign.ts +2 -2
  73. package/lib/import.ts +197 -209
  74. package/lib/response.ts +1 -1
  75. package/lib/word-extraction.ts +93 -0
  76. package/lib/wordcomments.ts +180 -82
  77. package/package.json +1 -1
  78. package/skill/REFERENCE.md +29 -2
  79. package/skill/SKILL.md +12 -2
  80. package/dist/package.json +0 -137
@@ -0,0 +1,276 @@
1
+ /**
2
+ * Anchor matching primitives shared between sync (insertion) and
3
+ * verify-anchors (drift reporting). The functions are pure: given an
4
+ * anchor string and surrounding context, locate candidate positions in
5
+ * a target text using progressively looser strategies.
6
+ */
7
+
8
+ export type AnchorStrategy =
9
+ | 'direct'
10
+ | 'normalized'
11
+ | 'stripped'
12
+ | 'partial-start'
13
+ | 'partial-start-stripped'
14
+ | 'context-both'
15
+ | 'context-before'
16
+ | 'context-after'
17
+ | 'split-match'
18
+ | 'empty-anchor'
19
+ | 'failed';
20
+
21
+ export interface AnchorSearchResult {
22
+ occurrences: number[];
23
+ matchedAnchor: string | null;
24
+ strategy: AnchorStrategy;
25
+ stripped?: boolean;
26
+ }
27
+
28
+ /**
29
+ * Strip CriticMarkup so the matcher sees plain prose instead of
30
+ * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
+ * underneath previously imported track changes.
32
+ */
33
+ export function stripCriticMarkup(text: string): string {
34
+ return text
35
+ .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
+ .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
+ .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
+ .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
+ .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
+ }
41
+
42
+ /**
43
+ * Return every starting index where `needle` occurs in `haystack`.
44
+ * Empty needles return no occurrences (empty matches are not useful
45
+ * for anchor placement).
46
+ */
47
+ /**
48
+ * Score how well the docx-side `before` / `after` context matches the
49
+ * surroundings of a candidate position in the target text. Used by
50
+ * `verify-anchors` to tell apart "multiple hits but context picks one
51
+ * cleanly" (sync will place it correctly) from "multiple hits, context
52
+ * doesn't help" (truly ambiguous, needs human placement).
53
+ *
54
+ * Returns 0 if no context was provided.
55
+ */
56
+ export function scoreContextAt(
57
+ pos: number,
58
+ text: string,
59
+ before: string,
60
+ after: string,
61
+ anchorLen: number,
62
+ ): number {
63
+ let score = 0;
64
+ if (before) {
65
+ const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
+ const beforeLower = before.toLowerCase();
67
+ const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
+ for (const word of beforeWords) {
69
+ if (contextBefore.includes(word)) score += 2;
70
+ }
71
+ if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
+ }
73
+ if (after) {
74
+ const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
+ const afterLower = after.toLowerCase();
76
+ const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
+ for (const word of afterWords) {
78
+ if (contextAfter.includes(word)) score += 2;
79
+ }
80
+ if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
+ }
82
+ return score;
83
+ }
84
+
85
+ export function findAllOccurrences(haystack: string, needle: string): number[] {
86
+ if (!needle || needle.length === 0) return [];
87
+ const occurrences: number[] = [];
88
+ let idx = 0;
89
+ while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
+ occurrences.push(idx);
91
+ idx += 1;
92
+ }
93
+ return occurrences;
94
+ }
95
+
96
+ /**
97
+ * Find candidate positions for `anchor` in `text`, falling back through
98
+ * progressively looser strategies (whitespace normalization, stripped
99
+ * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
+ *
101
+ * The returned `strategy` lets callers distinguish a clean direct hit
102
+ * from a fuzzy approximation — useful for drift reporting.
103
+ */
104
+ export function findAnchorInText(
105
+ anchor: string,
106
+ text: string,
107
+ before: string = '',
108
+ after: string = ''
109
+ ): AnchorSearchResult {
110
+ // Empty anchor: skip directly to context-based matching
111
+ if (!anchor || anchor.trim().length === 0) {
112
+ if (before || after) {
113
+ const beforeLower = (before || '').toLowerCase();
114
+ const afterLower = (after || '').toLowerCase();
115
+ const textLower = text.toLowerCase();
116
+
117
+ if (before && after) {
118
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
+ if (beforeIdx !== -1) {
120
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
+ }
125
+ }
126
+ }
127
+
128
+ if (before) {
129
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
+ if (beforeIdx !== -1) {
131
+ return {
132
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
+ matchedAnchor: null,
134
+ strategy: 'context-before',
135
+ };
136
+ }
137
+ }
138
+
139
+ if (after) {
140
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
+ if (afterIdx !== -1) {
142
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
+ }
144
+ }
145
+ }
146
+ return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
+ }
148
+
149
+ const anchorLower = anchor.toLowerCase();
150
+ const textLower = text.toLowerCase();
151
+
152
+ // Strategy 1: direct match
153
+ let occurrences = findAllOccurrences(textLower, anchorLower);
154
+ if (occurrences.length > 0) {
155
+ return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
+ }
157
+
158
+ // Strategy 2: normalized whitespace
159
+ const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
+ const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
+ const idx = normalizedText.indexOf(normalizedAnchor);
162
+ if (idx !== -1) {
163
+ return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
+ }
165
+
166
+ // Strategy 3: match in stripped CriticMarkup version
167
+ const strippedText = stripCriticMarkup(text);
168
+ const strippedLower = strippedText.toLowerCase();
169
+ occurrences = findAllOccurrences(strippedLower, anchorLower);
170
+ if (occurrences.length > 0) {
171
+ return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
+ }
173
+
174
+ // Strategy 4: first N words of anchor (long anchors)
175
+ const words = anchor.split(/\s+/);
176
+ if (words.length > 3) {
177
+ for (let n = Math.min(6, words.length); n >= 3; n--) {
178
+ const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
+ if (partialAnchor.length >= 15) {
180
+ occurrences = findAllOccurrences(textLower, partialAnchor);
181
+ if (occurrences.length > 0) {
182
+ return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
+ }
184
+ occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
+ if (occurrences.length > 0) {
186
+ return {
187
+ occurrences,
188
+ matchedAnchor: words.slice(0, n).join(' '),
189
+ strategy: 'partial-start-stripped',
190
+ stripped: true,
191
+ };
192
+ }
193
+ }
194
+ }
195
+ }
196
+
197
+ // Strategy 5: context (before/after) only
198
+ if (before || after) {
199
+ const beforeLower = before.toLowerCase();
200
+ const afterLower = after.toLowerCase();
201
+
202
+ if (before && after) {
203
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
+ if (beforeIdx !== -1) {
205
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
+ }
210
+ }
211
+ }
212
+
213
+ if (before) {
214
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
+ if (beforeIdx !== -1) {
216
+ return {
217
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
+ matchedAnchor: null,
219
+ strategy: 'context-before',
220
+ };
221
+ }
222
+ }
223
+
224
+ if (after) {
225
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
+ if (afterIdx !== -1) {
227
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
+ }
229
+ }
230
+ }
231
+
232
+ // Strategy 6: split anchor on transition characters
233
+ const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
+ for (const sep of splitPatterns) {
235
+ if (anchor.includes(sep)) {
236
+ const parts = anchor.split(sep).filter(p => p.length >= 4);
237
+ for (const part of parts) {
238
+ const partLower = part.toLowerCase();
239
+ occurrences = findAllOccurrences(textLower, partLower);
240
+ if (occurrences.length > 0 && occurrences.length < 5) {
241
+ return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
+ }
243
+ }
244
+ }
245
+ }
246
+
247
+ return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
+ }
249
+
250
+ /**
251
+ * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
+ * Used by `verify-anchors` to summarize per-comment match quality.
253
+ */
254
+ export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
+
256
+ export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
+ if (occurrences === 0) return 'unmatched';
258
+ switch (strategy) {
259
+ case 'direct':
260
+ case 'normalized':
261
+ return 'clean';
262
+ case 'stripped':
263
+ case 'partial-start':
264
+ case 'partial-start-stripped':
265
+ case 'split-match':
266
+ return 'drift';
267
+ case 'context-both':
268
+ case 'context-before':
269
+ case 'context-after':
270
+ return 'context-only';
271
+ case 'empty-anchor':
272
+ case 'failed':
273
+ default:
274
+ return 'unmatched';
275
+ }
276
+ }
@@ -91,16 +91,20 @@ function isCommentFalsePositive(commentContent: string, fullText: string, positi
91
91
  // Contains markdown figure reference syntax
92
92
  if (/\{#fig:|!\[/.test(commentContent)) return true;
93
93
 
94
- // Contains URL patterns (likely a link, not a comment)
95
- if (/https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
94
+ // Real comments typically have "Author:" at start. Accept hyphens, apostrophes,
95
+ // periods, and Unicode letters so names like "Jens-Christian Svenning" or
96
+ // "Camilla T Colding-Jørgensen" don't get rejected. See gcol33/docrev#1.
97
+ const hasAuthorPrefix = /^[\p{L}][\p{L}\s\-'.]{0,30}:\s/u.test(commentContent.trim());
98
+ const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
99
+
100
+ // Contains URL patterns (likely a link, not a comment) — only filter when
101
+ // there is no real author prefix, since reviewers legitimately cite URLs/DOIs.
102
+ if (!hasAuthorPrefix && /https?:\/\/|www\./i.test(commentContent) && commentContent.length < 150) return true;
96
103
 
97
104
  // Looks like code (contains programming patterns)
98
105
  if (/function\s*\(|=>|import\s+|export\s+|const\s+|let\s+|var\s+/.test(commentContent)) return true;
99
106
 
100
107
  // Very long without clear author pattern (likely caption, not comment)
101
- // Real comments typically have "Author:" at start and are shorter
102
- const hasAuthorPrefix = /^[A-Za-z][A-Za-z\s]{0,20}:\s/.test(commentContent.trim());
103
- const hasResolvedMark = /^[✓✔]\s/.test(commentContent.trim());
104
108
  if (!hasAuthorPrefix && !hasResolvedMark && commentContent.length > MAX_COMMENT_CONTENT_LENGTH) return true;
105
109
 
106
110
  // Looks like a figure caption (starts with "Fig" or contains typical caption words)
@@ -281,8 +285,16 @@ export function stripAnnotations(text: string, options: StripOptions = {}): stri
281
285
  text = text.replace(PATTERNS.comment, '');
282
286
  }
283
287
 
284
- // Strip pandoc highlight spans: [text]{.mark} → text
285
- text = text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
288
+ // Strip pandoc highlight spans: [text]{.mark} → text.
289
+ // When `keepComments=true`, preserve `[anchor]{.mark}` that is the
290
+ // anchor of a kept `{>>...<<}` comment. The dual-build flow runs
291
+ // stripAnnotations() before prepareMarkdownWithMarkers(), and stripping
292
+ // the anchor span here would leave the marker generator with no anchor
293
+ // text — collapsing every multi-word anchor to a single fallback word
294
+ // in the rebuilt docx.
295
+ text = keepComments
296
+ ? text.replace(/(?<!<<\}\s{0,3})\[([^\]]*)\]\{\.mark\}/g, '$1')
297
+ : text.replace(/\[([^\]]*)\]\{\.mark\}/g, '$1');
286
298
 
287
299
  // Clean up partial/orphaned markers within the loop
288
300
  // This handles cases where nested annotations leave behind fragments
@@ -323,9 +335,11 @@ export function stripAnnotations(text: string, options: StripOptions = {}): stri
323
335
  text = text.replace(/~>/g, '');
324
336
 
325
337
  // Remove orphan [ from stripped {.mark} spans where the closing ]{.mark}
326
- // was inside a comment. A [ is orphan if no matching ] follows before
327
- // the next [ or end of line.
328
- text = text.replace(/\[(?![^\[\]]*\])/g, '');
338
+ // was inside a comment. A [ is orphan if no `]` follows before end of line.
339
+ // We deliberately allow other `[` between the candidate and the matching `]`
340
+ // otherwise nested forms like `[[0..9]]{.mark}` would have their outer
341
+ // `[` stripped because the lookahead saw the inner `[` as a barrier.
342
+ text = text.replace(/\[(?![^\]\n]*\])/g, '');
329
343
 
330
344
  return text;
331
345
  }
@@ -429,7 +443,7 @@ export function applyDecision(text: string, annotation: Annotation, accept: bool
429
443
 
430
444
  // Extract any comments embedded in the annotation content
431
445
  // These should be preserved when accepting deletions or rejecting insertions
432
- const commentPattern = /\{>>[^<]*<<\}/g;
446
+ const commentPattern = /\{>>[\s\S]*?<<\}/g;
433
447
  const embeddedComments = (annotation.match || '').match(commentPattern) || [];
434
448
 
435
449
  switch (annotation.type) {
@@ -11,6 +11,7 @@ import { register as registerCommentCommands } from './comments.js';
11
11
  import { register as registerInitCommands } from './init.js';
12
12
  import { register as registerSectionCommands } from './sections.js';
13
13
  import { register as registerSyncCommands } from './sync.js';
14
+ import { register as registerVerifyAnchorsCommands } from './verify-anchors.js';
14
15
  import { register as registerMergeResolveCommands } from './merge-resolve.js';
15
16
  import { register as registerBuildCommands } from './build.js';
16
17
  import { register as registerResponseCommands } from './response.js';
@@ -31,6 +32,7 @@ export {
31
32
  registerInitCommands,
32
33
  registerSectionCommands,
33
34
  registerSyncCommands,
35
+ registerVerifyAnchorsCommands,
34
36
  registerMergeResolveCommands,
35
37
  registerBuildCommands,
36
38
  registerResponseCommands,
@@ -68,6 +70,7 @@ export function registerAllCommands(program: Command, pkg?: PackageJson): void {
68
70
  registerInitCommands(program);
69
71
  registerSectionCommands(program);
70
72
  registerSyncCommands(program);
73
+ registerVerifyAnchorsCommands(program);
71
74
  registerMergeResolveCommands(program);
72
75
  registerBuildCommands(program, pkg || {});
73
76
  registerResponseCommands(program);
@@ -427,7 +427,7 @@ export function register(program: Command): void {
427
427
  }
428
428
 
429
429
  // Check for unresolved comments
430
- const unresolvedComments = (content.match(/\{>>[^<]*<<\}/g) || [])
430
+ const unresolvedComments = (content.match(/\{>>[\s\S]*?<<\}/g) || [])
431
431
  .filter(c => !c.includes('[RESOLVED]'));
432
432
  if (unresolvedComments.length > 0) {
433
433
  lintWarnings.push({ file, message: `${unresolvedComments.length} unresolved comment(s)` });
@@ -0,0 +1,82 @@
1
+ /**
2
+ * Compute section boundaries in a DOCX from its real heading paragraphs.
3
+ *
4
+ * Given the configured `sections.yaml` and the headings extracted via
5
+ * `extractHeadings()`, return one boundary per section file with text
6
+ * positions in the same coordinate system as `CommentAnchorData.docPosition`.
7
+ *
8
+ * Matching is by heading text (primary header + aliases, case-insensitive).
9
+ * This replaces the older keyword-search-in-body-text approach which would
10
+ * pick up section names that happen to appear inside prose ("results across
11
+ * countries") or in structured-abstract labels where paragraph boundaries
12
+ * are lost in concatenation.
13
+ */
14
+
15
+ import type { DocxHeading } from '../word-extraction.js';
16
+ import type { SectionConfig } from '../types.js';
17
+
18
+ export interface SectionBoundary {
19
+ file: string;
20
+ start: number;
21
+ end: number;
22
+ }
23
+
24
+ export function computeSectionBoundaries(
25
+ sections: Record<string, SectionConfig>,
26
+ headings: DocxHeading[],
27
+ docLength?: number,
28
+ ): SectionBoundary[] {
29
+ const matched: SectionBoundary[] = [];
30
+
31
+ // Only consider top-level (Heading1-style) when level info is available;
32
+ // when level==0 (unparseable style), fall back to all headings.
33
+ const haveLevels = headings.some(h => h.level > 0);
34
+ const candidates = haveLevels ? headings.filter(h => h.level === 1) : headings;
35
+
36
+ for (const [file, cfg] of Object.entries(sections)) {
37
+ const targets = [cfg.header, ...(cfg.aliases || [])]
38
+ .filter(Boolean)
39
+ .map(s => s.toLowerCase().trim());
40
+
41
+ let firstMatch = -1;
42
+ for (const h of candidates) {
43
+ const text = h.text.toLowerCase().trim();
44
+ if (targets.includes(text)) {
45
+ firstMatch = h.docPosition;
46
+ break;
47
+ }
48
+ }
49
+
50
+ // Fallback: if no level-1 hit, allow any-level match (handles single-level docs)
51
+ if (firstMatch < 0 && haveLevels) {
52
+ for (const h of headings) {
53
+ const text = h.text.toLowerCase().trim();
54
+ if (targets.includes(text)) {
55
+ firstMatch = h.docPosition;
56
+ break;
57
+ }
58
+ }
59
+ }
60
+
61
+ if (firstMatch >= 0) {
62
+ matched.push({ file, start: firstMatch, end: Number.MAX_SAFE_INTEGER });
63
+ }
64
+ }
65
+
66
+ // Sort by start position and tighten each end to the next start.
67
+ // The last section's end is capped at docLength when known, otherwise
68
+ // left at MAX_SAFE_INTEGER. Without the cap, single-section configs
69
+ // produce a sectionLength of ~9e15, collapsing proportional-position
70
+ // math in insertCommentsIntoMarkdown to 0 and stacking every comment
71
+ // at the document start.
72
+ matched.sort((a, b) => a.start - b.start);
73
+ for (let i = 0; i < matched.length - 1; i++) {
74
+ matched[i].end = matched[i + 1].start;
75
+ }
76
+ if (matched.length > 0 && docLength !== undefined) {
77
+ const last = matched[matched.length - 1];
78
+ if (last.end > docLength) last.end = docLength;
79
+ }
80
+
81
+ return matched;
82
+ }
@@ -35,6 +35,10 @@ interface SyncOptions {
35
35
  diff?: boolean;
36
36
  force?: boolean;
37
37
  dryRun?: boolean;
38
+ /** Commander maps `--comments-only` (a positive flag) cleanly. `--no-overwrite`
39
+ * conflicts with the existing `overwrite` semantics in `--force`-style flags
40
+ * and Commander's `--no-X` convention assigns `options.x === false`. */
41
+ commentsOnly?: boolean;
38
42
  }
39
43
 
40
44
  /**
@@ -57,6 +61,7 @@ export function register(program: Command): void {
57
61
  .option('--no-diff', 'Skip showing diff preview')
58
62
  .option('--force', 'Overwrite files without conflict warning')
59
63
  .option('--dry-run', 'Preview without writing files')
64
+ .option('--comments-only', 'Insert comments at fuzzy-matched anchors only; never modify existing prose or apply track changes (use when markdown was revised after the docx was sent for review)')
60
65
  .action(async (docx: string | undefined, sections: string[], options: SyncOptions) => {
61
66
  // Auto-detect most recent docx or pdf if not provided
62
67
  if (!docx) {
@@ -137,6 +142,14 @@ export function register(program: Command): void {
137
142
  process.exit(1);
138
143
  }
139
144
 
145
+ // --comments-only: import comments only, never modify existing prose.
146
+ // Use this when the markdown has been revised since the docx was sent
147
+ // out — track changes from a stale draft would clobber newer edits.
148
+ if (options.commentsOnly) {
149
+ await syncCommentsOnly(docx, sections, options, configPath);
150
+ return;
151
+ }
152
+
140
153
  // Check pandoc availability upfront and warn
141
154
  const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
142
155
  if (!hasPandoc()) {
@@ -534,3 +547,160 @@ export function register(program: Command): void {
534
547
  }
535
548
  });
536
549
  }
550
+
551
+ /**
552
+ * `sync --comments-only`: import only Word comments at fuzzy-matched anchors.
553
+ *
554
+ * Skips the Word→Markdown diff entirely (no track changes, no pandoc, no
555
+ * prose modifications). Useful when the markdown has been edited after the
556
+ * docx was sent for review — applying track changes from a stale draft
557
+ * would overwrite newer edits.
558
+ */
559
+ async function syncCommentsOnly(
560
+ docx: string,
561
+ sectionFilter: string[] | undefined,
562
+ options: SyncOptions,
563
+ configPath: string,
564
+ ): Promise<void> {
565
+ const config = loadConfig(configPath);
566
+ const { extractWordComments, extractCommentAnchors, extractHeadings, insertCommentsIntoMarkdown } = await import('../import.js');
567
+ const { computeSectionBoundaries } = await import('./section-boundaries.js');
568
+
569
+ const spin = fmt.spinner(`Reading comments from ${path.basename(docx)}...`).start();
570
+
571
+ let comments;
572
+ let anchors;
573
+ let headings;
574
+ let fullDocText = '';
575
+ try {
576
+ comments = await extractWordComments(docx);
577
+ const result = await extractCommentAnchors(docx);
578
+ anchors = result.anchors;
579
+ fullDocText = result.fullDocText;
580
+ headings = await extractHeadings(docx);
581
+ spin.stop();
582
+ } catch (err) {
583
+ spin.stop();
584
+ const error = err as Error;
585
+ console.error(fmt.status('error', error.message));
586
+ process.exit(1);
587
+ }
588
+
589
+ console.log(fmt.header(`Comments from ${path.basename(docx)} (comments-only)`));
590
+ console.log();
591
+
592
+ if (comments.length === 0) {
593
+ console.log(fmt.status('info', 'No comments found in document.'));
594
+ return;
595
+ }
596
+
597
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
598
+
599
+ if (boundaries.length === 0) {
600
+ console.error(fmt.status('warning', 'No section headings detected in Word document.'));
601
+ console.error(chalk.dim(' Check that headers in sections.yaml match heading paragraphs in the docx.'));
602
+ process.exit(1);
603
+ }
604
+
605
+ // Apply optional section filter from CLI
606
+ let activeBoundaries = boundaries;
607
+ if (sectionFilter && sectionFilter.length > 0) {
608
+ const wanted = sectionFilter.map(s => s.trim().toLowerCase());
609
+ activeBoundaries = boundaries.filter(b => {
610
+ const base = b.file.replace(/\.md$/i, '').toLowerCase();
611
+ return wanted.some(name => base === name || base.includes(name));
612
+ });
613
+ if (activeBoundaries.length === 0) {
614
+ console.error(fmt.status('error', `No sections matched: ${sectionFilter.join(', ')}`));
615
+ process.exit(1);
616
+ }
617
+ }
618
+
619
+ const firstBoundaryStart = boundaries[0].start;
620
+ const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
621
+
622
+ for (const boundary of activeBoundaries) {
623
+ const sectionPath = path.join(options.dir, boundary.file);
624
+ if (!fs.existsSync(sectionPath)) {
625
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
626
+ continue;
627
+ }
628
+
629
+ const isFirstSection = boundary === activeBoundaries[0];
630
+ const sectionComments = comments.filter((c: { id: string }) => {
631
+ const anchor = anchors.get(c.id);
632
+ if (!anchor || anchor.docPosition === undefined) return false;
633
+ if (anchor.docPosition >= boundary.start && anchor.docPosition < boundary.end) return true;
634
+ // Comments before the first heading land in the first matched section
635
+ if (isFirstSection && anchor.docPosition < firstBoundaryStart) return true;
636
+ return false;
637
+ });
638
+
639
+ if (sectionComments.length === 0) {
640
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
641
+ continue;
642
+ }
643
+
644
+ const original = fs.readFileSync(sectionPath, 'utf-8');
645
+
646
+ const stats = { placed: 0, deduped: 0, unmatched: 0 };
647
+ const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
648
+ quiet: !process.env.DEBUG,
649
+ sectionBoundary: { start: boundary.start, end: boundary.end },
650
+ wrapAnchor: false,
651
+ outStats: stats,
652
+ });
653
+
654
+ if (!options.dryRun && stats.placed > 0) {
655
+ fs.writeFileSync(sectionPath, annotated, 'utf-8');
656
+ }
657
+ results.push({ file: boundary.file, ...stats, skipped: false });
658
+ }
659
+
660
+ const tableRows = results.map(r => {
661
+ if (r.skipped) {
662
+ return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
663
+ }
664
+ return [
665
+ chalk.bold(r.file),
666
+ chalk.green(`${r.placed}`),
667
+ r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
668
+ r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
669
+ chalk.dim('comments only'),
670
+ ];
671
+ });
672
+
673
+ console.log(fmt.table(
674
+ ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
675
+ tableRows,
676
+ { align: ['left', 'right', 'right', 'right', 'left'] },
677
+ ));
678
+ console.log();
679
+
680
+ const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
681
+ const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
682
+ const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
683
+
684
+ const lines: string[] = [];
685
+ lines.push(`${chalk.bold(comments.length)} comments in document`);
686
+ if (totalPlaced > 0) {
687
+ lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
688
+ }
689
+ if (totalDeduped > 0) {
690
+ lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
691
+ }
692
+ if (totalUnmatched > 0) {
693
+ lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
694
+ }
695
+ if (options.dryRun) {
696
+ lines.push(chalk.yellow('Dry run — no files written'));
697
+ } else if (totalPlaced > 0) {
698
+ lines.push(chalk.dim('Existing prose unchanged.'));
699
+ }
700
+ console.log(fmt.box(lines.join('\n'), { title: 'Summary', padding: 0 }));
701
+
702
+ if (totalUnmatched > 0) {
703
+ console.log();
704
+ console.log(chalk.dim('Tip: run "rev verify-anchors" to see which comments drifted.'));
705
+ }
706
+ }