docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
@@ -1,706 +1,706 @@
1
- /**
2
- * SYNC command: Import feedback from Word/PDF back to section files
3
- *
4
- * Split from sections.ts for maintainability.
5
- */
6
-
7
- import {
8
- chalk,
9
- fs,
10
- path,
11
- fmt,
12
- findFiles,
13
- loadConfig,
14
- extractSectionsFromText,
15
- countAnnotations,
16
- buildRegistry,
17
- convertHardcodedRefs,
18
- inlineDiffPreview,
19
- } from './context.js';
20
- import type { Command } from 'commander';
21
- import * as readline from 'readline';
22
-
23
- interface ImportStats {
24
- insertions: number;
25
- deletions: number;
26
- substitutions: number;
27
- comments: number;
28
- total: number;
29
- }
30
-
31
- interface SyncOptions {
32
- config: string;
33
- dir: string;
34
- crossref?: boolean;
35
- diff?: boolean;
36
- force?: boolean;
37
- dryRun?: boolean;
38
- /** Commander maps `--comments-only` (a positive flag) cleanly. `--no-overwrite`
39
- * conflicts with the existing `overwrite` semantics in `--force`-style flags
40
- * and Commander's `--no-X` convention assigns `options.x === false`. */
41
- commentsOnly?: boolean;
42
- }
43
-
44
- /**
45
- * Register the sync command with the program
46
- */
47
- export function register(program: Command): void {
48
- // ==========================================================================
49
- // SYNC command - Import with section awareness
50
- // ==========================================================================
51
-
52
- program
53
- .command('sync')
54
- .alias('sections')
55
- .description('Sync feedback from Word/PDF back to section files')
56
- .argument('[file]', 'Word (.docx) or PDF file from reviewer (default: most recent)')
57
- .argument('[sections...]', 'Specific sections to sync (default: all)')
58
- .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
59
- .option('-d, --dir <directory>', 'Directory with section files', '.')
60
- .option('--no-crossref', 'Skip converting hardcoded figure/table refs')
61
- .option('--no-diff', 'Skip showing diff preview')
62
- .option('--force', 'Overwrite files without conflict warning')
63
- .option('--dry-run', 'Preview without writing files')
64
- .option('--comments-only', 'Insert comments at fuzzy-matched anchors only; never modify existing prose or apply track changes (use when markdown was revised after the docx was sent for review)')
65
- .action(async (docx: string | undefined, sections: string[], options: SyncOptions) => {
66
- // Auto-detect most recent docx or pdf if not provided
67
- if (!docx) {
68
- const docxFiles = findFiles('.docx');
69
- const pdfFiles = findFiles('.pdf');
70
- const allFiles = [...docxFiles, ...pdfFiles];
71
-
72
- if (allFiles.length === 0) {
73
- console.error(fmt.status('error', 'No .docx or .pdf files found in current directory.'));
74
- process.exit(1);
75
- }
76
- const sorted = allFiles
77
- .map(f => ({ name: f, mtime: fs.statSync(f).mtime }))
78
- .sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
79
- docx = sorted[0].name;
80
- console.log(fmt.status('info', `Using most recent: ${docx}`));
81
- console.log();
82
- }
83
-
84
- if (!fs.existsSync(docx)) {
85
- console.error(fmt.status('error', `File not found: ${docx}`));
86
- process.exit(1);
87
- }
88
-
89
- // Handle PDF files
90
- if (docx.toLowerCase().endsWith('.pdf')) {
91
- const { extractPdfComments, formatPdfComments, getPdfCommentStats } = await import('../pdf-import.js');
92
-
93
- const spin = fmt.spinner(`Extracting comments from ${path.basename(docx)}...`).start();
94
-
95
- try {
96
- const comments = await extractPdfComments(docx);
97
- spin.stop();
98
-
99
- if (comments.length === 0) {
100
- console.log(fmt.status('info', 'No comments found in PDF.'));
101
- return;
102
- }
103
-
104
- const stats = getPdfCommentStats(comments);
105
- console.log(fmt.header(`PDF Comments from ${path.basename(docx)}`));
106
- console.log();
107
- console.log(formatPdfComments(comments));
108
- console.log();
109
-
110
- const authorList = Object.entries(stats.byAuthor)
111
- .map(([author, count]) => `${author} (${count})`)
112
- .join(', ');
113
- console.log(chalk.dim(`Total: ${stats.total} comments from ${authorList}`));
114
- console.log();
115
-
116
- const configPath = path.resolve(options.dir, options.config);
117
- if (fs.existsSync(configPath) && !options.dryRun) {
118
- const config = loadConfig(configPath);
119
- const mainSection = config.sections?.[0];
120
-
121
- if (mainSection && typeof mainSection === 'string') {
122
- const mainPath = path.join(options.dir, mainSection);
123
- if (fs.existsSync(mainPath)) {
124
- console.log(chalk.dim(`Use 'rev pdf-comments ${docx} --append ${mainSection}' to add comments to markdown.`));
125
- }
126
- }
127
- }
128
- } catch (err) {
129
- spin.stop();
130
- const error = err as Error;
131
- console.error(fmt.status('error', `Failed to extract PDF comments: ${error.message}`));
132
- if (process.env.DEBUG) console.error(error.stack);
133
- process.exit(1);
134
- }
135
- return;
136
- }
137
-
138
- const configPath = path.resolve(options.dir, options.config);
139
- if (!fs.existsSync(configPath)) {
140
- console.error(fmt.status('error', `Config not found: ${configPath}`));
141
- console.error(chalk.dim(' Run "rev init" first to generate sections.yaml'));
142
- process.exit(1);
143
- }
144
-
145
- // --comments-only: import comments only, never modify existing prose.
146
- // Use this when the markdown has been revised since the docx was sent
147
- // out — track changes from a stale draft would clobber newer edits.
148
- if (options.commentsOnly) {
149
- await syncCommentsOnly(docx, sections, options, configPath);
150
- return;
151
- }
152
-
153
- // Check pandoc availability upfront and warn
154
- const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
155
- if (!hasPandoc()) {
156
- console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
157
- console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
158
- console.log();
159
- }
160
-
161
- const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
162
-
163
- try {
164
- const config = loadConfig(configPath);
165
- const { importFromWord, extractWordComments, extractCommentAnchors, insertCommentsIntoMarkdown, extractFromWord } = await import('../import.js');
166
-
167
- let registry = null;
168
- let totalRefConversions = 0;
169
- if (options.crossref !== false) {
170
- registry = buildRegistry(options.dir);
171
- }
172
-
173
- const comments = await extractWordComments(docx);
174
- const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
175
-
176
- // Extract Word text (uses pandoc if available, falls back to XML extraction)
177
- const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
178
- let wordText = wordExtraction.text;
179
- const wordTables = wordExtraction.tables || [];
180
-
181
- // Log extraction messages (warnings about pandoc, track change stats, etc.)
182
- for (const msg of wordExtraction.messages || []) {
183
- if (msg.type === 'warning') {
184
- spin.stop();
185
- console.log(fmt.status('warning', msg.message));
186
- spin.start();
187
- }
188
- }
189
-
190
- // Restore crossref on FULL text BEFORE splitting into sections
191
- // This ensures duplicate labels from track changes are handled correctly
192
- // (the same figure may appear multiple times in old/new versions)
193
- const { restoreCrossrefFromWord, restoreImagesFromRegistry } = await import('../import.js');
194
- const crossrefResult = restoreCrossrefFromWord(wordText, options.dir);
195
- wordText = crossrefResult.text;
196
- if (crossrefResult.restored > 0) {
197
- console.log(`Restored ${crossrefResult.restored} crossref reference(s)`);
198
- }
199
-
200
- // Also restore images from registry using shared restoredLabels
201
- const imageRestoreResult = restoreImagesFromRegistry(wordText, options.dir, crossrefResult.restoredLabels);
202
- wordText = imageRestoreResult.text;
203
- if (imageRestoreResult.restored > 0) {
204
- console.log(`Restored ${imageRestoreResult.restored} image(s) from registry`);
205
- }
206
-
207
- let wordSections = extractSectionsFromText(wordText, config.sections);
208
-
209
- if (wordSections.length === 0) {
210
- spin.stop();
211
- console.error(fmt.status('warning', 'No sections detected in Word document.'));
212
- console.error(chalk.dim(' Check that headings match sections.yaml'));
213
- process.exit(1);
214
- }
215
-
216
- if (sections && sections.length > 0) {
217
- const onlyList = sections.map(s => s.trim().toLowerCase());
218
- wordSections = wordSections.filter(section => {
219
- const fileName = section.file.replace(/\.md$/i, '').toLowerCase();
220
- const header = section.header.toLowerCase();
221
- return onlyList.some(name => fileName === name || fileName.includes(name) || header.includes(name));
222
- });
223
- if (wordSections.length === 0) {
224
- spin.stop();
225
- console.error(fmt.status('error', `No sections matched: ${sections.join(', ')}`));
226
- console.error(chalk.dim(` Available: ${extractSectionsFromText(wordText, config.sections).map(s => s.file.replace(/\.md$/i, '')).join(', ')}`));
227
- process.exit(1);
228
- }
229
- }
230
-
231
- spin.stop();
232
- console.log(fmt.header(`Import from ${path.basename(docx)}`));
233
- console.log();
234
-
235
- // Conflict detection
236
- if (!options.force && !options.dryRun) {
237
- const conflicts: Array<{ file: string; annotations: number }> = [];
238
- for (const section of wordSections) {
239
- const sectionPath = path.join(options.dir, section.file);
240
- if (fs.existsSync(sectionPath)) {
241
- const existing = fs.readFileSync(sectionPath, 'utf-8');
242
- const existingCounts = countAnnotations(existing);
243
- if (existingCounts.total > 0) {
244
- conflicts.push({
245
- file: section.file,
246
- annotations: existingCounts.total,
247
- });
248
- }
249
- }
250
- }
251
-
252
- if (conflicts.length > 0) {
253
- console.log(fmt.status('warning', 'Files with existing annotations will be overwritten:'));
254
- for (const c of conflicts) {
255
- console.log(chalk.yellow(` - ${c.file} (${c.annotations} annotations)`));
256
- }
257
- console.log();
258
-
259
- const rl = readline.createInterface({
260
- input: process.stdin,
261
- output: process.stdout,
262
- });
263
-
264
- const answer = await new Promise<string>((resolve) =>
265
- rl.question(chalk.cyan('Continue and overwrite? [y/N] '), resolve)
266
- );
267
- rl.close();
268
-
269
- if (answer.toLowerCase() !== 'y') {
270
- console.log(chalk.dim('Aborted. Use --force to skip this check.'));
271
- process.exit(0);
272
- }
273
- console.log();
274
- }
275
- }
276
-
277
- const sectionResults: Array<{
278
- file: string;
279
- header: string;
280
- status: string;
281
- stats?: ImportStats;
282
- refs?: number;
283
- }> = [];
284
- let totalChanges = 0;
285
-
286
- // Calculate section boundaries in the XML document text for comment filtering
287
- // Comment positions (docPosition) are relative to xmlDocText, NOT wordText
288
- // So we must find section headers in xmlDocText to get matching boundaries
289
- const sectionBoundaries: Array<{ file: string; start: number; end: number }> = [];
290
- const xmlLower = xmlDocText.toLowerCase();
291
-
292
- // Standard section header keywords to search for in XML
293
- // Map from file name pattern to search terms
294
- const sectionKeywords: Record<string, string[]> = {
295
- 'abstract': ['abstract', 'summary'],
296
- 'introduction': ['introduction', 'background'],
297
- 'methods': ['methods', 'materials and methods', 'methodology'],
298
- 'results': ['results'],
299
- 'discussion': ['discussion'],
300
- 'conclusion': ['conclusion', 'conclusions'],
301
- };
302
-
303
- // Helper: find section header (skip labels like "Methods:" in structured abstracts)
304
- // Real section headers are NOT followed by ":" immediately
305
- function findSectionHeader(text: string, keyword: string, startFrom: number = 0): number {
306
- const lower = text.toLowerCase();
307
- let idx = startFrom;
308
- while ((idx = lower.indexOf(keyword, idx)) !== -1) {
309
- // Check what follows the keyword
310
- const afterKeyword = text.slice(idx + keyword.length, idx + keyword.length + 5);
311
- // Skip if followed by ":" (this is a label, not a section header)
312
- // Real headers are followed by text content, a newline, or a subheading
313
- if (!afterKeyword.startsWith(':') && !afterKeyword.startsWith(' :')) {
314
- return idx;
315
- }
316
- idx++;
317
- }
318
- return -1;
319
- }
320
-
321
- for (const section of wordSections) {
322
- const fileBase = section.file.replace(/\.md$/i, '').toLowerCase();
323
-
324
- // Get keywords for this section
325
- const keywords = sectionKeywords[fileBase] || [fileBase];
326
-
327
- // Find the first valid keyword that exists in XML (not a label)
328
- let headerIdx = -1;
329
- for (const kw of keywords) {
330
- const idx = findSectionHeader(xmlDocText, kw, 0);
331
- if (idx >= 0 && (headerIdx < 0 || idx < headerIdx)) {
332
- headerIdx = idx;
333
- }
334
- }
335
-
336
- if (headerIdx >= 0) {
337
- // Find the next section's start to determine end boundary
338
- let nextHeaderIdx = xmlDocText.length;
339
- const sectionIdx = wordSections.indexOf(section);
340
- if (sectionIdx < wordSections.length - 1) {
341
- const nextFileBase = wordSections[sectionIdx + 1].file.replace(/\.md$/i, '').toLowerCase();
342
- const nextKeywords = sectionKeywords[nextFileBase] || [nextFileBase];
343
- for (const nkw of nextKeywords) {
344
- const foundNext = findSectionHeader(xmlDocText, nkw, headerIdx + 10);
345
- if (foundNext >= 0 && foundNext < nextHeaderIdx) {
346
- nextHeaderIdx = foundNext;
347
- }
348
- }
349
- }
350
-
351
- sectionBoundaries.push({
352
- file: section.file,
353
- start: headerIdx,
354
- end: nextHeaderIdx
355
- });
356
-
357
- }
358
- }
359
-
360
- // Document length is the XML text length (same coordinate system as docPosition)
361
- const docLength = xmlDocText.length;
362
-
363
- for (const section of wordSections) {
364
- const sectionPath = path.join(options.dir, section.file);
365
-
366
- if (!fs.existsSync(sectionPath)) {
367
- sectionResults.push({
368
- file: section.file,
369
- header: section.header,
370
- status: 'skipped',
371
- stats: undefined,
372
- });
373
- continue;
374
- }
375
-
376
- const result = await importFromWord(docx, sectionPath, {
377
- sectionContent: section.content,
378
- author: 'Reviewer',
379
- wordTables: wordTables,
380
- });
381
-
382
- let { annotated, stats } = result;
383
-
384
- let refConversions: Array<{ from: string; to: string }> = [];
385
- if (registry && options.crossref !== false) {
386
- const crossrefResult = convertHardcodedRefs(annotated, registry);
387
- annotated = crossrefResult.converted;
388
- refConversions = crossrefResult.conversions;
389
- totalRefConversions += refConversions.length;
390
- }
391
-
392
- let commentsInserted = 0;
393
- if (comments.length > 0 && anchors.size > 0) {
394
- // Filter comments to only those that belong to this section
395
- // Use exact position matching: docPosition is in xmlDocText coordinates,
396
- // and sectionBoundaries are also in xmlDocText coordinates (same source!)
397
- const boundary = sectionBoundaries.find(b => b.file === section.file);
398
- const isFirstSection = wordSections.indexOf(section) === 0;
399
- const firstBoundaryStart = sectionBoundaries.length > 0 ? Math.min(...sectionBoundaries.map(b => b.start)) : 0;
400
-
401
- const sectionComments = comments.filter((c: any) => {
402
- const anchorData = anchors.get(c.id);
403
- if (!anchorData) return false;
404
-
405
- // Use exact position - no scaling needed since both are in xmlDocText coordinates
406
- if (anchorData.docPosition !== undefined && boundary) {
407
- // Include comments within section boundaries
408
- if (anchorData.docPosition >= boundary.start && anchorData.docPosition < boundary.end) {
409
- return true;
410
- }
411
- // Also include "outside" comments (before first section) in the first section file
412
- if (isFirstSection && anchorData.docPosition < firstBoundaryStart) {
413
- return true;
414
- }
415
- }
416
-
417
- return false;
418
- });
419
-
420
- if (process.env.DEBUG) {
421
- console.log(`[DEBUG] ${section.file}: ${sectionComments.length} comments to place (boundary: ${boundary?.start}-${boundary?.end})`);
422
- }
423
-
424
- if (sectionComments.length > 0) {
425
- // Use a more robust pattern that handles < in comment text
426
- const commentPattern = /\{>>.*?<<\}/gs;
427
- const beforeCount = (annotated.match(commentPattern) || []).length;
428
- annotated = insertCommentsIntoMarkdown(annotated, sectionComments, anchors, {
429
- quiet: !process.env.DEBUG,
430
- sectionBoundary: boundary // Pass section boundary for position-based insertion
431
- });
432
- const afterCount = (annotated.match(commentPattern) || []).length;
433
- commentsInserted = afterCount - beforeCount;
434
-
435
- if (process.env.DEBUG) {
436
- console.log(`[DEBUG] ${section.file}: inserted ${commentsInserted} of ${sectionComments.length} comments`);
437
- }
438
-
439
- if (commentsInserted > 0) {
440
- stats.comments = (stats.comments || 0) + commentsInserted;
441
- }
442
- }
443
- }
444
-
445
- totalChanges += stats.total;
446
-
447
- sectionResults.push({
448
- file: section.file,
449
- header: section.header,
450
- status: 'ok',
451
- stats,
452
- refs: refConversions.length,
453
- });
454
-
455
- if (!options.dryRun) {
456
- // Preserve any preamble content (YAML frontmatter, author blocks, metadata)
457
- // that exists before the first heading in the original file.
458
- // This content is never included in the Word build output, so it won't
459
- // appear in the Word doc and would otherwise be lost during sync.
460
- const originalContent = fs.readFileSync(sectionPath, 'utf-8');
461
- const firstHeadingMatch = originalContent.match(/^(#\s)/m);
462
- if (firstHeadingMatch && firstHeadingMatch.index !== undefined && firstHeadingMatch.index > 0) {
463
- const preamble = originalContent.slice(0, firstHeadingMatch.index);
464
- // Only prepend if preamble has non-whitespace content
465
- if (preamble.trim().length > 0) {
466
- annotated = preamble + annotated;
467
- }
468
- }
469
- fs.writeFileSync(sectionPath, annotated, 'utf-8');
470
- }
471
- }
472
-
473
- const tableRows = sectionResults.map((r) => {
474
- if (r.status === 'skipped') {
475
- return [
476
- chalk.dim(r.file),
477
- chalk.dim(r.header.slice(0, 25)),
478
- chalk.yellow('skipped'),
479
- '',
480
- '',
481
- '',
482
- '',
483
- ];
484
- }
485
- const s = r.stats!;
486
- return [
487
- chalk.bold(r.file),
488
- r.header.length > 25 ? r.header.slice(0, 22) + '...' : r.header,
489
- s.insertions > 0 ? chalk.green(`+${s.insertions}`) : chalk.dim('-'),
490
- s.deletions > 0 ? chalk.red(`-${s.deletions}`) : chalk.dim('-'),
491
- s.substitutions > 0 ? chalk.yellow(`~${s.substitutions}`) : chalk.dim('-'),
492
- s.comments > 0 ? chalk.blue(`#${s.comments}`) : chalk.dim('-'),
493
- r.refs! > 0 ? chalk.magenta(`@${r.refs}`) : chalk.dim('-'),
494
- ];
495
- });
496
-
497
- console.log(fmt.table(
498
- ['File', 'Section', 'Ins', 'Del', 'Sub', 'Cmt', 'Ref'],
499
- tableRows,
500
- { align: ['left', 'left', 'right', 'right', 'right', 'right', 'right'] }
501
- ));
502
- console.log();
503
-
504
- if (options.diff !== false && totalChanges > 0) {
505
- console.log(fmt.header('Changes Preview'));
506
- console.log();
507
- for (const result of sectionResults) {
508
- if (result.status === 'ok' && result.stats && result.stats.total > 0) {
509
- const sectionPath = path.join(options.dir, result.file);
510
- if (fs.existsSync(sectionPath)) {
511
- const content = fs.readFileSync(sectionPath, 'utf-8');
512
- const preview = inlineDiffPreview(content, { maxLines: 3 });
513
- if (preview) {
514
- console.log(chalk.bold(result.file) + ':');
515
- console.log(preview);
516
- console.log();
517
- }
518
- }
519
- }
520
- }
521
- }
522
-
523
- if (options.dryRun) {
524
- console.log(fmt.box(chalk.yellow('Dry run - no files written'), { padding: 0 }));
525
- } else if (totalChanges > 0 || totalRefConversions > 0 || comments.length > 0) {
526
- const summaryLines: string[] = [];
527
- summaryLines.push(`${chalk.bold(wordSections.length)} sections processed`);
528
- if (totalChanges > 0) summaryLines.push(`${chalk.bold(totalChanges)} annotations imported`);
529
- if (comments.length > 0) summaryLines.push(`${chalk.bold(comments.length)} comments placed`);
530
- if (totalRefConversions > 0) summaryLines.push(`${chalk.bold(totalRefConversions)} refs converted to @-syntax`);
531
-
532
- console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
533
- console.log();
534
- console.log(chalk.dim('Next steps:'));
535
- console.log(chalk.dim(' 1. rev review <section.md> - Accept/reject changes'));
536
- console.log(chalk.dim(' 2. rev comments <section.md> - View/address comments'));
537
- console.log(chalk.dim(' 3. rev build docx - Rebuild Word doc'));
538
- } else {
539
- console.log(fmt.status('success', 'No changes detected.'));
540
- }
541
- } catch (err) {
542
- spin.stop();
543
- const error = err as Error;
544
- console.error(fmt.status('error', error.message));
545
- if (process.env.DEBUG) console.error(error.stack);
546
- process.exit(1);
547
- }
548
- });
549
- }
550
-
551
- /**
552
- * `sync --comments-only`: import only Word comments at fuzzy-matched anchors.
553
- *
554
- * Skips the Word→Markdown diff entirely (no track changes, no pandoc, no
555
- * prose modifications). Useful when the markdown has been edited after the
556
- * docx was sent for review — applying track changes from a stale draft
557
- * would overwrite newer edits.
558
- */
559
- async function syncCommentsOnly(
560
- docx: string,
561
- sectionFilter: string[] | undefined,
562
- options: SyncOptions,
563
- configPath: string,
564
- ): Promise<void> {
565
- const config = loadConfig(configPath);
566
- const { extractWordComments, extractCommentAnchors, extractHeadings, insertCommentsIntoMarkdown } = await import('../import.js');
567
- const { computeSectionBoundaries } = await import('./section-boundaries.js');
568
-
569
- const spin = fmt.spinner(`Reading comments from ${path.basename(docx)}...`).start();
570
-
571
- let comments;
572
- let anchors;
573
- let headings;
574
- let fullDocText = '';
575
- try {
576
- comments = await extractWordComments(docx);
577
- const result = await extractCommentAnchors(docx);
578
- anchors = result.anchors;
579
- fullDocText = result.fullDocText;
580
- headings = await extractHeadings(docx);
581
- spin.stop();
582
- } catch (err) {
583
- spin.stop();
584
- const error = err as Error;
585
- console.error(fmt.status('error', error.message));
586
- process.exit(1);
587
- }
588
-
589
- console.log(fmt.header(`Comments from ${path.basename(docx)} (comments-only)`));
590
- console.log();
591
-
592
- if (comments.length === 0) {
593
- console.log(fmt.status('info', 'No comments found in document.'));
594
- return;
595
- }
596
-
597
- const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
598
-
599
- if (boundaries.length === 0) {
600
- console.error(fmt.status('warning', 'No section headings detected in Word document.'));
601
- console.error(chalk.dim(' Check that headers in sections.yaml match heading paragraphs in the docx.'));
602
- process.exit(1);
603
- }
604
-
605
- // Apply optional section filter from CLI
606
- let activeBoundaries = boundaries;
607
- if (sectionFilter && sectionFilter.length > 0) {
608
- const wanted = sectionFilter.map(s => s.trim().toLowerCase());
609
- activeBoundaries = boundaries.filter(b => {
610
- const base = b.file.replace(/\.md$/i, '').toLowerCase();
611
- return wanted.some(name => base === name || base.includes(name));
612
- });
613
- if (activeBoundaries.length === 0) {
614
- console.error(fmt.status('error', `No sections matched: ${sectionFilter.join(', ')}`));
615
- process.exit(1);
616
- }
617
- }
618
-
619
- const firstBoundaryStart = boundaries[0].start;
620
- const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
621
-
622
- for (const boundary of activeBoundaries) {
623
- const sectionPath = path.join(options.dir, boundary.file);
624
- if (!fs.existsSync(sectionPath)) {
625
- results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
626
- continue;
627
- }
628
-
629
- const isFirstSection = boundary === activeBoundaries[0];
630
- const sectionComments = comments.filter((c: { id: string }) => {
631
- const anchor = anchors.get(c.id);
632
- if (!anchor || anchor.docPosition === undefined) return false;
633
- if (anchor.docPosition >= boundary.start && anchor.docPosition < boundary.end) return true;
634
- // Comments before the first heading land in the first matched section
635
- if (isFirstSection && anchor.docPosition < firstBoundaryStart) return true;
636
- return false;
637
- });
638
-
639
- if (sectionComments.length === 0) {
640
- results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
641
- continue;
642
- }
643
-
644
- const original = fs.readFileSync(sectionPath, 'utf-8');
645
-
646
- const stats = { placed: 0, deduped: 0, unmatched: 0 };
647
- const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
648
- quiet: !process.env.DEBUG,
649
- sectionBoundary: { start: boundary.start, end: boundary.end },
650
- wrapAnchor: false,
651
- outStats: stats,
652
- });
653
-
654
- if (!options.dryRun && stats.placed > 0) {
655
- fs.writeFileSync(sectionPath, annotated, 'utf-8');
656
- }
657
- results.push({ file: boundary.file, ...stats, skipped: false });
658
- }
659
-
660
- const tableRows = results.map(r => {
661
- if (r.skipped) {
662
- return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
663
- }
664
- return [
665
- chalk.bold(r.file),
666
- chalk.green(`${r.placed}`),
667
- r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
668
- r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
669
- chalk.dim('comments only'),
670
- ];
671
- });
672
-
673
- console.log(fmt.table(
674
- ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
675
- tableRows,
676
- { align: ['left', 'right', 'right', 'right', 'left'] },
677
- ));
678
- console.log();
679
-
680
- const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
681
- const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
682
- const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
683
-
684
- const lines: string[] = [];
685
- lines.push(`${chalk.bold(comments.length)} comments in document`);
686
- if (totalPlaced > 0) {
687
- lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
688
- }
689
- if (totalDeduped > 0) {
690
- lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
691
- }
692
- if (totalUnmatched > 0) {
693
- lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
694
- }
695
- if (options.dryRun) {
696
- lines.push(chalk.yellow('Dry run — no files written'));
697
- } else if (totalPlaced > 0) {
698
- lines.push(chalk.dim('Existing prose unchanged.'));
699
- }
700
- console.log(fmt.box(lines.join('\n'), { title: 'Summary', padding: 0 }));
701
-
702
- if (totalUnmatched > 0) {
703
- console.log();
704
- console.log(chalk.dim('Tip: run "rev verify-anchors" to see which comments drifted.'));
705
- }
706
- }
1
+ /**
2
+ * SYNC command: Import feedback from Word/PDF back to section files
3
+ *
4
+ * Split from sections.ts for maintainability.
5
+ */
6
+
7
+ import {
8
+ chalk,
9
+ fs,
10
+ path,
11
+ fmt,
12
+ findFiles,
13
+ loadConfig,
14
+ extractSectionsFromText,
15
+ countAnnotations,
16
+ buildRegistry,
17
+ convertHardcodedRefs,
18
+ inlineDiffPreview,
19
+ } from './context.js';
20
+ import type { Command } from 'commander';
21
+ import * as readline from 'readline';
22
+
23
+ interface ImportStats {
24
+ insertions: number;
25
+ deletions: number;
26
+ substitutions: number;
27
+ comments: number;
28
+ total: number;
29
+ }
30
+
31
+ interface SyncOptions {
32
+ config: string;
33
+ dir: string;
34
+ crossref?: boolean;
35
+ diff?: boolean;
36
+ force?: boolean;
37
+ dryRun?: boolean;
38
+ /** Commander maps `--comments-only` (a positive flag) cleanly. `--no-overwrite`
39
+ * conflicts with the existing `overwrite` semantics in `--force`-style flags
40
+ * and Commander's `--no-X` convention assigns `options.x === false`. */
41
+ commentsOnly?: boolean;
42
+ }
43
+
44
+ /**
45
+ * Register the sync command with the program
46
+ */
47
+ export function register(program: Command): void {
48
+ // ==========================================================================
49
+ // SYNC command - Import with section awareness
50
+ // ==========================================================================
51
+
52
+ program
53
+ .command('sync')
54
+ .alias('sections')
55
+ .description('Sync feedback from Word/PDF back to section files')
56
+ .argument('[file]', 'Word (.docx) or PDF file from reviewer (default: most recent)')
57
+ .argument('[sections...]', 'Specific sections to sync (default: all)')
58
+ .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
59
+ .option('-d, --dir <directory>', 'Directory with section files', '.')
60
+ .option('--no-crossref', 'Skip converting hardcoded figure/table refs')
61
+ .option('--no-diff', 'Skip showing diff preview')
62
+ .option('--force', 'Overwrite files without conflict warning')
63
+ .option('--dry-run', 'Preview without writing files')
64
+ .option('--comments-only', 'Insert comments at fuzzy-matched anchors only; never modify existing prose or apply track changes (use when markdown was revised after the docx was sent for review)')
65
+ .action(async (docx: string | undefined, sections: string[], options: SyncOptions) => {
66
+ // Auto-detect most recent docx or pdf if not provided
67
+ if (!docx) {
68
+ const docxFiles = findFiles('.docx');
69
+ const pdfFiles = findFiles('.pdf');
70
+ const allFiles = [...docxFiles, ...pdfFiles];
71
+
72
+ if (allFiles.length === 0) {
73
+ console.error(fmt.status('error', 'No .docx or .pdf files found in current directory.'));
74
+ process.exit(1);
75
+ }
76
+ const sorted = allFiles
77
+ .map(f => ({ name: f, mtime: fs.statSync(f).mtime }))
78
+ .sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
79
+ docx = sorted[0].name;
80
+ console.log(fmt.status('info', `Using most recent: ${docx}`));
81
+ console.log();
82
+ }
83
+
84
+ if (!fs.existsSync(docx)) {
85
+ console.error(fmt.status('error', `File not found: ${docx}`));
86
+ process.exit(1);
87
+ }
88
+
89
+ // Handle PDF files
90
+ if (docx.toLowerCase().endsWith('.pdf')) {
91
+ const { extractPdfComments, formatPdfComments, getPdfCommentStats } = await import('../pdf-import.js');
92
+
93
+ const spin = fmt.spinner(`Extracting comments from ${path.basename(docx)}...`).start();
94
+
95
+ try {
96
+ const comments = await extractPdfComments(docx);
97
+ spin.stop();
98
+
99
+ if (comments.length === 0) {
100
+ console.log(fmt.status('info', 'No comments found in PDF.'));
101
+ return;
102
+ }
103
+
104
+ const stats = getPdfCommentStats(comments);
105
+ console.log(fmt.header(`PDF Comments from ${path.basename(docx)}`));
106
+ console.log();
107
+ console.log(formatPdfComments(comments));
108
+ console.log();
109
+
110
+ const authorList = Object.entries(stats.byAuthor)
111
+ .map(([author, count]) => `${author} (${count})`)
112
+ .join(', ');
113
+ console.log(chalk.dim(`Total: ${stats.total} comments from ${authorList}`));
114
+ console.log();
115
+
116
+ const configPath = path.resolve(options.dir, options.config);
117
+ if (fs.existsSync(configPath) && !options.dryRun) {
118
+ const config = loadConfig(configPath);
119
+ const mainSection = config.sections?.[0];
120
+
121
+ if (mainSection && typeof mainSection === 'string') {
122
+ const mainPath = path.join(options.dir, mainSection);
123
+ if (fs.existsSync(mainPath)) {
124
+ console.log(chalk.dim(`Use 'rev pdf-comments ${docx} --append ${mainSection}' to add comments to markdown.`));
125
+ }
126
+ }
127
+ }
128
+ } catch (err) {
129
+ spin.stop();
130
+ const error = err as Error;
131
+ console.error(fmt.status('error', `Failed to extract PDF comments: ${error.message}`));
132
+ if (process.env.DEBUG) console.error(error.stack);
133
+ process.exit(1);
134
+ }
135
+ return;
136
+ }
137
+
138
+ const configPath = path.resolve(options.dir, options.config);
139
+ if (!fs.existsSync(configPath)) {
140
+ console.error(fmt.status('error', `Config not found: ${configPath}`));
141
+ console.error(chalk.dim(' Run "rev init" first to generate sections.yaml'));
142
+ process.exit(1);
143
+ }
144
+
145
+ // --comments-only: import comments only, never modify existing prose.
146
+ // Use this when the markdown has been revised since the docx was sent
147
+ // out — track changes from a stale draft would clobber newer edits.
148
+ if (options.commentsOnly) {
149
+ await syncCommentsOnly(docx, sections, options, configPath);
150
+ return;
151
+ }
152
+
153
+ // Check pandoc availability upfront and warn
154
+ const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
155
+ if (!hasPandoc()) {
156
+ console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
157
+ console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
158
+ console.log();
159
+ }
160
+
161
+ const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
162
+
163
+ try {
164
+ const config = loadConfig(configPath);
165
+ const { importFromWord, extractWordComments, extractCommentAnchors, insertCommentsIntoMarkdown, extractFromWord } = await import('../import.js');
166
+
167
+ let registry = null;
168
+ let totalRefConversions = 0;
169
+ if (options.crossref !== false) {
170
+ registry = buildRegistry(options.dir);
171
+ }
172
+
173
+ const comments = await extractWordComments(docx);
174
+ const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
175
+
176
+ // Extract Word text (uses pandoc if available, falls back to XML extraction)
177
+ const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
178
+ let wordText = wordExtraction.text;
179
+ const wordTables = wordExtraction.tables || [];
180
+
181
+ // Log extraction messages (warnings about pandoc, track change stats, etc.)
182
+ for (const msg of wordExtraction.messages || []) {
183
+ if (msg.type === 'warning') {
184
+ spin.stop();
185
+ console.log(fmt.status('warning', msg.message));
186
+ spin.start();
187
+ }
188
+ }
189
+
190
+ // Restore crossref on FULL text BEFORE splitting into sections
191
+ // This ensures duplicate labels from track changes are handled correctly
192
+ // (the same figure may appear multiple times in old/new versions)
193
+ const { restoreCrossrefFromWord, restoreImagesFromRegistry } = await import('../import.js');
194
+ const crossrefResult = restoreCrossrefFromWord(wordText, options.dir);
195
+ wordText = crossrefResult.text;
196
+ if (crossrefResult.restored > 0) {
197
+ console.log(`Restored ${crossrefResult.restored} crossref reference(s)`);
198
+ }
199
+
200
+ // Also restore images from registry using shared restoredLabels
201
+ const imageRestoreResult = restoreImagesFromRegistry(wordText, options.dir, crossrefResult.restoredLabels);
202
+ wordText = imageRestoreResult.text;
203
+ if (imageRestoreResult.restored > 0) {
204
+ console.log(`Restored ${imageRestoreResult.restored} image(s) from registry`);
205
+ }
206
+
207
+ let wordSections = extractSectionsFromText(wordText, config.sections);
208
+
209
+ if (wordSections.length === 0) {
210
+ spin.stop();
211
+ console.error(fmt.status('warning', 'No sections detected in Word document.'));
212
+ console.error(chalk.dim(' Check that headings match sections.yaml'));
213
+ process.exit(1);
214
+ }
215
+
216
+ if (sections && sections.length > 0) {
217
+ const onlyList = sections.map(s => s.trim().toLowerCase());
218
+ wordSections = wordSections.filter(section => {
219
+ const fileName = section.file.replace(/\.md$/i, '').toLowerCase();
220
+ const header = section.header.toLowerCase();
221
+ return onlyList.some(name => fileName === name || fileName.includes(name) || header.includes(name));
222
+ });
223
+ if (wordSections.length === 0) {
224
+ spin.stop();
225
+ console.error(fmt.status('error', `No sections matched: ${sections.join(', ')}`));
226
+ console.error(chalk.dim(` Available: ${extractSectionsFromText(wordText, config.sections).map(s => s.file.replace(/\.md$/i, '')).join(', ')}`));
227
+ process.exit(1);
228
+ }
229
+ }
230
+
231
+ spin.stop();
232
+ console.log(fmt.header(`Import from ${path.basename(docx)}`));
233
+ console.log();
234
+
235
+ // Conflict detection
236
+ if (!options.force && !options.dryRun) {
237
+ const conflicts: Array<{ file: string; annotations: number }> = [];
238
+ for (const section of wordSections) {
239
+ const sectionPath = path.join(options.dir, section.file);
240
+ if (fs.existsSync(sectionPath)) {
241
+ const existing = fs.readFileSync(sectionPath, 'utf-8');
242
+ const existingCounts = countAnnotations(existing);
243
+ if (existingCounts.total > 0) {
244
+ conflicts.push({
245
+ file: section.file,
246
+ annotations: existingCounts.total,
247
+ });
248
+ }
249
+ }
250
+ }
251
+
252
+ if (conflicts.length > 0) {
253
+ console.log(fmt.status('warning', 'Files with existing annotations will be overwritten:'));
254
+ for (const c of conflicts) {
255
+ console.log(chalk.yellow(` - ${c.file} (${c.annotations} annotations)`));
256
+ }
257
+ console.log();
258
+
259
+ const rl = readline.createInterface({
260
+ input: process.stdin,
261
+ output: process.stdout,
262
+ });
263
+
264
+ const answer = await new Promise<string>((resolve) =>
265
+ rl.question(chalk.cyan('Continue and overwrite? [y/N] '), resolve)
266
+ );
267
+ rl.close();
268
+
269
+ if (answer.toLowerCase() !== 'y') {
270
+ console.log(chalk.dim('Aborted. Use --force to skip this check.'));
271
+ process.exit(0);
272
+ }
273
+ console.log();
274
+ }
275
+ }
276
+
277
+ const sectionResults: Array<{
278
+ file: string;
279
+ header: string;
280
+ status: string;
281
+ stats?: ImportStats;
282
+ refs?: number;
283
+ }> = [];
284
+ let totalChanges = 0;
285
+
286
+ // Calculate section boundaries in the XML document text for comment filtering
287
+ // Comment positions (docPosition) are relative to xmlDocText, NOT wordText
288
+ // So we must find section headers in xmlDocText to get matching boundaries
289
+ const sectionBoundaries: Array<{ file: string; start: number; end: number }> = [];
290
+ const xmlLower = xmlDocText.toLowerCase();
291
+
292
+ // Standard section header keywords to search for in XML
293
+ // Map from file name pattern to search terms
294
+ const sectionKeywords: Record<string, string[]> = {
295
+ 'abstract': ['abstract', 'summary'],
296
+ 'introduction': ['introduction', 'background'],
297
+ 'methods': ['methods', 'materials and methods', 'methodology'],
298
+ 'results': ['results'],
299
+ 'discussion': ['discussion'],
300
+ 'conclusion': ['conclusion', 'conclusions'],
301
+ };
302
+
303
+ // Helper: find section header (skip labels like "Methods:" in structured abstracts)
304
+ // Real section headers are NOT followed by ":" immediately
305
+ function findSectionHeader(text: string, keyword: string, startFrom: number = 0): number {
306
+ const lower = text.toLowerCase();
307
+ let idx = startFrom;
308
+ while ((idx = lower.indexOf(keyword, idx)) !== -1) {
309
+ // Check what follows the keyword
310
+ const afterKeyword = text.slice(idx + keyword.length, idx + keyword.length + 5);
311
+ // Skip if followed by ":" (this is a label, not a section header)
312
+ // Real headers are followed by text content, a newline, or a subheading
313
+ if (!afterKeyword.startsWith(':') && !afterKeyword.startsWith(' :')) {
314
+ return idx;
315
+ }
316
+ idx++;
317
+ }
318
+ return -1;
319
+ }
320
+
321
+ for (const section of wordSections) {
322
+ const fileBase = section.file.replace(/\.md$/i, '').toLowerCase();
323
+
324
+ // Get keywords for this section
325
+ const keywords = sectionKeywords[fileBase] || [fileBase];
326
+
327
+ // Find the first valid keyword that exists in XML (not a label)
328
+ let headerIdx = -1;
329
+ for (const kw of keywords) {
330
+ const idx = findSectionHeader(xmlDocText, kw, 0);
331
+ if (idx >= 0 && (headerIdx < 0 || idx < headerIdx)) {
332
+ headerIdx = idx;
333
+ }
334
+ }
335
+
336
+ if (headerIdx >= 0) {
337
+ // Find the next section's start to determine end boundary
338
+ let nextHeaderIdx = xmlDocText.length;
339
+ const sectionIdx = wordSections.indexOf(section);
340
+ if (sectionIdx < wordSections.length - 1) {
341
+ const nextFileBase = wordSections[sectionIdx + 1].file.replace(/\.md$/i, '').toLowerCase();
342
+ const nextKeywords = sectionKeywords[nextFileBase] || [nextFileBase];
343
+ for (const nkw of nextKeywords) {
344
+ const foundNext = findSectionHeader(xmlDocText, nkw, headerIdx + 10);
345
+ if (foundNext >= 0 && foundNext < nextHeaderIdx) {
346
+ nextHeaderIdx = foundNext;
347
+ }
348
+ }
349
+ }
350
+
351
+ sectionBoundaries.push({
352
+ file: section.file,
353
+ start: headerIdx,
354
+ end: nextHeaderIdx
355
+ });
356
+
357
+ }
358
+ }
359
+
360
+ // Document length is the XML text length (same coordinate system as docPosition)
361
+ const docLength = xmlDocText.length;
362
+
363
+ for (const section of wordSections) {
364
+ const sectionPath = path.join(options.dir, section.file);
365
+
366
+ if (!fs.existsSync(sectionPath)) {
367
+ sectionResults.push({
368
+ file: section.file,
369
+ header: section.header,
370
+ status: 'skipped',
371
+ stats: undefined,
372
+ });
373
+ continue;
374
+ }
375
+
376
+ const result = await importFromWord(docx, sectionPath, {
377
+ sectionContent: section.content,
378
+ author: 'Reviewer',
379
+ wordTables: wordTables,
380
+ });
381
+
382
+ let { annotated, stats } = result;
383
+
384
+ let refConversions: Array<{ from: string; to: string }> = [];
385
+ if (registry && options.crossref !== false) {
386
+ const crossrefResult = convertHardcodedRefs(annotated, registry);
387
+ annotated = crossrefResult.converted;
388
+ refConversions = crossrefResult.conversions;
389
+ totalRefConversions += refConversions.length;
390
+ }
391
+
392
+ let commentsInserted = 0;
393
+ if (comments.length > 0 && anchors.size > 0) {
394
+ // Filter comments to only those that belong to this section
395
+ // Use exact position matching: docPosition is in xmlDocText coordinates,
396
+ // and sectionBoundaries are also in xmlDocText coordinates (same source!)
397
+ const boundary = sectionBoundaries.find(b => b.file === section.file);
398
+ const isFirstSection = wordSections.indexOf(section) === 0;
399
+ const firstBoundaryStart = sectionBoundaries.length > 0 ? Math.min(...sectionBoundaries.map(b => b.start)) : 0;
400
+
401
+ const sectionComments = comments.filter((c: any) => {
402
+ const anchorData = anchors.get(c.id);
403
+ if (!anchorData) return false;
404
+
405
+ // Use exact position - no scaling needed since both are in xmlDocText coordinates
406
+ if (anchorData.docPosition !== undefined && boundary) {
407
+ // Include comments within section boundaries
408
+ if (anchorData.docPosition >= boundary.start && anchorData.docPosition < boundary.end) {
409
+ return true;
410
+ }
411
+ // Also include "outside" comments (before first section) in the first section file
412
+ if (isFirstSection && anchorData.docPosition < firstBoundaryStart) {
413
+ return true;
414
+ }
415
+ }
416
+
417
+ return false;
418
+ });
419
+
420
+ if (process.env.DEBUG) {
421
+ console.log(`[DEBUG] ${section.file}: ${sectionComments.length} comments to place (boundary: ${boundary?.start}-${boundary?.end})`);
422
+ }
423
+
424
+ if (sectionComments.length > 0) {
425
+ // Use a more robust pattern that handles < in comment text
426
+ const commentPattern = /\{>>.*?<<\}/gs;
427
+ const beforeCount = (annotated.match(commentPattern) || []).length;
428
+ annotated = insertCommentsIntoMarkdown(annotated, sectionComments, anchors, {
429
+ quiet: !process.env.DEBUG,
430
+ sectionBoundary: boundary // Pass section boundary for position-based insertion
431
+ });
432
+ const afterCount = (annotated.match(commentPattern) || []).length;
433
+ commentsInserted = afterCount - beforeCount;
434
+
435
+ if (process.env.DEBUG) {
436
+ console.log(`[DEBUG] ${section.file}: inserted ${commentsInserted} of ${sectionComments.length} comments`);
437
+ }
438
+
439
+ if (commentsInserted > 0) {
440
+ stats.comments = (stats.comments || 0) + commentsInserted;
441
+ }
442
+ }
443
+ }
444
+
445
+ totalChanges += stats.total;
446
+
447
+ sectionResults.push({
448
+ file: section.file,
449
+ header: section.header,
450
+ status: 'ok',
451
+ stats,
452
+ refs: refConversions.length,
453
+ });
454
+
455
+ if (!options.dryRun) {
456
+ // Preserve any preamble content (YAML frontmatter, author blocks, metadata)
457
+ // that exists before the first heading in the original file.
458
+ // This content is never included in the Word build output, so it won't
459
+ // appear in the Word doc and would otherwise be lost during sync.
460
+ const originalContent = fs.readFileSync(sectionPath, 'utf-8');
461
+ const firstHeadingMatch = originalContent.match(/^(#\s)/m);
462
+ if (firstHeadingMatch && firstHeadingMatch.index !== undefined && firstHeadingMatch.index > 0) {
463
+ const preamble = originalContent.slice(0, firstHeadingMatch.index);
464
+ // Only prepend if preamble has non-whitespace content
465
+ if (preamble.trim().length > 0) {
466
+ annotated = preamble + annotated;
467
+ }
468
+ }
469
+ fs.writeFileSync(sectionPath, annotated, 'utf-8');
470
+ }
471
+ }
472
+
473
+ const tableRows = sectionResults.map((r) => {
474
+ if (r.status === 'skipped') {
475
+ return [
476
+ chalk.dim(r.file),
477
+ chalk.dim(r.header.slice(0, 25)),
478
+ chalk.yellow('skipped'),
479
+ '',
480
+ '',
481
+ '',
482
+ '',
483
+ ];
484
+ }
485
+ const s = r.stats!;
486
+ return [
487
+ chalk.bold(r.file),
488
+ r.header.length > 25 ? r.header.slice(0, 22) + '...' : r.header,
489
+ s.insertions > 0 ? chalk.green(`+${s.insertions}`) : chalk.dim('-'),
490
+ s.deletions > 0 ? chalk.red(`-${s.deletions}`) : chalk.dim('-'),
491
+ s.substitutions > 0 ? chalk.yellow(`~${s.substitutions}`) : chalk.dim('-'),
492
+ s.comments > 0 ? chalk.blue(`#${s.comments}`) : chalk.dim('-'),
493
+ r.refs! > 0 ? chalk.magenta(`@${r.refs}`) : chalk.dim('-'),
494
+ ];
495
+ });
496
+
497
+ console.log(fmt.table(
498
+ ['File', 'Section', 'Ins', 'Del', 'Sub', 'Cmt', 'Ref'],
499
+ tableRows,
500
+ { align: ['left', 'left', 'right', 'right', 'right', 'right', 'right'] }
501
+ ));
502
+ console.log();
503
+
504
+ if (options.diff !== false && totalChanges > 0) {
505
+ console.log(fmt.header('Changes Preview'));
506
+ console.log();
507
+ for (const result of sectionResults) {
508
+ if (result.status === 'ok' && result.stats && result.stats.total > 0) {
509
+ const sectionPath = path.join(options.dir, result.file);
510
+ if (fs.existsSync(sectionPath)) {
511
+ const content = fs.readFileSync(sectionPath, 'utf-8');
512
+ const preview = inlineDiffPreview(content, { maxLines: 3 });
513
+ if (preview) {
514
+ console.log(chalk.bold(result.file) + ':');
515
+ console.log(preview);
516
+ console.log();
517
+ }
518
+ }
519
+ }
520
+ }
521
+ }
522
+
523
+ if (options.dryRun) {
524
+ console.log(fmt.box(chalk.yellow('Dry run - no files written'), { padding: 0 }));
525
+ } else if (totalChanges > 0 || totalRefConversions > 0 || comments.length > 0) {
526
+ const summaryLines: string[] = [];
527
+ summaryLines.push(`${chalk.bold(wordSections.length)} sections processed`);
528
+ if (totalChanges > 0) summaryLines.push(`${chalk.bold(totalChanges)} annotations imported`);
529
+ if (comments.length > 0) summaryLines.push(`${chalk.bold(comments.length)} comments placed`);
530
+ if (totalRefConversions > 0) summaryLines.push(`${chalk.bold(totalRefConversions)} refs converted to @-syntax`);
531
+
532
+ console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
533
+ console.log();
534
+ console.log(chalk.dim('Next steps:'));
535
+ console.log(chalk.dim(' 1. rev review <section.md> - Accept/reject changes'));
536
+ console.log(chalk.dim(' 2. rev comments <section.md> - View/address comments'));
537
+ console.log(chalk.dim(' 3. rev build docx - Rebuild Word doc'));
538
+ } else {
539
+ console.log(fmt.status('success', 'No changes detected.'));
540
+ }
541
+ } catch (err) {
542
+ spin.stop();
543
+ const error = err as Error;
544
+ console.error(fmt.status('error', error.message));
545
+ if (process.env.DEBUG) console.error(error.stack);
546
+ process.exit(1);
547
+ }
548
+ });
549
+ }
550
+
551
+ /**
552
+ * `sync --comments-only`: import only Word comments at fuzzy-matched anchors.
553
+ *
554
+ * Skips the Word→Markdown diff entirely (no track changes, no pandoc, no
555
+ * prose modifications). Useful when the markdown has been edited after the
556
+ * docx was sent for review — applying track changes from a stale draft
557
+ * would overwrite newer edits.
558
+ */
559
+ async function syncCommentsOnly(
560
+ docx: string,
561
+ sectionFilter: string[] | undefined,
562
+ options: SyncOptions,
563
+ configPath: string,
564
+ ): Promise<void> {
565
+ const config = loadConfig(configPath);
566
+ const { extractWordComments, extractCommentAnchors, extractHeadings, insertCommentsIntoMarkdown } = await import('../import.js');
567
+ const { computeSectionBoundaries } = await import('./section-boundaries.js');
568
+
569
+ const spin = fmt.spinner(`Reading comments from ${path.basename(docx)}...`).start();
570
+
571
+ let comments;
572
+ let anchors;
573
+ let headings;
574
+ let fullDocText = '';
575
+ try {
576
+ comments = await extractWordComments(docx);
577
+ const result = await extractCommentAnchors(docx);
578
+ anchors = result.anchors;
579
+ fullDocText = result.fullDocText;
580
+ headings = await extractHeadings(docx);
581
+ spin.stop();
582
+ } catch (err) {
583
+ spin.stop();
584
+ const error = err as Error;
585
+ console.error(fmt.status('error', error.message));
586
+ process.exit(1);
587
+ }
588
+
589
+ console.log(fmt.header(`Comments from ${path.basename(docx)} (comments-only)`));
590
+ console.log();
591
+
592
+ if (comments.length === 0) {
593
+ console.log(fmt.status('info', 'No comments found in document.'));
594
+ return;
595
+ }
596
+
597
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
598
+
599
+ if (boundaries.length === 0) {
600
+ console.error(fmt.status('warning', 'No section headings detected in Word document.'));
601
+ console.error(chalk.dim(' Check that headers in sections.yaml match heading paragraphs in the docx.'));
602
+ process.exit(1);
603
+ }
604
+
605
+ // Apply optional section filter from CLI
606
+ let activeBoundaries = boundaries;
607
+ if (sectionFilter && sectionFilter.length > 0) {
608
+ const wanted = sectionFilter.map(s => s.trim().toLowerCase());
609
+ activeBoundaries = boundaries.filter(b => {
610
+ const base = b.file.replace(/\.md$/i, '').toLowerCase();
611
+ return wanted.some(name => base === name || base.includes(name));
612
+ });
613
+ if (activeBoundaries.length === 0) {
614
+ console.error(fmt.status('error', `No sections matched: ${sectionFilter.join(', ')}`));
615
+ process.exit(1);
616
+ }
617
+ }
618
+
619
+ const firstBoundaryStart = boundaries[0].start;
620
+ const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
621
+
622
+ for (const boundary of activeBoundaries) {
623
+ const sectionPath = path.join(options.dir, boundary.file);
624
+ if (!fs.existsSync(sectionPath)) {
625
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
626
+ continue;
627
+ }
628
+
629
+ const isFirstSection = boundary === activeBoundaries[0];
630
+ const sectionComments = comments.filter((c: { id: string }) => {
631
+ const anchor = anchors.get(c.id);
632
+ if (!anchor || anchor.docPosition === undefined) return false;
633
+ if (anchor.docPosition >= boundary.start && anchor.docPosition < boundary.end) return true;
634
+ // Comments before the first heading land in the first matched section
635
+ if (isFirstSection && anchor.docPosition < firstBoundaryStart) return true;
636
+ return false;
637
+ });
638
+
639
+ if (sectionComments.length === 0) {
640
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
641
+ continue;
642
+ }
643
+
644
+ const original = fs.readFileSync(sectionPath, 'utf-8');
645
+
646
+ const stats = { placed: 0, deduped: 0, unmatched: 0 };
647
+ const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
648
+ quiet: !process.env.DEBUG,
649
+ sectionBoundary: { start: boundary.start, end: boundary.end },
650
+ wrapAnchor: false,
651
+ outStats: stats,
652
+ });
653
+
654
+ if (!options.dryRun && stats.placed > 0) {
655
+ fs.writeFileSync(sectionPath, annotated, 'utf-8');
656
+ }
657
+ results.push({ file: boundary.file, ...stats, skipped: false });
658
+ }
659
+
660
+ const tableRows = results.map(r => {
661
+ if (r.skipped) {
662
+ return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
663
+ }
664
+ return [
665
+ chalk.bold(r.file),
666
+ chalk.green(`${r.placed}`),
667
+ r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
668
+ r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
669
+ chalk.dim('comments only'),
670
+ ];
671
+ });
672
+
673
+ console.log(fmt.table(
674
+ ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
675
+ tableRows,
676
+ { align: ['left', 'right', 'right', 'right', 'left'] },
677
+ ));
678
+ console.log();
679
+
680
+ const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
681
+ const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
682
+ const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
683
+
684
+ const lines: string[] = [];
685
+ lines.push(`${chalk.bold(comments.length)} comments in document`);
686
+ if (totalPlaced > 0) {
687
+ lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
688
+ }
689
+ if (totalDeduped > 0) {
690
+ lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
691
+ }
692
+ if (totalUnmatched > 0) {
693
+ lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
694
+ }
695
+ if (options.dryRun) {
696
+ lines.push(chalk.yellow('Dry run — no files written'));
697
+ } else if (totalPlaced > 0) {
698
+ lines.push(chalk.dim('Existing prose unchanged.'));
699
+ }
700
+ console.log(fmt.box(lines.join('\n'), { title: 'Summary', padding: 0 }));
701
+
702
+ if (totalUnmatched > 0) {
703
+ console.log();
704
+ console.log(chalk.dim('Tip: run "rev verify-anchors" to see which comments drifted.'));
705
+ }
706
+ }