docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
@@ -1,706 +1,709 @@
1
- /**
2
- * SYNC command: Import feedback from Word/PDF back to section files
3
- *
4
- * Split from sections.ts for maintainability.
5
- */
6
-
7
- import {
8
- chalk,
9
- fs,
10
- path,
11
- fmt,
12
- findFiles,
13
- loadConfig,
14
- extractSectionsFromText,
15
- countAnnotations,
16
- buildRegistry,
17
- convertHardcodedRefs,
18
- inlineDiffPreview,
19
- } from './context.js';
20
- import type { Command } from 'commander';
21
- import * as readline from 'readline';
22
-
23
- interface ImportStats {
24
- insertions: number;
25
- deletions: number;
26
- substitutions: number;
27
- comments: number;
28
- total: number;
29
- }
30
-
31
- interface SyncOptions {
32
- config: string;
33
- dir: string;
34
- crossref?: boolean;
35
- diff?: boolean;
36
- force?: boolean;
37
- dryRun?: boolean;
38
- /** Commander maps `--comments-only` (a positive flag) cleanly. `--no-overwrite`
39
- * conflicts with the existing `overwrite` semantics in `--force`-style flags
40
- * and Commander's `--no-X` convention assigns `options.x === false`. */
41
- commentsOnly?: boolean;
42
- }
43
-
44
- /**
45
- * Register the sync command with the program
46
- */
47
- export function register(program: Command): void {
48
- // ==========================================================================
49
- // SYNC command - Import with section awareness
50
- // ==========================================================================
51
-
52
- program
53
- .command('sync')
54
- .alias('sections')
55
- .description('Sync feedback from Word/PDF back to section files')
56
- .argument('[file]', 'Word (.docx) or PDF file from reviewer (default: most recent)')
57
- .argument('[sections...]', 'Specific sections to sync (default: all)')
58
- .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
59
- .option('-d, --dir <directory>', 'Directory with section files', '.')
60
- .option('--no-crossref', 'Skip converting hardcoded figure/table refs')
61
- .option('--no-diff', 'Skip showing diff preview')
62
- .option('--force', 'Overwrite files without conflict warning')
63
- .option('--dry-run', 'Preview without writing files')
64
- .option('--comments-only', 'Insert comments at fuzzy-matched anchors only; never modify existing prose or apply track changes (use when markdown was revised after the docx was sent for review)')
65
- .action(async (docx: string | undefined, sections: string[], options: SyncOptions) => {
66
- // Auto-detect most recent docx or pdf if not provided
67
- if (!docx) {
68
- const docxFiles = findFiles('.docx');
69
- const pdfFiles = findFiles('.pdf');
70
- const allFiles = [...docxFiles, ...pdfFiles];
71
-
72
- if (allFiles.length === 0) {
73
- console.error(fmt.status('error', 'No .docx or .pdf files found in current directory.'));
74
- process.exit(1);
75
- }
76
- const sorted = allFiles
77
- .map(f => ({ name: f, mtime: fs.statSync(f).mtime }))
78
- .sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
79
- docx = sorted[0].name;
80
- console.log(fmt.status('info', `Using most recent: ${docx}`));
81
- console.log();
82
- }
83
-
84
- if (!fs.existsSync(docx)) {
85
- console.error(fmt.status('error', `File not found: ${docx}`));
86
- process.exit(1);
87
- }
88
-
89
- // Handle PDF files
90
- if (docx.toLowerCase().endsWith('.pdf')) {
91
- const { extractPdfComments, formatPdfComments, getPdfCommentStats } = await import('../pdf-import.js');
92
-
93
- const spin = fmt.spinner(`Extracting comments from ${path.basename(docx)}...`).start();
94
-
95
- try {
96
- const comments = await extractPdfComments(docx);
97
- spin.stop();
98
-
99
- if (comments.length === 0) {
100
- console.log(fmt.status('info', 'No comments found in PDF.'));
101
- return;
102
- }
103
-
104
- const stats = getPdfCommentStats(comments);
105
- console.log(fmt.header(`PDF Comments from ${path.basename(docx)}`));
106
- console.log();
107
- console.log(formatPdfComments(comments));
108
- console.log();
109
-
110
- const authorList = Object.entries(stats.byAuthor)
111
- .map(([author, count]) => `${author} (${count})`)
112
- .join(', ');
113
- console.log(chalk.dim(`Total: ${stats.total} comments from ${authorList}`));
114
- console.log();
115
-
116
- const configPath = path.resolve(options.dir, options.config);
117
- if (fs.existsSync(configPath) && !options.dryRun) {
118
- const config = loadConfig(configPath);
119
- const mainSection = config.sections?.[0];
120
-
121
- if (mainSection && typeof mainSection === 'string') {
122
- const mainPath = path.join(options.dir, mainSection);
123
- if (fs.existsSync(mainPath)) {
124
- console.log(chalk.dim(`Use 'rev pdf-comments ${docx} --append ${mainSection}' to add comments to markdown.`));
125
- }
126
- }
127
- }
128
- } catch (err) {
129
- spin.stop();
130
- const error = err as Error;
131
- console.error(fmt.status('error', `Failed to extract PDF comments: ${error.message}`));
132
- if (process.env.DEBUG) console.error(error.stack);
133
- process.exit(1);
134
- }
135
- return;
136
- }
137
-
138
- const configPath = path.resolve(options.dir, options.config);
139
- if (!fs.existsSync(configPath)) {
140
- console.error(fmt.status('error', `Config not found: ${configPath}`));
141
- console.error(chalk.dim(' Run "rev init" first to generate sections.yaml'));
142
- process.exit(1);
143
- }
144
-
145
- // --comments-only: import comments only, never modify existing prose.
146
- // Use this when the markdown has been revised since the docx was sent
147
- // out track changes from a stale draft would clobber newer edits.
148
- if (options.commentsOnly) {
149
- await syncCommentsOnly(docx, sections, options, configPath);
150
- return;
151
- }
152
-
153
- // Check pandoc availability upfront and warn
154
- const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
155
- if (!hasPandoc()) {
156
- console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
157
- console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
158
- console.log();
159
- }
160
-
161
- const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
162
-
163
- try {
164
- const config = loadConfig(configPath);
165
- const { importFromWord, extractWordComments, extractCommentAnchors, insertCommentsIntoMarkdown, extractFromWord } = await import('../import.js');
166
-
167
- let registry = null;
168
- let totalRefConversions = 0;
169
- if (options.crossref !== false) {
170
- registry = buildRegistry(options.dir);
171
- }
172
-
173
- const comments = await extractWordComments(docx);
174
- const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
175
-
176
- // Extract Word text (uses pandoc if available, falls back to XML extraction)
177
- const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
178
- let wordText = wordExtraction.text;
179
- const wordTables = wordExtraction.tables || [];
180
-
181
- // Log extraction messages (warnings about pandoc, track change stats, etc.)
182
- for (const msg of wordExtraction.messages || []) {
183
- if (msg.type === 'warning') {
184
- spin.stop();
185
- console.log(fmt.status('warning', msg.message));
186
- spin.start();
187
- }
188
- }
189
-
190
- // Restore crossref on FULL text BEFORE splitting into sections
191
- // This ensures duplicate labels from track changes are handled correctly
192
- // (the same figure may appear multiple times in old/new versions)
193
- const { restoreCrossrefFromWord, restoreImagesFromRegistry } = await import('../import.js');
194
- const crossrefResult = restoreCrossrefFromWord(wordText, options.dir);
195
- wordText = crossrefResult.text;
196
- if (crossrefResult.restored > 0) {
197
- console.log(`Restored ${crossrefResult.restored} crossref reference(s)`);
198
- }
199
-
200
- // Also restore images from registry using shared restoredLabels
201
- const imageRestoreResult = restoreImagesFromRegistry(wordText, options.dir, crossrefResult.restoredLabels);
202
- wordText = imageRestoreResult.text;
203
- if (imageRestoreResult.restored > 0) {
204
- console.log(`Restored ${imageRestoreResult.restored} image(s) from registry`);
205
- }
206
-
207
- let wordSections = extractSectionsFromText(wordText, config.sections);
208
-
209
- if (wordSections.length === 0) {
210
- spin.stop();
211
- console.error(fmt.status('warning', 'No sections detected in Word document.'));
212
- console.error(chalk.dim(' Check that headings match sections.yaml'));
213
- process.exit(1);
214
- }
215
-
216
- if (sections && sections.length > 0) {
217
- const onlyList = sections.map(s => s.trim().toLowerCase());
218
- wordSections = wordSections.filter(section => {
219
- const fileName = section.file.replace(/\.md$/i, '').toLowerCase();
220
- const header = section.header.toLowerCase();
221
- return onlyList.some(name => fileName === name || fileName.includes(name) || header.includes(name));
222
- });
223
- if (wordSections.length === 0) {
224
- spin.stop();
225
- console.error(fmt.status('error', `No sections matched: ${sections.join(', ')}`));
226
- console.error(chalk.dim(` Available: ${extractSectionsFromText(wordText, config.sections).map(s => s.file.replace(/\.md$/i, '')).join(', ')}`));
227
- process.exit(1);
228
- }
229
- }
230
-
231
- spin.stop();
232
- console.log(fmt.header(`Import from ${path.basename(docx)}`));
233
- console.log();
234
-
235
- // Conflict detection
236
- if (!options.force && !options.dryRun) {
237
- const conflicts: Array<{ file: string; annotations: number }> = [];
238
- for (const section of wordSections) {
239
- const sectionPath = path.join(options.dir, section.file);
240
- if (fs.existsSync(sectionPath)) {
241
- const existing = fs.readFileSync(sectionPath, 'utf-8');
242
- const existingCounts = countAnnotations(existing);
243
- if (existingCounts.total > 0) {
244
- conflicts.push({
245
- file: section.file,
246
- annotations: existingCounts.total,
247
- });
248
- }
249
- }
250
- }
251
-
252
- if (conflicts.length > 0) {
253
- console.log(fmt.status('warning', 'Files with existing annotations will be overwritten:'));
254
- for (const c of conflicts) {
255
- console.log(chalk.yellow(` - ${c.file} (${c.annotations} annotations)`));
256
- }
257
- console.log();
258
-
259
- const rl = readline.createInterface({
260
- input: process.stdin,
261
- output: process.stdout,
262
- });
263
-
264
- const answer = await new Promise<string>((resolve) =>
265
- rl.question(chalk.cyan('Continue and overwrite? [y/N] '), resolve)
266
- );
267
- rl.close();
268
-
269
- if (answer.toLowerCase() !== 'y') {
270
- console.log(chalk.dim('Aborted. Use --force to skip this check.'));
271
- process.exit(0);
272
- }
273
- console.log();
274
- }
275
- }
276
-
277
- const sectionResults: Array<{
278
- file: string;
279
- header: string;
280
- status: string;
281
- stats?: ImportStats;
282
- refs?: number;
283
- }> = [];
284
- let totalChanges = 0;
285
-
286
- // Calculate section boundaries in the XML document text for comment filtering
287
- // Comment positions (docPosition) are relative to xmlDocText, NOT wordText
288
- // So we must find section headers in xmlDocText to get matching boundaries
289
- const sectionBoundaries: Array<{ file: string; start: number; end: number }> = [];
290
- const xmlLower = xmlDocText.toLowerCase();
291
-
292
- // Standard section header keywords to search for in XML
293
- // Map from file name pattern to search terms
294
- const sectionKeywords: Record<string, string[]> = {
295
- 'abstract': ['abstract', 'summary'],
296
- 'introduction': ['introduction', 'background'],
297
- 'methods': ['methods', 'materials and methods', 'methodology'],
298
- 'results': ['results'],
299
- 'discussion': ['discussion'],
300
- 'conclusion': ['conclusion', 'conclusions'],
301
- };
302
-
303
- // Helper: find section header (skip labels like "Methods:" in structured abstracts)
304
- // Real section headers are NOT followed by ":" immediately
305
- function findSectionHeader(text: string, keyword: string, startFrom: number = 0): number {
306
- const lower = text.toLowerCase();
307
- let idx = startFrom;
308
- while ((idx = lower.indexOf(keyword, idx)) !== -1) {
309
- // Check what follows the keyword
310
- const afterKeyword = text.slice(idx + keyword.length, idx + keyword.length + 5);
311
- // Skip if followed by ":" (this is a label, not a section header)
312
- // Real headers are followed by text content, a newline, or a subheading
313
- if (!afterKeyword.startsWith(':') && !afterKeyword.startsWith(' :')) {
314
- return idx;
315
- }
316
- idx++;
317
- }
318
- return -1;
319
- }
320
-
321
- for (const section of wordSections) {
322
- const fileBase = section.file.replace(/\.md$/i, '').toLowerCase();
323
-
324
- // Get keywords for this section
325
- const keywords = sectionKeywords[fileBase] || [fileBase];
326
-
327
- // Find the first valid keyword that exists in XML (not a label)
328
- let headerIdx = -1;
329
- for (const kw of keywords) {
330
- const idx = findSectionHeader(xmlDocText, kw, 0);
331
- if (idx >= 0 && (headerIdx < 0 || idx < headerIdx)) {
332
- headerIdx = idx;
333
- }
334
- }
335
-
336
- if (headerIdx >= 0) {
337
- // Find the next section's start to determine end boundary
338
- let nextHeaderIdx = xmlDocText.length;
339
- const sectionIdx = wordSections.indexOf(section);
340
- if (sectionIdx < wordSections.length - 1) {
341
- const nextFileBase = wordSections[sectionIdx + 1].file.replace(/\.md$/i, '').toLowerCase();
342
- const nextKeywords = sectionKeywords[nextFileBase] || [nextFileBase];
343
- for (const nkw of nextKeywords) {
344
- const foundNext = findSectionHeader(xmlDocText, nkw, headerIdx + 10);
345
- if (foundNext >= 0 && foundNext < nextHeaderIdx) {
346
- nextHeaderIdx = foundNext;
347
- }
348
- }
349
- }
350
-
351
- sectionBoundaries.push({
352
- file: section.file,
353
- start: headerIdx,
354
- end: nextHeaderIdx
355
- });
356
-
357
- }
358
- }
359
-
360
- // Document length is the XML text length (same coordinate system as docPosition)
361
- const docLength = xmlDocText.length;
362
-
363
- for (const section of wordSections) {
364
- const sectionPath = path.join(options.dir, section.file);
365
-
366
- if (!fs.existsSync(sectionPath)) {
367
- sectionResults.push({
368
- file: section.file,
369
- header: section.header,
370
- status: 'skipped',
371
- stats: undefined,
372
- });
373
- continue;
374
- }
375
-
376
- const result = await importFromWord(docx, sectionPath, {
377
- sectionContent: section.content,
378
- author: 'Reviewer',
379
- wordTables: wordTables,
380
- });
381
-
382
- let { annotated, stats } = result;
383
-
384
- let refConversions: Array<{ from: string; to: string }> = [];
385
- if (registry && options.crossref !== false) {
386
- const crossrefResult = convertHardcodedRefs(annotated, registry);
387
- annotated = crossrefResult.converted;
388
- refConversions = crossrefResult.conversions;
389
- totalRefConversions += refConversions.length;
390
- }
391
-
392
- let commentsInserted = 0;
393
- if (comments.length > 0 && anchors.size > 0) {
394
- // Filter comments to only those that belong to this section
395
- // Use exact position matching: docPosition is in xmlDocText coordinates,
396
- // and sectionBoundaries are also in xmlDocText coordinates (same source!)
397
- const boundary = sectionBoundaries.find(b => b.file === section.file);
398
- const isFirstSection = wordSections.indexOf(section) === 0;
399
- const firstBoundaryStart = sectionBoundaries.length > 0 ? Math.min(...sectionBoundaries.map(b => b.start)) : 0;
400
-
401
- const sectionComments = comments.filter((c: any) => {
402
- const anchorData = anchors.get(c.id);
403
- if (!anchorData) return false;
404
-
405
- // Use exact position - no scaling needed since both are in xmlDocText coordinates
406
- if (anchorData.docPosition !== undefined && boundary) {
407
- // Include comments within section boundaries
408
- if (anchorData.docPosition >= boundary.start && anchorData.docPosition < boundary.end) {
409
- return true;
410
- }
411
- // Also include "outside" comments (before first section) in the first section file
412
- if (isFirstSection && anchorData.docPosition < firstBoundaryStart) {
413
- return true;
414
- }
415
- }
416
-
417
- return false;
418
- });
419
-
420
- if (process.env.DEBUG) {
421
- console.log(`[DEBUG] ${section.file}: ${sectionComments.length} comments to place (boundary: ${boundary?.start}-${boundary?.end})`);
422
- }
423
-
424
- if (sectionComments.length > 0) {
425
- // Use a more robust pattern that handles < in comment text
426
- const commentPattern = /\{>>.*?<<\}/gs;
427
- const beforeCount = (annotated.match(commentPattern) || []).length;
428
- annotated = insertCommentsIntoMarkdown(annotated, sectionComments, anchors, {
429
- quiet: !process.env.DEBUG,
430
- sectionBoundary: boundary // Pass section boundary for position-based insertion
431
- });
432
- const afterCount = (annotated.match(commentPattern) || []).length;
433
- commentsInserted = afterCount - beforeCount;
434
-
435
- if (process.env.DEBUG) {
436
- console.log(`[DEBUG] ${section.file}: inserted ${commentsInserted} of ${sectionComments.length} comments`);
437
- }
438
-
439
- if (commentsInserted > 0) {
440
- stats.comments = (stats.comments || 0) + commentsInserted;
441
- }
442
- }
443
- }
444
-
445
- totalChanges += stats.total;
446
-
447
- sectionResults.push({
448
- file: section.file,
449
- header: section.header,
450
- status: 'ok',
451
- stats,
452
- refs: refConversions.length,
453
- });
454
-
455
- if (!options.dryRun) {
456
- // Preserve any preamble content (YAML frontmatter, author blocks, metadata)
457
- // that exists before the first heading in the original file.
458
- // This content is never included in the Word build output, so it won't
459
- // appear in the Word doc and would otherwise be lost during sync.
460
- const originalContent = fs.readFileSync(sectionPath, 'utf-8');
461
- const firstHeadingMatch = originalContent.match(/^(#\s)/m);
462
- if (firstHeadingMatch && firstHeadingMatch.index !== undefined && firstHeadingMatch.index > 0) {
463
- const preamble = originalContent.slice(0, firstHeadingMatch.index);
464
- // Only prepend if preamble has non-whitespace content
465
- if (preamble.trim().length > 0) {
466
- annotated = preamble + annotated;
467
- }
468
- }
469
- fs.writeFileSync(sectionPath, annotated, 'utf-8');
470
- }
471
- }
472
-
473
- const tableRows = sectionResults.map((r) => {
474
- if (r.status === 'skipped') {
475
- return [
476
- chalk.dim(r.file),
477
- chalk.dim(r.header.slice(0, 25)),
478
- chalk.yellow('skipped'),
479
- '',
480
- '',
481
- '',
482
- '',
483
- ];
484
- }
485
- const s = r.stats!;
486
- return [
487
- chalk.bold(r.file),
488
- r.header.length > 25 ? r.header.slice(0, 22) + '...' : r.header,
489
- s.insertions > 0 ? chalk.green(`+${s.insertions}`) : chalk.dim('-'),
490
- s.deletions > 0 ? chalk.red(`-${s.deletions}`) : chalk.dim('-'),
491
- s.substitutions > 0 ? chalk.yellow(`~${s.substitutions}`) : chalk.dim('-'),
492
- s.comments > 0 ? chalk.blue(`#${s.comments}`) : chalk.dim('-'),
493
- r.refs! > 0 ? chalk.magenta(`@${r.refs}`) : chalk.dim('-'),
494
- ];
495
- });
496
-
497
- console.log(fmt.table(
498
- ['File', 'Section', 'Ins', 'Del', 'Sub', 'Cmt', 'Ref'],
499
- tableRows,
500
- { align: ['left', 'left', 'right', 'right', 'right', 'right', 'right'] }
501
- ));
502
- console.log();
503
-
504
- if (options.diff !== false && totalChanges > 0) {
505
- console.log(fmt.header('Changes Preview'));
506
- console.log();
507
- for (const result of sectionResults) {
508
- if (result.status === 'ok' && result.stats && result.stats.total > 0) {
509
- const sectionPath = path.join(options.dir, result.file);
510
- if (fs.existsSync(sectionPath)) {
511
- const content = fs.readFileSync(sectionPath, 'utf-8');
512
- const preview = inlineDiffPreview(content, { maxLines: 3 });
513
- if (preview) {
514
- console.log(chalk.bold(result.file) + ':');
515
- console.log(preview);
516
- console.log();
517
- }
518
- }
519
- }
520
- }
521
- }
522
-
523
- if (options.dryRun) {
524
- console.log(fmt.box(chalk.yellow('Dry run - no files written'), { padding: 0 }));
525
- } else if (totalChanges > 0 || totalRefConversions > 0 || comments.length > 0) {
526
- const summaryLines: string[] = [];
527
- summaryLines.push(`${chalk.bold(wordSections.length)} sections processed`);
528
- if (totalChanges > 0) summaryLines.push(`${chalk.bold(totalChanges)} annotations imported`);
529
- if (comments.length > 0) summaryLines.push(`${chalk.bold(comments.length)} comments placed`);
530
- if (totalRefConversions > 0) summaryLines.push(`${chalk.bold(totalRefConversions)} refs converted to @-syntax`);
531
-
532
- console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
533
- console.log();
534
- console.log(chalk.dim('Next steps:'));
535
- console.log(chalk.dim(' 1. rev review <section.md> - Accept/reject changes'));
536
- console.log(chalk.dim(' 2. rev comments <section.md> - View/address comments'));
537
- console.log(chalk.dim(' 3. rev build docx - Rebuild Word doc'));
538
- } else {
539
- console.log(fmt.status('success', 'No changes detected.'));
540
- }
541
- } catch (err) {
542
- spin.stop();
543
- const error = err as Error;
544
- console.error(fmt.status('error', error.message));
545
- if (process.env.DEBUG) console.error(error.stack);
546
- process.exit(1);
547
- }
548
- });
549
- }
550
-
551
- /**
552
- * `sync --comments-only`: import only Word comments at fuzzy-matched anchors.
553
- *
554
- * Skips the Word→Markdown diff entirely (no track changes, no pandoc, no
555
- * prose modifications). Useful when the markdown has been edited after the
556
- * docx was sent for review applying track changes from a stale draft
557
- * would overwrite newer edits.
558
- */
559
- async function syncCommentsOnly(
560
- docx: string,
561
- sectionFilter: string[] | undefined,
562
- options: SyncOptions,
563
- configPath: string,
564
- ): Promise<void> {
565
- const config = loadConfig(configPath);
566
- const { extractWordComments, extractCommentAnchors, extractHeadings, insertCommentsIntoMarkdown } = await import('../import.js');
567
- const { computeSectionBoundaries } = await import('./section-boundaries.js');
568
-
569
- const spin = fmt.spinner(`Reading comments from ${path.basename(docx)}...`).start();
570
-
571
- let comments;
572
- let anchors;
573
- let headings;
574
- let fullDocText = '';
575
- try {
576
- comments = await extractWordComments(docx);
577
- const result = await extractCommentAnchors(docx);
578
- anchors = result.anchors;
579
- fullDocText = result.fullDocText;
580
- headings = await extractHeadings(docx);
581
- spin.stop();
582
- } catch (err) {
583
- spin.stop();
584
- const error = err as Error;
585
- console.error(fmt.status('error', error.message));
586
- process.exit(1);
587
- }
588
-
589
- console.log(fmt.header(`Comments from ${path.basename(docx)} (comments-only)`));
590
- console.log();
591
-
592
- if (comments.length === 0) {
593
- console.log(fmt.status('info', 'No comments found in document.'));
594
- return;
595
- }
596
-
597
- const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
598
-
599
- if (boundaries.length === 0) {
600
- console.error(fmt.status('warning', 'No section headings detected in Word document.'));
601
- console.error(chalk.dim(' Check that headers in sections.yaml match heading paragraphs in the docx.'));
602
- process.exit(1);
603
- }
604
-
605
- // Apply optional section filter from CLI
606
- let activeBoundaries = boundaries;
607
- if (sectionFilter && sectionFilter.length > 0) {
608
- const wanted = sectionFilter.map(s => s.trim().toLowerCase());
609
- activeBoundaries = boundaries.filter(b => {
610
- const base = b.file.replace(/\.md$/i, '').toLowerCase();
611
- return wanted.some(name => base === name || base.includes(name));
612
- });
613
- if (activeBoundaries.length === 0) {
614
- console.error(fmt.status('error', `No sections matched: ${sectionFilter.join(', ')}`));
615
- process.exit(1);
616
- }
617
- }
618
-
619
- const firstBoundaryStart = boundaries[0].start;
620
- const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
621
-
622
- for (const boundary of activeBoundaries) {
623
- const sectionPath = path.join(options.dir, boundary.file);
624
- if (!fs.existsSync(sectionPath)) {
625
- results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
626
- continue;
627
- }
628
-
629
- const isFirstSection = boundary === activeBoundaries[0];
630
- const sectionComments = comments.filter((c: { id: string }) => {
631
- const anchor = anchors.get(c.id);
632
- if (!anchor || anchor.docPosition === undefined) return false;
633
- if (anchor.docPosition >= boundary.start && anchor.docPosition < boundary.end) return true;
634
- // Comments before the first heading land in the first matched section
635
- if (isFirstSection && anchor.docPosition < firstBoundaryStart) return true;
636
- return false;
637
- });
638
-
639
- if (sectionComments.length === 0) {
640
- results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
641
- continue;
642
- }
643
-
644
- const original = fs.readFileSync(sectionPath, 'utf-8');
645
-
646
- const stats = { placed: 0, deduped: 0, unmatched: 0 };
647
- const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
648
- quiet: !process.env.DEBUG,
649
- sectionBoundary: { start: boundary.start, end: boundary.end },
650
- wrapAnchor: false,
651
- outStats: stats,
652
- });
653
-
654
- if (!options.dryRun && stats.placed > 0) {
655
- fs.writeFileSync(sectionPath, annotated, 'utf-8');
656
- }
657
- results.push({ file: boundary.file, ...stats, skipped: false });
658
- }
659
-
660
- const tableRows = results.map(r => {
661
- if (r.skipped) {
662
- return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
663
- }
664
- return [
665
- chalk.bold(r.file),
666
- chalk.green(`${r.placed}`),
667
- r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
668
- r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
669
- chalk.dim('comments only'),
670
- ];
671
- });
672
-
673
- console.log(fmt.table(
674
- ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
675
- tableRows,
676
- { align: ['left', 'right', 'right', 'right', 'left'] },
677
- ));
678
- console.log();
679
-
680
- const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
681
- const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
682
- const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
683
-
684
- const lines: string[] = [];
685
- lines.push(`${chalk.bold(comments.length)} comments in document`);
686
- if (totalPlaced > 0) {
687
- lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
688
- }
689
- if (totalDeduped > 0) {
690
- lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
691
- }
692
- if (totalUnmatched > 0) {
693
- lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
694
- }
695
- if (options.dryRun) {
696
- lines.push(chalk.yellow('Dry run no files written'));
697
- } else if (totalPlaced > 0) {
698
- lines.push(chalk.dim('Existing prose unchanged.'));
699
- }
700
- console.log(fmt.box(lines.join('\n'), { title: 'Summary', padding: 0 }));
701
-
702
- if (totalUnmatched > 0) {
703
- console.log();
704
- console.log(chalk.dim('Tip: run "rev verify-anchors" to see which comments drifted.'));
705
- }
706
- }
1
+ /**
2
+ * SYNC command: Import feedback from Word/PDF back to section files
3
+ *
4
+ * Split from sections.ts for maintainability.
5
+ */
6
+
7
+ import {
8
+ chalk,
9
+ fs,
10
+ path,
11
+ fmt,
12
+ findFiles,
13
+ resolveSectionsConfig,
14
+ getOrderedSections,
15
+ extractSectionsFromText,
16
+ countAnnotations,
17
+ buildRegistry,
18
+ convertHardcodedRefs,
19
+ inlineDiffPreview,
20
+ } from './context.js';
21
+ import type { Command } from 'commander';
22
+ import type { SectionsConfig } from '../types.js';
23
+ import * as readline from 'readline';
24
+
25
+ interface ImportStats {
26
+ insertions: number;
27
+ deletions: number;
28
+ substitutions: number;
29
+ comments: number;
30
+ total: number;
31
+ }
32
+
33
+ interface SyncOptions {
34
+ config: string;
35
+ dir: string;
36
+ crossref?: boolean;
37
+ diff?: boolean;
38
+ force?: boolean;
39
+ dryRun?: boolean;
40
+ /** Commander maps `--comments-only` (a positive flag) cleanly. `--no-overwrite`
41
+ * conflicts with the existing `overwrite` semantics in `--force`-style flags
42
+ * and Commander's `--no-X` convention assigns `options.x === false`. */
43
+ commentsOnly?: boolean;
44
+ }
45
+
46
+ /**
47
+ * Register the sync command with the program
48
+ */
49
+ export function register(program: Command): void {
50
+ // ==========================================================================
51
+ // SYNC command - Import with section awareness
52
+ // ==========================================================================
53
+
54
+ program
55
+ .command('sync')
56
+ .alias('sections')
57
+ .description('Sync feedback from Word/PDF back to section files')
58
+ .argument('[file]', 'Word (.docx) or PDF file from reviewer (default: most recent)')
59
+ .argument('[sections...]', 'Specific sections to sync (default: all)')
60
+ .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
61
+ .option('-d, --dir <directory>', 'Directory with section files', '.')
62
+ .option('--no-crossref', 'Skip converting hardcoded figure/table refs')
63
+ .option('--no-diff', 'Skip showing diff preview')
64
+ .option('--force', 'Overwrite files without conflict warning')
65
+ .option('--dry-run', 'Preview without writing files')
66
+ .option('--comments-only', 'Insert comments at fuzzy-matched anchors only; never modify existing prose or apply track changes (use when markdown was revised after the docx was sent for review)')
67
+ .action(async (docx: string | undefined, sections: string[], options: SyncOptions) => {
68
+ // Auto-detect most recent docx or pdf if not provided
69
+ if (!docx) {
70
+ const docxFiles = findFiles('.docx');
71
+ const pdfFiles = findFiles('.pdf');
72
+ const allFiles = [...docxFiles, ...pdfFiles];
73
+
74
+ if (allFiles.length === 0) {
75
+ console.error(fmt.status('error', 'No .docx or .pdf files found in current directory.'));
76
+ process.exit(1);
77
+ }
78
+ const sorted = allFiles
79
+ .map(f => ({ name: f, mtime: fs.statSync(f).mtime }))
80
+ .sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
81
+ docx = sorted[0].name;
82
+ console.log(fmt.status('info', `Using most recent: ${docx}`));
83
+ console.log();
84
+ }
85
+
86
+ if (!fs.existsSync(docx)) {
87
+ console.error(fmt.status('error', `File not found: ${docx}`));
88
+ process.exit(1);
89
+ }
90
+
91
+ // Handle PDF files
92
+ if (docx.toLowerCase().endsWith('.pdf')) {
93
+ const { extractPdfComments, formatPdfComments, getPdfCommentStats } = await import('../pdf-import.js');
94
+
95
+ const spin = fmt.spinner(`Extracting comments from ${path.basename(docx)}...`).start();
96
+
97
+ try {
98
+ const comments = await extractPdfComments(docx);
99
+ spin.stop();
100
+
101
+ if (comments.length === 0) {
102
+ console.log(fmt.status('info', 'No comments found in PDF.'));
103
+ return;
104
+ }
105
+
106
+ const stats = getPdfCommentStats(comments);
107
+ console.log(fmt.header(`PDF Comments from ${path.basename(docx)}`));
108
+ console.log();
109
+ console.log(formatPdfComments(comments));
110
+ console.log();
111
+
112
+ const authorList = Object.entries(stats.byAuthor)
113
+ .map(([author, count]) => `${author} (${count})`)
114
+ .join(', ');
115
+ console.log(chalk.dim(`Total: ${stats.total} comments from ${authorList}`));
116
+ console.log();
117
+
118
+ const resolved = resolveSectionsConfig(options.dir, options.config);
119
+ if (resolved && !options.dryRun) {
120
+ const mainSection = getOrderedSections(resolved.config)[0];
121
+
122
+ if (mainSection) {
123
+ const mainPath = path.join(options.dir, mainSection);
124
+ if (fs.existsSync(mainPath)) {
125
+ console.log(chalk.dim(`Use 'rev pdf-comments ${docx} --append ${mainSection}' to add comments to markdown.`));
126
+ }
127
+ }
128
+ }
129
+ } catch (err) {
130
+ spin.stop();
131
+ const error = err as Error;
132
+ console.error(fmt.status('error', `Failed to extract PDF comments: ${error.message}`));
133
+ if (process.env.DEBUG) console.error(error.stack);
134
+ process.exit(1);
135
+ }
136
+ return;
137
+ }
138
+
139
+ // Resolve the section config: an explicit sections.yaml if present,
140
+ // otherwise the `sections:` list in rev.yaml (single source of truth).
141
+ const resolved = resolveSectionsConfig(options.dir, options.config);
142
+ if (!resolved) {
143
+ console.error(fmt.status('error', `No section config found in ${path.resolve(options.dir)}`));
144
+ console.error(chalk.dim(' Add a `sections:` list to rev.yaml, or run "rev init" to generate sections.yaml.'));
145
+ process.exit(1);
146
+ }
147
+ const sectionsConfig = resolved.config;
148
+
149
+ // --comments-only: import comments only, never modify existing prose.
150
+ // Use this when the markdown has been revised since the docx was sent
151
+ // out — track changes from a stale draft would clobber newer edits.
152
+ if (options.commentsOnly) {
153
+ await syncCommentsOnly(docx, sections, options, sectionsConfig);
154
+ return;
155
+ }
156
+
157
+ // Check pandoc availability upfront and warn
158
+ const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
159
+ if (!hasPandoc()) {
160
+ console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
161
+ console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
162
+ console.log();
163
+ }
164
+
165
+ const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
166
+
167
+ try {
168
+ const config = sectionsConfig;
169
+ const { importFromWord, extractWordComments, extractCommentAnchors, insertCommentsIntoMarkdown, extractFromWord } = await import('../import.js');
170
+
171
+ let registry = null;
172
+ let totalRefConversions = 0;
173
+ if (options.crossref !== false) {
174
+ registry = buildRegistry(options.dir);
175
+ }
176
+
177
+ const comments = await extractWordComments(docx);
178
+ const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
179
+
180
+ // Extract Word text (uses pandoc if available, falls back to XML extraction)
181
+ const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
182
+ let wordText = wordExtraction.text;
183
+ const wordTables = wordExtraction.tables || [];
184
+
185
+ // Log extraction messages (warnings about pandoc, track change stats, etc.)
186
+ for (const msg of wordExtraction.messages || []) {
187
+ if (msg.type === 'warning') {
188
+ spin.stop();
189
+ console.log(fmt.status('warning', msg.message));
190
+ spin.start();
191
+ }
192
+ }
193
+
194
+ // Restore crossref on FULL text BEFORE splitting into sections
195
+ // This ensures duplicate labels from track changes are handled correctly
196
+ // (the same figure may appear multiple times in old/new versions)
197
+ const { restoreCrossrefFromWord, restoreImagesFromRegistry } = await import('../import.js');
198
+ const crossrefResult = restoreCrossrefFromWord(wordText, options.dir);
199
+ wordText = crossrefResult.text;
200
+ if (crossrefResult.restored > 0) {
201
+ console.log(`Restored ${crossrefResult.restored} crossref reference(s)`);
202
+ }
203
+
204
+ // Also restore images from registry using shared restoredLabels
205
+ const imageRestoreResult = restoreImagesFromRegistry(wordText, options.dir, crossrefResult.restoredLabels);
206
+ wordText = imageRestoreResult.text;
207
+ if (imageRestoreResult.restored > 0) {
208
+ console.log(`Restored ${imageRestoreResult.restored} image(s) from registry`);
209
+ }
210
+
211
+ let wordSections = extractSectionsFromText(wordText, config.sections);
212
+
213
+ if (wordSections.length === 0) {
214
+ spin.stop();
215
+ console.error(fmt.status('warning', 'No sections detected in Word document.'));
216
+ console.error(chalk.dim(' Check that headings match sections.yaml'));
217
+ process.exit(1);
218
+ }
219
+
220
+ if (sections && sections.length > 0) {
221
+ const onlyList = sections.map(s => s.trim().toLowerCase());
222
+ wordSections = wordSections.filter(section => {
223
+ const fileName = section.file.replace(/\.md$/i, '').toLowerCase();
224
+ const header = section.header.toLowerCase();
225
+ return onlyList.some(name => fileName === name || fileName.includes(name) || header.includes(name));
226
+ });
227
+ if (wordSections.length === 0) {
228
+ spin.stop();
229
+ console.error(fmt.status('error', `No sections matched: ${sections.join(', ')}`));
230
+ console.error(chalk.dim(` Available: ${extractSectionsFromText(wordText, config.sections).map(s => s.file.replace(/\.md$/i, '')).join(', ')}`));
231
+ process.exit(1);
232
+ }
233
+ }
234
+
235
+ spin.stop();
236
+ console.log(fmt.header(`Import from ${path.basename(docx)}`));
237
+ console.log();
238
+
239
+ // Conflict detection
240
+ if (!options.force && !options.dryRun) {
241
+ const conflicts: Array<{ file: string; annotations: number }> = [];
242
+ for (const section of wordSections) {
243
+ const sectionPath = path.join(options.dir, section.file);
244
+ if (fs.existsSync(sectionPath)) {
245
+ const existing = fs.readFileSync(sectionPath, 'utf-8');
246
+ const existingCounts = countAnnotations(existing);
247
+ if (existingCounts.total > 0) {
248
+ conflicts.push({
249
+ file: section.file,
250
+ annotations: existingCounts.total,
251
+ });
252
+ }
253
+ }
254
+ }
255
+
256
+ if (conflicts.length > 0) {
257
+ console.log(fmt.status('warning', 'Files with existing annotations will be overwritten:'));
258
+ for (const c of conflicts) {
259
+ console.log(chalk.yellow(` - ${c.file} (${c.annotations} annotations)`));
260
+ }
261
+ console.log();
262
+
263
+ const rl = readline.createInterface({
264
+ input: process.stdin,
265
+ output: process.stdout,
266
+ });
267
+
268
+ const answer = await new Promise<string>((resolve) =>
269
+ rl.question(chalk.cyan('Continue and overwrite? [y/N] '), resolve)
270
+ );
271
+ rl.close();
272
+
273
+ if (answer.toLowerCase() !== 'y') {
274
+ console.log(chalk.dim('Aborted. Use --force to skip this check.'));
275
+ process.exit(0);
276
+ }
277
+ console.log();
278
+ }
279
+ }
280
+
281
+ const sectionResults: Array<{
282
+ file: string;
283
+ header: string;
284
+ status: string;
285
+ stats?: ImportStats;
286
+ refs?: number;
287
+ }> = [];
288
+ let totalChanges = 0;
289
+
290
+ // Calculate section boundaries in the XML document text for comment filtering
291
+ // Comment positions (docPosition) are relative to xmlDocText, NOT wordText
292
+ // So we must find section headers in xmlDocText to get matching boundaries
293
+ const sectionBoundaries: Array<{ file: string; start: number; end: number }> = [];
294
+ const xmlLower = xmlDocText.toLowerCase();
295
+
296
+ // Standard section header keywords to search for in XML
297
+ // Map from file name pattern to search terms
298
+ const sectionKeywords: Record<string, string[]> = {
299
+ 'abstract': ['abstract', 'summary'],
300
+ 'introduction': ['introduction', 'background'],
301
+ 'methods': ['methods', 'materials and methods', 'methodology'],
302
+ 'results': ['results'],
303
+ 'discussion': ['discussion'],
304
+ 'conclusion': ['conclusion', 'conclusions'],
305
+ };
306
+
307
+ // Helper: find section header (skip labels like "Methods:" in structured abstracts)
308
+ // Real section headers are NOT followed by ":" immediately
309
+ function findSectionHeader(text: string, keyword: string, startFrom: number = 0): number {
310
+ const lower = text.toLowerCase();
311
+ let idx = startFrom;
312
+ while ((idx = lower.indexOf(keyword, idx)) !== -1) {
313
+ // Check what follows the keyword
314
+ const afterKeyword = text.slice(idx + keyword.length, idx + keyword.length + 5);
315
+ // Skip if followed by ":" (this is a label, not a section header)
316
+ // Real headers are followed by text content, a newline, or a subheading
317
+ if (!afterKeyword.startsWith(':') && !afterKeyword.startsWith(' :')) {
318
+ return idx;
319
+ }
320
+ idx++;
321
+ }
322
+ return -1;
323
+ }
324
+
325
+ for (const section of wordSections) {
326
+ const fileBase = section.file.replace(/\.md$/i, '').toLowerCase();
327
+
328
+ // Get keywords for this section
329
+ const keywords = sectionKeywords[fileBase] || [fileBase];
330
+
331
+ // Find the first valid keyword that exists in XML (not a label)
332
+ let headerIdx = -1;
333
+ for (const kw of keywords) {
334
+ const idx = findSectionHeader(xmlDocText, kw, 0);
335
+ if (idx >= 0 && (headerIdx < 0 || idx < headerIdx)) {
336
+ headerIdx = idx;
337
+ }
338
+ }
339
+
340
+ if (headerIdx >= 0) {
341
+ // Find the next section's start to determine end boundary
342
+ let nextHeaderIdx = xmlDocText.length;
343
+ const sectionIdx = wordSections.indexOf(section);
344
+ if (sectionIdx < wordSections.length - 1) {
345
+ const nextFileBase = wordSections[sectionIdx + 1].file.replace(/\.md$/i, '').toLowerCase();
346
+ const nextKeywords = sectionKeywords[nextFileBase] || [nextFileBase];
347
+ for (const nkw of nextKeywords) {
348
+ const foundNext = findSectionHeader(xmlDocText, nkw, headerIdx + 10);
349
+ if (foundNext >= 0 && foundNext < nextHeaderIdx) {
350
+ nextHeaderIdx = foundNext;
351
+ }
352
+ }
353
+ }
354
+
355
+ sectionBoundaries.push({
356
+ file: section.file,
357
+ start: headerIdx,
358
+ end: nextHeaderIdx
359
+ });
360
+
361
+ }
362
+ }
363
+
364
+ // Document length is the XML text length (same coordinate system as docPosition)
365
+ const docLength = xmlDocText.length;
366
+
367
+ for (const section of wordSections) {
368
+ const sectionPath = path.join(options.dir, section.file);
369
+
370
+ if (!fs.existsSync(sectionPath)) {
371
+ sectionResults.push({
372
+ file: section.file,
373
+ header: section.header,
374
+ status: 'skipped',
375
+ stats: undefined,
376
+ });
377
+ continue;
378
+ }
379
+
380
+ const result = await importFromWord(docx, sectionPath, {
381
+ sectionContent: section.content,
382
+ author: 'Reviewer',
383
+ wordTables: wordTables,
384
+ });
385
+
386
+ let { annotated, stats } = result;
387
+
388
+ let refConversions: Array<{ from: string; to: string }> = [];
389
+ if (registry && options.crossref !== false) {
390
+ const crossrefResult = convertHardcodedRefs(annotated, registry);
391
+ annotated = crossrefResult.converted;
392
+ refConversions = crossrefResult.conversions;
393
+ totalRefConversions += refConversions.length;
394
+ }
395
+
396
+ let commentsInserted = 0;
397
+ if (comments.length > 0 && anchors.size > 0) {
398
+ // Filter comments to only those that belong to this section
399
+ // Use exact position matching: docPosition is in xmlDocText coordinates,
400
+ // and sectionBoundaries are also in xmlDocText coordinates (same source!)
401
+ const boundary = sectionBoundaries.find(b => b.file === section.file);
402
+ const isFirstSection = wordSections.indexOf(section) === 0;
403
+ const firstBoundaryStart = sectionBoundaries.length > 0 ? Math.min(...sectionBoundaries.map(b => b.start)) : 0;
404
+
405
+ const sectionComments = comments.filter((c: any) => {
406
+ const anchorData = anchors.get(c.id);
407
+ if (!anchorData) return false;
408
+
409
+ // Use exact position - no scaling needed since both are in xmlDocText coordinates
410
+ if (anchorData.docPosition !== undefined && boundary) {
411
+ // Include comments within section boundaries
412
+ if (anchorData.docPosition >= boundary.start && anchorData.docPosition < boundary.end) {
413
+ return true;
414
+ }
415
+ // Also include "outside" comments (before first section) in the first section file
416
+ if (isFirstSection && anchorData.docPosition < firstBoundaryStart) {
417
+ return true;
418
+ }
419
+ }
420
+
421
+ return false;
422
+ });
423
+
424
+ if (process.env.DEBUG) {
425
+ console.log(`[DEBUG] ${section.file}: ${sectionComments.length} comments to place (boundary: ${boundary?.start}-${boundary?.end})`);
426
+ }
427
+
428
+ if (sectionComments.length > 0) {
429
+ // Use a more robust pattern that handles < in comment text
430
+ const commentPattern = /\{>>.*?<<\}/gs;
431
+ const beforeCount = (annotated.match(commentPattern) || []).length;
432
+ annotated = insertCommentsIntoMarkdown(annotated, sectionComments, anchors, {
433
+ quiet: !process.env.DEBUG,
434
+ sectionBoundary: boundary // Pass section boundary for position-based insertion
435
+ });
436
+ const afterCount = (annotated.match(commentPattern) || []).length;
437
+ commentsInserted = afterCount - beforeCount;
438
+
439
+ if (process.env.DEBUG) {
440
+ console.log(`[DEBUG] ${section.file}: inserted ${commentsInserted} of ${sectionComments.length} comments`);
441
+ }
442
+
443
+ if (commentsInserted > 0) {
444
+ stats.comments = (stats.comments || 0) + commentsInserted;
445
+ }
446
+ }
447
+ }
448
+
449
+ totalChanges += stats.total;
450
+
451
+ sectionResults.push({
452
+ file: section.file,
453
+ header: section.header,
454
+ status: 'ok',
455
+ stats,
456
+ refs: refConversions.length,
457
+ });
458
+
459
+ if (!options.dryRun) {
460
+ // Preserve any preamble content (YAML frontmatter, author blocks, metadata)
461
+ // that exists before the first heading in the original file.
462
+ // This content is never included in the Word build output, so it won't
463
+ // appear in the Word doc and would otherwise be lost during sync.
464
+ const originalContent = fs.readFileSync(sectionPath, 'utf-8');
465
+ const firstHeadingMatch = originalContent.match(/^(#\s)/m);
466
+ if (firstHeadingMatch && firstHeadingMatch.index !== undefined && firstHeadingMatch.index > 0) {
467
+ const preamble = originalContent.slice(0, firstHeadingMatch.index);
468
+ // Only prepend if preamble has non-whitespace content
469
+ if (preamble.trim().length > 0) {
470
+ annotated = preamble + annotated;
471
+ }
472
+ }
473
+ fs.writeFileSync(sectionPath, annotated, 'utf-8');
474
+ }
475
+ }
476
+
477
+ const tableRows = sectionResults.map((r) => {
478
+ if (r.status === 'skipped') {
479
+ return [
480
+ chalk.dim(r.file),
481
+ chalk.dim(r.header.slice(0, 25)),
482
+ chalk.yellow('skipped'),
483
+ '',
484
+ '',
485
+ '',
486
+ '',
487
+ ];
488
+ }
489
+ const s = r.stats!;
490
+ return [
491
+ chalk.bold(r.file),
492
+ r.header.length > 25 ? r.header.slice(0, 22) + '...' : r.header,
493
+ s.insertions > 0 ? chalk.green(`+${s.insertions}`) : chalk.dim('-'),
494
+ s.deletions > 0 ? chalk.red(`-${s.deletions}`) : chalk.dim('-'),
495
+ s.substitutions > 0 ? chalk.yellow(`~${s.substitutions}`) : chalk.dim('-'),
496
+ s.comments > 0 ? chalk.blue(`#${s.comments}`) : chalk.dim('-'),
497
+ r.refs! > 0 ? chalk.magenta(`@${r.refs}`) : chalk.dim('-'),
498
+ ];
499
+ });
500
+
501
+ console.log(fmt.table(
502
+ ['File', 'Section', 'Ins', 'Del', 'Sub', 'Cmt', 'Ref'],
503
+ tableRows,
504
+ { align: ['left', 'left', 'right', 'right', 'right', 'right', 'right'] }
505
+ ));
506
+ console.log();
507
+
508
+ if (options.diff !== false && totalChanges > 0) {
509
+ console.log(fmt.header('Changes Preview'));
510
+ console.log();
511
+ for (const result of sectionResults) {
512
+ if (result.status === 'ok' && result.stats && result.stats.total > 0) {
513
+ const sectionPath = path.join(options.dir, result.file);
514
+ if (fs.existsSync(sectionPath)) {
515
+ const content = fs.readFileSync(sectionPath, 'utf-8');
516
+ const preview = inlineDiffPreview(content, { maxLines: 3 });
517
+ if (preview) {
518
+ console.log(chalk.bold(result.file) + ':');
519
+ console.log(preview);
520
+ console.log();
521
+ }
522
+ }
523
+ }
524
+ }
525
+ }
526
+
527
+ if (options.dryRun) {
528
+ console.log(fmt.box(chalk.yellow('Dry run - no files written'), { padding: 0 }));
529
+ } else if (totalChanges > 0 || totalRefConversions > 0 || comments.length > 0) {
530
+ const summaryLines: string[] = [];
531
+ summaryLines.push(`${chalk.bold(wordSections.length)} sections processed`);
532
+ if (totalChanges > 0) summaryLines.push(`${chalk.bold(totalChanges)} annotations imported`);
533
+ if (comments.length > 0) summaryLines.push(`${chalk.bold(comments.length)} comments placed`);
534
+ if (totalRefConversions > 0) summaryLines.push(`${chalk.bold(totalRefConversions)} refs converted to @-syntax`);
535
+
536
+ console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
537
+ console.log();
538
+ console.log(chalk.dim('Next steps:'));
539
+ console.log(chalk.dim(' 1. rev review <section.md> - Accept/reject changes'));
540
+ console.log(chalk.dim(' 2. rev comments <section.md> - View/address comments'));
541
+ console.log(chalk.dim(' 3. rev build docx - Rebuild Word doc'));
542
+ } else {
543
+ console.log(fmt.status('success', 'No changes detected.'));
544
+ }
545
+ } catch (err) {
546
+ spin.stop();
547
+ const error = err as Error;
548
+ console.error(fmt.status('error', error.message));
549
+ if (process.env.DEBUG) console.error(error.stack);
550
+ process.exit(1);
551
+ }
552
+ });
553
+ }
554
+
555
+ /**
556
+ * `sync --comments-only`: import only Word comments at fuzzy-matched anchors.
557
+ *
558
+ * Skips the Word→Markdown diff entirely (no track changes, no pandoc, no
559
+ * prose modifications). Useful when the markdown has been edited after the
560
+ * docx was sent for review — applying track changes from a stale draft
561
+ * would overwrite newer edits.
562
+ */
563
+ async function syncCommentsOnly(
564
+ docx: string,
565
+ sectionFilter: string[] | undefined,
566
+ options: SyncOptions,
567
+ config: SectionsConfig,
568
+ ): Promise<void> {
569
+ const { extractWordComments, extractCommentAnchors, extractHeadings, insertCommentsIntoMarkdown } = await import('../import.js');
570
+ const { computeSectionBoundaries } = await import('./section-boundaries.js');
571
+
572
+ const spin = fmt.spinner(`Reading comments from ${path.basename(docx)}...`).start();
573
+
574
+ let comments;
575
+ let anchors;
576
+ let headings;
577
+ let fullDocText = '';
578
+ try {
579
+ comments = await extractWordComments(docx);
580
+ const result = await extractCommentAnchors(docx);
581
+ anchors = result.anchors;
582
+ fullDocText = result.fullDocText;
583
+ headings = await extractHeadings(docx);
584
+ spin.stop();
585
+ } catch (err) {
586
+ spin.stop();
587
+ const error = err as Error;
588
+ console.error(fmt.status('error', error.message));
589
+ process.exit(1);
590
+ }
591
+
592
+ console.log(fmt.header(`Comments from ${path.basename(docx)} (comments-only)`));
593
+ console.log();
594
+
595
+ if (comments.length === 0) {
596
+ console.log(fmt.status('info', 'No comments found in document.'));
597
+ return;
598
+ }
599
+
600
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
601
+
602
+ if (boundaries.length === 0) {
603
+ console.error(fmt.status('warning', 'No section headings detected in Word document.'));
604
+ console.error(chalk.dim(' Check that headers in sections.yaml match heading paragraphs in the docx.'));
605
+ process.exit(1);
606
+ }
607
+
608
+ // Apply optional section filter from CLI
609
+ let activeBoundaries = boundaries;
610
+ if (sectionFilter && sectionFilter.length > 0) {
611
+ const wanted = sectionFilter.map(s => s.trim().toLowerCase());
612
+ activeBoundaries = boundaries.filter(b => {
613
+ const base = b.file.replace(/\.md$/i, '').toLowerCase();
614
+ return wanted.some(name => base === name || base.includes(name));
615
+ });
616
+ if (activeBoundaries.length === 0) {
617
+ console.error(fmt.status('error', `No sections matched: ${sectionFilter.join(', ')}`));
618
+ process.exit(1);
619
+ }
620
+ }
621
+
622
+ const firstBoundaryStart = boundaries[0].start;
623
+ const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
624
+
625
+ for (const boundary of activeBoundaries) {
626
+ const sectionPath = path.join(options.dir, boundary.file);
627
+ if (!fs.existsSync(sectionPath)) {
628
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
629
+ continue;
630
+ }
631
+
632
+ const isFirstSection = boundary === activeBoundaries[0];
633
+ const sectionComments = comments.filter((c: { id: string }) => {
634
+ const anchor = anchors.get(c.id);
635
+ if (!anchor || anchor.docPosition === undefined) return false;
636
+ if (anchor.docPosition >= boundary.start && anchor.docPosition < boundary.end) return true;
637
+ // Comments before the first heading land in the first matched section
638
+ if (isFirstSection && anchor.docPosition < firstBoundaryStart) return true;
639
+ return false;
640
+ });
641
+
642
+ if (sectionComments.length === 0) {
643
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
644
+ continue;
645
+ }
646
+
647
+ const original = fs.readFileSync(sectionPath, 'utf-8');
648
+
649
+ const stats = { placed: 0, deduped: 0, unmatched: 0 };
650
+ const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
651
+ quiet: !process.env.DEBUG,
652
+ sectionBoundary: { start: boundary.start, end: boundary.end },
653
+ wrapAnchor: false,
654
+ outStats: stats,
655
+ });
656
+
657
+ if (!options.dryRun && stats.placed > 0) {
658
+ fs.writeFileSync(sectionPath, annotated, 'utf-8');
659
+ }
660
+ results.push({ file: boundary.file, ...stats, skipped: false });
661
+ }
662
+
663
+ const tableRows = results.map(r => {
664
+ if (r.skipped) {
665
+ return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
666
+ }
667
+ return [
668
+ chalk.bold(r.file),
669
+ chalk.green(`${r.placed}`),
670
+ r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
671
+ r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
672
+ chalk.dim('comments only'),
673
+ ];
674
+ });
675
+
676
+ console.log(fmt.table(
677
+ ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
678
+ tableRows,
679
+ { align: ['left', 'right', 'right', 'right', 'left'] },
680
+ ));
681
+ console.log();
682
+
683
+ const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
684
+ const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
685
+ const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
686
+
687
+ const lines: string[] = [];
688
+ lines.push(`${chalk.bold(comments.length)} comments in document`);
689
+ if (totalPlaced > 0) {
690
+ lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
691
+ }
692
+ if (totalDeduped > 0) {
693
+ lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
694
+ }
695
+ if (totalUnmatched > 0) {
696
+ lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
697
+ }
698
+ if (options.dryRun) {
699
+ lines.push(chalk.yellow('Dry run — no files written'));
700
+ } else if (totalPlaced > 0) {
701
+ lines.push(chalk.dim('Existing prose unchanged.'));
702
+ }
703
+ console.log(fmt.box(lines.join('\n'), { title: 'Summary', padding: 0 }));
704
+
705
+ if (totalUnmatched > 0) {
706
+ console.log();
707
+ console.log(chalk.dim('Tip: run "rev verify-anchors" to see which comments drifted.'));
708
+ }
709
+ }