docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -164
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -431
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/anchor-match.d.ts +1 -1
  11. package/dist/lib/anchor-match.d.ts.map +1 -1
  12. package/dist/lib/anchor-match.js +17 -47
  13. package/dist/lib/anchor-match.js.map +1 -1
  14. package/dist/lib/build.js +4 -4
  15. package/dist/lib/commands/context.d.ts +1 -1
  16. package/dist/lib/commands/context.d.ts.map +1 -1
  17. package/dist/lib/commands/context.js +1 -1
  18. package/dist/lib/commands/context.js.map +1 -1
  19. package/dist/lib/commands/sections.js +7 -7
  20. package/dist/lib/commands/sections.js.map +1 -1
  21. package/dist/lib/commands/sync.d.ts.map +1 -1
  22. package/dist/lib/commands/sync.js +15 -14
  23. package/dist/lib/commands/sync.js.map +1 -1
  24. package/dist/lib/commands/utilities.js +164 -164
  25. package/dist/lib/commands/verify-anchors.js +6 -6
  26. package/dist/lib/commands/verify-anchors.js.map +1 -1
  27. package/dist/lib/commands/word-tools.js +8 -8
  28. package/dist/lib/grammar.js +3 -3
  29. package/dist/lib/macro-filter.lua +201 -201
  30. package/dist/lib/pdf-comments.js +44 -44
  31. package/dist/lib/plugins.js +57 -57
  32. package/dist/lib/pptx-color-filter.lua +37 -37
  33. package/dist/lib/pptx-themes.js +115 -115
  34. package/dist/lib/sections.d.ts +35 -0
  35. package/dist/lib/sections.d.ts.map +1 -1
  36. package/dist/lib/sections.js +81 -0
  37. package/dist/lib/sections.js.map +1 -1
  38. package/dist/lib/spelling.js +2 -2
  39. package/dist/lib/templates.js +387 -387
  40. package/dist/lib/themes.js +51 -51
  41. package/docs-src/build.py +113 -113
  42. package/docs-src/extra.css +208 -208
  43. package/docs-src/md-to-html.lua +6 -6
  44. package/docs-src/template.html +116 -116
  45. package/eslint.config.js +27 -27
  46. package/lib/anchor-match.ts +276 -308
  47. package/lib/annotations.ts +644 -644
  48. package/lib/build.ts +1766 -1766
  49. package/lib/citations.ts +160 -160
  50. package/lib/commands/build.ts +855 -855
  51. package/lib/commands/citations.ts +515 -515
  52. package/lib/commands/comments.ts +1050 -1050
  53. package/lib/commands/context.ts +176 -174
  54. package/lib/commands/core.ts +309 -309
  55. package/lib/commands/doi.ts +435 -435
  56. package/lib/commands/file-ops.ts +372 -372
  57. package/lib/commands/history.ts +320 -320
  58. package/lib/commands/index.ts +87 -87
  59. package/lib/commands/init.ts +259 -259
  60. package/lib/commands/merge-resolve.ts +378 -378
  61. package/lib/commands/preview.ts +178 -178
  62. package/lib/commands/project-info.ts +244 -244
  63. package/lib/commands/quality.ts +517 -517
  64. package/lib/commands/response.ts +454 -454
  65. package/lib/commands/section-boundaries.ts +82 -82
  66. package/lib/commands/sections.ts +451 -451
  67. package/lib/commands/sync.ts +709 -706
  68. package/lib/commands/text-ops.ts +449 -449
  69. package/lib/commands/utilities.ts +448 -448
  70. package/lib/commands/verify-anchors.ts +272 -272
  71. package/lib/commands/word-tools.ts +340 -340
  72. package/lib/comment-realign.ts +517 -517
  73. package/lib/config.ts +84 -84
  74. package/lib/crossref.ts +781 -781
  75. package/lib/csl.ts +191 -191
  76. package/lib/dependencies.ts +98 -98
  77. package/lib/diff-engine.ts +465 -465
  78. package/lib/doi-cache.ts +115 -115
  79. package/lib/doi.ts +897 -897
  80. package/lib/equations.ts +506 -506
  81. package/lib/errors.ts +346 -346
  82. package/lib/format.ts +541 -541
  83. package/lib/git.ts +326 -326
  84. package/lib/grammar.ts +303 -303
  85. package/lib/image-registry.ts +180 -180
  86. package/lib/import.ts +911 -911
  87. package/lib/journals.ts +543 -543
  88. package/lib/macro-filter.lua +201 -201
  89. package/lib/macros.ts +273 -273
  90. package/lib/merge.ts +633 -633
  91. package/lib/orcid.ts +144 -144
  92. package/lib/pdf-comments.ts +263 -263
  93. package/lib/pdf-import.ts +524 -524
  94. package/lib/plugins.ts +362 -362
  95. package/lib/postprocess.ts +188 -188
  96. package/lib/pptx-color-filter.lua +37 -37
  97. package/lib/pptx-template.ts +469 -469
  98. package/lib/pptx-themes.ts +483 -483
  99. package/lib/protect-restore.ts +520 -520
  100. package/lib/rate-limiter.ts +94 -94
  101. package/lib/response.ts +197 -197
  102. package/lib/restore-references.ts +240 -240
  103. package/lib/review.ts +327 -327
  104. package/lib/schema.ts +488 -488
  105. package/lib/scientific-words.ts +73 -73
  106. package/lib/sections.ts +425 -335
  107. package/lib/slides.ts +756 -756
  108. package/lib/spelling.ts +334 -334
  109. package/lib/templates.ts +526 -526
  110. package/lib/themes.ts +742 -742
  111. package/lib/trackchanges.ts +247 -247
  112. package/lib/tui.ts +450 -450
  113. package/lib/types.ts +550 -550
  114. package/lib/undo.ts +250 -250
  115. package/lib/utils.ts +69 -69
  116. package/lib/variables.ts +179 -179
  117. package/lib/word-extraction.ts +806 -806
  118. package/lib/word.ts +643 -643
  119. package/lib/wordcomments.ts +840 -840
  120. package/mkdocs.yml +64 -64
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +47 -47
  123. package/skill/REFERENCE.md +539 -539
  124. package/skill/SKILL.md +295 -295
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
@@ -1,706 +1,709 @@
1
- /**
2
- * SYNC command: Import feedback from Word/PDF back to section files
3
- *
4
- * Split from sections.ts for maintainability.
5
- */
6
-
7
- import {
8
- chalk,
9
- fs,
10
- path,
11
- fmt,
12
- findFiles,
13
- loadConfig,
14
- extractSectionsFromText,
15
- countAnnotations,
16
- buildRegistry,
17
- convertHardcodedRefs,
18
- inlineDiffPreview,
19
- } from './context.js';
20
- import type { Command } from 'commander';
21
- import * as readline from 'readline';
22
-
23
- interface ImportStats {
24
- insertions: number;
25
- deletions: number;
26
- substitutions: number;
27
- comments: number;
28
- total: number;
29
- }
30
-
31
- interface SyncOptions {
32
- config: string;
33
- dir: string;
34
- crossref?: boolean;
35
- diff?: boolean;
36
- force?: boolean;
37
- dryRun?: boolean;
38
- /** Commander maps `--comments-only` (a positive flag) cleanly. `--no-overwrite`
39
- * conflicts with the existing `overwrite` semantics in `--force`-style flags
40
- * and Commander's `--no-X` convention assigns `options.x === false`. */
41
- commentsOnly?: boolean;
42
- }
43
-
44
- /**
45
- * Register the sync command with the program
46
- */
47
- export function register(program: Command): void {
48
- // ==========================================================================
49
- // SYNC command - Import with section awareness
50
- // ==========================================================================
51
-
52
- program
53
- .command('sync')
54
- .alias('sections')
55
- .description('Sync feedback from Word/PDF back to section files')
56
- .argument('[file]', 'Word (.docx) or PDF file from reviewer (default: most recent)')
57
- .argument('[sections...]', 'Specific sections to sync (default: all)')
58
- .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
59
- .option('-d, --dir <directory>', 'Directory with section files', '.')
60
- .option('--no-crossref', 'Skip converting hardcoded figure/table refs')
61
- .option('--no-diff', 'Skip showing diff preview')
62
- .option('--force', 'Overwrite files without conflict warning')
63
- .option('--dry-run', 'Preview without writing files')
64
- .option('--comments-only', 'Insert comments at fuzzy-matched anchors only; never modify existing prose or apply track changes (use when markdown was revised after the docx was sent for review)')
65
- .action(async (docx: string | undefined, sections: string[], options: SyncOptions) => {
66
- // Auto-detect most recent docx or pdf if not provided
67
- if (!docx) {
68
- const docxFiles = findFiles('.docx');
69
- const pdfFiles = findFiles('.pdf');
70
- const allFiles = [...docxFiles, ...pdfFiles];
71
-
72
- if (allFiles.length === 0) {
73
- console.error(fmt.status('error', 'No .docx or .pdf files found in current directory.'));
74
- process.exit(1);
75
- }
76
- const sorted = allFiles
77
- .map(f => ({ name: f, mtime: fs.statSync(f).mtime }))
78
- .sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
79
- docx = sorted[0].name;
80
- console.log(fmt.status('info', `Using most recent: ${docx}`));
81
- console.log();
82
- }
83
-
84
- if (!fs.existsSync(docx)) {
85
- console.error(fmt.status('error', `File not found: ${docx}`));
86
- process.exit(1);
87
- }
88
-
89
- // Handle PDF files
90
- if (docx.toLowerCase().endsWith('.pdf')) {
91
- const { extractPdfComments, formatPdfComments, getPdfCommentStats } = await import('../pdf-import.js');
92
-
93
- const spin = fmt.spinner(`Extracting comments from ${path.basename(docx)}...`).start();
94
-
95
- try {
96
- const comments = await extractPdfComments(docx);
97
- spin.stop();
98
-
99
- if (comments.length === 0) {
100
- console.log(fmt.status('info', 'No comments found in PDF.'));
101
- return;
102
- }
103
-
104
- const stats = getPdfCommentStats(comments);
105
- console.log(fmt.header(`PDF Comments from ${path.basename(docx)}`));
106
- console.log();
107
- console.log(formatPdfComments(comments));
108
- console.log();
109
-
110
- const authorList = Object.entries(stats.byAuthor)
111
- .map(([author, count]) => `${author} (${count})`)
112
- .join(', ');
113
- console.log(chalk.dim(`Total: ${stats.total} comments from ${authorList}`));
114
- console.log();
115
-
116
- const configPath = path.resolve(options.dir, options.config);
117
- if (fs.existsSync(configPath) && !options.dryRun) {
118
- const config = loadConfig(configPath);
119
- const mainSection = config.sections?.[0];
120
-
121
- if (mainSection && typeof mainSection === 'string') {
122
- const mainPath = path.join(options.dir, mainSection);
123
- if (fs.existsSync(mainPath)) {
124
- console.log(chalk.dim(`Use 'rev pdf-comments ${docx} --append ${mainSection}' to add comments to markdown.`));
125
- }
126
- }
127
- }
128
- } catch (err) {
129
- spin.stop();
130
- const error = err as Error;
131
- console.error(fmt.status('error', `Failed to extract PDF comments: ${error.message}`));
132
- if (process.env.DEBUG) console.error(error.stack);
133
- process.exit(1);
134
- }
135
- return;
136
- }
137
-
138
- const configPath = path.resolve(options.dir, options.config);
139
- if (!fs.existsSync(configPath)) {
140
- console.error(fmt.status('error', `Config not found: ${configPath}`));
141
- console.error(chalk.dim(' Run "rev init" first to generate sections.yaml'));
142
- process.exit(1);
143
- }
144
-
145
- // --comments-only: import comments only, never modify existing prose.
146
- // Use this when the markdown has been revised since the docx was sent
147
- // out track changes from a stale draft would clobber newer edits.
148
- if (options.commentsOnly) {
149
- await syncCommentsOnly(docx, sections, options, configPath);
150
- return;
151
- }
152
-
153
- // Check pandoc availability upfront and warn
154
- const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
155
- if (!hasPandoc()) {
156
- console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
157
- console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
158
- console.log();
159
- }
160
-
161
- const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
162
-
163
- try {
164
- const config = loadConfig(configPath);
165
- const { importFromWord, extractWordComments, extractCommentAnchors, insertCommentsIntoMarkdown, extractFromWord } = await import('../import.js');
166
-
167
- let registry = null;
168
- let totalRefConversions = 0;
169
- if (options.crossref !== false) {
170
- registry = buildRegistry(options.dir);
171
- }
172
-
173
- const comments = await extractWordComments(docx);
174
- const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
175
-
176
- // Extract Word text (uses pandoc if available, falls back to XML extraction)
177
- const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
178
- let wordText = wordExtraction.text;
179
- const wordTables = wordExtraction.tables || [];
180
-
181
- // Log extraction messages (warnings about pandoc, track change stats, etc.)
182
- for (const msg of wordExtraction.messages || []) {
183
- if (msg.type === 'warning') {
184
- spin.stop();
185
- console.log(fmt.status('warning', msg.message));
186
- spin.start();
187
- }
188
- }
189
-
190
- // Restore crossref on FULL text BEFORE splitting into sections
191
- // This ensures duplicate labels from track changes are handled correctly
192
- // (the same figure may appear multiple times in old/new versions)
193
- const { restoreCrossrefFromWord, restoreImagesFromRegistry } = await import('../import.js');
194
- const crossrefResult = restoreCrossrefFromWord(wordText, options.dir);
195
- wordText = crossrefResult.text;
196
- if (crossrefResult.restored > 0) {
197
- console.log(`Restored ${crossrefResult.restored} crossref reference(s)`);
198
- }
199
-
200
- // Also restore images from registry using shared restoredLabels
201
- const imageRestoreResult = restoreImagesFromRegistry(wordText, options.dir, crossrefResult.restoredLabels);
202
- wordText = imageRestoreResult.text;
203
- if (imageRestoreResult.restored > 0) {
204
- console.log(`Restored ${imageRestoreResult.restored} image(s) from registry`);
205
- }
206
-
207
- let wordSections = extractSectionsFromText(wordText, config.sections);
208
-
209
- if (wordSections.length === 0) {
210
- spin.stop();
211
- console.error(fmt.status('warning', 'No sections detected in Word document.'));
212
- console.error(chalk.dim(' Check that headings match sections.yaml'));
213
- process.exit(1);
214
- }
215
-
216
- if (sections && sections.length > 0) {
217
- const onlyList = sections.map(s => s.trim().toLowerCase());
218
- wordSections = wordSections.filter(section => {
219
- const fileName = section.file.replace(/\.md$/i, '').toLowerCase();
220
- const header = section.header.toLowerCase();
221
- return onlyList.some(name => fileName === name || fileName.includes(name) || header.includes(name));
222
- });
223
- if (wordSections.length === 0) {
224
- spin.stop();
225
- console.error(fmt.status('error', `No sections matched: ${sections.join(', ')}`));
226
- console.error(chalk.dim(` Available: ${extractSectionsFromText(wordText, config.sections).map(s => s.file.replace(/\.md$/i, '')).join(', ')}`));
227
- process.exit(1);
228
- }
229
- }
230
-
231
- spin.stop();
232
- console.log(fmt.header(`Import from ${path.basename(docx)}`));
233
- console.log();
234
-
235
- // Conflict detection
236
- if (!options.force && !options.dryRun) {
237
- const conflicts: Array<{ file: string; annotations: number }> = [];
238
- for (const section of wordSections) {
239
- const sectionPath = path.join(options.dir, section.file);
240
- if (fs.existsSync(sectionPath)) {
241
- const existing = fs.readFileSync(sectionPath, 'utf-8');
242
- const existingCounts = countAnnotations(existing);
243
- if (existingCounts.total > 0) {
244
- conflicts.push({
245
- file: section.file,
246
- annotations: existingCounts.total,
247
- });
248
- }
249
- }
250
- }
251
-
252
- if (conflicts.length > 0) {
253
- console.log(fmt.status('warning', 'Files with existing annotations will be overwritten:'));
254
- for (const c of conflicts) {
255
- console.log(chalk.yellow(` - ${c.file} (${c.annotations} annotations)`));
256
- }
257
- console.log();
258
-
259
- const rl = readline.createInterface({
260
- input: process.stdin,
261
- output: process.stdout,
262
- });
263
-
264
- const answer = await new Promise<string>((resolve) =>
265
- rl.question(chalk.cyan('Continue and overwrite? [y/N] '), resolve)
266
- );
267
- rl.close();
268
-
269
- if (answer.toLowerCase() !== 'y') {
270
- console.log(chalk.dim('Aborted. Use --force to skip this check.'));
271
- process.exit(0);
272
- }
273
- console.log();
274
- }
275
- }
276
-
277
- const sectionResults: Array<{
278
- file: string;
279
- header: string;
280
- status: string;
281
- stats?: ImportStats;
282
- refs?: number;
283
- }> = [];
284
- let totalChanges = 0;
285
-
286
- // Calculate section boundaries in the XML document text for comment filtering
287
- // Comment positions (docPosition) are relative to xmlDocText, NOT wordText
288
- // So we must find section headers in xmlDocText to get matching boundaries
289
- const sectionBoundaries: Array<{ file: string; start: number; end: number }> = [];
290
- const xmlLower = xmlDocText.toLowerCase();
291
-
292
- // Standard section header keywords to search for in XML
293
- // Map from file name pattern to search terms
294
- const sectionKeywords: Record<string, string[]> = {
295
- 'abstract': ['abstract', 'summary'],
296
- 'introduction': ['introduction', 'background'],
297
- 'methods': ['methods', 'materials and methods', 'methodology'],
298
- 'results': ['results'],
299
- 'discussion': ['discussion'],
300
- 'conclusion': ['conclusion', 'conclusions'],
301
- };
302
-
303
- // Helper: find section header (skip labels like "Methods:" in structured abstracts)
304
- // Real section headers are NOT followed by ":" immediately
305
- function findSectionHeader(text: string, keyword: string, startFrom: number = 0): number {
306
- const lower = text.toLowerCase();
307
- let idx = startFrom;
308
- while ((idx = lower.indexOf(keyword, idx)) !== -1) {
309
- // Check what follows the keyword
310
- const afterKeyword = text.slice(idx + keyword.length, idx + keyword.length + 5);
311
- // Skip if followed by ":" (this is a label, not a section header)
312
- // Real headers are followed by text content, a newline, or a subheading
313
- if (!afterKeyword.startsWith(':') && !afterKeyword.startsWith(' :')) {
314
- return idx;
315
- }
316
- idx++;
317
- }
318
- return -1;
319
- }
320
-
321
- for (const section of wordSections) {
322
- const fileBase = section.file.replace(/\.md$/i, '').toLowerCase();
323
-
324
- // Get keywords for this section
325
- const keywords = sectionKeywords[fileBase] || [fileBase];
326
-
327
- // Find the first valid keyword that exists in XML (not a label)
328
- let headerIdx = -1;
329
- for (const kw of keywords) {
330
- const idx = findSectionHeader(xmlDocText, kw, 0);
331
- if (idx >= 0 && (headerIdx < 0 || idx < headerIdx)) {
332
- headerIdx = idx;
333
- }
334
- }
335
-
336
- if (headerIdx >= 0) {
337
- // Find the next section's start to determine end boundary
338
- let nextHeaderIdx = xmlDocText.length;
339
- const sectionIdx = wordSections.indexOf(section);
340
- if (sectionIdx < wordSections.length - 1) {
341
- const nextFileBase = wordSections[sectionIdx + 1].file.replace(/\.md$/i, '').toLowerCase();
342
- const nextKeywords = sectionKeywords[nextFileBase] || [nextFileBase];
343
- for (const nkw of nextKeywords) {
344
- const foundNext = findSectionHeader(xmlDocText, nkw, headerIdx + 10);
345
- if (foundNext >= 0 && foundNext < nextHeaderIdx) {
346
- nextHeaderIdx = foundNext;
347
- }
348
- }
349
- }
350
-
351
- sectionBoundaries.push({
352
- file: section.file,
353
- start: headerIdx,
354
- end: nextHeaderIdx
355
- });
356
-
357
- }
358
- }
359
-
360
- // Document length is the XML text length (same coordinate system as docPosition)
361
- const docLength = xmlDocText.length;
362
-
363
- for (const section of wordSections) {
364
- const sectionPath = path.join(options.dir, section.file);
365
-
366
- if (!fs.existsSync(sectionPath)) {
367
- sectionResults.push({
368
- file: section.file,
369
- header: section.header,
370
- status: 'skipped',
371
- stats: undefined,
372
- });
373
- continue;
374
- }
375
-
376
- const result = await importFromWord(docx, sectionPath, {
377
- sectionContent: section.content,
378
- author: 'Reviewer',
379
- wordTables: wordTables,
380
- });
381
-
382
- let { annotated, stats } = result;
383
-
384
- let refConversions: Array<{ from: string; to: string }> = [];
385
- if (registry && options.crossref !== false) {
386
- const crossrefResult = convertHardcodedRefs(annotated, registry);
387
- annotated = crossrefResult.converted;
388
- refConversions = crossrefResult.conversions;
389
- totalRefConversions += refConversions.length;
390
- }
391
-
392
- let commentsInserted = 0;
393
- if (comments.length > 0 && anchors.size > 0) {
394
- // Filter comments to only those that belong to this section
395
- // Use exact position matching: docPosition is in xmlDocText coordinates,
396
- // and sectionBoundaries are also in xmlDocText coordinates (same source!)
397
- const boundary = sectionBoundaries.find(b => b.file === section.file);
398
- const isFirstSection = wordSections.indexOf(section) === 0;
399
- const firstBoundaryStart = sectionBoundaries.length > 0 ? Math.min(...sectionBoundaries.map(b => b.start)) : 0;
400
-
401
- const sectionComments = comments.filter((c: any) => {
402
- const anchorData = anchors.get(c.id);
403
- if (!anchorData) return false;
404
-
405
- // Use exact position - no scaling needed since both are in xmlDocText coordinates
406
- if (anchorData.docPosition !== undefined && boundary) {
407
- // Include comments within section boundaries
408
- if (anchorData.docPosition >= boundary.start && anchorData.docPosition < boundary.end) {
409
- return true;
410
- }
411
- // Also include "outside" comments (before first section) in the first section file
412
- if (isFirstSection && anchorData.docPosition < firstBoundaryStart) {
413
- return true;
414
- }
415
- }
416
-
417
- return false;
418
- });
419
-
420
- if (process.env.DEBUG) {
421
- console.log(`[DEBUG] ${section.file}: ${sectionComments.length} comments to place (boundary: ${boundary?.start}-${boundary?.end})`);
422
- }
423
-
424
- if (sectionComments.length > 0) {
425
- // Use a more robust pattern that handles < in comment text
426
- const commentPattern = /\{>>.*?<<\}/gs;
427
- const beforeCount = (annotated.match(commentPattern) || []).length;
428
- annotated = insertCommentsIntoMarkdown(annotated, sectionComments, anchors, {
429
- quiet: !process.env.DEBUG,
430
- sectionBoundary: boundary // Pass section boundary for position-based insertion
431
- });
432
- const afterCount = (annotated.match(commentPattern) || []).length;
433
- commentsInserted = afterCount - beforeCount;
434
-
435
- if (process.env.DEBUG) {
436
- console.log(`[DEBUG] ${section.file}: inserted ${commentsInserted} of ${sectionComments.length} comments`);
437
- }
438
-
439
- if (commentsInserted > 0) {
440
- stats.comments = (stats.comments || 0) + commentsInserted;
441
- }
442
- }
443
- }
444
-
445
- totalChanges += stats.total;
446
-
447
- sectionResults.push({
448
- file: section.file,
449
- header: section.header,
450
- status: 'ok',
451
- stats,
452
- refs: refConversions.length,
453
- });
454
-
455
- if (!options.dryRun) {
456
- // Preserve any preamble content (YAML frontmatter, author blocks, metadata)
457
- // that exists before the first heading in the original file.
458
- // This content is never included in the Word build output, so it won't
459
- // appear in the Word doc and would otherwise be lost during sync.
460
- const originalContent = fs.readFileSync(sectionPath, 'utf-8');
461
- const firstHeadingMatch = originalContent.match(/^(#\s)/m);
462
- if (firstHeadingMatch && firstHeadingMatch.index !== undefined && firstHeadingMatch.index > 0) {
463
- const preamble = originalContent.slice(0, firstHeadingMatch.index);
464
- // Only prepend if preamble has non-whitespace content
465
- if (preamble.trim().length > 0) {
466
- annotated = preamble + annotated;
467
- }
468
- }
469
- fs.writeFileSync(sectionPath, annotated, 'utf-8');
470
- }
471
- }
472
-
473
- const tableRows = sectionResults.map((r) => {
474
- if (r.status === 'skipped') {
475
- return [
476
- chalk.dim(r.file),
477
- chalk.dim(r.header.slice(0, 25)),
478
- chalk.yellow('skipped'),
479
- '',
480
- '',
481
- '',
482
- '',
483
- ];
484
- }
485
- const s = r.stats!;
486
- return [
487
- chalk.bold(r.file),
488
- r.header.length > 25 ? r.header.slice(0, 22) + '...' : r.header,
489
- s.insertions > 0 ? chalk.green(`+${s.insertions}`) : chalk.dim('-'),
490
- s.deletions > 0 ? chalk.red(`-${s.deletions}`) : chalk.dim('-'),
491
- s.substitutions > 0 ? chalk.yellow(`~${s.substitutions}`) : chalk.dim('-'),
492
- s.comments > 0 ? chalk.blue(`#${s.comments}`) : chalk.dim('-'),
493
- r.refs! > 0 ? chalk.magenta(`@${r.refs}`) : chalk.dim('-'),
494
- ];
495
- });
496
-
497
- console.log(fmt.table(
498
- ['File', 'Section', 'Ins', 'Del', 'Sub', 'Cmt', 'Ref'],
499
- tableRows,
500
- { align: ['left', 'left', 'right', 'right', 'right', 'right', 'right'] }
501
- ));
502
- console.log();
503
-
504
- if (options.diff !== false && totalChanges > 0) {
505
- console.log(fmt.header('Changes Preview'));
506
- console.log();
507
- for (const result of sectionResults) {
508
- if (result.status === 'ok' && result.stats && result.stats.total > 0) {
509
- const sectionPath = path.join(options.dir, result.file);
510
- if (fs.existsSync(sectionPath)) {
511
- const content = fs.readFileSync(sectionPath, 'utf-8');
512
- const preview = inlineDiffPreview(content, { maxLines: 3 });
513
- if (preview) {
514
- console.log(chalk.bold(result.file) + ':');
515
- console.log(preview);
516
- console.log();
517
- }
518
- }
519
- }
520
- }
521
- }
522
-
523
- if (options.dryRun) {
524
- console.log(fmt.box(chalk.yellow('Dry run - no files written'), { padding: 0 }));
525
- } else if (totalChanges > 0 || totalRefConversions > 0 || comments.length > 0) {
526
- const summaryLines: string[] = [];
527
- summaryLines.push(`${chalk.bold(wordSections.length)} sections processed`);
528
- if (totalChanges > 0) summaryLines.push(`${chalk.bold(totalChanges)} annotations imported`);
529
- if (comments.length > 0) summaryLines.push(`${chalk.bold(comments.length)} comments placed`);
530
- if (totalRefConversions > 0) summaryLines.push(`${chalk.bold(totalRefConversions)} refs converted to @-syntax`);
531
-
532
- console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
533
- console.log();
534
- console.log(chalk.dim('Next steps:'));
535
- console.log(chalk.dim(' 1. rev review <section.md> - Accept/reject changes'));
536
- console.log(chalk.dim(' 2. rev comments <section.md> - View/address comments'));
537
- console.log(chalk.dim(' 3. rev build docx - Rebuild Word doc'));
538
- } else {
539
- console.log(fmt.status('success', 'No changes detected.'));
540
- }
541
- } catch (err) {
542
- spin.stop();
543
- const error = err as Error;
544
- console.error(fmt.status('error', error.message));
545
- if (process.env.DEBUG) console.error(error.stack);
546
- process.exit(1);
547
- }
548
- });
549
- }
550
-
551
- /**
552
- * `sync --comments-only`: import only Word comments at fuzzy-matched anchors.
553
- *
554
- * Skips the Word→Markdown diff entirely (no track changes, no pandoc, no
555
- * prose modifications). Useful when the markdown has been edited after the
556
- * docx was sent for review applying track changes from a stale draft
557
- * would overwrite newer edits.
558
- */
559
- async function syncCommentsOnly(
560
- docx: string,
561
- sectionFilter: string[] | undefined,
562
- options: SyncOptions,
563
- configPath: string,
564
- ): Promise<void> {
565
- const config = loadConfig(configPath);
566
- const { extractWordComments, extractCommentAnchors, extractHeadings, insertCommentsIntoMarkdown } = await import('../import.js');
567
- const { computeSectionBoundaries } = await import('./section-boundaries.js');
568
-
569
- const spin = fmt.spinner(`Reading comments from ${path.basename(docx)}...`).start();
570
-
571
- let comments;
572
- let anchors;
573
- let headings;
574
- let fullDocText = '';
575
- try {
576
- comments = await extractWordComments(docx);
577
- const result = await extractCommentAnchors(docx);
578
- anchors = result.anchors;
579
- fullDocText = result.fullDocText;
580
- headings = await extractHeadings(docx);
581
- spin.stop();
582
- } catch (err) {
583
- spin.stop();
584
- const error = err as Error;
585
- console.error(fmt.status('error', error.message));
586
- process.exit(1);
587
- }
588
-
589
- console.log(fmt.header(`Comments from ${path.basename(docx)} (comments-only)`));
590
- console.log();
591
-
592
- if (comments.length === 0) {
593
- console.log(fmt.status('info', 'No comments found in document.'));
594
- return;
595
- }
596
-
597
- const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
598
-
599
- if (boundaries.length === 0) {
600
- console.error(fmt.status('warning', 'No section headings detected in Word document.'));
601
- console.error(chalk.dim(' Check that headers in sections.yaml match heading paragraphs in the docx.'));
602
- process.exit(1);
603
- }
604
-
605
- // Apply optional section filter from CLI
606
- let activeBoundaries = boundaries;
607
- if (sectionFilter && sectionFilter.length > 0) {
608
- const wanted = sectionFilter.map(s => s.trim().toLowerCase());
609
- activeBoundaries = boundaries.filter(b => {
610
- const base = b.file.replace(/\.md$/i, '').toLowerCase();
611
- return wanted.some(name => base === name || base.includes(name));
612
- });
613
- if (activeBoundaries.length === 0) {
614
- console.error(fmt.status('error', `No sections matched: ${sectionFilter.join(', ')}`));
615
- process.exit(1);
616
- }
617
- }
618
-
619
- const firstBoundaryStart = boundaries[0].start;
620
- const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
621
-
622
- for (const boundary of activeBoundaries) {
623
- const sectionPath = path.join(options.dir, boundary.file);
624
- if (!fs.existsSync(sectionPath)) {
625
- results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
626
- continue;
627
- }
628
-
629
- const isFirstSection = boundary === activeBoundaries[0];
630
- const sectionComments = comments.filter((c: { id: string }) => {
631
- const anchor = anchors.get(c.id);
632
- if (!anchor || anchor.docPosition === undefined) return false;
633
- if (anchor.docPosition >= boundary.start && anchor.docPosition < boundary.end) return true;
634
- // Comments before the first heading land in the first matched section
635
- if (isFirstSection && anchor.docPosition < firstBoundaryStart) return true;
636
- return false;
637
- });
638
-
639
- if (sectionComments.length === 0) {
640
- results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
641
- continue;
642
- }
643
-
644
- const original = fs.readFileSync(sectionPath, 'utf-8');
645
-
646
- const stats = { placed: 0, deduped: 0, unmatched: 0 };
647
- const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
648
- quiet: !process.env.DEBUG,
649
- sectionBoundary: { start: boundary.start, end: boundary.end },
650
- wrapAnchor: false,
651
- outStats: stats,
652
- });
653
-
654
- if (!options.dryRun && stats.placed > 0) {
655
- fs.writeFileSync(sectionPath, annotated, 'utf-8');
656
- }
657
- results.push({ file: boundary.file, ...stats, skipped: false });
658
- }
659
-
660
- const tableRows = results.map(r => {
661
- if (r.skipped) {
662
- return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
663
- }
664
- return [
665
- chalk.bold(r.file),
666
- chalk.green(`${r.placed}`),
667
- r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
668
- r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
669
- chalk.dim('comments only'),
670
- ];
671
- });
672
-
673
- console.log(fmt.table(
674
- ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
675
- tableRows,
676
- { align: ['left', 'right', 'right', 'right', 'left'] },
677
- ));
678
- console.log();
679
-
680
- const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
681
- const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
682
- const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
683
-
684
- const lines: string[] = [];
685
- lines.push(`${chalk.bold(comments.length)} comments in document`);
686
- if (totalPlaced > 0) {
687
- lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
688
- }
689
- if (totalDeduped > 0) {
690
- lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
691
- }
692
- if (totalUnmatched > 0) {
693
- lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
694
- }
695
- if (options.dryRun) {
696
- lines.push(chalk.yellow('Dry run no files written'));
697
- } else if (totalPlaced > 0) {
698
- lines.push(chalk.dim('Existing prose unchanged.'));
699
- }
700
- console.log(fmt.box(lines.join('\n'), { title: 'Summary', padding: 0 }));
701
-
702
- if (totalUnmatched > 0) {
703
- console.log();
704
- console.log(chalk.dim('Tip: run "rev verify-anchors" to see which comments drifted.'));
705
- }
706
- }
1
+ /**
2
+ * SYNC command: Import feedback from Word/PDF back to section files
3
+ *
4
+ * Split from sections.ts for maintainability.
5
+ */
6
+
7
+ import {
8
+ chalk,
9
+ fs,
10
+ path,
11
+ fmt,
12
+ findFiles,
13
+ resolveSectionsConfig,
14
+ getOrderedSections,
15
+ extractSectionsFromText,
16
+ countAnnotations,
17
+ buildRegistry,
18
+ convertHardcodedRefs,
19
+ inlineDiffPreview,
20
+ } from './context.js';
21
+ import type { Command } from 'commander';
22
+ import type { SectionsConfig } from '../types.js';
23
+ import * as readline from 'readline';
24
+
25
+ interface ImportStats {
26
+ insertions: number;
27
+ deletions: number;
28
+ substitutions: number;
29
+ comments: number;
30
+ total: number;
31
+ }
32
+
33
+ interface SyncOptions {
34
+ config: string;
35
+ dir: string;
36
+ crossref?: boolean;
37
+ diff?: boolean;
38
+ force?: boolean;
39
+ dryRun?: boolean;
40
+ /** Commander maps `--comments-only` (a positive flag) cleanly. `--no-overwrite`
41
+ * conflicts with the existing `overwrite` semantics in `--force`-style flags
42
+ * and Commander's `--no-X` convention assigns `options.x === false`. */
43
+ commentsOnly?: boolean;
44
+ }
45
+
46
+ /**
47
+ * Register the sync command with the program
48
+ */
49
+ export function register(program: Command): void {
50
+ // ==========================================================================
51
+ // SYNC command - Import with section awareness
52
+ // ==========================================================================
53
+
54
+ program
55
+ .command('sync')
56
+ .alias('sections')
57
+ .description('Sync feedback from Word/PDF back to section files')
58
+ .argument('[file]', 'Word (.docx) or PDF file from reviewer (default: most recent)')
59
+ .argument('[sections...]', 'Specific sections to sync (default: all)')
60
+ .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
61
+ .option('-d, --dir <directory>', 'Directory with section files', '.')
62
+ .option('--no-crossref', 'Skip converting hardcoded figure/table refs')
63
+ .option('--no-diff', 'Skip showing diff preview')
64
+ .option('--force', 'Overwrite files without conflict warning')
65
+ .option('--dry-run', 'Preview without writing files')
66
+ .option('--comments-only', 'Insert comments at fuzzy-matched anchors only; never modify existing prose or apply track changes (use when markdown was revised after the docx was sent for review)')
67
+ .action(async (docx: string | undefined, sections: string[], options: SyncOptions) => {
68
+ // Auto-detect most recent docx or pdf if not provided
69
+ if (!docx) {
70
+ const docxFiles = findFiles('.docx');
71
+ const pdfFiles = findFiles('.pdf');
72
+ const allFiles = [...docxFiles, ...pdfFiles];
73
+
74
+ if (allFiles.length === 0) {
75
+ console.error(fmt.status('error', 'No .docx or .pdf files found in current directory.'));
76
+ process.exit(1);
77
+ }
78
+ const sorted = allFiles
79
+ .map(f => ({ name: f, mtime: fs.statSync(f).mtime }))
80
+ .sort((a, b) => b.mtime.getTime() - a.mtime.getTime());
81
+ docx = sorted[0].name;
82
+ console.log(fmt.status('info', `Using most recent: ${docx}`));
83
+ console.log();
84
+ }
85
+
86
+ if (!fs.existsSync(docx)) {
87
+ console.error(fmt.status('error', `File not found: ${docx}`));
88
+ process.exit(1);
89
+ }
90
+
91
+ // Handle PDF files
92
+ if (docx.toLowerCase().endsWith('.pdf')) {
93
+ const { extractPdfComments, formatPdfComments, getPdfCommentStats } = await import('../pdf-import.js');
94
+
95
+ const spin = fmt.spinner(`Extracting comments from ${path.basename(docx)}...`).start();
96
+
97
+ try {
98
+ const comments = await extractPdfComments(docx);
99
+ spin.stop();
100
+
101
+ if (comments.length === 0) {
102
+ console.log(fmt.status('info', 'No comments found in PDF.'));
103
+ return;
104
+ }
105
+
106
+ const stats = getPdfCommentStats(comments);
107
+ console.log(fmt.header(`PDF Comments from ${path.basename(docx)}`));
108
+ console.log();
109
+ console.log(formatPdfComments(comments));
110
+ console.log();
111
+
112
+ const authorList = Object.entries(stats.byAuthor)
113
+ .map(([author, count]) => `${author} (${count})`)
114
+ .join(', ');
115
+ console.log(chalk.dim(`Total: ${stats.total} comments from ${authorList}`));
116
+ console.log();
117
+
118
+ const resolved = resolveSectionsConfig(options.dir, options.config);
119
+ if (resolved && !options.dryRun) {
120
+ const mainSection = getOrderedSections(resolved.config)[0];
121
+
122
+ if (mainSection) {
123
+ const mainPath = path.join(options.dir, mainSection);
124
+ if (fs.existsSync(mainPath)) {
125
+ console.log(chalk.dim(`Use 'rev pdf-comments ${docx} --append ${mainSection}' to add comments to markdown.`));
126
+ }
127
+ }
128
+ }
129
+ } catch (err) {
130
+ spin.stop();
131
+ const error = err as Error;
132
+ console.error(fmt.status('error', `Failed to extract PDF comments: ${error.message}`));
133
+ if (process.env.DEBUG) console.error(error.stack);
134
+ process.exit(1);
135
+ }
136
+ return;
137
+ }
138
+
139
+ // Resolve the section config: an explicit sections.yaml if present,
140
+ // otherwise the `sections:` list in rev.yaml (single source of truth).
141
+ const resolved = resolveSectionsConfig(options.dir, options.config);
142
+ if (!resolved) {
143
+ console.error(fmt.status('error', `No section config found in ${path.resolve(options.dir)}`));
144
+ console.error(chalk.dim(' Add a `sections:` list to rev.yaml, or run "rev init" to generate sections.yaml.'));
145
+ process.exit(1);
146
+ }
147
+ const sectionsConfig = resolved.config;
148
+
149
+ // --comments-only: import comments only, never modify existing prose.
150
+ // Use this when the markdown has been revised since the docx was sent
151
+ // out — track changes from a stale draft would clobber newer edits.
152
+ if (options.commentsOnly) {
153
+ await syncCommentsOnly(docx, sections, options, sectionsConfig);
154
+ return;
155
+ }
156
+
157
+ // Check pandoc availability upfront and warn
158
+ const { hasPandoc, getInstallInstructions } = await import('../dependencies.js');
159
+ if (!hasPandoc()) {
160
+ console.log(fmt.status('warning', `Pandoc not installed. Track changes will be extracted from XML (formatting may differ).`));
161
+ console.log(chalk.dim(` Install for best results: ${getInstallInstructions('pandoc')}`));
162
+ console.log();
163
+ }
164
+
165
+ const spin = fmt.spinner(`Importing ${path.basename(docx)}...`).start();
166
+
167
+ try {
168
+ const config = sectionsConfig;
169
+ const { importFromWord, extractWordComments, extractCommentAnchors, insertCommentsIntoMarkdown, extractFromWord } = await import('../import.js');
170
+
171
+ let registry = null;
172
+ let totalRefConversions = 0;
173
+ if (options.crossref !== false) {
174
+ registry = buildRegistry(options.dir);
175
+ }
176
+
177
+ const comments = await extractWordComments(docx);
178
+ const { anchors, fullDocText: xmlDocText } = await extractCommentAnchors(docx);
179
+
180
+ // Extract Word text (uses pandoc if available, falls back to XML extraction)
181
+ const wordExtraction = await extractFromWord(docx, { mediaDir: options.dir });
182
+ let wordText = wordExtraction.text;
183
+ const wordTables = wordExtraction.tables || [];
184
+
185
+ // Log extraction messages (warnings about pandoc, track change stats, etc.)
186
+ for (const msg of wordExtraction.messages || []) {
187
+ if (msg.type === 'warning') {
188
+ spin.stop();
189
+ console.log(fmt.status('warning', msg.message));
190
+ spin.start();
191
+ }
192
+ }
193
+
194
+ // Restore crossref on FULL text BEFORE splitting into sections
195
+ // This ensures duplicate labels from track changes are handled correctly
196
+ // (the same figure may appear multiple times in old/new versions)
197
+ const { restoreCrossrefFromWord, restoreImagesFromRegistry } = await import('../import.js');
198
+ const crossrefResult = restoreCrossrefFromWord(wordText, options.dir);
199
+ wordText = crossrefResult.text;
200
+ if (crossrefResult.restored > 0) {
201
+ console.log(`Restored ${crossrefResult.restored} crossref reference(s)`);
202
+ }
203
+
204
+ // Also restore images from registry using shared restoredLabels
205
+ const imageRestoreResult = restoreImagesFromRegistry(wordText, options.dir, crossrefResult.restoredLabels);
206
+ wordText = imageRestoreResult.text;
207
+ if (imageRestoreResult.restored > 0) {
208
+ console.log(`Restored ${imageRestoreResult.restored} image(s) from registry`);
209
+ }
210
+
211
+ let wordSections = extractSectionsFromText(wordText, config.sections);
212
+
213
+ if (wordSections.length === 0) {
214
+ spin.stop();
215
+ console.error(fmt.status('warning', 'No sections detected in Word document.'));
216
+ console.error(chalk.dim(' Check that headings match sections.yaml'));
217
+ process.exit(1);
218
+ }
219
+
220
+ if (sections && sections.length > 0) {
221
+ const onlyList = sections.map(s => s.trim().toLowerCase());
222
+ wordSections = wordSections.filter(section => {
223
+ const fileName = section.file.replace(/\.md$/i, '').toLowerCase();
224
+ const header = section.header.toLowerCase();
225
+ return onlyList.some(name => fileName === name || fileName.includes(name) || header.includes(name));
226
+ });
227
+ if (wordSections.length === 0) {
228
+ spin.stop();
229
+ console.error(fmt.status('error', `No sections matched: ${sections.join(', ')}`));
230
+ console.error(chalk.dim(` Available: ${extractSectionsFromText(wordText, config.sections).map(s => s.file.replace(/\.md$/i, '')).join(', ')}`));
231
+ process.exit(1);
232
+ }
233
+ }
234
+
235
+ spin.stop();
236
+ console.log(fmt.header(`Import from ${path.basename(docx)}`));
237
+ console.log();
238
+
239
+ // Conflict detection
240
+ if (!options.force && !options.dryRun) {
241
+ const conflicts: Array<{ file: string; annotations: number }> = [];
242
+ for (const section of wordSections) {
243
+ const sectionPath = path.join(options.dir, section.file);
244
+ if (fs.existsSync(sectionPath)) {
245
+ const existing = fs.readFileSync(sectionPath, 'utf-8');
246
+ const existingCounts = countAnnotations(existing);
247
+ if (existingCounts.total > 0) {
248
+ conflicts.push({
249
+ file: section.file,
250
+ annotations: existingCounts.total,
251
+ });
252
+ }
253
+ }
254
+ }
255
+
256
+ if (conflicts.length > 0) {
257
+ console.log(fmt.status('warning', 'Files with existing annotations will be overwritten:'));
258
+ for (const c of conflicts) {
259
+ console.log(chalk.yellow(` - ${c.file} (${c.annotations} annotations)`));
260
+ }
261
+ console.log();
262
+
263
+ const rl = readline.createInterface({
264
+ input: process.stdin,
265
+ output: process.stdout,
266
+ });
267
+
268
+ const answer = await new Promise<string>((resolve) =>
269
+ rl.question(chalk.cyan('Continue and overwrite? [y/N] '), resolve)
270
+ );
271
+ rl.close();
272
+
273
+ if (answer.toLowerCase() !== 'y') {
274
+ console.log(chalk.dim('Aborted. Use --force to skip this check.'));
275
+ process.exit(0);
276
+ }
277
+ console.log();
278
+ }
279
+ }
280
+
281
+ const sectionResults: Array<{
282
+ file: string;
283
+ header: string;
284
+ status: string;
285
+ stats?: ImportStats;
286
+ refs?: number;
287
+ }> = [];
288
+ let totalChanges = 0;
289
+
290
+ // Calculate section boundaries in the XML document text for comment filtering
291
+ // Comment positions (docPosition) are relative to xmlDocText, NOT wordText
292
+ // So we must find section headers in xmlDocText to get matching boundaries
293
+ const sectionBoundaries: Array<{ file: string; start: number; end: number }> = [];
294
+ const xmlLower = xmlDocText.toLowerCase();
295
+
296
+ // Standard section header keywords to search for in XML
297
+ // Map from file name pattern to search terms
298
+ const sectionKeywords: Record<string, string[]> = {
299
+ 'abstract': ['abstract', 'summary'],
300
+ 'introduction': ['introduction', 'background'],
301
+ 'methods': ['methods', 'materials and methods', 'methodology'],
302
+ 'results': ['results'],
303
+ 'discussion': ['discussion'],
304
+ 'conclusion': ['conclusion', 'conclusions'],
305
+ };
306
+
307
+ // Helper: find section header (skip labels like "Methods:" in structured abstracts)
308
+ // Real section headers are NOT followed by ":" immediately
309
+ function findSectionHeader(text: string, keyword: string, startFrom: number = 0): number {
310
+ const lower = text.toLowerCase();
311
+ let idx = startFrom;
312
+ while ((idx = lower.indexOf(keyword, idx)) !== -1) {
313
+ // Check what follows the keyword
314
+ const afterKeyword = text.slice(idx + keyword.length, idx + keyword.length + 5);
315
+ // Skip if followed by ":" (this is a label, not a section header)
316
+ // Real headers are followed by text content, a newline, or a subheading
317
+ if (!afterKeyword.startsWith(':') && !afterKeyword.startsWith(' :')) {
318
+ return idx;
319
+ }
320
+ idx++;
321
+ }
322
+ return -1;
323
+ }
324
+
325
+ for (const section of wordSections) {
326
+ const fileBase = section.file.replace(/\.md$/i, '').toLowerCase();
327
+
328
+ // Get keywords for this section
329
+ const keywords = sectionKeywords[fileBase] || [fileBase];
330
+
331
+ // Find the first valid keyword that exists in XML (not a label)
332
+ let headerIdx = -1;
333
+ for (const kw of keywords) {
334
+ const idx = findSectionHeader(xmlDocText, kw, 0);
335
+ if (idx >= 0 && (headerIdx < 0 || idx < headerIdx)) {
336
+ headerIdx = idx;
337
+ }
338
+ }
339
+
340
+ if (headerIdx >= 0) {
341
+ // Find the next section's start to determine end boundary
342
+ let nextHeaderIdx = xmlDocText.length;
343
+ const sectionIdx = wordSections.indexOf(section);
344
+ if (sectionIdx < wordSections.length - 1) {
345
+ const nextFileBase = wordSections[sectionIdx + 1].file.replace(/\.md$/i, '').toLowerCase();
346
+ const nextKeywords = sectionKeywords[nextFileBase] || [nextFileBase];
347
+ for (const nkw of nextKeywords) {
348
+ const foundNext = findSectionHeader(xmlDocText, nkw, headerIdx + 10);
349
+ if (foundNext >= 0 && foundNext < nextHeaderIdx) {
350
+ nextHeaderIdx = foundNext;
351
+ }
352
+ }
353
+ }
354
+
355
+ sectionBoundaries.push({
356
+ file: section.file,
357
+ start: headerIdx,
358
+ end: nextHeaderIdx
359
+ });
360
+
361
+ }
362
+ }
363
+
364
+ // Document length is the XML text length (same coordinate system as docPosition)
365
+ const docLength = xmlDocText.length;
366
+
367
+ for (const section of wordSections) {
368
+ const sectionPath = path.join(options.dir, section.file);
369
+
370
+ if (!fs.existsSync(sectionPath)) {
371
+ sectionResults.push({
372
+ file: section.file,
373
+ header: section.header,
374
+ status: 'skipped',
375
+ stats: undefined,
376
+ });
377
+ continue;
378
+ }
379
+
380
+ const result = await importFromWord(docx, sectionPath, {
381
+ sectionContent: section.content,
382
+ author: 'Reviewer',
383
+ wordTables: wordTables,
384
+ });
385
+
386
+ let { annotated, stats } = result;
387
+
388
+ let refConversions: Array<{ from: string; to: string }> = [];
389
+ if (registry && options.crossref !== false) {
390
+ const crossrefResult = convertHardcodedRefs(annotated, registry);
391
+ annotated = crossrefResult.converted;
392
+ refConversions = crossrefResult.conversions;
393
+ totalRefConversions += refConversions.length;
394
+ }
395
+
396
+ let commentsInserted = 0;
397
+ if (comments.length > 0 && anchors.size > 0) {
398
+ // Filter comments to only those that belong to this section
399
+ // Use exact position matching: docPosition is in xmlDocText coordinates,
400
+ // and sectionBoundaries are also in xmlDocText coordinates (same source!)
401
+ const boundary = sectionBoundaries.find(b => b.file === section.file);
402
+ const isFirstSection = wordSections.indexOf(section) === 0;
403
+ const firstBoundaryStart = sectionBoundaries.length > 0 ? Math.min(...sectionBoundaries.map(b => b.start)) : 0;
404
+
405
+ const sectionComments = comments.filter((c: any) => {
406
+ const anchorData = anchors.get(c.id);
407
+ if (!anchorData) return false;
408
+
409
+ // Use exact position - no scaling needed since both are in xmlDocText coordinates
410
+ if (anchorData.docPosition !== undefined && boundary) {
411
+ // Include comments within section boundaries
412
+ if (anchorData.docPosition >= boundary.start && anchorData.docPosition < boundary.end) {
413
+ return true;
414
+ }
415
+ // Also include "outside" comments (before first section) in the first section file
416
+ if (isFirstSection && anchorData.docPosition < firstBoundaryStart) {
417
+ return true;
418
+ }
419
+ }
420
+
421
+ return false;
422
+ });
423
+
424
+ if (process.env.DEBUG) {
425
+ console.log(`[DEBUG] ${section.file}: ${sectionComments.length} comments to place (boundary: ${boundary?.start}-${boundary?.end})`);
426
+ }
427
+
428
+ if (sectionComments.length > 0) {
429
+ // Use a more robust pattern that handles < in comment text
430
+ const commentPattern = /\{>>.*?<<\}/gs;
431
+ const beforeCount = (annotated.match(commentPattern) || []).length;
432
+ annotated = insertCommentsIntoMarkdown(annotated, sectionComments, anchors, {
433
+ quiet: !process.env.DEBUG,
434
+ sectionBoundary: boundary // Pass section boundary for position-based insertion
435
+ });
436
+ const afterCount = (annotated.match(commentPattern) || []).length;
437
+ commentsInserted = afterCount - beforeCount;
438
+
439
+ if (process.env.DEBUG) {
440
+ console.log(`[DEBUG] ${section.file}: inserted ${commentsInserted} of ${sectionComments.length} comments`);
441
+ }
442
+
443
+ if (commentsInserted > 0) {
444
+ stats.comments = (stats.comments || 0) + commentsInserted;
445
+ }
446
+ }
447
+ }
448
+
449
+ totalChanges += stats.total;
450
+
451
+ sectionResults.push({
452
+ file: section.file,
453
+ header: section.header,
454
+ status: 'ok',
455
+ stats,
456
+ refs: refConversions.length,
457
+ });
458
+
459
+ if (!options.dryRun) {
460
+ // Preserve any preamble content (YAML frontmatter, author blocks, metadata)
461
+ // that exists before the first heading in the original file.
462
+ // This content is never included in the Word build output, so it won't
463
+ // appear in the Word doc and would otherwise be lost during sync.
464
+ const originalContent = fs.readFileSync(sectionPath, 'utf-8');
465
+ const firstHeadingMatch = originalContent.match(/^(#\s)/m);
466
+ if (firstHeadingMatch && firstHeadingMatch.index !== undefined && firstHeadingMatch.index > 0) {
467
+ const preamble = originalContent.slice(0, firstHeadingMatch.index);
468
+ // Only prepend if preamble has non-whitespace content
469
+ if (preamble.trim().length > 0) {
470
+ annotated = preamble + annotated;
471
+ }
472
+ }
473
+ fs.writeFileSync(sectionPath, annotated, 'utf-8');
474
+ }
475
+ }
476
+
477
+ const tableRows = sectionResults.map((r) => {
478
+ if (r.status === 'skipped') {
479
+ return [
480
+ chalk.dim(r.file),
481
+ chalk.dim(r.header.slice(0, 25)),
482
+ chalk.yellow('skipped'),
483
+ '',
484
+ '',
485
+ '',
486
+ '',
487
+ ];
488
+ }
489
+ const s = r.stats!;
490
+ return [
491
+ chalk.bold(r.file),
492
+ r.header.length > 25 ? r.header.slice(0, 22) + '...' : r.header,
493
+ s.insertions > 0 ? chalk.green(`+${s.insertions}`) : chalk.dim('-'),
494
+ s.deletions > 0 ? chalk.red(`-${s.deletions}`) : chalk.dim('-'),
495
+ s.substitutions > 0 ? chalk.yellow(`~${s.substitutions}`) : chalk.dim('-'),
496
+ s.comments > 0 ? chalk.blue(`#${s.comments}`) : chalk.dim('-'),
497
+ r.refs! > 0 ? chalk.magenta(`@${r.refs}`) : chalk.dim('-'),
498
+ ];
499
+ });
500
+
501
+ console.log(fmt.table(
502
+ ['File', 'Section', 'Ins', 'Del', 'Sub', 'Cmt', 'Ref'],
503
+ tableRows,
504
+ { align: ['left', 'left', 'right', 'right', 'right', 'right', 'right'] }
505
+ ));
506
+ console.log();
507
+
508
+ if (options.diff !== false && totalChanges > 0) {
509
+ console.log(fmt.header('Changes Preview'));
510
+ console.log();
511
+ for (const result of sectionResults) {
512
+ if (result.status === 'ok' && result.stats && result.stats.total > 0) {
513
+ const sectionPath = path.join(options.dir, result.file);
514
+ if (fs.existsSync(sectionPath)) {
515
+ const content = fs.readFileSync(sectionPath, 'utf-8');
516
+ const preview = inlineDiffPreview(content, { maxLines: 3 });
517
+ if (preview) {
518
+ console.log(chalk.bold(result.file) + ':');
519
+ console.log(preview);
520
+ console.log();
521
+ }
522
+ }
523
+ }
524
+ }
525
+ }
526
+
527
+ if (options.dryRun) {
528
+ console.log(fmt.box(chalk.yellow('Dry run - no files written'), { padding: 0 }));
529
+ } else if (totalChanges > 0 || totalRefConversions > 0 || comments.length > 0) {
530
+ const summaryLines: string[] = [];
531
+ summaryLines.push(`${chalk.bold(wordSections.length)} sections processed`);
532
+ if (totalChanges > 0) summaryLines.push(`${chalk.bold(totalChanges)} annotations imported`);
533
+ if (comments.length > 0) summaryLines.push(`${chalk.bold(comments.length)} comments placed`);
534
+ if (totalRefConversions > 0) summaryLines.push(`${chalk.bold(totalRefConversions)} refs converted to @-syntax`);
535
+
536
+ console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
537
+ console.log();
538
+ console.log(chalk.dim('Next steps:'));
539
+ console.log(chalk.dim(' 1. rev review <section.md> - Accept/reject changes'));
540
+ console.log(chalk.dim(' 2. rev comments <section.md> - View/address comments'));
541
+ console.log(chalk.dim(' 3. rev build docx - Rebuild Word doc'));
542
+ } else {
543
+ console.log(fmt.status('success', 'No changes detected.'));
544
+ }
545
+ } catch (err) {
546
+ spin.stop();
547
+ const error = err as Error;
548
+ console.error(fmt.status('error', error.message));
549
+ if (process.env.DEBUG) console.error(error.stack);
550
+ process.exit(1);
551
+ }
552
+ });
553
+ }
554
+
555
+ /**
556
+ * `sync --comments-only`: import only Word comments at fuzzy-matched anchors.
557
+ *
558
+ * Skips the Word→Markdown diff entirely (no track changes, no pandoc, no
559
+ * prose modifications). Useful when the markdown has been edited after the
560
+ * docx was sent for review — applying track changes from a stale draft
561
+ * would overwrite newer edits.
562
+ */
563
+ async function syncCommentsOnly(
564
+ docx: string,
565
+ sectionFilter: string[] | undefined,
566
+ options: SyncOptions,
567
+ config: SectionsConfig,
568
+ ): Promise<void> {
569
+ const { extractWordComments, extractCommentAnchors, extractHeadings, insertCommentsIntoMarkdown } = await import('../import.js');
570
+ const { computeSectionBoundaries } = await import('./section-boundaries.js');
571
+
572
+ const spin = fmt.spinner(`Reading comments from ${path.basename(docx)}...`).start();
573
+
574
+ let comments;
575
+ let anchors;
576
+ let headings;
577
+ let fullDocText = '';
578
+ try {
579
+ comments = await extractWordComments(docx);
580
+ const result = await extractCommentAnchors(docx);
581
+ anchors = result.anchors;
582
+ fullDocText = result.fullDocText;
583
+ headings = await extractHeadings(docx);
584
+ spin.stop();
585
+ } catch (err) {
586
+ spin.stop();
587
+ const error = err as Error;
588
+ console.error(fmt.status('error', error.message));
589
+ process.exit(1);
590
+ }
591
+
592
+ console.log(fmt.header(`Comments from ${path.basename(docx)} (comments-only)`));
593
+ console.log();
594
+
595
+ if (comments.length === 0) {
596
+ console.log(fmt.status('info', 'No comments found in document.'));
597
+ return;
598
+ }
599
+
600
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
601
+
602
+ if (boundaries.length === 0) {
603
+ console.error(fmt.status('warning', 'No section headings detected in Word document.'));
604
+ console.error(chalk.dim(' Check that headers in sections.yaml match heading paragraphs in the docx.'));
605
+ process.exit(1);
606
+ }
607
+
608
+ // Apply optional section filter from CLI
609
+ let activeBoundaries = boundaries;
610
+ if (sectionFilter && sectionFilter.length > 0) {
611
+ const wanted = sectionFilter.map(s => s.trim().toLowerCase());
612
+ activeBoundaries = boundaries.filter(b => {
613
+ const base = b.file.replace(/\.md$/i, '').toLowerCase();
614
+ return wanted.some(name => base === name || base.includes(name));
615
+ });
616
+ if (activeBoundaries.length === 0) {
617
+ console.error(fmt.status('error', `No sections matched: ${sectionFilter.join(', ')}`));
618
+ process.exit(1);
619
+ }
620
+ }
621
+
622
+ const firstBoundaryStart = boundaries[0].start;
623
+ const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
624
+
625
+ for (const boundary of activeBoundaries) {
626
+ const sectionPath = path.join(options.dir, boundary.file);
627
+ if (!fs.existsSync(sectionPath)) {
628
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
629
+ continue;
630
+ }
631
+
632
+ const isFirstSection = boundary === activeBoundaries[0];
633
+ const sectionComments = comments.filter((c: { id: string }) => {
634
+ const anchor = anchors.get(c.id);
635
+ if (!anchor || anchor.docPosition === undefined) return false;
636
+ if (anchor.docPosition >= boundary.start && anchor.docPosition < boundary.end) return true;
637
+ // Comments before the first heading land in the first matched section
638
+ if (isFirstSection && anchor.docPosition < firstBoundaryStart) return true;
639
+ return false;
640
+ });
641
+
642
+ if (sectionComments.length === 0) {
643
+ results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
644
+ continue;
645
+ }
646
+
647
+ const original = fs.readFileSync(sectionPath, 'utf-8');
648
+
649
+ const stats = { placed: 0, deduped: 0, unmatched: 0 };
650
+ const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
651
+ quiet: !process.env.DEBUG,
652
+ sectionBoundary: { start: boundary.start, end: boundary.end },
653
+ wrapAnchor: false,
654
+ outStats: stats,
655
+ });
656
+
657
+ if (!options.dryRun && stats.placed > 0) {
658
+ fs.writeFileSync(sectionPath, annotated, 'utf-8');
659
+ }
660
+ results.push({ file: boundary.file, ...stats, skipped: false });
661
+ }
662
+
663
+ const tableRows = results.map(r => {
664
+ if (r.skipped) {
665
+ return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
666
+ }
667
+ return [
668
+ chalk.bold(r.file),
669
+ chalk.green(`${r.placed}`),
670
+ r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
671
+ r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
672
+ chalk.dim('comments only'),
673
+ ];
674
+ });
675
+
676
+ console.log(fmt.table(
677
+ ['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
678
+ tableRows,
679
+ { align: ['left', 'right', 'right', 'right', 'left'] },
680
+ ));
681
+ console.log();
682
+
683
+ const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
684
+ const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
685
+ const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
686
+
687
+ const lines: string[] = [];
688
+ lines.push(`${chalk.bold(comments.length)} comments in document`);
689
+ if (totalPlaced > 0) {
690
+ lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
691
+ }
692
+ if (totalDeduped > 0) {
693
+ lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
694
+ }
695
+ if (totalUnmatched > 0) {
696
+ lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
697
+ }
698
+ if (options.dryRun) {
699
+ lines.push(chalk.yellow('Dry run — no files written'));
700
+ } else if (totalPlaced > 0) {
701
+ lines.push(chalk.dim('Existing prose unchanged.'));
702
+ }
703
+ console.log(fmt.box(lines.join('\n'), { title: 'Summary', padding: 0 }));
704
+
705
+ if (totalUnmatched > 0) {
706
+ console.log();
707
+ console.log(chalk.dim('Tip: run "rev verify-anchors" to see which comments drifted.'));
708
+ }
709
+ }