docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
@@ -1,451 +1,451 @@
1
- /**
2
- * Section commands: import, extract, split
3
- *
4
- * Commands for importing Word documents and splitting section files.
5
- * Sync and merge commands are in sync.ts and merge-resolve.ts respectively.
6
- */
7
-
8
- import {
9
- chalk,
10
- fs,
11
- path,
12
- countAnnotations,
13
- loadConfig,
14
- splitAnnotatedPaper,
15
- } from './context.js';
16
- import type { Command } from 'commander';
17
-
18
- interface DetectedSection {
19
- header: string;
20
- content: string;
21
- file: string;
22
- }
23
-
24
- interface ImportStats {
25
- insertions: number;
26
- deletions: number;
27
- substitutions: number;
28
- comments: number;
29
- total: number;
30
- }
31
-
32
- interface BootstrapOptions {
33
- output: string;
34
- dryRun?: boolean;
35
- }
36
-
37
- interface ImportOptions {
38
- output?: string;
39
- author?: string;
40
- dryRun?: boolean;
41
- }
42
-
43
- interface ExtractOptions {
44
- output?: string;
45
- }
46
-
47
- interface SplitOptions {
48
- config: string;
49
- dir: string;
50
- dryRun?: boolean;
51
- }
52
-
53
- /**
54
- * Detect sections from Word document text
55
- * Looks for common academic paper section headers
56
- */
57
- function detectSectionsFromWord(text: string): DetectedSection[] {
58
- const lines = text.split('\n');
59
- const sections: DetectedSection[] = [];
60
-
61
- const headerPatterns = [
62
- /^(Abstract|Summary)$/i,
63
- /^(Introduction|Background)$/i,
64
- /^(Methods?|Materials?\s*(and|&)\s*Methods?|Methodology|Experimental\s*Methods?)$/i,
65
- /^(Results?)$/i,
66
- /^(Results?\s*(and|&)\s*Discussion)$/i,
67
- /^(Discussion)$/i,
68
- /^(Conclusions?|Summary\s*(and|&)?\s*Conclusions?)$/i,
69
- /^(Acknowledgements?|Acknowledgments?)$/i,
70
- /^(References|Bibliography|Literature\s*Cited|Works\s*Cited)$/i,
71
- /^(Appendix|Appendices|Supplementary\s*(Materials?|Information)?|Supporting\s*Information)$/i,
72
- /^(Literature\s*Review|Related\s*Work|Previous\s*Work)$/i,
73
- /^(Study\s*Area|Study\s*Site|Site\s*Description)$/i,
74
- /^(Data\s*Analysis|Statistical\s*Analysis|Data\s*Collection)$/i,
75
- /^(Theoretical\s*Framework|Conceptual\s*Framework)$/i,
76
- /^(Case\s*Study|Case\s*Studies)$/i,
77
- /^(Limitations?)$/i,
78
- /^(Future\s*Work|Future\s*Directions?)$/i,
79
- /^(Funding|Author\s*Contributions?|Conflict\s*of\s*Interest|Data\s*Availability)$/i,
80
- ];
81
-
82
- const numberedHeaderPattern = /^(\d+\.?\s+)(Abstract|Introduction|Background|Methods?|Materials|Results?|Discussion|Conclusions?|References|Acknowledgements?|Appendix)/i;
83
-
84
- let currentSection: string | null = null;
85
- let currentContent: string[] = [];
86
- let preambleContent: string[] = [];
87
-
88
- for (const line of lines) {
89
- const trimmed = line.trim();
90
- if (!trimmed) {
91
- if (currentSection) {
92
- currentContent.push(line);
93
- } else {
94
- preambleContent.push(line);
95
- }
96
- continue;
97
- }
98
-
99
- let isHeader = false;
100
- let headerText = trimmed;
101
-
102
- for (const pattern of headerPatterns) {
103
- if (pattern.test(trimmed)) {
104
- isHeader = true;
105
- break;
106
- }
107
- }
108
-
109
- if (!isHeader) {
110
- const match = trimmed.match(numberedHeaderPattern);
111
- if (match) {
112
- isHeader = true;
113
- headerText = trimmed.replace(/^\d+\.?\s+/, '');
114
- }
115
- }
116
-
117
- if (isHeader) {
118
- if (currentSection) {
119
- sections.push({
120
- header: currentSection,
121
- content: currentContent.join('\n'),
122
- file: headerToFilename(currentSection),
123
- });
124
- } else if (preambleContent.some(l => l.trim())) {
125
- sections.push({
126
- header: 'Preamble',
127
- content: preambleContent.join('\n'),
128
- file: 'preamble.md',
129
- });
130
- }
131
- currentSection = headerText;
132
- currentContent = [];
133
- } else if (currentSection) {
134
- currentContent.push(line);
135
- } else {
136
- preambleContent.push(line);
137
- }
138
- }
139
-
140
- if (currentSection) {
141
- sections.push({
142
- header: currentSection,
143
- content: currentContent.join('\n'),
144
- file: headerToFilename(currentSection),
145
- });
146
- }
147
-
148
- if (sections.length === 0) {
149
- const allContent = [...preambleContent, ...currentContent].join('\n');
150
- if (allContent.trim()) {
151
- sections.push({
152
- header: 'Content',
153
- content: allContent,
154
- file: 'content.md',
155
- });
156
- }
157
- }
158
-
159
- return sections;
160
- }
161
-
162
- /**
163
- * Convert a section header to a filename
164
- */
165
- function headerToFilename(header: string): string {
166
- return header
167
- .toLowerCase()
168
- .replace(/[^a-z0-9]+/g, '-')
169
- .replace(/^-|-$/g, '')
170
- .slice(0, 30) + '.md';
171
- }
172
-
173
- /**
174
- * Bootstrap a new project from a Word document
175
- */
176
- async function bootstrapFromWord(docx: string, options: BootstrapOptions): Promise<void> {
177
- const outputDir = path.resolve(options.output);
178
-
179
- console.log(chalk.cyan(`Bootstrapping project from ${path.basename(docx)}...\n`));
180
-
181
- try {
182
- const { extractTextFromWord } = await import('../word.js');
183
- const { default: YAML } = await import('yaml');
184
-
185
- const text = await extractTextFromWord(docx);
186
-
187
- const sections = detectSectionsFromWord(text);
188
-
189
- if (sections.length === 0) {
190
- console.error(chalk.yellow('No sections detected. Creating single content.md file.'));
191
- sections.push({ header: 'Content', content: text, file: 'content.md' });
192
- }
193
-
194
- console.log(chalk.green(`Detected ${sections.length} section(s):\n`));
195
-
196
- if (!fs.existsSync(outputDir)) {
197
- fs.mkdirSync(outputDir, { recursive: true });
198
- }
199
-
200
- const sectionFiles: string[] = [];
201
- for (const section of sections) {
202
- const filePath = path.join(outputDir, section.file);
203
- const content = `# ${section.header}\n\n${section.content.trim()}\n`;
204
-
205
- console.log(` ${chalk.bold(section.file)} - "${section.header}" (${section.content.split('\n').length} lines)`);
206
-
207
- if (!options.dryRun) {
208
- fs.writeFileSync(filePath, content, 'utf-8');
209
- }
210
- sectionFiles.push(section.file);
211
- }
212
-
213
- const docxName = path.basename(docx, '.docx');
214
- const title = docxName.replace(/[-_]/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
215
-
216
- const config = {
217
- title: title,
218
- authors: [],
219
- sections: sectionFiles,
220
- bibliography: null,
221
- crossref: {
222
- figureTitle: 'Figure',
223
- tableTitle: 'Table',
224
- figPrefix: ['Fig.', 'Figs.'],
225
- tblPrefix: ['Table', 'Tables'],
226
- },
227
- pdf: {
228
- documentclass: 'article',
229
- fontsize: '12pt',
230
- geometry: 'margin=1in',
231
- linestretch: 1.5,
232
- },
233
- docx: {
234
- keepComments: true,
235
- },
236
- };
237
-
238
- const configPath = path.join(outputDir, 'rev.yaml');
239
- console.log(`\n ${chalk.bold('rev.yaml')} - project configuration`);
240
-
241
- if (!options.dryRun) {
242
- fs.writeFileSync(configPath, YAML.stringify(config), 'utf-8');
243
- }
244
-
245
- const figuresDir = path.join(outputDir, 'figures');
246
- if (!fs.existsSync(figuresDir) && !options.dryRun) {
247
- fs.mkdirSync(figuresDir, { recursive: true });
248
- console.log(` ${chalk.dim('figures/')} - image directory`);
249
- }
250
-
251
- if (options.dryRun) {
252
- console.log(chalk.yellow('\n(Dry run - no files written)'));
253
- } else {
254
- console.log(chalk.green('\nProject created!'));
255
- console.log(chalk.cyan('\nNext steps:'));
256
- if (outputDir !== process.cwd()) {
257
- console.log(chalk.dim(` cd ${path.relative(process.cwd(), outputDir) || '.'}`));
258
- }
259
- console.log(chalk.dim(' # Edit rev.yaml to add authors and adjust settings'));
260
- console.log(chalk.dim(' # Review and clean up section files'));
261
- console.log(chalk.dim(' rev build # Build PDF and DOCX'));
262
- }
263
- } catch (err) {
264
- const error = err as Error;
265
- console.error(chalk.red(`Error: ${error.message}`));
266
- if (process.env.DEBUG) console.error(error.stack);
267
- process.exit(1);
268
- }
269
- }
270
-
271
- /**
272
- * Register section commands with the program
273
- */
274
- export function register(program: Command): void {
275
- // ==========================================================================
276
- // IMPORT command - Import from Word (bootstrap or diff mode)
277
- // ==========================================================================
278
-
279
- program
280
- .command('import')
281
- .description('Import from Word: creates sections from scratch, or diffs against existing MD')
282
- .argument('<docx>', 'Word document')
283
- .argument('[original]', 'Optional: original Markdown file to compare against')
284
- .option('-o, --output <dir>', 'Output directory for bootstrap mode', '.')
285
- .option('-a, --author <name>', 'Author name for changes (diff mode)', 'Reviewer')
286
- .option('--dry-run', 'Preview without saving')
287
- .action(async (docx: string, original: string | undefined, options: ImportOptions) => {
288
- if (!fs.existsSync(docx)) {
289
- console.error(chalk.red(`Error: Word file not found: ${docx}`));
290
- process.exit(1);
291
- }
292
-
293
- if (!original) {
294
- await bootstrapFromWord(docx, options as BootstrapOptions);
295
- return;
296
- }
297
-
298
- if (!fs.existsSync(original)) {
299
- console.error(chalk.red(`Error: Original MD not found: ${original}`));
300
- process.exit(1);
301
- }
302
-
303
- console.log(chalk.cyan(`Comparing ${path.basename(docx)} against ${path.basename(original)}...`));
304
-
305
- // Warn if pandoc is missing
306
- const { hasPandoc: hasPandocImport, getInstallInstructions: getInstallImport } = await import('../dependencies.js');
307
- if (!hasPandocImport()) {
308
- console.log(chalk.yellow(`\n Warning: Pandoc not installed. Track changes extracted from XML (formatting may differ).`));
309
- console.log(chalk.dim(` Install for best results: ${getInstallImport('pandoc')}\n`));
310
- }
311
-
312
- try {
313
- const { importFromWord } = await import('../import.js');
314
- const { annotated, stats } = await importFromWord(docx, original, {
315
- author: options.author,
316
- });
317
-
318
- console.log(chalk.cyan('\nChanges detected:'));
319
- if (stats.insertions > 0) console.log(chalk.green(` + Insertions: ${stats.insertions}`));
320
- if (stats.deletions > 0) console.log(chalk.red(` - Deletions: ${stats.deletions}`));
321
- if (stats.substitutions > 0) console.log(chalk.yellow(` ~ Substitutions: ${stats.substitutions}`));
322
- if (stats.comments > 0) console.log(chalk.blue(` # Comments: ${stats.comments}`));
323
-
324
- if (stats.total === 0) {
325
- console.log(chalk.green('\nNo changes detected.'));
326
- return;
327
- }
328
-
329
- console.log(chalk.dim(`\n Total: ${stats.total}`));
330
-
331
- if (options.dryRun) {
332
- console.log(chalk.cyan('\n--- Preview (first 1000 chars) ---\n'));
333
- console.log(annotated.slice(0, 1000));
334
- if (annotated.length > 1000) console.log(chalk.dim('\n... (truncated)'));
335
- return;
336
- }
337
-
338
- const outputPath = options.output || original;
339
- fs.writeFileSync(outputPath, annotated, 'utf-8');
340
- console.log(chalk.green(`\nSaved annotated version to ${outputPath}`));
341
- console.log(chalk.cyan('\nNext steps:'));
342
- console.log(` 1. ${chalk.bold('rev review ' + outputPath)} - Accept/reject track changes`);
343
- console.log(` 2. Work with Claude to address comments`);
344
- console.log(` 3. ${chalk.bold('rev build docx')} - Rebuild Word doc`);
345
-
346
- } catch (err) {
347
- const error = err as Error;
348
- console.error(chalk.red(`Error: ${error.message}`));
349
- if (process.env.DEBUG) console.error(error.stack);
350
- process.exit(1);
351
- }
352
- });
353
-
354
- // ==========================================================================
355
- // EXTRACT command - Just extract text from Word
356
- // ==========================================================================
357
-
358
- program
359
- .command('extract')
360
- .description('Extract plain text from Word document (no diff)')
361
- .argument('<docx>', 'Word document')
362
- .option('-o, --output <file>', 'Output file (default: stdout)')
363
- .action(async (docx: string, options: ExtractOptions) => {
364
- if (!fs.existsSync(docx)) {
365
- console.error(chalk.red(`Error: File not found: ${docx}`));
366
- process.exit(1);
367
- }
368
-
369
- try {
370
- const { extractTextFromWord } = await import('../word.js');
371
- const text = await extractTextFromWord(docx);
372
-
373
- if (options.output) {
374
- fs.writeFileSync(options.output, text, 'utf-8');
375
- console.error(chalk.green(`Extracted to ${options.output}`));
376
- } else {
377
- process.stdout.write(text);
378
- }
379
- } catch (err) {
380
- const error = err as Error;
381
- console.error(chalk.red(`Error: ${error.message}`));
382
- process.exit(1);
383
- }
384
- });
385
-
386
- // ==========================================================================
387
- // SPLIT command - Split annotated paper.md back to section files
388
- // ==========================================================================
389
-
390
- program
391
- .command('split')
392
- .description('Split annotated paper.md back to section files')
393
- .argument('<file>', 'Annotated paper.md file')
394
- .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
395
- .option('-d, --dir <directory>', 'Output directory for section files', '.')
396
- .option('--dry-run', 'Preview without writing files')
397
- .action((file: string, options: SplitOptions) => {
398
- if (!fs.existsSync(file)) {
399
- console.error(chalk.red(`File not found: ${file}`));
400
- process.exit(1);
401
- }
402
-
403
- const configPath = path.resolve(options.dir, options.config);
404
- if (!fs.existsSync(configPath)) {
405
- console.error(chalk.red(`Config not found: ${configPath}`));
406
- console.error(chalk.dim('Run "rev init" first to generate sections.yaml'));
407
- process.exit(1);
408
- }
409
-
410
- console.log(chalk.cyan(`Splitting ${file} using ${options.config}...`));
411
-
412
- const config = loadConfig(configPath);
413
- const paperContent = fs.readFileSync(file, 'utf-8');
414
- const sections = splitAnnotatedPaper(paperContent, config.sections);
415
-
416
- if (sections.size === 0) {
417
- console.error(chalk.yellow('No sections detected.'));
418
- console.error(chalk.dim('Check that headers match sections.yaml'));
419
- process.exit(1);
420
- }
421
-
422
- console.log(chalk.green(`\nFound ${sections.size} sections:\n`));
423
-
424
- for (const [sectionFile, content] of sections) {
425
- const outputPath = path.join(options.dir, sectionFile);
426
- const lines = content.split('\n').length;
427
- const annotations = countAnnotations(content);
428
-
429
- console.log(` ${chalk.bold(sectionFile)} (${lines} lines)`);
430
- if (annotations.total > 0) {
431
- const parts: string[] = [];
432
- if (annotations.inserts > 0) parts.push(chalk.green(`+${annotations.inserts}`));
433
- if (annotations.deletes > 0) parts.push(chalk.red(`-${annotations.deletes}`));
434
- if (annotations.substitutes > 0) parts.push(chalk.yellow(`~${annotations.substitutes}`));
435
- if (annotations.comments > 0) parts.push(chalk.blue(`#${annotations.comments}`));
436
- console.log(chalk.dim(` Annotations: ${parts.join(' ')}`));
437
- }
438
-
439
- if (!options.dryRun) {
440
- fs.writeFileSync(outputPath, content, 'utf-8');
441
- }
442
- }
443
-
444
- if (options.dryRun) {
445
- console.log(chalk.yellow('\n(Dry run - no files written)'));
446
- } else {
447
- console.log(chalk.green('\nSection files updated.'));
448
- console.log(chalk.cyan('\nNext: rev review <section.md> for each section'));
449
- }
450
- });
451
- }
1
+ /**
2
+ * Section commands: import, extract, split
3
+ *
4
+ * Commands for importing Word documents and splitting section files.
5
+ * Sync and merge commands are in sync.ts and merge-resolve.ts respectively.
6
+ */
7
+
8
+ import {
9
+ chalk,
10
+ fs,
11
+ path,
12
+ countAnnotations,
13
+ loadConfig,
14
+ splitAnnotatedPaper,
15
+ } from './context.js';
16
+ import type { Command } from 'commander';
17
+
18
+ interface DetectedSection {
19
+ header: string;
20
+ content: string;
21
+ file: string;
22
+ }
23
+
24
+ interface ImportStats {
25
+ insertions: number;
26
+ deletions: number;
27
+ substitutions: number;
28
+ comments: number;
29
+ total: number;
30
+ }
31
+
32
+ interface BootstrapOptions {
33
+ output: string;
34
+ dryRun?: boolean;
35
+ }
36
+
37
+ interface ImportOptions {
38
+ output?: string;
39
+ author?: string;
40
+ dryRun?: boolean;
41
+ }
42
+
43
+ interface ExtractOptions {
44
+ output?: string;
45
+ }
46
+
47
+ interface SplitOptions {
48
+ config: string;
49
+ dir: string;
50
+ dryRun?: boolean;
51
+ }
52
+
53
+ /**
54
+ * Detect sections from Word document text
55
+ * Looks for common academic paper section headers
56
+ */
57
+ function detectSectionsFromWord(text: string): DetectedSection[] {
58
+ const lines = text.split('\n');
59
+ const sections: DetectedSection[] = [];
60
+
61
+ const headerPatterns = [
62
+ /^(Abstract|Summary)$/i,
63
+ /^(Introduction|Background)$/i,
64
+ /^(Methods?|Materials?\s*(and|&)\s*Methods?|Methodology|Experimental\s*Methods?)$/i,
65
+ /^(Results?)$/i,
66
+ /^(Results?\s*(and|&)\s*Discussion)$/i,
67
+ /^(Discussion)$/i,
68
+ /^(Conclusions?|Summary\s*(and|&)?\s*Conclusions?)$/i,
69
+ /^(Acknowledgements?|Acknowledgments?)$/i,
70
+ /^(References|Bibliography|Literature\s*Cited|Works\s*Cited)$/i,
71
+ /^(Appendix|Appendices|Supplementary\s*(Materials?|Information)?|Supporting\s*Information)$/i,
72
+ /^(Literature\s*Review|Related\s*Work|Previous\s*Work)$/i,
73
+ /^(Study\s*Area|Study\s*Site|Site\s*Description)$/i,
74
+ /^(Data\s*Analysis|Statistical\s*Analysis|Data\s*Collection)$/i,
75
+ /^(Theoretical\s*Framework|Conceptual\s*Framework)$/i,
76
+ /^(Case\s*Study|Case\s*Studies)$/i,
77
+ /^(Limitations?)$/i,
78
+ /^(Future\s*Work|Future\s*Directions?)$/i,
79
+ /^(Funding|Author\s*Contributions?|Conflict\s*of\s*Interest|Data\s*Availability)$/i,
80
+ ];
81
+
82
+ const numberedHeaderPattern = /^(\d+\.?\s+)(Abstract|Introduction|Background|Methods?|Materials|Results?|Discussion|Conclusions?|References|Acknowledgements?|Appendix)/i;
83
+
84
+ let currentSection: string | null = null;
85
+ let currentContent: string[] = [];
86
+ let preambleContent: string[] = [];
87
+
88
+ for (const line of lines) {
89
+ const trimmed = line.trim();
90
+ if (!trimmed) {
91
+ if (currentSection) {
92
+ currentContent.push(line);
93
+ } else {
94
+ preambleContent.push(line);
95
+ }
96
+ continue;
97
+ }
98
+
99
+ let isHeader = false;
100
+ let headerText = trimmed;
101
+
102
+ for (const pattern of headerPatterns) {
103
+ if (pattern.test(trimmed)) {
104
+ isHeader = true;
105
+ break;
106
+ }
107
+ }
108
+
109
+ if (!isHeader) {
110
+ const match = trimmed.match(numberedHeaderPattern);
111
+ if (match) {
112
+ isHeader = true;
113
+ headerText = trimmed.replace(/^\d+\.?\s+/, '');
114
+ }
115
+ }
116
+
117
+ if (isHeader) {
118
+ if (currentSection) {
119
+ sections.push({
120
+ header: currentSection,
121
+ content: currentContent.join('\n'),
122
+ file: headerToFilename(currentSection),
123
+ });
124
+ } else if (preambleContent.some(l => l.trim())) {
125
+ sections.push({
126
+ header: 'Preamble',
127
+ content: preambleContent.join('\n'),
128
+ file: 'preamble.md',
129
+ });
130
+ }
131
+ currentSection = headerText;
132
+ currentContent = [];
133
+ } else if (currentSection) {
134
+ currentContent.push(line);
135
+ } else {
136
+ preambleContent.push(line);
137
+ }
138
+ }
139
+
140
+ if (currentSection) {
141
+ sections.push({
142
+ header: currentSection,
143
+ content: currentContent.join('\n'),
144
+ file: headerToFilename(currentSection),
145
+ });
146
+ }
147
+
148
+ if (sections.length === 0) {
149
+ const allContent = [...preambleContent, ...currentContent].join('\n');
150
+ if (allContent.trim()) {
151
+ sections.push({
152
+ header: 'Content',
153
+ content: allContent,
154
+ file: 'content.md',
155
+ });
156
+ }
157
+ }
158
+
159
+ return sections;
160
+ }
161
+
162
+ /**
163
+ * Convert a section header to a filename
164
+ */
165
+ function headerToFilename(header: string): string {
166
+ return header
167
+ .toLowerCase()
168
+ .replace(/[^a-z0-9]+/g, '-')
169
+ .replace(/^-|-$/g, '')
170
+ .slice(0, 30) + '.md';
171
+ }
172
+
173
+ /**
174
+ * Bootstrap a new project from a Word document
175
+ */
176
+ async function bootstrapFromWord(docx: string, options: BootstrapOptions): Promise<void> {
177
+ const outputDir = path.resolve(options.output);
178
+
179
+ console.log(chalk.cyan(`Bootstrapping project from ${path.basename(docx)}...\n`));
180
+
181
+ try {
182
+ const { extractTextFromWord } = await import('../word.js');
183
+ const { default: YAML } = await import('yaml');
184
+
185
+ const text = await extractTextFromWord(docx);
186
+
187
+ const sections = detectSectionsFromWord(text);
188
+
189
+ if (sections.length === 0) {
190
+ console.error(chalk.yellow('No sections detected. Creating single content.md file.'));
191
+ sections.push({ header: 'Content', content: text, file: 'content.md' });
192
+ }
193
+
194
+ console.log(chalk.green(`Detected ${sections.length} section(s):\n`));
195
+
196
+ if (!fs.existsSync(outputDir)) {
197
+ fs.mkdirSync(outputDir, { recursive: true });
198
+ }
199
+
200
+ const sectionFiles: string[] = [];
201
+ for (const section of sections) {
202
+ const filePath = path.join(outputDir, section.file);
203
+ const content = `# ${section.header}\n\n${section.content.trim()}\n`;
204
+
205
+ console.log(` ${chalk.bold(section.file)} - "${section.header}" (${section.content.split('\n').length} lines)`);
206
+
207
+ if (!options.dryRun) {
208
+ fs.writeFileSync(filePath, content, 'utf-8');
209
+ }
210
+ sectionFiles.push(section.file);
211
+ }
212
+
213
+ const docxName = path.basename(docx, '.docx');
214
+ const title = docxName.replace(/[-_]/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
215
+
216
+ const config = {
217
+ title: title,
218
+ authors: [],
219
+ sections: sectionFiles,
220
+ bibliography: null,
221
+ crossref: {
222
+ figureTitle: 'Figure',
223
+ tableTitle: 'Table',
224
+ figPrefix: ['Fig.', 'Figs.'],
225
+ tblPrefix: ['Table', 'Tables'],
226
+ },
227
+ pdf: {
228
+ documentclass: 'article',
229
+ fontsize: '12pt',
230
+ geometry: 'margin=1in',
231
+ linestretch: 1.5,
232
+ },
233
+ docx: {
234
+ keepComments: true,
235
+ },
236
+ };
237
+
238
+ const configPath = path.join(outputDir, 'rev.yaml');
239
+ console.log(`\n ${chalk.bold('rev.yaml')} - project configuration`);
240
+
241
+ if (!options.dryRun) {
242
+ fs.writeFileSync(configPath, YAML.stringify(config), 'utf-8');
243
+ }
244
+
245
+ const figuresDir = path.join(outputDir, 'figures');
246
+ if (!fs.existsSync(figuresDir) && !options.dryRun) {
247
+ fs.mkdirSync(figuresDir, { recursive: true });
248
+ console.log(` ${chalk.dim('figures/')} - image directory`);
249
+ }
250
+
251
+ if (options.dryRun) {
252
+ console.log(chalk.yellow('\n(Dry run - no files written)'));
253
+ } else {
254
+ console.log(chalk.green('\nProject created!'));
255
+ console.log(chalk.cyan('\nNext steps:'));
256
+ if (outputDir !== process.cwd()) {
257
+ console.log(chalk.dim(` cd ${path.relative(process.cwd(), outputDir) || '.'}`));
258
+ }
259
+ console.log(chalk.dim(' # Edit rev.yaml to add authors and adjust settings'));
260
+ console.log(chalk.dim(' # Review and clean up section files'));
261
+ console.log(chalk.dim(' rev build # Build PDF and DOCX'));
262
+ }
263
+ } catch (err) {
264
+ const error = err as Error;
265
+ console.error(chalk.red(`Error: ${error.message}`));
266
+ if (process.env.DEBUG) console.error(error.stack);
267
+ process.exit(1);
268
+ }
269
+ }
270
+
271
+ /**
272
+ * Register section commands with the program
273
+ */
274
+ export function register(program: Command): void {
275
+ // ==========================================================================
276
+ // IMPORT command - Import from Word (bootstrap or diff mode)
277
+ // ==========================================================================
278
+
279
+ program
280
+ .command('import')
281
+ .description('Import from Word: creates sections from scratch, or diffs against existing MD')
282
+ .argument('<docx>', 'Word document')
283
+ .argument('[original]', 'Optional: original Markdown file to compare against')
284
+ .option('-o, --output <dir>', 'Output directory for bootstrap mode', '.')
285
+ .option('-a, --author <name>', 'Author name for changes (diff mode)', 'Reviewer')
286
+ .option('--dry-run', 'Preview without saving')
287
+ .action(async (docx: string, original: string | undefined, options: ImportOptions) => {
288
+ if (!fs.existsSync(docx)) {
289
+ console.error(chalk.red(`Error: Word file not found: ${docx}`));
290
+ process.exit(1);
291
+ }
292
+
293
+ if (!original) {
294
+ await bootstrapFromWord(docx, options as BootstrapOptions);
295
+ return;
296
+ }
297
+
298
+ if (!fs.existsSync(original)) {
299
+ console.error(chalk.red(`Error: Original MD not found: ${original}`));
300
+ process.exit(1);
301
+ }
302
+
303
+ console.log(chalk.cyan(`Comparing ${path.basename(docx)} against ${path.basename(original)}...`));
304
+
305
+ // Warn if pandoc is missing
306
+ const { hasPandoc: hasPandocImport, getInstallInstructions: getInstallImport } = await import('../dependencies.js');
307
+ if (!hasPandocImport()) {
308
+ console.log(chalk.yellow(`\n Warning: Pandoc not installed. Track changes extracted from XML (formatting may differ).`));
309
+ console.log(chalk.dim(` Install for best results: ${getInstallImport('pandoc')}\n`));
310
+ }
311
+
312
+ try {
313
+ const { importFromWord } = await import('../import.js');
314
+ const { annotated, stats } = await importFromWord(docx, original, {
315
+ author: options.author,
316
+ });
317
+
318
+ console.log(chalk.cyan('\nChanges detected:'));
319
+ if (stats.insertions > 0) console.log(chalk.green(` + Insertions: ${stats.insertions}`));
320
+ if (stats.deletions > 0) console.log(chalk.red(` - Deletions: ${stats.deletions}`));
321
+ if (stats.substitutions > 0) console.log(chalk.yellow(` ~ Substitutions: ${stats.substitutions}`));
322
+ if (stats.comments > 0) console.log(chalk.blue(` # Comments: ${stats.comments}`));
323
+
324
+ if (stats.total === 0) {
325
+ console.log(chalk.green('\nNo changes detected.'));
326
+ return;
327
+ }
328
+
329
+ console.log(chalk.dim(`\n Total: ${stats.total}`));
330
+
331
+ if (options.dryRun) {
332
+ console.log(chalk.cyan('\n--- Preview (first 1000 chars) ---\n'));
333
+ console.log(annotated.slice(0, 1000));
334
+ if (annotated.length > 1000) console.log(chalk.dim('\n... (truncated)'));
335
+ return;
336
+ }
337
+
338
+ const outputPath = options.output || original;
339
+ fs.writeFileSync(outputPath, annotated, 'utf-8');
340
+ console.log(chalk.green(`\nSaved annotated version to ${outputPath}`));
341
+ console.log(chalk.cyan('\nNext steps:'));
342
+ console.log(` 1. ${chalk.bold('rev review ' + outputPath)} - Accept/reject track changes`);
343
+ console.log(` 2. Work with Claude to address comments`);
344
+ console.log(` 3. ${chalk.bold('rev build docx')} - Rebuild Word doc`);
345
+
346
+ } catch (err) {
347
+ const error = err as Error;
348
+ console.error(chalk.red(`Error: ${error.message}`));
349
+ if (process.env.DEBUG) console.error(error.stack);
350
+ process.exit(1);
351
+ }
352
+ });
353
+
354
+ // ==========================================================================
355
+ // EXTRACT command - Just extract text from Word
356
+ // ==========================================================================
357
+
358
+ program
359
+ .command('extract')
360
+ .description('Extract plain text from Word document (no diff)')
361
+ .argument('<docx>', 'Word document')
362
+ .option('-o, --output <file>', 'Output file (default: stdout)')
363
+ .action(async (docx: string, options: ExtractOptions) => {
364
+ if (!fs.existsSync(docx)) {
365
+ console.error(chalk.red(`Error: File not found: ${docx}`));
366
+ process.exit(1);
367
+ }
368
+
369
+ try {
370
+ const { extractTextFromWord } = await import('../word.js');
371
+ const text = await extractTextFromWord(docx);
372
+
373
+ if (options.output) {
374
+ fs.writeFileSync(options.output, text, 'utf-8');
375
+ console.error(chalk.green(`Extracted to ${options.output}`));
376
+ } else {
377
+ process.stdout.write(text);
378
+ }
379
+ } catch (err) {
380
+ const error = err as Error;
381
+ console.error(chalk.red(`Error: ${error.message}`));
382
+ process.exit(1);
383
+ }
384
+ });
385
+
386
+ // ==========================================================================
387
+ // SPLIT command - Split annotated paper.md back to section files
388
+ // ==========================================================================
389
+
390
+ program
391
+ .command('split')
392
+ .description('Split annotated paper.md back to section files')
393
+ .argument('<file>', 'Annotated paper.md file')
394
+ .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
395
+ .option('-d, --dir <directory>', 'Output directory for section files', '.')
396
+ .option('--dry-run', 'Preview without writing files')
397
+ .action((file: string, options: SplitOptions) => {
398
+ if (!fs.existsSync(file)) {
399
+ console.error(chalk.red(`File not found: ${file}`));
400
+ process.exit(1);
401
+ }
402
+
403
+ const configPath = path.resolve(options.dir, options.config);
404
+ if (!fs.existsSync(configPath)) {
405
+ console.error(chalk.red(`Config not found: ${configPath}`));
406
+ console.error(chalk.dim('Run "rev init" first to generate sections.yaml'));
407
+ process.exit(1);
408
+ }
409
+
410
+ console.log(chalk.cyan(`Splitting ${file} using ${options.config}...`));
411
+
412
+ const config = loadConfig(configPath);
413
+ const paperContent = fs.readFileSync(file, 'utf-8');
414
+ const sections = splitAnnotatedPaper(paperContent, config.sections);
415
+
416
+ if (sections.size === 0) {
417
+ console.error(chalk.yellow('No sections detected.'));
418
+ console.error(chalk.dim('Check that headers match sections.yaml'));
419
+ process.exit(1);
420
+ }
421
+
422
+ console.log(chalk.green(`\nFound ${sections.size} sections:\n`));
423
+
424
+ for (const [sectionFile, content] of sections) {
425
+ const outputPath = path.join(options.dir, sectionFile);
426
+ const lines = content.split('\n').length;
427
+ const annotations = countAnnotations(content);
428
+
429
+ console.log(` ${chalk.bold(sectionFile)} (${lines} lines)`);
430
+ if (annotations.total > 0) {
431
+ const parts: string[] = [];
432
+ if (annotations.inserts > 0) parts.push(chalk.green(`+${annotations.inserts}`));
433
+ if (annotations.deletes > 0) parts.push(chalk.red(`-${annotations.deletes}`));
434
+ if (annotations.substitutes > 0) parts.push(chalk.yellow(`~${annotations.substitutes}`));
435
+ if (annotations.comments > 0) parts.push(chalk.blue(`#${annotations.comments}`));
436
+ console.log(chalk.dim(` Annotations: ${parts.join(' ')}`));
437
+ }
438
+
439
+ if (!options.dryRun) {
440
+ fs.writeFileSync(outputPath, content, 'utf-8');
441
+ }
442
+ }
443
+
444
+ if (options.dryRun) {
445
+ console.log(chalk.yellow('\n(Dry run - no files written)'));
446
+ } else {
447
+ console.log(chalk.green('\nSection files updated.'));
448
+ console.log(chalk.cyan('\nNext: rev review <section.md> for each section'));
449
+ }
450
+ });
451
+ }