docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
@@ -1,451 +1,451 @@
1
- /**
2
- * Section commands: import, extract, split
3
- *
4
- * Commands for importing Word documents and splitting section files.
5
- * Sync and merge commands are in sync.ts and merge-resolve.ts respectively.
6
- */
7
-
8
- import {
9
- chalk,
10
- fs,
11
- path,
12
- countAnnotations,
13
- loadConfig,
14
- splitAnnotatedPaper,
15
- } from './context.js';
16
- import type { Command } from 'commander';
17
-
18
- interface DetectedSection {
19
- header: string;
20
- content: string;
21
- file: string;
22
- }
23
-
24
- interface ImportStats {
25
- insertions: number;
26
- deletions: number;
27
- substitutions: number;
28
- comments: number;
29
- total: number;
30
- }
31
-
32
- interface BootstrapOptions {
33
- output: string;
34
- dryRun?: boolean;
35
- }
36
-
37
- interface ImportOptions {
38
- output?: string;
39
- author?: string;
40
- dryRun?: boolean;
41
- }
42
-
43
- interface ExtractOptions {
44
- output?: string;
45
- }
46
-
47
- interface SplitOptions {
48
- config: string;
49
- dir: string;
50
- dryRun?: boolean;
51
- }
52
-
53
- /**
54
- * Detect sections from Word document text
55
- * Looks for common academic paper section headers
56
- */
57
- function detectSectionsFromWord(text: string): DetectedSection[] {
58
- const lines = text.split('\n');
59
- const sections: DetectedSection[] = [];
60
-
61
- const headerPatterns = [
62
- /^(Abstract|Summary)$/i,
63
- /^(Introduction|Background)$/i,
64
- /^(Methods?|Materials?\s*(and|&)\s*Methods?|Methodology|Experimental\s*Methods?)$/i,
65
- /^(Results?)$/i,
66
- /^(Results?\s*(and|&)\s*Discussion)$/i,
67
- /^(Discussion)$/i,
68
- /^(Conclusions?|Summary\s*(and|&)?\s*Conclusions?)$/i,
69
- /^(Acknowledgements?|Acknowledgments?)$/i,
70
- /^(References|Bibliography|Literature\s*Cited|Works\s*Cited)$/i,
71
- /^(Appendix|Appendices|Supplementary\s*(Materials?|Information)?|Supporting\s*Information)$/i,
72
- /^(Literature\s*Review|Related\s*Work|Previous\s*Work)$/i,
73
- /^(Study\s*Area|Study\s*Site|Site\s*Description)$/i,
74
- /^(Data\s*Analysis|Statistical\s*Analysis|Data\s*Collection)$/i,
75
- /^(Theoretical\s*Framework|Conceptual\s*Framework)$/i,
76
- /^(Case\s*Study|Case\s*Studies)$/i,
77
- /^(Limitations?)$/i,
78
- /^(Future\s*Work|Future\s*Directions?)$/i,
79
- /^(Funding|Author\s*Contributions?|Conflict\s*of\s*Interest|Data\s*Availability)$/i,
80
- ];
81
-
82
- const numberedHeaderPattern = /^(\d+\.?\s+)(Abstract|Introduction|Background|Methods?|Materials|Results?|Discussion|Conclusions?|References|Acknowledgements?|Appendix)/i;
83
-
84
- let currentSection: string | null = null;
85
- let currentContent: string[] = [];
86
- let preambleContent: string[] = [];
87
-
88
- for (const line of lines) {
89
- const trimmed = line.trim();
90
- if (!trimmed) {
91
- if (currentSection) {
92
- currentContent.push(line);
93
- } else {
94
- preambleContent.push(line);
95
- }
96
- continue;
97
- }
98
-
99
- let isHeader = false;
100
- let headerText = trimmed;
101
-
102
- for (const pattern of headerPatterns) {
103
- if (pattern.test(trimmed)) {
104
- isHeader = true;
105
- break;
106
- }
107
- }
108
-
109
- if (!isHeader) {
110
- const match = trimmed.match(numberedHeaderPattern);
111
- if (match) {
112
- isHeader = true;
113
- headerText = trimmed.replace(/^\d+\.?\s+/, '');
114
- }
115
- }
116
-
117
- if (isHeader) {
118
- if (currentSection) {
119
- sections.push({
120
- header: currentSection,
121
- content: currentContent.join('\n'),
122
- file: headerToFilename(currentSection),
123
- });
124
- } else if (preambleContent.some(l => l.trim())) {
125
- sections.push({
126
- header: 'Preamble',
127
- content: preambleContent.join('\n'),
128
- file: 'preamble.md',
129
- });
130
- }
131
- currentSection = headerText;
132
- currentContent = [];
133
- } else if (currentSection) {
134
- currentContent.push(line);
135
- } else {
136
- preambleContent.push(line);
137
- }
138
- }
139
-
140
- if (currentSection) {
141
- sections.push({
142
- header: currentSection,
143
- content: currentContent.join('\n'),
144
- file: headerToFilename(currentSection),
145
- });
146
- }
147
-
148
- if (sections.length === 0) {
149
- const allContent = [...preambleContent, ...currentContent].join('\n');
150
- if (allContent.trim()) {
151
- sections.push({
152
- header: 'Content',
153
- content: allContent,
154
- file: 'content.md',
155
- });
156
- }
157
- }
158
-
159
- return sections;
160
- }
161
-
162
- /**
163
- * Convert a section header to a filename
164
- */
165
- function headerToFilename(header: string): string {
166
- return header
167
- .toLowerCase()
168
- .replace(/[^a-z0-9]+/g, '-')
169
- .replace(/^-|-$/g, '')
170
- .slice(0, 30) + '.md';
171
- }
172
-
173
- /**
174
- * Bootstrap a new project from a Word document
175
- */
176
- async function bootstrapFromWord(docx: string, options: BootstrapOptions): Promise<void> {
177
- const outputDir = path.resolve(options.output);
178
-
179
- console.log(chalk.cyan(`Bootstrapping project from ${path.basename(docx)}...\n`));
180
-
181
- try {
182
- const { extractTextFromWord } = await import('../word.js');
183
- const { default: YAML } = await import('yaml');
184
-
185
- const text = await extractTextFromWord(docx);
186
-
187
- const sections = detectSectionsFromWord(text);
188
-
189
- if (sections.length === 0) {
190
- console.error(chalk.yellow('No sections detected. Creating single content.md file.'));
191
- sections.push({ header: 'Content', content: text, file: 'content.md' });
192
- }
193
-
194
- console.log(chalk.green(`Detected ${sections.length} section(s):\n`));
195
-
196
- if (!fs.existsSync(outputDir)) {
197
- fs.mkdirSync(outputDir, { recursive: true });
198
- }
199
-
200
- const sectionFiles: string[] = [];
201
- for (const section of sections) {
202
- const filePath = path.join(outputDir, section.file);
203
- const content = `# ${section.header}\n\n${section.content.trim()}\n`;
204
-
205
- console.log(` ${chalk.bold(section.file)} - "${section.header}" (${section.content.split('\n').length} lines)`);
206
-
207
- if (!options.dryRun) {
208
- fs.writeFileSync(filePath, content, 'utf-8');
209
- }
210
- sectionFiles.push(section.file);
211
- }
212
-
213
- const docxName = path.basename(docx, '.docx');
214
- const title = docxName.replace(/[-_]/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
215
-
216
- const config = {
217
- title: title,
218
- authors: [],
219
- sections: sectionFiles,
220
- bibliography: null,
221
- crossref: {
222
- figureTitle: 'Figure',
223
- tableTitle: 'Table',
224
- figPrefix: ['Fig.', 'Figs.'],
225
- tblPrefix: ['Table', 'Tables'],
226
- },
227
- pdf: {
228
- documentclass: 'article',
229
- fontsize: '12pt',
230
- geometry: 'margin=1in',
231
- linestretch: 1.5,
232
- },
233
- docx: {
234
- keepComments: true,
235
- },
236
- };
237
-
238
- const configPath = path.join(outputDir, 'rev.yaml');
239
- console.log(`\n ${chalk.bold('rev.yaml')} - project configuration`);
240
-
241
- if (!options.dryRun) {
242
- fs.writeFileSync(configPath, YAML.stringify(config), 'utf-8');
243
- }
244
-
245
- const figuresDir = path.join(outputDir, 'figures');
246
- if (!fs.existsSync(figuresDir) && !options.dryRun) {
247
- fs.mkdirSync(figuresDir, { recursive: true });
248
- console.log(` ${chalk.dim('figures/')} - image directory`);
249
- }
250
-
251
- if (options.dryRun) {
252
- console.log(chalk.yellow('\n(Dry run - no files written)'));
253
- } else {
254
- console.log(chalk.green('\nProject created!'));
255
- console.log(chalk.cyan('\nNext steps:'));
256
- if (outputDir !== process.cwd()) {
257
- console.log(chalk.dim(` cd ${path.relative(process.cwd(), outputDir) || '.'}`));
258
- }
259
- console.log(chalk.dim(' # Edit rev.yaml to add authors and adjust settings'));
260
- console.log(chalk.dim(' # Review and clean up section files'));
261
- console.log(chalk.dim(' rev build # Build PDF and DOCX'));
262
- }
263
- } catch (err) {
264
- const error = err as Error;
265
- console.error(chalk.red(`Error: ${error.message}`));
266
- if (process.env.DEBUG) console.error(error.stack);
267
- process.exit(1);
268
- }
269
- }
270
-
271
- /**
272
- * Register section commands with the program
273
- */
274
- export function register(program: Command): void {
275
- // ==========================================================================
276
- // IMPORT command - Import from Word (bootstrap or diff mode)
277
- // ==========================================================================
278
-
279
- program
280
- .command('import')
281
- .description('Import from Word: creates sections from scratch, or diffs against existing MD')
282
- .argument('<docx>', 'Word document')
283
- .argument('[original]', 'Optional: original Markdown file to compare against')
284
- .option('-o, --output <dir>', 'Output directory for bootstrap mode', '.')
285
- .option('-a, --author <name>', 'Author name for changes (diff mode)', 'Reviewer')
286
- .option('--dry-run', 'Preview without saving')
287
- .action(async (docx: string, original: string | undefined, options: ImportOptions) => {
288
- if (!fs.existsSync(docx)) {
289
- console.error(chalk.red(`Error: Word file not found: ${docx}`));
290
- process.exit(1);
291
- }
292
-
293
- if (!original) {
294
- await bootstrapFromWord(docx, options as BootstrapOptions);
295
- return;
296
- }
297
-
298
- if (!fs.existsSync(original)) {
299
- console.error(chalk.red(`Error: Original MD not found: ${original}`));
300
- process.exit(1);
301
- }
302
-
303
- console.log(chalk.cyan(`Comparing ${path.basename(docx)} against ${path.basename(original)}...`));
304
-
305
- // Warn if pandoc is missing
306
- const { hasPandoc: hasPandocImport, getInstallInstructions: getInstallImport } = await import('../dependencies.js');
307
- if (!hasPandocImport()) {
308
- console.log(chalk.yellow(`\n Warning: Pandoc not installed. Track changes extracted from XML (formatting may differ).`));
309
- console.log(chalk.dim(` Install for best results: ${getInstallImport('pandoc')}\n`));
310
- }
311
-
312
- try {
313
- const { importFromWord } = await import('../import.js');
314
- const { annotated, stats } = await importFromWord(docx, original, {
315
- author: options.author,
316
- });
317
-
318
- console.log(chalk.cyan('\nChanges detected:'));
319
- if (stats.insertions > 0) console.log(chalk.green(` + Insertions: ${stats.insertions}`));
320
- if (stats.deletions > 0) console.log(chalk.red(` - Deletions: ${stats.deletions}`));
321
- if (stats.substitutions > 0) console.log(chalk.yellow(` ~ Substitutions: ${stats.substitutions}`));
322
- if (stats.comments > 0) console.log(chalk.blue(` # Comments: ${stats.comments}`));
323
-
324
- if (stats.total === 0) {
325
- console.log(chalk.green('\nNo changes detected.'));
326
- return;
327
- }
328
-
329
- console.log(chalk.dim(`\n Total: ${stats.total}`));
330
-
331
- if (options.dryRun) {
332
- console.log(chalk.cyan('\n--- Preview (first 1000 chars) ---\n'));
333
- console.log(annotated.slice(0, 1000));
334
- if (annotated.length > 1000) console.log(chalk.dim('\n... (truncated)'));
335
- return;
336
- }
337
-
338
- const outputPath = options.output || original;
339
- fs.writeFileSync(outputPath, annotated, 'utf-8');
340
- console.log(chalk.green(`\nSaved annotated version to ${outputPath}`));
341
- console.log(chalk.cyan('\nNext steps:'));
342
- console.log(` 1. ${chalk.bold('rev review ' + outputPath)} - Accept/reject track changes`);
343
- console.log(` 2. Work with Claude to address comments`);
344
- console.log(` 3. ${chalk.bold('rev build docx')} - Rebuild Word doc`);
345
-
346
- } catch (err) {
347
- const error = err as Error;
348
- console.error(chalk.red(`Error: ${error.message}`));
349
- if (process.env.DEBUG) console.error(error.stack);
350
- process.exit(1);
351
- }
352
- });
353
-
354
- // ==========================================================================
355
- // EXTRACT command - Just extract text from Word
356
- // ==========================================================================
357
-
358
- program
359
- .command('extract')
360
- .description('Extract plain text from Word document (no diff)')
361
- .argument('<docx>', 'Word document')
362
- .option('-o, --output <file>', 'Output file (default: stdout)')
363
- .action(async (docx: string, options: ExtractOptions) => {
364
- if (!fs.existsSync(docx)) {
365
- console.error(chalk.red(`Error: File not found: ${docx}`));
366
- process.exit(1);
367
- }
368
-
369
- try {
370
- const { extractTextFromWord } = await import('../word.js');
371
- const text = await extractTextFromWord(docx);
372
-
373
- if (options.output) {
374
- fs.writeFileSync(options.output, text, 'utf-8');
375
- console.error(chalk.green(`Extracted to ${options.output}`));
376
- } else {
377
- process.stdout.write(text);
378
- }
379
- } catch (err) {
380
- const error = err as Error;
381
- console.error(chalk.red(`Error: ${error.message}`));
382
- process.exit(1);
383
- }
384
- });
385
-
386
- // ==========================================================================
387
- // SPLIT command - Split annotated paper.md back to section files
388
- // ==========================================================================
389
-
390
- program
391
- .command('split')
392
- .description('Split annotated paper.md back to section files')
393
- .argument('<file>', 'Annotated paper.md file')
394
- .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
395
- .option('-d, --dir <directory>', 'Output directory for section files', '.')
396
- .option('--dry-run', 'Preview without writing files')
397
- .action((file: string, options: SplitOptions) => {
398
- if (!fs.existsSync(file)) {
399
- console.error(chalk.red(`File not found: ${file}`));
400
- process.exit(1);
401
- }
402
-
403
- const configPath = path.resolve(options.dir, options.config);
404
- if (!fs.existsSync(configPath)) {
405
- console.error(chalk.red(`Config not found: ${configPath}`));
406
- console.error(chalk.dim('Run "rev init" first to generate sections.yaml'));
407
- process.exit(1);
408
- }
409
-
410
- console.log(chalk.cyan(`Splitting ${file} using ${options.config}...`));
411
-
412
- const config = loadConfig(configPath);
413
- const paperContent = fs.readFileSync(file, 'utf-8');
414
- const sections = splitAnnotatedPaper(paperContent, config.sections);
415
-
416
- if (sections.size === 0) {
417
- console.error(chalk.yellow('No sections detected.'));
418
- console.error(chalk.dim('Check that headers match sections.yaml'));
419
- process.exit(1);
420
- }
421
-
422
- console.log(chalk.green(`\nFound ${sections.size} sections:\n`));
423
-
424
- for (const [sectionFile, content] of sections) {
425
- const outputPath = path.join(options.dir, sectionFile);
426
- const lines = content.split('\n').length;
427
- const annotations = countAnnotations(content);
428
-
429
- console.log(` ${chalk.bold(sectionFile)} (${lines} lines)`);
430
- if (annotations.total > 0) {
431
- const parts: string[] = [];
432
- if (annotations.inserts > 0) parts.push(chalk.green(`+${annotations.inserts}`));
433
- if (annotations.deletes > 0) parts.push(chalk.red(`-${annotations.deletes}`));
434
- if (annotations.substitutes > 0) parts.push(chalk.yellow(`~${annotations.substitutes}`));
435
- if (annotations.comments > 0) parts.push(chalk.blue(`#${annotations.comments}`));
436
- console.log(chalk.dim(` Annotations: ${parts.join(' ')}`));
437
- }
438
-
439
- if (!options.dryRun) {
440
- fs.writeFileSync(outputPath, content, 'utf-8');
441
- }
442
- }
443
-
444
- if (options.dryRun) {
445
- console.log(chalk.yellow('\n(Dry run - no files written)'));
446
- } else {
447
- console.log(chalk.green('\nSection files updated.'));
448
- console.log(chalk.cyan('\nNext: rev review <section.md> for each section'));
449
- }
450
- });
451
- }
1
+ /**
2
+ * Section commands: import, extract, split
3
+ *
4
+ * Commands for importing Word documents and splitting section files.
5
+ * Sync and merge commands are in sync.ts and merge-resolve.ts respectively.
6
+ */
7
+
8
+ import {
9
+ chalk,
10
+ fs,
11
+ path,
12
+ countAnnotations,
13
+ resolveSectionsConfig,
14
+ splitAnnotatedPaper,
15
+ } from './context.js';
16
+ import type { Command } from 'commander';
17
+
18
+ interface DetectedSection {
19
+ header: string;
20
+ content: string;
21
+ file: string;
22
+ }
23
+
24
+ interface ImportStats {
25
+ insertions: number;
26
+ deletions: number;
27
+ substitutions: number;
28
+ comments: number;
29
+ total: number;
30
+ }
31
+
32
+ interface BootstrapOptions {
33
+ output: string;
34
+ dryRun?: boolean;
35
+ }
36
+
37
+ interface ImportOptions {
38
+ output?: string;
39
+ author?: string;
40
+ dryRun?: boolean;
41
+ }
42
+
43
+ interface ExtractOptions {
44
+ output?: string;
45
+ }
46
+
47
+ interface SplitOptions {
48
+ config: string;
49
+ dir: string;
50
+ dryRun?: boolean;
51
+ }
52
+
53
+ /**
54
+ * Detect sections from Word document text
55
+ * Looks for common academic paper section headers
56
+ */
57
+ function detectSectionsFromWord(text: string): DetectedSection[] {
58
+ const lines = text.split('\n');
59
+ const sections: DetectedSection[] = [];
60
+
61
+ const headerPatterns = [
62
+ /^(Abstract|Summary)$/i,
63
+ /^(Introduction|Background)$/i,
64
+ /^(Methods?|Materials?\s*(and|&)\s*Methods?|Methodology|Experimental\s*Methods?)$/i,
65
+ /^(Results?)$/i,
66
+ /^(Results?\s*(and|&)\s*Discussion)$/i,
67
+ /^(Discussion)$/i,
68
+ /^(Conclusions?|Summary\s*(and|&)?\s*Conclusions?)$/i,
69
+ /^(Acknowledgements?|Acknowledgments?)$/i,
70
+ /^(References|Bibliography|Literature\s*Cited|Works\s*Cited)$/i,
71
+ /^(Appendix|Appendices|Supplementary\s*(Materials?|Information)?|Supporting\s*Information)$/i,
72
+ /^(Literature\s*Review|Related\s*Work|Previous\s*Work)$/i,
73
+ /^(Study\s*Area|Study\s*Site|Site\s*Description)$/i,
74
+ /^(Data\s*Analysis|Statistical\s*Analysis|Data\s*Collection)$/i,
75
+ /^(Theoretical\s*Framework|Conceptual\s*Framework)$/i,
76
+ /^(Case\s*Study|Case\s*Studies)$/i,
77
+ /^(Limitations?)$/i,
78
+ /^(Future\s*Work|Future\s*Directions?)$/i,
79
+ /^(Funding|Author\s*Contributions?|Conflict\s*of\s*Interest|Data\s*Availability)$/i,
80
+ ];
81
+
82
+ const numberedHeaderPattern = /^(\d+\.?\s+)(Abstract|Introduction|Background|Methods?|Materials|Results?|Discussion|Conclusions?|References|Acknowledgements?|Appendix)/i;
83
+
84
+ let currentSection: string | null = null;
85
+ let currentContent: string[] = [];
86
+ let preambleContent: string[] = [];
87
+
88
+ for (const line of lines) {
89
+ const trimmed = line.trim();
90
+ if (!trimmed) {
91
+ if (currentSection) {
92
+ currentContent.push(line);
93
+ } else {
94
+ preambleContent.push(line);
95
+ }
96
+ continue;
97
+ }
98
+
99
+ let isHeader = false;
100
+ let headerText = trimmed;
101
+
102
+ for (const pattern of headerPatterns) {
103
+ if (pattern.test(trimmed)) {
104
+ isHeader = true;
105
+ break;
106
+ }
107
+ }
108
+
109
+ if (!isHeader) {
110
+ const match = trimmed.match(numberedHeaderPattern);
111
+ if (match) {
112
+ isHeader = true;
113
+ headerText = trimmed.replace(/^\d+\.?\s+/, '');
114
+ }
115
+ }
116
+
117
+ if (isHeader) {
118
+ if (currentSection) {
119
+ sections.push({
120
+ header: currentSection,
121
+ content: currentContent.join('\n'),
122
+ file: headerToFilename(currentSection),
123
+ });
124
+ } else if (preambleContent.some(l => l.trim())) {
125
+ sections.push({
126
+ header: 'Preamble',
127
+ content: preambleContent.join('\n'),
128
+ file: 'preamble.md',
129
+ });
130
+ }
131
+ currentSection = headerText;
132
+ currentContent = [];
133
+ } else if (currentSection) {
134
+ currentContent.push(line);
135
+ } else {
136
+ preambleContent.push(line);
137
+ }
138
+ }
139
+
140
+ if (currentSection) {
141
+ sections.push({
142
+ header: currentSection,
143
+ content: currentContent.join('\n'),
144
+ file: headerToFilename(currentSection),
145
+ });
146
+ }
147
+
148
+ if (sections.length === 0) {
149
+ const allContent = [...preambleContent, ...currentContent].join('\n');
150
+ if (allContent.trim()) {
151
+ sections.push({
152
+ header: 'Content',
153
+ content: allContent,
154
+ file: 'content.md',
155
+ });
156
+ }
157
+ }
158
+
159
+ return sections;
160
+ }
161
+
162
+ /**
163
+ * Convert a section header to a filename
164
+ */
165
+ function headerToFilename(header: string): string {
166
+ return header
167
+ .toLowerCase()
168
+ .replace(/[^a-z0-9]+/g, '-')
169
+ .replace(/^-|-$/g, '')
170
+ .slice(0, 30) + '.md';
171
+ }
172
+
173
+ /**
174
+ * Bootstrap a new project from a Word document
175
+ */
176
+ async function bootstrapFromWord(docx: string, options: BootstrapOptions): Promise<void> {
177
+ const outputDir = path.resolve(options.output);
178
+
179
+ console.log(chalk.cyan(`Bootstrapping project from ${path.basename(docx)}...\n`));
180
+
181
+ try {
182
+ const { extractTextFromWord } = await import('../word.js');
183
+ const { default: YAML } = await import('yaml');
184
+
185
+ const text = await extractTextFromWord(docx);
186
+
187
+ const sections = detectSectionsFromWord(text);
188
+
189
+ if (sections.length === 0) {
190
+ console.error(chalk.yellow('No sections detected. Creating single content.md file.'));
191
+ sections.push({ header: 'Content', content: text, file: 'content.md' });
192
+ }
193
+
194
+ console.log(chalk.green(`Detected ${sections.length} section(s):\n`));
195
+
196
+ if (!fs.existsSync(outputDir)) {
197
+ fs.mkdirSync(outputDir, { recursive: true });
198
+ }
199
+
200
+ const sectionFiles: string[] = [];
201
+ for (const section of sections) {
202
+ const filePath = path.join(outputDir, section.file);
203
+ const content = `# ${section.header}\n\n${section.content.trim()}\n`;
204
+
205
+ console.log(` ${chalk.bold(section.file)} - "${section.header}" (${section.content.split('\n').length} lines)`);
206
+
207
+ if (!options.dryRun) {
208
+ fs.writeFileSync(filePath, content, 'utf-8');
209
+ }
210
+ sectionFiles.push(section.file);
211
+ }
212
+
213
+ const docxName = path.basename(docx, '.docx');
214
+ const title = docxName.replace(/[-_]/g, ' ').replace(/\b\w/g, (c) => c.toUpperCase());
215
+
216
+ const config = {
217
+ title: title,
218
+ authors: [],
219
+ sections: sectionFiles,
220
+ bibliography: null,
221
+ crossref: {
222
+ figureTitle: 'Figure',
223
+ tableTitle: 'Table',
224
+ figPrefix: ['Fig.', 'Figs.'],
225
+ tblPrefix: ['Table', 'Tables'],
226
+ },
227
+ pdf: {
228
+ documentclass: 'article',
229
+ fontsize: '12pt',
230
+ geometry: 'margin=1in',
231
+ linestretch: 1.5,
232
+ },
233
+ docx: {
234
+ keepComments: true,
235
+ },
236
+ };
237
+
238
+ const configPath = path.join(outputDir, 'rev.yaml');
239
+ console.log(`\n ${chalk.bold('rev.yaml')} - project configuration`);
240
+
241
+ if (!options.dryRun) {
242
+ fs.writeFileSync(configPath, YAML.stringify(config), 'utf-8');
243
+ }
244
+
245
+ const figuresDir = path.join(outputDir, 'figures');
246
+ if (!fs.existsSync(figuresDir) && !options.dryRun) {
247
+ fs.mkdirSync(figuresDir, { recursive: true });
248
+ console.log(` ${chalk.dim('figures/')} - image directory`);
249
+ }
250
+
251
+ if (options.dryRun) {
252
+ console.log(chalk.yellow('\n(Dry run - no files written)'));
253
+ } else {
254
+ console.log(chalk.green('\nProject created!'));
255
+ console.log(chalk.cyan('\nNext steps:'));
256
+ if (outputDir !== process.cwd()) {
257
+ console.log(chalk.dim(` cd ${path.relative(process.cwd(), outputDir) || '.'}`));
258
+ }
259
+ console.log(chalk.dim(' # Edit rev.yaml to add authors and adjust settings'));
260
+ console.log(chalk.dim(' # Review and clean up section files'));
261
+ console.log(chalk.dim(' rev build # Build PDF and DOCX'));
262
+ }
263
+ } catch (err) {
264
+ const error = err as Error;
265
+ console.error(chalk.red(`Error: ${error.message}`));
266
+ if (process.env.DEBUG) console.error(error.stack);
267
+ process.exit(1);
268
+ }
269
+ }
270
+
271
+ /**
272
+ * Register section commands with the program
273
+ */
274
+ export function register(program: Command): void {
275
+ // ==========================================================================
276
+ // IMPORT command - Import from Word (bootstrap or diff mode)
277
+ // ==========================================================================
278
+
279
+ program
280
+ .command('import')
281
+ .description('Import from Word: creates sections from scratch, or diffs against existing MD')
282
+ .argument('<docx>', 'Word document')
283
+ .argument('[original]', 'Optional: original Markdown file to compare against')
284
+ .option('-o, --output <dir>', 'Output directory for bootstrap mode', '.')
285
+ .option('-a, --author <name>', 'Author name for changes (diff mode)', 'Reviewer')
286
+ .option('--dry-run', 'Preview without saving')
287
+ .action(async (docx: string, original: string | undefined, options: ImportOptions) => {
288
+ if (!fs.existsSync(docx)) {
289
+ console.error(chalk.red(`Error: Word file not found: ${docx}`));
290
+ process.exit(1);
291
+ }
292
+
293
+ if (!original) {
294
+ await bootstrapFromWord(docx, options as BootstrapOptions);
295
+ return;
296
+ }
297
+
298
+ if (!fs.existsSync(original)) {
299
+ console.error(chalk.red(`Error: Original MD not found: ${original}`));
300
+ process.exit(1);
301
+ }
302
+
303
+ console.log(chalk.cyan(`Comparing ${path.basename(docx)} against ${path.basename(original)}...`));
304
+
305
+ // Warn if pandoc is missing
306
+ const { hasPandoc: hasPandocImport, getInstallInstructions: getInstallImport } = await import('../dependencies.js');
307
+ if (!hasPandocImport()) {
308
+ console.log(chalk.yellow(`\n Warning: Pandoc not installed. Track changes extracted from XML (formatting may differ).`));
309
+ console.log(chalk.dim(` Install for best results: ${getInstallImport('pandoc')}\n`));
310
+ }
311
+
312
+ try {
313
+ const { importFromWord } = await import('../import.js');
314
+ const { annotated, stats } = await importFromWord(docx, original, {
315
+ author: options.author,
316
+ });
317
+
318
+ console.log(chalk.cyan('\nChanges detected:'));
319
+ if (stats.insertions > 0) console.log(chalk.green(` + Insertions: ${stats.insertions}`));
320
+ if (stats.deletions > 0) console.log(chalk.red(` - Deletions: ${stats.deletions}`));
321
+ if (stats.substitutions > 0) console.log(chalk.yellow(` ~ Substitutions: ${stats.substitutions}`));
322
+ if (stats.comments > 0) console.log(chalk.blue(` # Comments: ${stats.comments}`));
323
+
324
+ if (stats.total === 0) {
325
+ console.log(chalk.green('\nNo changes detected.'));
326
+ return;
327
+ }
328
+
329
+ console.log(chalk.dim(`\n Total: ${stats.total}`));
330
+
331
+ if (options.dryRun) {
332
+ console.log(chalk.cyan('\n--- Preview (first 1000 chars) ---\n'));
333
+ console.log(annotated.slice(0, 1000));
334
+ if (annotated.length > 1000) console.log(chalk.dim('\n... (truncated)'));
335
+ return;
336
+ }
337
+
338
+ const outputPath = options.output || original;
339
+ fs.writeFileSync(outputPath, annotated, 'utf-8');
340
+ console.log(chalk.green(`\nSaved annotated version to ${outputPath}`));
341
+ console.log(chalk.cyan('\nNext steps:'));
342
+ console.log(` 1. ${chalk.bold('rev review ' + outputPath)} - Accept/reject track changes`);
343
+ console.log(` 2. Work with Claude to address comments`);
344
+ console.log(` 3. ${chalk.bold('rev build docx')} - Rebuild Word doc`);
345
+
346
+ } catch (err) {
347
+ const error = err as Error;
348
+ console.error(chalk.red(`Error: ${error.message}`));
349
+ if (process.env.DEBUG) console.error(error.stack);
350
+ process.exit(1);
351
+ }
352
+ });
353
+
354
+ // ==========================================================================
355
+ // EXTRACT command - Just extract text from Word
356
+ // ==========================================================================
357
+
358
+ program
359
+ .command('extract')
360
+ .description('Extract plain text from Word document (no diff)')
361
+ .argument('<docx>', 'Word document')
362
+ .option('-o, --output <file>', 'Output file (default: stdout)')
363
+ .action(async (docx: string, options: ExtractOptions) => {
364
+ if (!fs.existsSync(docx)) {
365
+ console.error(chalk.red(`Error: File not found: ${docx}`));
366
+ process.exit(1);
367
+ }
368
+
369
+ try {
370
+ const { extractTextFromWord } = await import('../word.js');
371
+ const text = await extractTextFromWord(docx);
372
+
373
+ if (options.output) {
374
+ fs.writeFileSync(options.output, text, 'utf-8');
375
+ console.error(chalk.green(`Extracted to ${options.output}`));
376
+ } else {
377
+ process.stdout.write(text);
378
+ }
379
+ } catch (err) {
380
+ const error = err as Error;
381
+ console.error(chalk.red(`Error: ${error.message}`));
382
+ process.exit(1);
383
+ }
384
+ });
385
+
386
+ // ==========================================================================
387
+ // SPLIT command - Split annotated paper.md back to section files
388
+ // ==========================================================================
389
+
390
+ program
391
+ .command('split')
392
+ .description('Split annotated paper.md back to section files')
393
+ .argument('<file>', 'Annotated paper.md file')
394
+ .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
395
+ .option('-d, --dir <directory>', 'Output directory for section files', '.')
396
+ .option('--dry-run', 'Preview without writing files')
397
+ .action((file: string, options: SplitOptions) => {
398
+ if (!fs.existsSync(file)) {
399
+ console.error(chalk.red(`File not found: ${file}`));
400
+ process.exit(1);
401
+ }
402
+
403
+ const resolved = resolveSectionsConfig(options.dir, options.config);
404
+ if (!resolved) {
405
+ console.error(chalk.red(`No section config found in ${path.resolve(options.dir)}`));
406
+ console.error(chalk.dim('Add a `sections:` list to rev.yaml, or run "rev init" to generate sections.yaml.'));
407
+ process.exit(1);
408
+ }
409
+
410
+ console.log(chalk.cyan(`Splitting ${file}...`));
411
+
412
+ const config = resolved.config;
413
+ const paperContent = fs.readFileSync(file, 'utf-8');
414
+ const sections = splitAnnotatedPaper(paperContent, config.sections);
415
+
416
+ if (sections.size === 0) {
417
+ console.error(chalk.yellow('No sections detected.'));
418
+ console.error(chalk.dim('Check that headers match sections.yaml'));
419
+ process.exit(1);
420
+ }
421
+
422
+ console.log(chalk.green(`\nFound ${sections.size} sections:\n`));
423
+
424
+ for (const [sectionFile, content] of sections) {
425
+ const outputPath = path.join(options.dir, sectionFile);
426
+ const lines = content.split('\n').length;
427
+ const annotations = countAnnotations(content);
428
+
429
+ console.log(` ${chalk.bold(sectionFile)} (${lines} lines)`);
430
+ if (annotations.total > 0) {
431
+ const parts: string[] = [];
432
+ if (annotations.inserts > 0) parts.push(chalk.green(`+${annotations.inserts}`));
433
+ if (annotations.deletes > 0) parts.push(chalk.red(`-${annotations.deletes}`));
434
+ if (annotations.substitutes > 0) parts.push(chalk.yellow(`~${annotations.substitutes}`));
435
+ if (annotations.comments > 0) parts.push(chalk.blue(`#${annotations.comments}`));
436
+ console.log(chalk.dim(` Annotations: ${parts.join(' ')}`));
437
+ }
438
+
439
+ if (!options.dryRun) {
440
+ fs.writeFileSync(outputPath, content, 'utf-8');
441
+ }
442
+ }
443
+
444
+ if (options.dryRun) {
445
+ console.log(chalk.yellow('\n(Dry run - no files written)'));
446
+ } else {
447
+ console.log(chalk.green('\nSection files updated.'));
448
+ console.log(chalk.cyan('\nNext: rev review <section.md> for each section'));
449
+ }
450
+ });
451
+ }