docrev 0.6.7 → 0.7.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,497 @@
1
+ /**
2
+ * Citation commands: citations, figures, equations, pdf-comments
3
+ *
4
+ * Commands for validating citations, managing figures/tables, and equation handling.
5
+ */
6
+
7
+ import {
8
+ chalk,
9
+ fs,
10
+ path,
11
+ fmt,
12
+ } from './context.js';
13
+
14
+ /**
15
+ * Register citation commands with the program
16
+ * @param {import('commander').Command} program
17
+ */
18
+ export function register(program) {
19
+ // ==========================================================================
20
+ // CITATIONS command - Validate citations against .bib file
21
+ // ==========================================================================
22
+
23
+ program
24
+ .command('citations')
25
+ .alias('cite')
26
+ .description('Validate citations against bibliography')
27
+ .argument('[files...]', 'Markdown files to check (default: all section files)')
28
+ .option('-b, --bib <file>', 'Bibliography file', 'references.bib')
29
+ .action(async (files, options) => {
30
+ const { getCitationStats } = await import('../citations.js');
31
+
32
+ // If no files specified, find all .md files
33
+ let mdFiles = files;
34
+ if (!mdFiles || mdFiles.length === 0) {
35
+ mdFiles = fs.readdirSync('.').filter(f =>
36
+ f.endsWith('.md') && !['README.md', 'CLAUDE.md'].includes(f)
37
+ );
38
+ }
39
+
40
+ if (!fs.existsSync(options.bib)) {
41
+ console.error(fmt.status('error', `Bibliography not found: ${options.bib}`));
42
+ process.exit(1);
43
+ }
44
+
45
+ const stats = getCitationStats(mdFiles, options.bib);
46
+
47
+ console.log(fmt.header('Citation Check'));
48
+ console.log();
49
+
50
+ // Summary table
51
+ const rows = [
52
+ ['Total citations', stats.totalCitations.toString()],
53
+ ['Unique keys cited', stats.uniqueCited.toString()],
54
+ ['Bib entries', stats.bibEntries.toString()],
55
+ [chalk.green('Valid'), chalk.green(stats.valid.toString())],
56
+ [stats.missing > 0 ? chalk.red('Missing') : 'Missing', stats.missing > 0 ? chalk.red(stats.missing.toString()) : '0'],
57
+ [chalk.dim('Unused in bib'), chalk.dim(stats.unused.toString())],
58
+ ];
59
+ console.log(fmt.table(['Metric', 'Count'], rows));
60
+
61
+ // Show missing keys
62
+ if (stats.missingKeys.length > 0) {
63
+ console.log();
64
+ console.log(fmt.status('error', 'Missing citations:'));
65
+ for (const key of stats.missingKeys) {
66
+ console.log(chalk.red(` - ${key}`));
67
+ }
68
+ }
69
+
70
+ // Show unused (if verbose)
71
+ if (stats.unusedKeys.length > 0 && stats.unusedKeys.length <= 10) {
72
+ console.log();
73
+ console.log(chalk.dim('Unused bib entries:'));
74
+ for (const key of stats.unusedKeys.slice(0, 10)) {
75
+ console.log(chalk.dim(` - ${key}`));
76
+ }
77
+ if (stats.unusedKeys.length > 10) {
78
+ console.log(chalk.dim(` ... and ${stats.unusedKeys.length - 10} more`));
79
+ }
80
+ }
81
+
82
+ console.log();
83
+ if (stats.missing === 0) {
84
+ console.log(fmt.status('success', 'All citations valid'));
85
+ } else {
86
+ console.log(fmt.status('warning', `${stats.missing} citation(s) missing from ${options.bib}`));
87
+ process.exit(1);
88
+ }
89
+ });
90
+
91
+ // ==========================================================================
92
+ // FIGURES command - Figure/table inventory
93
+ // ==========================================================================
94
+
95
+ program
96
+ .command('figures')
97
+ .alias('figs')
98
+ .description('List all figures and tables with reference counts')
99
+ .argument('[files...]', 'Markdown files to scan')
100
+ .action(async (files) => {
101
+ const { buildRegistry } = await import('../crossref.js');
102
+
103
+ // If no files specified, find all .md files
104
+ let mdFiles = files;
105
+ if (!mdFiles || mdFiles.length === 0) {
106
+ mdFiles = fs.readdirSync('.').filter(f =>
107
+ f.endsWith('.md') && !['README.md', 'CLAUDE.md'].includes(f)
108
+ );
109
+ }
110
+
111
+ // Build registry
112
+ const registry = buildRegistry('.');
113
+
114
+ // Count references in files
115
+ const refCounts = new Map();
116
+ for (const file of mdFiles) {
117
+ if (!fs.existsSync(file)) continue;
118
+ const text = fs.readFileSync(file, 'utf-8');
119
+
120
+ // Count @fig: and @tbl: references
121
+ const figRefs = text.matchAll(/@fig:([a-zA-Z0-9_-]+)/g);
122
+ for (const match of figRefs) {
123
+ const key = `fig:${match[1]}`;
124
+ refCounts.set(key, (refCounts.get(key) || 0) + 1);
125
+ }
126
+
127
+ const tblRefs = text.matchAll(/@tbl:([a-zA-Z0-9_-]+)/g);
128
+ for (const match of tblRefs) {
129
+ const key = `tbl:${match[1]}`;
130
+ refCounts.set(key, (refCounts.get(key) || 0) + 1);
131
+ }
132
+ }
133
+
134
+ console.log(fmt.header('Figure & Table Inventory'));
135
+ console.log();
136
+
137
+ // Figures
138
+ if (registry.figures.size > 0) {
139
+ const figRows = [...registry.figures.entries()].map(([label, info]) => {
140
+ const key = `fig:${label}`;
141
+ const refs = refCounts.get(key) || 0;
142
+ const num = info.isSupp ? `S${info.num}` : info.num.toString();
143
+ return [
144
+ `Figure ${num}`,
145
+ chalk.cyan(`@fig:${label}`),
146
+ info.file,
147
+ refs > 0 ? chalk.green(refs.toString()) : chalk.yellow('0'),
148
+ ];
149
+ });
150
+ console.log(fmt.table(['#', 'Label', 'File', 'Refs'], figRows));
151
+ console.log();
152
+ }
153
+
154
+ // Tables
155
+ if (registry.tables.size > 0) {
156
+ const tblRows = [...registry.tables.entries()].map(([label, info]) => {
157
+ const key = `tbl:${label}`;
158
+ const refs = refCounts.get(key) || 0;
159
+ const num = info.isSupp ? `S${info.num}` : info.num.toString();
160
+ return [
161
+ `Table ${num}`,
162
+ chalk.cyan(`@tbl:${label}`),
163
+ info.file,
164
+ refs > 0 ? chalk.green(refs.toString()) : chalk.yellow('0'),
165
+ ];
166
+ });
167
+ console.log(fmt.table(['#', 'Label', 'File', 'Refs'], tblRows));
168
+ console.log();
169
+ }
170
+
171
+ if (registry.figures.size === 0 && registry.tables.size === 0) {
172
+ console.log(chalk.dim('No figures or tables found.'));
173
+ console.log(chalk.dim('Add anchors like {#fig:label} to your figures.'));
174
+ }
175
+
176
+ // Warn about unreferenced
177
+ const unreferenced = [];
178
+ for (const [label] of registry.figures) {
179
+ if (!refCounts.get(`fig:${label}`)) unreferenced.push(`@fig:${label}`);
180
+ }
181
+ for (const [label] of registry.tables) {
182
+ if (!refCounts.get(`tbl:${label}`)) unreferenced.push(`@tbl:${label}`);
183
+ }
184
+
185
+ if (unreferenced.length > 0) {
186
+ console.log(fmt.status('warning', `${unreferenced.length} unreferenced figure(s)/table(s)`));
187
+ }
188
+ });
189
+
190
+ // ==========================================================================
191
+ // EQUATIONS command - Extract and convert equations
192
+ // ==========================================================================
193
+
194
+ program
195
+ .command('equations')
196
+ .alias('eq')
197
+ .description('Extract equations or convert to Word')
198
+ .argument('<action>', 'Action: list, extract, convert, from-word')
199
+ .argument('[input]', 'Input file (.md for extract/convert, .docx for from-word)')
200
+ .option('-o, --output <file>', 'Output file')
201
+ .action(async (action, input, options) => {
202
+ const { extractEquations, getEquationStats, createEquationsDoc, extractEquationsFromWord, getWordEquationStats } = await import('../equations.js');
203
+
204
+ if (action === 'from-word') {
205
+ // Extract equations from Word document
206
+ if (!input) {
207
+ console.error(fmt.status('error', 'Word document required'));
208
+ process.exit(1);
209
+ }
210
+
211
+ if (!input.endsWith('.docx')) {
212
+ console.error(fmt.status('error', 'Input must be a .docx file'));
213
+ process.exit(1);
214
+ }
215
+
216
+ const spin = fmt.spinner(`Extracting equations from ${path.basename(input)}...`).start();
217
+
218
+ const result = await extractEquationsFromWord(input);
219
+
220
+ if (!result.success) {
221
+ spin.error(result.error);
222
+ process.exit(1);
223
+ }
224
+
225
+ spin.stop();
226
+ console.log(fmt.header('Equations from Word'));
227
+ console.log();
228
+
229
+ if (result.equations.length === 0) {
230
+ console.log(chalk.dim('No equations found in document.'));
231
+ return;
232
+ }
233
+
234
+ const display = result.equations.filter(e => e.type === 'display');
235
+ const inline = result.equations.filter(e => e.type === 'inline');
236
+
237
+ console.log(chalk.dim(`Found ${result.equations.length} equations (${display.length} display, ${inline.length} inline)`));
238
+ console.log();
239
+
240
+ // Show equations
241
+ for (let i = 0; i < result.equations.length; i++) {
242
+ const eq = result.equations[i];
243
+ const typeLabel = eq.type === 'display' ? chalk.cyan('[display]') : chalk.yellow('[inline]');
244
+
245
+ if (eq.latex) {
246
+ console.log(`${chalk.bold(i + 1)}. ${typeLabel}`);
247
+ console.log(chalk.dim(' LaTeX:'), eq.latex.length > 80 ? eq.latex.substring(0, 77) + '...' : eq.latex);
248
+ } else {
249
+ console.log(`${chalk.bold(i + 1)}. ${typeLabel} ${chalk.red('[conversion failed]')}`);
250
+ }
251
+ }
252
+
253
+ // Optionally save to file
254
+ if (options.output) {
255
+ const latex = result.equations
256
+ .filter(e => e.latex)
257
+ .map((e, i) => `%% Equation ${i + 1} (${e.type})\n${e.type === 'display' ? '$$' : '$'}${e.latex}${e.type === 'display' ? '$$' : '$'}`)
258
+ .join('\n\n');
259
+
260
+ fs.writeFileSync(options.output, latex, 'utf-8');
261
+ console.log();
262
+ console.log(fmt.status('success', `Saved ${result.equations.filter(e => e.latex).length} equations to ${options.output}`));
263
+ }
264
+
265
+ } else if (action === 'list') {
266
+ // List equations in all section files
267
+ const mdFiles = fs.readdirSync('.').filter(f =>
268
+ f.endsWith('.md') && !['README.md', 'CLAUDE.md'].includes(f)
269
+ );
270
+
271
+ const stats = getEquationStats(mdFiles);
272
+
273
+ console.log(fmt.header('Equations'));
274
+ console.log();
275
+
276
+ if (stats.byFile.length === 0) {
277
+ console.log(chalk.dim('No equations found.'));
278
+ return;
279
+ }
280
+
281
+ const rows = stats.byFile.map(f => [
282
+ f.file,
283
+ f.display > 0 ? chalk.cyan(f.display.toString()) : chalk.dim('-'),
284
+ f.inline > 0 ? chalk.yellow(f.inline.toString()) : chalk.dim('-'),
285
+ ]);
286
+ rows.push([
287
+ chalk.bold('Total'),
288
+ chalk.bold.cyan(stats.display.toString()),
289
+ chalk.bold.yellow(stats.inline.toString()),
290
+ ]);
291
+
292
+ console.log(fmt.table(['File', 'Display', 'Inline'], rows));
293
+
294
+ } else if (action === 'extract') {
295
+ if (!input) {
296
+ console.error(fmt.status('error', 'Input file required'));
297
+ process.exit(1);
298
+ }
299
+
300
+ const output = options.output || input.replace('.md', '-equations.md');
301
+ const result = await createEquationsDoc(input, output);
302
+
303
+ if (result.success) {
304
+ console.log(fmt.status('success', result.message));
305
+ console.log(chalk.dim(` ${result.stats.display} display, ${result.stats.inline} inline equations`));
306
+ } else {
307
+ console.error(fmt.status('error', result.message));
308
+ process.exit(1);
309
+ }
310
+
311
+ } else if (action === 'convert') {
312
+ if (!input) {
313
+ console.error(fmt.status('error', 'Input file required'));
314
+ process.exit(1);
315
+ }
316
+
317
+ const output = options.output || input.replace('.md', '.docx');
318
+
319
+ const spin = fmt.spinner(`Converting ${path.basename(input)} to Word...`).start();
320
+
321
+ try {
322
+ const { exec } = await import('child_process');
323
+ const { promisify } = await import('util');
324
+ const execAsync = promisify(exec);
325
+
326
+ await execAsync(`pandoc "${input}" -o "${output}" --mathml`);
327
+ spin.success(`Created ${output}`);
328
+ } catch (err) {
329
+ spin.error(err.message);
330
+ process.exit(1);
331
+ }
332
+ } else {
333
+ console.error(fmt.status('error', `Unknown action: ${action}`));
334
+ console.log(chalk.dim('Actions: list, extract, convert, from-word'));
335
+ process.exit(1);
336
+ }
337
+ });
338
+
339
+ // ==========================================================================
340
+ // PDF-COMMENTS command - Extract comments from PDF
341
+ // ==========================================================================
342
+
343
+ program
344
+ .command('pdf-comments')
345
+ .alias('pdf')
346
+ .description('Extract and manage comments from annotated PDFs')
347
+ .argument('<pdf>', 'PDF file with annotations')
348
+ .option('-a, --append <file>', 'Append comments to markdown file')
349
+ .option('--json', 'Output as JSON')
350
+ .option('--by-page', 'Group comments by page')
351
+ .option('--by-author', 'Group comments by author')
352
+ .option('--with-text', 'Extract highlighted text (slower but shows what was highlighted)')
353
+ .action(async (pdf, options) => {
354
+ if (!fs.existsSync(pdf)) {
355
+ console.error(fmt.status('error', `File not found: ${pdf}`));
356
+ process.exit(1);
357
+ }
358
+
359
+ if (!pdf.toLowerCase().endsWith('.pdf')) {
360
+ console.error(fmt.status('error', 'File must be a PDF'));
361
+ process.exit(1);
362
+ }
363
+
364
+ const {
365
+ extractPdfComments,
366
+ extractPdfAnnotationsWithText,
367
+ formatPdfComments,
368
+ getPdfCommentStats,
369
+ insertPdfCommentsIntoMarkdown,
370
+ formatAnnotationWithText,
371
+ } = await import('../pdf-import.js');
372
+
373
+ const spin = fmt.spinner(`Extracting comments from ${path.basename(pdf)}...`).start();
374
+
375
+ try {
376
+ let comments;
377
+
378
+ if (options.withText) {
379
+ // Use the new text extraction feature
380
+ const annotations = await extractPdfAnnotationsWithText(pdf);
381
+ spin.stop();
382
+
383
+ if (annotations.length === 0) {
384
+ console.log(fmt.status('info', 'No annotations found in PDF.'));
385
+ return;
386
+ }
387
+
388
+ // Convert to comment format with highlighted text
389
+ comments = annotations.map(a => ({
390
+ author: a.author || 'Reviewer',
391
+ text: a.highlightedText
392
+ ? `"${a.highlightedText}"${a.contents ? ' → ' + a.contents : ''}`
393
+ : a.contents,
394
+ page: a.page,
395
+ type: a.type,
396
+ date: a.date,
397
+ highlightedText: a.highlightedText,
398
+ })).filter(c => c.text);
399
+ } else {
400
+ comments = await extractPdfComments(pdf);
401
+ spin.stop();
402
+ }
403
+
404
+ if (comments.length === 0) {
405
+ console.log(fmt.status('info', 'No comments found in PDF.'));
406
+ return;
407
+ }
408
+
409
+ const stats = getPdfCommentStats(comments);
410
+
411
+ // JSON output
412
+ if (options.json) {
413
+ console.log(JSON.stringify({ comments, stats }, null, 2));
414
+ return;
415
+ }
416
+
417
+ // Append to markdown file
418
+ if (options.append) {
419
+ if (!fs.existsSync(options.append)) {
420
+ console.error(fmt.status('error', `Markdown file not found: ${options.append}`));
421
+ process.exit(1);
422
+ }
423
+
424
+ const markdown = fs.readFileSync(options.append, 'utf-8');
425
+ const updated = insertPdfCommentsIntoMarkdown(markdown, comments);
426
+ fs.writeFileSync(options.append, updated, 'utf-8');
427
+
428
+ console.log(fmt.status('success', `Added ${comments.length} comments to ${options.append}`));
429
+ return;
430
+ }
431
+
432
+ // Display comments
433
+ console.log(fmt.header(`PDF Comments: ${path.basename(pdf)}`));
434
+ console.log();
435
+
436
+ if (options.byAuthor) {
437
+ // Group by author
438
+ const byAuthor = {};
439
+ for (const c of comments) {
440
+ const author = c.author || 'Unknown';
441
+ if (!byAuthor[author]) byAuthor[author] = [];
442
+ byAuthor[author].push(c);
443
+ }
444
+
445
+ for (const [author, authorComments] of Object.entries(byAuthor)) {
446
+ console.log(chalk.bold(`${author} (${authorComments.length}):`));
447
+ for (const c of authorComments) {
448
+ if (c.highlightedText) {
449
+ console.log(` [p.${c.page}] ${chalk.yellow(`"${c.highlightedText}"`)}${c.text !== c.highlightedText ? ` → ${c.text.replace(`"${c.highlightedText}" → `, '')}` : ''}`);
450
+ } else {
451
+ console.log(` [p.${c.page}] ${c.text}`);
452
+ }
453
+ }
454
+ console.log();
455
+ }
456
+ } else {
457
+ // Default: by page
458
+ if (options.withText) {
459
+ let currentPage = 0;
460
+ for (const c of comments) {
461
+ if (c.page !== currentPage) {
462
+ if (currentPage > 0) console.log();
463
+ console.log(`Page ${c.page}:`);
464
+ currentPage = c.page;
465
+ }
466
+ if (c.highlightedText) {
467
+ console.log(` ${chalk.yellow(`"${c.highlightedText}"`)} → ${c.text.replace(`"${c.highlightedText}" → `, '')}`);
468
+ } else {
469
+ console.log(` ${c.text}`);
470
+ }
471
+ }
472
+ console.log();
473
+ } else {
474
+ console.log(formatPdfComments(comments));
475
+ console.log();
476
+ }
477
+ }
478
+
479
+ // Summary
480
+ const authorList = Object.entries(stats.byAuthor)
481
+ .map(([author, count]) => `${author} (${count})`)
482
+ .join(', ');
483
+ console.log(chalk.dim(`Total: ${stats.total} comments from ${authorList}`));
484
+ console.log();
485
+ if (!options.withText) {
486
+ console.log(chalk.dim(`Tip: Use --with-text to extract the highlighted text content`));
487
+ }
488
+ console.log(chalk.dim(`Tip: Use --append <file.md> to add comments to your markdown`));
489
+
490
+ } catch (err) {
491
+ spin.stop();
492
+ console.error(fmt.status('error', `Failed to extract PDF comments: ${err.message}`));
493
+ if (process.env.DEBUG) console.error(err.stack);
494
+ process.exit(1);
495
+ }
496
+ });
497
+ }