docrev 0.5.2 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/bin/rev.js CHANGED
@@ -1524,6 +1524,7 @@ program
1524
1524
  .option('--no-crossref', 'Skip pandoc-crossref filter')
1525
1525
  .option('--toc', 'Include table of contents')
1526
1526
  .option('--show-changes', 'Export DOCX with visible track changes (audit mode)')
1527
+ .option('--dual', 'Output both clean DOCX and DOCX with Word comments (paper.docx + paper_comments.docx)')
1527
1528
  .action(async (formats, options) => {
1528
1529
  const dir = path.resolve(options.dir);
1529
1530
 
@@ -1558,6 +1559,7 @@ program
1558
1559
  console.log(chalk.dim(` Crossref: ${hasPandocCrossref() && options.crossref !== false ? 'enabled' : 'disabled'}`));
1559
1560
  if (tocEnabled) console.log(chalk.dim(` TOC: enabled`));
1560
1561
  if (options.showChanges) console.log(chalk.dim(` Track changes: visible`));
1562
+ if (options.dual) console.log(chalk.dim(` Dual output: clean + with comments`));
1561
1563
  console.log('');
1562
1564
 
1563
1565
  // Override config with CLI options
@@ -1643,6 +1645,34 @@ program
1643
1645
  process.exit(1);
1644
1646
  }
1645
1647
 
1648
+ // Handle --dual mode: create a second DOCX with proper Word comments
1649
+ if (options.dual) {
1650
+ const docxResult = results.find(r => r.format === 'docx' && r.success);
1651
+ if (docxResult) {
1652
+ const { injectComments } = await import('../lib/wordcomments.js');
1653
+
1654
+ // Read the combined paper.md (with comments still in it)
1655
+ const markdown = fs.readFileSync(paperPath, 'utf-8');
1656
+
1657
+ // Generate comments DOCX path
1658
+ const commentsDocxPath = docxResult.outputPath.replace(/\.docx$/, '_comments.docx');
1659
+
1660
+ const spinComments = fmt.spinner('Injecting Word comments...').start();
1661
+ const commentResult = await injectComments(docxResult.outputPath, markdown, commentsDocxPath);
1662
+ spinComments.stop();
1663
+
1664
+ if (commentResult.success) {
1665
+ console.log(chalk.cyan('\nDual output:'));
1666
+ console.log(` Clean: ${path.basename(docxResult.outputPath)}`);
1667
+ console.log(` Comments: ${path.basename(commentsDocxPath)} (${commentResult.commentCount} comments)`);
1668
+ } else {
1669
+ console.error(chalk.yellow(`\nWarning: Could not create comments DOCX: ${commentResult.error}`));
1670
+ }
1671
+ } else {
1672
+ console.error(chalk.yellow('\n--dual requires docx format to be built'));
1673
+ }
1674
+ }
1675
+
1646
1676
  console.log(chalk.green('\nBuild complete!'));
1647
1677
  } catch (err) {
1648
1678
  spin.stop();
@@ -0,0 +1,449 @@
1
+ /**
2
+ * Word comment injection - injects CriticMarkup comments as proper Word comments
3
+ *
4
+ * This module takes a clean DOCX and injects Word comments based on
5
+ * CriticMarkup annotations from the source markdown.
6
+ */
7
+
8
+ import * as fs from 'fs';
9
+ import AdmZip from 'adm-zip';
10
+ import { getComments, stripAnnotations } from './annotations.js';
11
+
12
+ /**
13
+ * Escape XML special characters
14
+ * @param {string} str
15
+ * @returns {string}
16
+ */
17
+ function escapeXml(str) {
18
+ return str
19
+ .replace(/&/g, '&')
20
+ .replace(/</g, '&lt;')
21
+ .replace(/>/g, '&gt;')
22
+ .replace(/"/g, '&quot;')
23
+ .replace(/'/g, '&apos;');
24
+ }
25
+
26
+ /**
27
+ * Generate a unique comment ID
28
+ * @param {number} index
29
+ * @returns {string}
30
+ */
31
+ function generateCommentId(index) {
32
+ return String(index);
33
+ }
34
+
35
+ /**
36
+ * Create the comments.xml content
37
+ * @param {Array<{id: string, author: string, text: string, replies?: Array}>} comments
38
+ * @returns {string}
39
+ */
40
+ function createCommentsXml(comments) {
41
+ const now = new Date().toISOString();
42
+
43
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
44
+ xml += '<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
45
+ xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">\n';
46
+
47
+ for (const comment of comments) {
48
+ xml += ` <w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}">\n`;
49
+ xml += ` <w:p>\n`;
50
+ xml += ` <w:r>\n`;
51
+ xml += ` <w:t>${escapeXml(comment.text)}</w:t>\n`;
52
+ xml += ` </w:r>\n`;
53
+ xml += ` </w:p>\n`;
54
+ xml += ` </w:comment>\n`;
55
+
56
+ // Add replies as separate comments with parent reference
57
+ if (comment.replies) {
58
+ for (const reply of comment.replies) {
59
+ xml += ` <w:comment w:id="${reply.id}" w:author="${escapeXml(reply.author)}" w:date="${now}">\n`;
60
+ xml += ` <w:p>\n`;
61
+ xml += ` <w:r>\n`;
62
+ xml += ` <w:t>${escapeXml(reply.text)}</w:t>\n`;
63
+ xml += ` </w:r>\n`;
64
+ xml += ` </w:p>\n`;
65
+ xml += ` </w:comment>\n`;
66
+ }
67
+ }
68
+ }
69
+
70
+ xml += '</w:comments>';
71
+ return xml;
72
+ }
73
+
74
+ /**
75
+ * Create commentsExtended.xml for reply threading
76
+ * @param {Array<{id: string, replies?: Array}>} comments
77
+ * @returns {string}
78
+ */
79
+ function createCommentsExtendedXml(comments) {
80
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
81
+ xml += '<w15:commentsEx xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml">\n';
82
+
83
+ for (const comment of comments) {
84
+ // Mark the parent comment as done=0 (open)
85
+ xml += ` <w15:commentEx w15:paraId="${comment.id}" w15:done="0"/>\n`;
86
+
87
+ if (comment.replies) {
88
+ for (const reply of comment.replies) {
89
+ // Link replies to parent
90
+ xml += ` <w15:commentEx w15:paraId="${reply.id}" w15:paraIdParent="${comment.id}" w15:done="0"/>\n`;
91
+ }
92
+ }
93
+ }
94
+
95
+ xml += '</w15:commentsEx>';
96
+ return xml;
97
+ }
98
+
99
+ /**
100
+ * Find text in document.xml and get surrounding context for anchor matching
101
+ * @param {string} documentXml
102
+ * @param {string} searchText
103
+ * @param {number} startFrom - position to start searching from
104
+ * @returns {{found: boolean, runIndex: number, textIndex: number, position: number}|null}
105
+ */
106
+ function findTextPosition(documentXml, searchText, startFrom = 0) {
107
+ // Normalize search text
108
+ const normalized = searchText.trim().replace(/\s+/g, ' ');
109
+ if (!normalized) return null;
110
+
111
+ // Extract all text content and map to XML positions
112
+ const textPattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
113
+ let match;
114
+ let fullText = '';
115
+ const positions = [];
116
+
117
+ while ((match = textPattern.exec(documentXml)) !== null) {
118
+ if (match.index < startFrom) continue;
119
+
120
+ positions.push({
121
+ xmlStart: match.index,
122
+ xmlEnd: match.index + match[0].length,
123
+ textStart: fullText.length,
124
+ text: match[1],
125
+ });
126
+ fullText += match[1];
127
+ }
128
+
129
+ // Find the search text in the combined text
130
+ const idx = fullText.indexOf(normalized);
131
+ if (idx === -1) {
132
+ // Try partial match (first 50 chars)
133
+ const partial = normalized.slice(0, 50);
134
+ const partialIdx = fullText.indexOf(partial);
135
+ if (partialIdx === -1) return null;
136
+
137
+ // Find which position block contains this
138
+ for (let i = 0; i < positions.length; i++) {
139
+ const pos = positions[i];
140
+ if (pos.textStart <= partialIdx && pos.textStart + pos.text.length > partialIdx) {
141
+ return {
142
+ found: true,
143
+ position: pos.xmlStart,
144
+ runStart: pos.xmlStart,
145
+ runEnd: pos.xmlEnd,
146
+ };
147
+ }
148
+ }
149
+ }
150
+
151
+ // Find which position block contains the start of the match
152
+ for (let i = 0; i < positions.length; i++) {
153
+ const pos = positions[i];
154
+ if (pos.textStart <= idx && pos.textStart + pos.text.length > idx) {
155
+ return {
156
+ found: true,
157
+ position: pos.xmlStart,
158
+ runStart: pos.xmlStart,
159
+ runEnd: pos.xmlEnd,
160
+ };
161
+ }
162
+ }
163
+
164
+ return null;
165
+ }
166
+
167
+ /**
168
+ * Get context text before a comment in the markdown
169
+ * @param {string} markdown
170
+ * @param {number} commentPosition
171
+ * @returns {string}
172
+ */
173
+ function getAnchorText(markdown, commentPosition) {
174
+ // Look backwards from comment position to find anchor text
175
+ // The anchor is typically the text immediately before the comment
176
+ const textBefore = markdown.slice(Math.max(0, commentPosition - 200), commentPosition);
177
+
178
+ // Get the last sentence or phrase before the comment
179
+ // Split on sentence boundaries
180
+ const sentences = textBefore.split(/[.!?]\s+/);
181
+ if (sentences.length > 0) {
182
+ let anchor = sentences[sentences.length - 1].trim();
183
+ // Clean up any markup
184
+ anchor = stripAnnotations(anchor);
185
+ // Take last 100 chars max
186
+ if (anchor.length > 100) {
187
+ anchor = anchor.slice(-100);
188
+ }
189
+ return anchor;
190
+ }
191
+
192
+ return textBefore.slice(-50);
193
+ }
194
+
195
+ /**
196
+ * Parse CriticMarkup comments including replies
197
+ * Format: {>>Author: comment<<} {>>Replier: reply<<}
198
+ * @param {string} markdown
199
+ * @returns {Array<{author: string, text: string, anchor: string, position: number, replies: Array}>}
200
+ */
201
+ function parseCommentsWithReplies(markdown) {
202
+ const comments = [];
203
+ // Use non-greedy match to find content between {>> and <<}
204
+ const commentPattern = /\{>>(.+?)<<\}/g;
205
+ let match;
206
+
207
+ while ((match = commentPattern.exec(markdown)) !== null) {
208
+ const fullMatch = match[1];
209
+ const position = match.index;
210
+
211
+ // Parse author and text
212
+ let author = 'Unknown';
213
+ let text = fullMatch;
214
+
215
+ const colonIdx = fullMatch.indexOf(':');
216
+ if (colonIdx > 0 && colonIdx < 30) {
217
+ author = fullMatch.slice(0, colonIdx).trim();
218
+ text = fullMatch.slice(colonIdx + 1).trim();
219
+ }
220
+
221
+ // Check if this is a reply to the previous comment (immediately follows another comment)
222
+ const textBefore = markdown.slice(Math.max(0, position - 5), position).trim();
223
+ const isReply = textBefore.endsWith('<<}');
224
+
225
+ if (isReply && comments.length > 0) {
226
+ // Add as reply to previous comment
227
+ const parent = comments[comments.length - 1];
228
+ if (!parent.replies) parent.replies = [];
229
+ parent.replies.push({ author, text });
230
+ } else {
231
+ // New comment
232
+ const anchor = getAnchorText(markdown, position);
233
+ comments.push({
234
+ author,
235
+ text,
236
+ anchor,
237
+ position,
238
+ replies: [],
239
+ });
240
+ }
241
+ }
242
+
243
+ return comments;
244
+ }
245
+
246
+ /**
247
+ * Inject comments into a DOCX file
248
+ * @param {string} docxPath - Path to the clean DOCX
249
+ * @param {string} markdown - Source markdown with CriticMarkup comments
250
+ * @param {string} outputPath - Path for output DOCX with comments
251
+ * @returns {Promise<{success: boolean, commentCount: number, error?: string}>}
252
+ */
253
+ export async function injectComments(docxPath, markdown, outputPath) {
254
+ try {
255
+ if (!fs.existsSync(docxPath)) {
256
+ return { success: false, commentCount: 0, error: `File not found: ${docxPath}` };
257
+ }
258
+
259
+ // Parse comments from markdown
260
+ const parsedComments = parseCommentsWithReplies(markdown);
261
+
262
+ if (parsedComments.length === 0) {
263
+ // No comments to inject, just copy the file
264
+ fs.copyFileSync(docxPath, outputPath);
265
+ return { success: true, commentCount: 0 };
266
+ }
267
+
268
+ // Read the DOCX
269
+ const zip = new AdmZip(docxPath);
270
+
271
+ // Get document.xml
272
+ const documentEntry = zip.getEntry('word/document.xml');
273
+ if (!documentEntry) {
274
+ return { success: false, commentCount: 0, error: 'Invalid DOCX: no document.xml' };
275
+ }
276
+
277
+ let documentXml = zip.readAsText(documentEntry);
278
+
279
+ // Assign IDs to comments and replies
280
+ let nextId = 0;
281
+ const commentsWithIds = parsedComments.map(c => {
282
+ const comment = {
283
+ ...c,
284
+ id: generateCommentId(nextId++),
285
+ };
286
+ if (c.replies) {
287
+ comment.replies = c.replies.map(r => ({
288
+ ...r,
289
+ id: generateCommentId(nextId++),
290
+ }));
291
+ }
292
+ return comment;
293
+ });
294
+
295
+ // Find anchor positions and inject comment ranges
296
+ const injections = [];
297
+ let searchFrom = 0;
298
+
299
+ for (const comment of commentsWithIds) {
300
+ const pos = findTextPosition(documentXml, comment.anchor, searchFrom);
301
+
302
+ if (pos && pos.found) {
303
+ // We'll inject the comment range around this position
304
+ injections.push({
305
+ comment,
306
+ position: pos.position,
307
+ runStart: pos.runStart,
308
+ runEnd: pos.runEnd,
309
+ });
310
+ searchFrom = pos.position + 1;
311
+ }
312
+ }
313
+
314
+ // Sort injections by position (reverse order for safe modification)
315
+ injections.sort((a, b) => b.position - a.position);
316
+
317
+ // Inject comment range markers into document.xml
318
+ for (const inj of injections) {
319
+ const { comment, runStart, runEnd } = inj;
320
+
321
+ // Find the <w:r> element containing this text
322
+ // Insert commentRangeStart before the run and commentRangeEnd after
323
+
324
+ // Find the start of the <w:r> containing this position
325
+ const rStartMatch = documentXml.lastIndexOf('<w:r', runStart);
326
+ if (rStartMatch === -1) continue;
327
+
328
+ // Find the end of this </w:r>
329
+ const rEndMatch = documentXml.indexOf('</w:r>', runEnd);
330
+ if (rEndMatch === -1) continue;
331
+ const rEnd = rEndMatch + '</w:r>'.length;
332
+
333
+ // Generate comment IDs list (main + replies for reference linking)
334
+ const allIds = [comment.id];
335
+ if (comment.replies) {
336
+ allIds.push(...comment.replies.map(r => r.id));
337
+ }
338
+
339
+ // Insert commentRangeEnd and commentReference after the run
340
+ let endMarker = `<w:commentRangeEnd w:id="${comment.id}"/>`;
341
+ endMarker += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
342
+
343
+ documentXml = documentXml.slice(0, rEnd) + endMarker + documentXml.slice(rEnd);
344
+
345
+ // Insert commentRangeStart before the run
346
+ const startMarker = `<w:commentRangeStart w:id="${comment.id}"/>`;
347
+ documentXml = documentXml.slice(0, rStartMatch) + startMarker + documentXml.slice(rStartMatch);
348
+ }
349
+
350
+ // Update document.xml in the zip
351
+ zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
352
+
353
+ // Create comments.xml
354
+ const commentsXml = createCommentsXml(commentsWithIds);
355
+
356
+ // Check if comments.xml already exists
357
+ if (zip.getEntry('word/comments.xml')) {
358
+ zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
359
+ } else {
360
+ zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
361
+ }
362
+
363
+ // Create commentsExtended.xml for reply threading (Word 2013+)
364
+ const hasReplies = commentsWithIds.some(c => c.replies && c.replies.length > 0);
365
+ if (hasReplies) {
366
+ const commentsExtXml = createCommentsExtendedXml(commentsWithIds);
367
+ if (zip.getEntry('word/commentsExtended.xml')) {
368
+ zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
369
+ } else {
370
+ zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
371
+ }
372
+ }
373
+
374
+ // Update [Content_Types].xml to include comments
375
+ const contentTypesEntry = zip.getEntry('[Content_Types].xml');
376
+ if (contentTypesEntry) {
377
+ let contentTypes = zip.readAsText(contentTypesEntry);
378
+
379
+ // Add comments content type if not present
380
+ if (!contentTypes.includes('comments.xml')) {
381
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
382
+ const commentType = '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>';
383
+ contentTypes = contentTypes.slice(0, insertPoint) + commentType + '\n' + contentTypes.slice(insertPoint);
384
+ }
385
+
386
+ // Add commentsExtended if we have replies
387
+ if (hasReplies && !contentTypes.includes('commentsExtended.xml')) {
388
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
389
+ const extType = '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>';
390
+ contentTypes = contentTypes.slice(0, insertPoint) + extType + '\n' + contentTypes.slice(insertPoint);
391
+ }
392
+
393
+ zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
394
+ }
395
+
396
+ // Update word/_rels/document.xml.rels to include comments relationship
397
+ const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
398
+ if (relsEntry) {
399
+ let rels = zip.readAsText(relsEntry);
400
+
401
+ // Find max rId
402
+ const rIdMatches = rels.match(/rId(\d+)/g) || [];
403
+ const maxId = rIdMatches.reduce((max, r) => {
404
+ const num = parseInt(r.replace('rId', ''));
405
+ return num > max ? num : max;
406
+ }, 0);
407
+
408
+ // Add comments relationship if not present
409
+ if (!rels.includes('comments.xml')) {
410
+ const insertPoint = rels.lastIndexOf('</Relationships>');
411
+ const commentRel = `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>`;
412
+ rels = rels.slice(0, insertPoint) + commentRel + '\n' + rels.slice(insertPoint);
413
+ }
414
+
415
+ // Add commentsExtended relationship if needed
416
+ if (hasReplies && !rels.includes('commentsExtended.xml')) {
417
+ const insertPoint = rels.lastIndexOf('</Relationships>');
418
+ const extRel = `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>`;
419
+ rels = rels.slice(0, insertPoint) + extRel + '\n' + rels.slice(insertPoint);
420
+ }
421
+
422
+ zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
423
+ }
424
+
425
+ // Write the output file
426
+ zip.writeZip(outputPath);
427
+
428
+ const totalComments = commentsWithIds.reduce((sum, c) => {
429
+ return sum + 1 + (c.replies ? c.replies.length : 0);
430
+ }, 0);
431
+
432
+ return { success: true, commentCount: totalComments };
433
+
434
+ } catch (err) {
435
+ return { success: false, commentCount: 0, error: err.message };
436
+ }
437
+ }
438
+
439
+ /**
440
+ * Build DOCX with proper Word comments from markdown
441
+ * @param {string} cleanDocxPath - Path to clean DOCX (built without comments)
442
+ * @param {string} markdownPath - Path to markdown with CriticMarkup comments
443
+ * @param {string} outputPath - Path for output DOCX with Word comments
444
+ * @returns {Promise<{success: boolean, commentCount: number, error?: string}>}
445
+ */
446
+ export async function buildWithComments(cleanDocxPath, markdownPath, outputPath) {
447
+ const markdown = fs.readFileSync(markdownPath, 'utf-8');
448
+ return injectComments(cleanDocxPath, markdown, outputPath);
449
+ }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docrev",
3
- "version": "0.5.2",
3
+ "version": "0.6.0",
4
4
  "description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
5
5
  "type": "module",
6
6
  "types": "types/index.d.ts",
@@ -68,6 +68,10 @@
68
68
  "./spelling": {
69
69
  "types": "./types/index.d.ts",
70
70
  "import": "./lib/spelling.js"
71
+ },
72
+ "./wordcomments": {
73
+ "types": "./types/index.d.ts",
74
+ "import": "./lib/wordcomments.js"
71
75
  }
72
76
  },
73
77
  "engines": {