docrev 0.6.1 → 0.6.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,19 +1,22 @@
1
1
  /**
2
- * Word comment injection - injects CriticMarkup comments as proper Word comments
2
+ * Word comment injection with reply threading
3
3
  *
4
- * This module takes a clean DOCX and injects Word comments based on
5
- * CriticMarkup annotations from the source markdown.
4
+ * Flow:
5
+ * 1. prepareMarkdownWithMarkers() - Parse comments, detect Guy→Gilles reply pairs
6
+ * - Guy comments get markers: ⟦CMS:n⟧anchor⟦CME:n⟧
7
+ * - Gilles replies: no markers (they attach to parent comment)
8
+ * 2. Pandoc converts to DOCX
9
+ * 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
10
+ * - Replies go in comments.xml with parent reference in commentsExtended.xml
6
11
  */
7
12
 
8
13
  import * as fs from 'fs';
9
14
  import AdmZip from 'adm-zip';
10
- import { getComments, stripAnnotations } from './annotations.js';
11
15
 
12
- /**
13
- * Escape XML special characters
14
- * @param {string} str
15
- * @returns {string}
16
- */
16
+ const MARKER_START_PREFIX = '⟦CMS:';
17
+ const MARKER_END_PREFIX = '⟦CME:';
18
+ const MARKER_SUFFIX = '⟧';
19
+
17
20
  function escapeXml(str) {
18
21
  return str
19
22
  .replace(/&/g, '&')
@@ -23,72 +26,164 @@ function escapeXml(str) {
23
26
  .replace(/'/g, ''');
24
27
  }
25
28
 
26
- /**
27
- * Generate a unique comment ID
28
- * @param {number} index
29
- * @returns {string}
30
- */
31
- function generateCommentId(index) {
32
- return String(index);
29
+ function generateParaId(commentIdx, paraNum) {
30
+ // Generate 8-character uppercase hex ID matching Word format
31
+ // Word uses IDs like "3F25BC58", "0331C187"
32
+ // Must be deterministic - same inputs always produce same output
33
+ const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
34
+ return id.toString(16).toUpperCase().padStart(8, '0');
33
35
  }
34
36
 
35
37
  /**
36
- * Create the comments.xml content
37
- * @param {Array<{id: string, author: string, text: string, replies?: Array}>} comments
38
- * @returns {string}
38
+ * Parse comments and create markers
39
+ *
40
+ * Returns:
41
+ * - markedMarkdown: markdown with markers for parent comments only
42
+ * - comments: array with author, text, isReply, parentIdx
39
43
  */
44
+ export function prepareMarkdownWithMarkers(markdown) {
45
+ // Match all comments with optional anchor
46
+ const commentPattern = /\{>>(.+?)<<\}(?:\s*\[([^\]]+)\]\{\.mark\})?/g;
47
+
48
+ const rawMatches = [];
49
+ let match;
50
+ while ((match = commentPattern.exec(markdown)) !== null) {
51
+ const content = match[1];
52
+ let author = 'Unknown';
53
+ let text = content;
54
+ const colonIdx = content.indexOf(':');
55
+ if (colonIdx > 0 && colonIdx < 30) {
56
+ author = content.slice(0, colonIdx).trim();
57
+ text = content.slice(colonIdx + 1).trim();
58
+ }
59
+
60
+ rawMatches.push({
61
+ author,
62
+ text,
63
+ anchor: match[2] || null,
64
+ start: match.index,
65
+ end: match.index + match[0].length,
66
+ fullMatch: match[0]
67
+ });
68
+ }
69
+
70
+ if (rawMatches.length === 0) {
71
+ return { markedMarkdown: markdown, comments: [] };
72
+ }
73
+
74
+ // Detect reply relationships: Gilles immediately following Guy = reply
75
+ // Comments are "adjacent" if there's only whitespace between them (< 50 chars)
76
+ const ADJACENT_THRESHOLD = 50;
77
+ const comments = [];
78
+ let lastGuyIdx = -1;
79
+ let lastCommentEnd = -1;
80
+
81
+ for (let i = 0; i < rawMatches.length; i++) {
82
+ const m = rawMatches[i];
83
+ const isGuy = m.author === 'Guy Colling';
84
+ const isGilles = m.author === 'Gilles Colling';
85
+
86
+ // Check if this comment is adjacent to the previous one
87
+ const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
88
+ const isAdjacent = gap < ADJACENT_THRESHOLD;
89
+
90
+ // Reset lastGuyIdx if there's a gap (comments not in same cluster)
91
+ if (!isAdjacent) {
92
+ lastGuyIdx = -1;
93
+ }
94
+
95
+ if (isGuy) {
96
+ comments.push({
97
+ ...m,
98
+ isReply: false,
99
+ parentIdx: null,
100
+ commentIdx: comments.length
101
+ });
102
+ lastGuyIdx = comments.length - 1;
103
+ } else if (isGilles && lastGuyIdx >= 0 && isAdjacent) {
104
+ // Gilles immediately following Guy (same cluster) = reply
105
+ comments.push({
106
+ ...m,
107
+ isReply: true,
108
+ parentIdx: lastGuyIdx,
109
+ commentIdx: comments.length
110
+ });
111
+ // Don't reset lastGuyIdx - multiple replies could follow
112
+ } else {
113
+ // Standalone comment (not a reply)
114
+ comments.push({
115
+ ...m,
116
+ isReply: false,
117
+ parentIdx: null,
118
+ commentIdx: comments.length
119
+ });
120
+ }
121
+
122
+ lastCommentEnd = m.end;
123
+ }
124
+
125
+ // Build marked markdown - only parent comments get markers
126
+ // Process from end to start to preserve positions
127
+ let markedMarkdown = markdown;
128
+
129
+ for (let i = comments.length - 1; i >= 0; i--) {
130
+ const c = comments[i];
131
+
132
+ if (c.isReply) {
133
+ // Reply: remove from document entirely (will be in comments.xml only)
134
+ markedMarkdown = markedMarkdown.slice(0, c.start) + markedMarkdown.slice(c.end);
135
+ } else {
136
+ // Parent comment: replace with markers
137
+ const anchor = c.anchor || '';
138
+ const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
139
+ markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
140
+ }
141
+ }
142
+
143
+ return { markedMarkdown, comments };
144
+ }
145
+
40
146
  function createCommentsXml(comments) {
41
- const now = new Date().toISOString();
147
+ // Word expects date without milliseconds: 2025-12-30T08:33:00Z
148
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
42
149
 
43
150
  let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
44
- xml += '<w:comments xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
45
- xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships">\n';
151
+ // Minimal namespaces matching golden file structure
152
+ xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
153
+
154
+ // Use a consistent rsid (8-char hex) for all comments in this batch
155
+ const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
46
156
 
47
157
  for (const comment of comments) {
48
- xml += ` <w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}">\n`;
49
- xml += ` <w:p>\n`;
50
- xml += ` <w:r>\n`;
51
- xml += ` <w:t>${escapeXml(comment.text)}</w:t>\n`;
52
- xml += ` </w:r>\n`;
53
- xml += ` </w:p>\n`;
54
- xml += ` </w:comment>\n`;
55
-
56
- // Add replies as separate comments with parent reference
57
- if (comment.replies) {
58
- for (const reply of comment.replies) {
59
- xml += ` <w:comment w:id="${reply.id}" w:author="${escapeXml(reply.author)}" w:date="${now}">\n`;
60
- xml += ` <w:p>\n`;
61
- xml += ` <w:r>\n`;
62
- xml += ` <w:t>${escapeXml(reply.text)}</w:t>\n`;
63
- xml += ` </w:r>\n`;
64
- xml += ` </w:p>\n`;
65
- xml += ` </w:comment>\n`;
66
- }
158
+ xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
159
+ // First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
160
+ xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
161
+ xml += `<w:r><w:annotationRef/></w:r>`;
162
+ xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
163
+ xml += `</w:p>`;
164
+ if (comment.isReply) {
165
+ // Second empty paragraph: rsidRDefault matches rsidR
166
+ xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
67
167
  }
168
+ xml += `</w:comment>`;
68
169
  }
69
170
 
70
171
  xml += '</w:comments>';
71
172
  return xml;
72
173
  }
73
174
 
74
- /**
75
- * Create commentsExtended.xml for reply threading
76
- * @param {Array<{id: string, replies?: Array}>} comments
77
- * @returns {string}
78
- */
79
175
  function createCommentsExtendedXml(comments) {
80
176
  let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
81
- xml += '<w15:commentsEx xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml">\n';
177
+ // Minimal namespaces matching golden file structure
178
+ xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
82
179
 
83
180
  for (const comment of comments) {
84
- // Mark the parent comment as done=0 (open)
85
- xml += ` <w15:commentEx w15:paraId="${comment.id}" w15:done="0"/>\n`;
86
-
87
- if (comment.replies) {
88
- for (const reply of comment.replies) {
89
- // Link replies to parent
90
- xml += ` <w15:commentEx w15:paraId="${reply.id}" w15:paraIdParent="${comment.id}" w15:done="0"/>\n`;
91
- }
181
+ if (comment.isReply && comment.parentParaId) {
182
+ // Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
183
+ xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
184
+ } else {
185
+ // Parent comment: use paraId (first paragraph)
186
+ xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
92
187
  }
93
188
  }
94
189
 
@@ -96,354 +191,417 @@ function createCommentsExtendedXml(comments) {
96
191
  return xml;
97
192
  }
98
193
 
99
- /**
100
- * Find text in document.xml and get surrounding context for anchor matching
101
- * @param {string} documentXml
102
- * @param {string} searchText
103
- * @param {number} startFrom - position to start searching from
104
- * @returns {{found: boolean, runIndex: number, textIndex: number, position: number}|null}
105
- */
106
- function findTextPosition(documentXml, searchText, startFrom = 0) {
107
- // Normalize search text
108
- const normalized = searchText.trim().replace(/\s+/g, ' ');
109
- if (!normalized) return null;
110
-
111
- // Extract all text content and map to XML positions
112
- const textPattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
113
- let match;
114
- let fullText = '';
115
- const positions = [];
194
+ function generateDurableId(index) {
195
+ // Generate unique 8-char hex ID for durableId
196
+ // CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
197
+ // Word interprets durableIds as signed 32-bit integers
198
+ const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
199
+ const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
200
+ return id.toString(16).toUpperCase().padStart(8, '0');
201
+ }
116
202
 
117
- while ((match = textPattern.exec(documentXml)) !== null) {
118
- if (match.index < startFrom) continue;
203
+ function createCommentsIdsXml(comments) {
204
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
205
+ // Minimal namespaces matching golden file structure
206
+ xml += '<w16cid:commentsIds ';
207
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
208
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
209
+ xml += 'mc:Ignorable="w16cid">';
119
210
 
120
- positions.push({
121
- xmlStart: match.index,
122
- xmlEnd: match.index + match[0].length,
123
- textStart: fullText.length,
124
- text: match[1],
125
- });
126
- fullText += match[1];
211
+ for (const comment of comments) {
212
+ // ONE entry per comment using the LAST paragraph's paraId:
213
+ // - Parent comments (1 paragraph): use paraId
214
+ // - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
215
+ const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
216
+ xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
127
217
  }
128
218
 
129
- // Find the search text in the combined text
130
- const idx = fullText.indexOf(normalized);
131
- if (idx === -1) {
132
- // Try partial match (first 50 chars)
133
- const partial = normalized.slice(0, 50);
134
- const partialIdx = fullText.indexOf(partial);
135
- if (partialIdx === -1) return null;
136
-
137
- // Find which position block contains this
138
- for (let i = 0; i < positions.length; i++) {
139
- const pos = positions[i];
140
- if (pos.textStart <= partialIdx && pos.textStart + pos.text.length > partialIdx) {
141
- return {
142
- found: true,
143
- position: pos.xmlStart,
144
- runStart: pos.xmlStart,
145
- runEnd: pos.xmlEnd,
146
- };
147
- }
148
- }
149
- }
219
+ xml += '</w16cid:commentsIds>';
220
+ return xml;
221
+ }
150
222
 
151
- // Find which position block contains the start of the match
152
- for (let i = 0; i < positions.length; i++) {
153
- const pos = positions[i];
154
- if (pos.textStart <= idx && pos.textStart + pos.text.length > idx) {
155
- return {
156
- found: true,
157
- position: pos.xmlStart,
158
- runStart: pos.xmlStart,
159
- runEnd: pos.xmlEnd,
160
- };
161
- }
162
- }
223
+ function createCommentsExtensibleXml(comments) {
224
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
163
225
 
164
- return null;
165
- }
226
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
227
+ // Minimal namespaces matching golden file structure
228
+ xml += '<w16cex:commentsExtensible ';
229
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
230
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
231
+ xml += 'mc:Ignorable="w16cex">';
166
232
 
167
- /**
168
- * Get context text before a comment in the markdown
169
- * @param {string} markdown
170
- * @param {number} commentPosition
171
- * @returns {string}
172
- */
173
- function getAnchorText(markdown, commentPosition) {
174
- // Look backwards from comment position to find anchor text
175
- // The anchor is typically the text immediately before the comment
176
- const textBefore = markdown.slice(Math.max(0, commentPosition - 200), commentPosition);
177
-
178
- // Get the last sentence or phrase before the comment
179
- // Split on sentence boundaries
180
- const sentences = textBefore.split(/[.!?]\s+/);
181
- if (sentences.length > 0) {
182
- let anchor = sentences[sentences.length - 1].trim();
183
- // Clean up any markup
184
- anchor = stripAnnotations(anchor);
185
- // Take last 100 chars max
186
- if (anchor.length > 100) {
187
- anchor = anchor.slice(-100);
188
- }
189
- return anchor;
233
+ for (const comment of comments) {
234
+ // ONE entry per comment using the durableId
235
+ xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
190
236
  }
191
237
 
192
- return textBefore.slice(-50);
238
+ xml += '</w16cex:commentsExtensible>';
239
+ return xml;
193
240
  }
194
241
 
195
- /**
196
- * Parse CriticMarkup comments including replies
197
- * Format: {>>Author: comment<<} {>>Replier: reply<<}
198
- * @param {string} markdown
199
- * @returns {Array<{author: string, text: string, anchor: string, position: number, replies: Array}>}
200
- */
201
- function parseCommentsWithReplies(markdown) {
202
- const comments = [];
203
- // Use non-greedy match to find content between {>> and <<}
204
- const commentPattern = /\{>>(.+?)<<\}/g;
205
- let match;
206
-
207
- while ((match = commentPattern.exec(markdown)) !== null) {
208
- const fullMatch = match[1];
209
- const position = match.index;
242
+ // Known Windows Live user IDs for authors (from manual_comments.docx)
243
+ const AUTHOR_USER_IDS = {
244
+ 'Guy Colling': '9ff4d97962428673',
245
+ 'Gilles Colling': '46e930a4c4b85dfd',
246
+ };
210
247
 
211
- // Parse author and text
212
- let author = 'Unknown';
213
- let text = fullMatch;
248
+ function createPeopleXml(comments) {
249
+ // Extract unique authors
250
+ const authors = [...new Set(comments.map(c => c.author))];
214
251
 
215
- const colonIdx = fullMatch.indexOf(':');
216
- if (colonIdx > 0 && colonIdx < 30) {
217
- author = fullMatch.slice(0, colonIdx).trim();
218
- text = fullMatch.slice(colonIdx + 1).trim();
219
- }
252
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
253
+ xml += '<w15:people ';
254
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
255
+ xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
256
+ xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
257
+ xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
258
+ xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
259
+ xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
260
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
261
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
262
+ xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
263
+ xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
264
+ xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
265
+ xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
266
+
267
+ for (const author of authors) {
268
+ const userId = AUTHOR_USER_IDS[author] || generateUserId(author);
269
+ xml += `<w15:person w15:author="${escapeXml(author)}">`;
270
+ xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
271
+ xml += `</w15:person>`;
272
+ }
220
273
 
221
- // Check if this is a reply to the previous comment (immediately follows another comment)
222
- const textBefore = markdown.slice(Math.max(0, position - 5), position).trim();
223
- const isReply = textBefore.endsWith('<<}');
274
+ xml += '</w15:people>';
275
+ return xml;
276
+ }
224
277
 
225
- if (isReply && comments.length > 0) {
226
- // Add as reply to previous comment
227
- const parent = comments[comments.length - 1];
228
- if (!parent.replies) parent.replies = [];
229
- parent.replies.push({ author, text });
230
- } else {
231
- // New comment
232
- const anchor = getAnchorText(markdown, position);
233
- comments.push({
234
- author,
235
- text,
236
- anchor,
237
- position,
238
- replies: [],
239
- });
240
- }
278
+ function generateUserId(author) {
279
+ // Generate a deterministic 16-char hex ID from author name
280
+ let hash = 0;
281
+ for (let i = 0; i < author.length; i++) {
282
+ hash = ((hash << 5) - hash) + author.charCodeAt(i);
283
+ hash = hash & hash;
241
284
  }
242
-
243
- return comments;
285
+ return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
244
286
  }
245
287
 
246
288
  /**
247
- * Inject comments into a DOCX file
248
- * @param {string} docxPath - Path to the clean DOCX
249
- * @param {string} markdown - Source markdown with CriticMarkup comments
250
- * @param {string} outputPath - Path for output DOCX with comments
251
- * @returns {Promise<{success: boolean, commentCount: number, error?: string}>}
289
+ * Inject comments at marker positions
252
290
  */
253
- export async function injectComments(docxPath, markdown, outputPath) {
291
+ export async function injectCommentsAtMarkers(docxPath, comments, outputPath) {
254
292
  try {
255
293
  if (!fs.existsSync(docxPath)) {
256
- return { success: false, commentCount: 0, error: `File not found: ${docxPath}` };
294
+ return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
257
295
  }
258
296
 
259
- // Parse comments from markdown
260
- const parsedComments = parseCommentsWithReplies(markdown);
261
-
262
- if (parsedComments.length === 0) {
263
- // No comments to inject, just copy the file
297
+ if (comments.length === 0) {
264
298
  fs.copyFileSync(docxPath, outputPath);
265
- return { success: true, commentCount: 0 };
299
+ return { success: true, commentCount: 0, skippedComments: 0 };
266
300
  }
267
301
 
268
- // Read the DOCX
269
302
  const zip = new AdmZip(docxPath);
270
-
271
- // Get document.xml
272
303
  const documentEntry = zip.getEntry('word/document.xml');
273
304
  if (!documentEntry) {
274
- return { success: false, commentCount: 0, error: 'Invalid DOCX: no document.xml' };
305
+ return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
275
306
  }
276
307
 
277
308
  let documentXml = zip.readAsText(documentEntry);
278
309
 
279
- // Assign IDs to comments and replies
280
- let nextId = 0;
281
- const commentsWithIds = parsedComments.map(c => {
282
- const comment = {
283
- ...c,
284
- id: generateCommentId(nextId++),
285
- };
286
- if (c.replies) {
287
- comment.replies = c.replies.map(r => ({
288
- ...r,
289
- id: generateCommentId(nextId++),
290
- }));
291
- }
292
- return comment;
293
- });
294
-
295
- // Find anchor positions and inject comment ranges
296
- const injections = [];
297
- let searchFrom = 0;
298
-
299
- for (const comment of commentsWithIds) {
300
- const pos = findTextPosition(documentXml, comment.anchor, searchFrom);
301
-
302
- if (pos && pos.found) {
303
- // We'll inject the comment range around this position
304
- injections.push({
305
- comment,
306
- position: pos.position,
307
- runStart: pos.runStart,
308
- runEnd: pos.runEnd,
309
- });
310
- searchFrom = pos.position + 1;
310
+ // Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
311
+ const commentsWithIds = comments.map((c, idx) => ({
312
+ ...c,
313
+ id: String(idx + 1),
314
+ paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
315
+ paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
316
+ durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
317
+ }));
318
+
319
+ // Link replies to parent paraIds
320
+ for (const c of commentsWithIds) {
321
+ if (c.isReply && c.parentIdx !== null) {
322
+ c.parentParaId = commentsWithIds[c.parentIdx].paraId;
311
323
  }
312
324
  }
313
325
 
314
- // Sort injections by position (reverse order for safe modification)
315
- injections.sort((a, b) => b.position - a.position);
326
+ const injectedIds = new Set();
327
+
328
+ // Process only parent comments (non-replies) for document ranges
329
+ const parentComments = commentsWithIds.filter(c => !c.isReply);
330
+
331
+ for (let i = parentComments.length - 1; i >= 0; i--) {
332
+ const comment = parentComments[i];
333
+ const idx = comment.commentIdx;
334
+
335
+ const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
336
+ const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
316
337
 
317
- // Inject comment range markers into document.xml
318
- for (const inj of injections) {
319
- const { comment, runStart, runEnd } = inj;
338
+ const startPos = documentXml.indexOf(startMarker);
339
+ const endPos = documentXml.indexOf(endMarker);
320
340
 
321
- // Find the <w:r> element containing this text
322
- // Insert commentRangeStart before the run and commentRangeEnd after
341
+ if (startPos === -1 || endPos === -1) continue;
323
342
 
324
- // Find the start of the <w:r> containing this position
325
- const rStartMatch = documentXml.lastIndexOf('<w:r', runStart);
326
- if (rStartMatch === -1) continue;
343
+ // Find the <w:r> containing the markers
344
+ const rStartBefore = documentXml.lastIndexOf('<w:r>', startPos);
345
+ const rStartOpen = documentXml.lastIndexOf('<w:r ', startPos);
346
+ const rStart = Math.max(rStartBefore, rStartOpen);
347
+ const rEndPos = documentXml.indexOf('</w:r>', endPos);
327
348
 
328
- // Find the end of this </w:r>
329
- const rEndMatch = documentXml.indexOf('</w:r>', runEnd);
330
- if (rEndMatch === -1) continue;
331
- const rEnd = rEndMatch + '</w:r>'.length;
349
+ if (rStart === -1 || rEndPos === -1) continue;
332
350
 
333
- // Generate comment IDs list (main + replies for reference linking)
334
- const allIds = [comment.id];
335
- if (comment.replies) {
336
- allIds.push(...comment.replies.map(r => r.id));
351
+ const rEnd = rEndPos + '</w:r>'.length;
352
+ const runContent = documentXml.slice(rStart, rEnd);
353
+
354
+ // Extract styling
355
+ const rPrMatch = runContent.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
356
+ const rPr = rPrMatch ? rPrMatch[0] : '';
357
+
358
+ // Extract text
359
+ const textMatch = runContent.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
360
+ if (!textMatch) continue;
361
+
362
+ const fullText = textMatch[1];
363
+ const tElement = textMatch[0].match(/<w:t[^>]*>/)[0];
364
+
365
+ const startInText = fullText.indexOf(startMarker);
366
+ const endInText = fullText.indexOf(endMarker);
367
+ if (startInText === -1 || endInText === -1) continue;
368
+
369
+ const textBefore = fullText.slice(0, startInText);
370
+ const anchorText = fullText.slice(startInText + startMarker.length, endInText);
371
+ const textAfter = fullText.slice(endInText + endMarker.length);
372
+
373
+ // Build replacement
374
+ let replacement = '';
375
+
376
+ if (textBefore) {
377
+ replacement += `<w:r>${rPr}${tElement}${textBefore}</w:t></w:r>`;
378
+ }
379
+
380
+ // Find replies to this comment
381
+ const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment.commentIdx);
382
+
383
+ // Start ranges for parent AND all replies (nested)
384
+ replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
385
+ for (const reply of replies) {
386
+ replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
337
387
  }
338
388
 
339
- // Insert commentRangeEnd and commentReference after the run
340
- let endMarker = `<w:commentRangeEnd w:id="${comment.id}"/>`;
341
- endMarker += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
389
+ // Anchor text
390
+ if (anchorText) {
391
+ replacement += `<w:r>${rPr}${tElement}${anchorText}</w:t></w:r>`;
392
+ }
393
+
394
+ // End parent range and reference (NO rStyle wrapper - required for threading)
395
+ replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
396
+ replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
397
+
398
+ // End reply ranges and references (same position as parent, NO rStyle wrapper)
399
+ for (const reply of replies) {
400
+ replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
401
+ replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
402
+ injectedIds.add(reply.id);
403
+ }
404
+
405
+ if (textAfter) {
406
+ replacement += `<w:r>${rPr}${tElement}${textAfter}</w:t></w:r>`;
407
+ }
342
408
 
343
- documentXml = documentXml.slice(0, rEnd) + endMarker + documentXml.slice(rEnd);
409
+ documentXml = documentXml.slice(0, rStart) + replacement + documentXml.slice(rEnd);
410
+ injectedIds.add(comment.id);
411
+ }
344
412
 
345
- // Insert commentRangeStart before the run
346
- const startMarker = `<w:commentRangeStart w:id="${comment.id}"/>`;
347
- documentXml = documentXml.slice(0, rStartMatch) + startMarker + documentXml.slice(rStartMatch);
413
+ // Add required namespaces to document.xml for comment threading
414
+ const requiredNs = {
415
+ 'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
416
+ 'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
417
+ 'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
418
+ 'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
419
+ 'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
420
+ };
421
+
422
+ // Find <w:document and add namespaces
423
+ const docTagMatch = documentXml.match(/<w:document[^>]*>/);
424
+ if (docTagMatch) {
425
+ let docTag = docTagMatch[0];
426
+ let modified = false;
427
+ for (const [attr, val] of Object.entries(requiredNs)) {
428
+ if (!docTag.includes(attr)) {
429
+ docTag = docTag.replace('>', ` ${attr}="${val}">`);
430
+ modified = true;
431
+ }
432
+ }
433
+ // Add mc:Ignorable if mc namespace was added
434
+ if (modified && !docTag.includes('mc:Ignorable')) {
435
+ docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
436
+ }
437
+ documentXml = documentXml.replace(docTagMatch[0], docTag);
348
438
  }
349
439
 
350
- // Update document.xml in the zip
440
+ // Update document.xml
351
441
  zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
352
442
 
353
- // Create comments.xml
354
- const commentsXml = createCommentsXml(commentsWithIds);
443
+ // All comments (parents + replies) go in comments.xml
444
+ // But only include if parent was injected
445
+ const includedComments = commentsWithIds.filter(c => {
446
+ if (!c.isReply) {
447
+ return injectedIds.has(c.id);
448
+ } else {
449
+ // Include reply if its parent was injected
450
+ return c.parentIdx !== null && injectedIds.has(commentsWithIds[c.parentIdx].id);
451
+ }
452
+ });
355
453
 
356
- // Check if comments.xml already exists
454
+ // Create comments.xml
455
+ const commentsXml = createCommentsXml(includedComments);
357
456
  if (zip.getEntry('word/comments.xml')) {
358
457
  zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
359
458
  } else {
360
459
  zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
361
460
  }
362
461
 
363
- // Create commentsExtended.xml for reply threading (Word 2013+)
364
- const hasReplies = commentsWithIds.some(c => c.replies && c.replies.length > 0);
365
- if (hasReplies) {
366
- const commentsExtXml = createCommentsExtendedXml(commentsWithIds);
367
- if (zip.getEntry('word/commentsExtended.xml')) {
368
- zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
369
- } else {
370
- zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
371
- }
462
+ // Create commentsExtended.xml with reply threading
463
+ const commentsExtXml = createCommentsExtendedXml(includedComments);
464
+ if (zip.getEntry('word/commentsExtended.xml')) {
465
+ zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
466
+ } else {
467
+ zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
468
+ }
469
+
470
+ // Create commentsIds.xml (Word 2016+)
471
+ const commentsIdsXml = createCommentsIdsXml(includedComments);
472
+ if (zip.getEntry('word/commentsIds.xml')) {
473
+ zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
474
+ } else {
475
+ zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
372
476
  }
373
477
 
374
- // Update [Content_Types].xml to include comments
478
+ // Create commentsExtensible.xml (Word 2018+)
479
+ const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
480
+ if (zip.getEntry('word/commentsExtensible.xml')) {
481
+ zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
482
+ } else {
483
+ zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
484
+ }
485
+
486
+ // Create people.xml (author definitions with Windows Live IDs)
487
+ const peopleXml = createPeopleXml(includedComments);
488
+ if (zip.getEntry('word/people.xml')) {
489
+ zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
490
+ } else {
491
+ zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
492
+ }
493
+
494
+ // Update [Content_Types].xml
375
495
  const contentTypesEntry = zip.getEntry('[Content_Types].xml');
376
496
  if (contentTypesEntry) {
377
497
  let contentTypes = zip.readAsText(contentTypesEntry);
378
498
 
379
- // Add comments content type if not present
380
499
  if (!contentTypes.includes('comments.xml')) {
381
500
  const insertPoint = contentTypes.lastIndexOf('</Types>');
382
- const commentType = '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>';
383
- contentTypes = contentTypes.slice(0, insertPoint) + commentType + '\n' + contentTypes.slice(insertPoint);
501
+ contentTypes = contentTypes.slice(0, insertPoint) +
502
+ '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
503
+ contentTypes.slice(insertPoint);
384
504
  }
385
505
 
386
- // Add commentsExtended if we have replies
387
- if (hasReplies && !contentTypes.includes('commentsExtended.xml')) {
506
+ if (!contentTypes.includes('commentsExtended.xml')) {
388
507
  const insertPoint = contentTypes.lastIndexOf('</Types>');
389
- const extType = '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>';
390
- contentTypes = contentTypes.slice(0, insertPoint) + extType + '\n' + contentTypes.slice(insertPoint);
508
+ contentTypes = contentTypes.slice(0, insertPoint) +
509
+ '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
510
+ contentTypes.slice(insertPoint);
511
+ }
512
+
513
+ if (!contentTypes.includes('commentsIds.xml')) {
514
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
515
+ contentTypes = contentTypes.slice(0, insertPoint) +
516
+ '<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
517
+ contentTypes.slice(insertPoint);
518
+ }
519
+
520
+ if (!contentTypes.includes('commentsExtensible.xml')) {
521
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
522
+ contentTypes = contentTypes.slice(0, insertPoint) +
523
+ '<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
524
+ contentTypes.slice(insertPoint);
525
+ }
526
+
527
+ if (!contentTypes.includes('people.xml')) {
528
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
529
+ contentTypes = contentTypes.slice(0, insertPoint) +
530
+ '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
531
+ contentTypes.slice(insertPoint);
391
532
  }
392
533
 
393
534
  zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
394
535
  }
395
536
 
396
- // Update word/_rels/document.xml.rels to include comments relationship
537
+ // Update relationships
397
538
  const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
398
539
  if (relsEntry) {
399
540
  let rels = zip.readAsText(relsEntry);
400
541
 
401
- // Find max rId
402
542
  const rIdMatches = rels.match(/rId(\d+)/g) || [];
403
- const maxId = rIdMatches.reduce((max, r) => {
404
- const num = parseInt(r.replace('rId', ''));
405
- return num > max ? num : max;
406
- }, 0);
543
+ const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
407
544
 
408
- // Add comments relationship if not present
409
545
  if (!rels.includes('comments.xml')) {
410
546
  const insertPoint = rels.lastIndexOf('</Relationships>');
411
- const commentRel = `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>`;
412
- rels = rels.slice(0, insertPoint) + commentRel + '\n' + rels.slice(insertPoint);
547
+ rels = rels.slice(0, insertPoint) +
548
+ `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
549
+ rels.slice(insertPoint);
413
550
  }
414
551
 
415
- // Add commentsExtended relationship if needed
416
- if (hasReplies && !rels.includes('commentsExtended.xml')) {
552
+ if (!rels.includes('commentsExtended.xml')) {
417
553
  const insertPoint = rels.lastIndexOf('</Relationships>');
418
- const extRel = `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>`;
419
- rels = rels.slice(0, insertPoint) + extRel + '\n' + rels.slice(insertPoint);
554
+ rels = rels.slice(0, insertPoint) +
555
+ `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
556
+ rels.slice(insertPoint);
557
+ }
558
+
559
+ if (!rels.includes('commentsIds.xml')) {
560
+ const insertPoint = rels.lastIndexOf('</Relationships>');
561
+ rels = rels.slice(0, insertPoint) +
562
+ `<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
563
+ rels.slice(insertPoint);
564
+ }
565
+
566
+ if (!rels.includes('commentsExtensible.xml')) {
567
+ const insertPoint = rels.lastIndexOf('</Relationships>');
568
+ rels = rels.slice(0, insertPoint) +
569
+ `<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
570
+ rels.slice(insertPoint);
571
+ }
572
+
573
+ if (!rels.includes('people.xml')) {
574
+ const insertPoint = rels.lastIndexOf('</Relationships>');
575
+ rels = rels.slice(0, insertPoint) +
576
+ `<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
577
+ rels.slice(insertPoint);
420
578
  }
421
579
 
422
580
  zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
423
581
  }
424
582
 
425
- // Write the output file
426
583
  zip.writeZip(outputPath);
427
584
 
428
- const totalComments = commentsWithIds.reduce((sum, c) => {
429
- return sum + 1 + (c.replies ? c.replies.length : 0);
430
- }, 0);
585
+ const parentCount = includedComments.filter(c => !c.isReply).length;
586
+ const replyCount = includedComments.filter(c => c.isReply).length;
431
587
 
432
- return { success: true, commentCount: totalComments };
588
+ return {
589
+ success: true,
590
+ commentCount: parentCount,
591
+ replyCount: replyCount,
592
+ skippedComments: comments.length - includedComments.length,
593
+ };
433
594
 
434
595
  } catch (err) {
435
- return { success: false, commentCount: 0, error: err.message };
596
+ return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
436
597
  }
437
598
  }
438
599
 
439
- /**
440
- * Build DOCX with proper Word comments from markdown
441
- * @param {string} cleanDocxPath - Path to clean DOCX (built without comments)
442
- * @param {string} markdownPath - Path to markdown with CriticMarkup comments
443
- * @param {string} outputPath - Path for output DOCX with Word comments
444
- * @returns {Promise<{success: boolean, commentCount: number, error?: string}>}
445
- */
446
- export async function buildWithComments(cleanDocxPath, markdownPath, outputPath) {
447
- const markdown = fs.readFileSync(markdownPath, 'utf-8');
448
- return injectComments(cleanDocxPath, markdown, outputPath);
600
+ export async function injectComments(docxPath, markdown, outputPath) {
601
+ console.warn('Warning: Use prepareMarkdownWithMarkers + injectCommentsAtMarkers instead');
602
+ return { success: false, commentCount: 0, skippedComments: 0, error: 'Use marker-based flow' };
603
+ }
604
+
605
+ export async function buildWithComments(cleanDocxPath, comments, outputPath) {
606
+ return injectCommentsAtMarkers(cleanDocxPath, comments, outputPath);
449
607
  }