docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
@@ -1,840 +1,840 @@
1
- /**
2
- * Word comment injection with reply threading
3
- *
4
- * Flow:
5
- * 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
6
- * - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
7
- * - Subsequent adjacent comments = replies (no markers, attach to parent)
8
- * 2. Pandoc converts to DOCX
9
- * 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
10
- * - Replies go in comments.xml with parent reference in commentsExtended.xml
11
- */
12
-
13
- import * as fs from 'fs';
14
- import AdmZip from 'adm-zip';
15
- import { escapeXml } from './utils.js';
16
-
17
- const MARKER_START_PREFIX = '⟦CMS:';
18
- const MARKER_END_PREFIX = '⟦CME:';
19
- const MARKER_SUFFIX = '⟧';
20
-
21
- interface ParsedComment {
22
- author: string;
23
- text: string;
24
- anchor: string | null;
25
- start: number;
26
- end: number;
27
- fullMatch: string;
28
- }
29
-
30
- interface PreparedComment extends ParsedComment {
31
- isReply: boolean;
32
- parentIdx: number | null;
33
- commentIdx: number;
34
- anchorFromReply?: boolean;
35
- placesParentMarkers?: boolean;
36
- }
37
-
38
- interface PrepareResult {
39
- markedMarkdown: string;
40
- comments: PreparedComment[];
41
- }
42
-
43
- interface CommentWithIds extends PreparedComment {
44
- id: string;
45
- paraId: string;
46
- paraId2: string;
47
- durableId: string;
48
- parentParaId?: string;
49
- }
50
-
51
- interface InjectionResult {
52
- success: boolean;
53
- commentCount: number;
54
- replyCount?: number;
55
- skippedComments: number;
56
- error?: string;
57
- }
58
-
59
- function generateParaId(commentIdx: number, paraNum: number): string {
60
- // Generate 8-character uppercase hex ID matching Word format
61
- // Word uses IDs like "3F25BC58", "0331C187"
62
- // Must be deterministic - same inputs always produce same output
63
- const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
64
- return id.toString(16).toUpperCase().padStart(8, '0');
65
- }
66
-
67
- /**
68
- * Parse comments and create markers
69
- *
70
- * Returns:
71
- * - markedMarkdown: markdown with markers for parent comments only
72
- * - comments: array with author, text, isReply, parentIdx
73
- */
74
- export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
75
- // Match the comment block first; extend manually to capture an optional
76
- // trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
77
- // bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
78
- // `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
79
- // ourselves and verify a `{.mark}` suffix.
80
- const commentPattern = /\{>>([\s\S]+?)<<\}/g;
81
-
82
- function tryParseTrailingAnchor(
83
- text: string,
84
- fromIdx: number,
85
- ): { anchor: string; endIdx: number } | null {
86
- let i = fromIdx;
87
- while (i < text.length && /\s/.test(text[i] ?? '')) i++;
88
- if (text[i] !== '[') return null;
89
- let depth = 1;
90
- let j = i + 1;
91
- while (j < text.length) {
92
- const ch = text[j];
93
- if (ch === '[') depth++;
94
- else if (ch === ']') {
95
- depth--;
96
- if (depth === 0) break;
97
- }
98
- j++;
99
- }
100
- if (depth !== 0) return null;
101
- if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
102
- return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
103
- }
104
-
105
- const REPLY_PREFIX = '↪ ';
106
- const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
107
- let match: RegExpExecArray | null;
108
- while ((match = commentPattern.exec(markdown)) !== null) {
109
- const content = match[1] ?? '';
110
- let author = 'Unknown';
111
- let text = content;
112
- const colonIdx = content.indexOf(':');
113
- if (colonIdx > 0 && colonIdx < 30) {
114
- author = content.slice(0, colonIdx).trim();
115
- text = content.slice(colonIdx + 1).trim();
116
- }
117
-
118
- // The `↪ ` prefix is the authoritative reply signal emitted by
119
- // `insertCommentsIntoMarkdown`. Strip it from the author before injection
120
- // so Word displays the real name.
121
- let explicitReply = false;
122
- if (author.startsWith(REPLY_PREFIX)) {
123
- explicitReply = true;
124
- author = author.slice(REPLY_PREFIX.length).trim();
125
- }
126
-
127
- const commentEnd = match.index + match[0].length;
128
- const trailing = tryParseTrailingAnchor(markdown, commentEnd);
129
-
130
- rawMatches.push({
131
- author,
132
- text,
133
- anchor: trailing ? trailing.anchor : null,
134
- start: match.index,
135
- end: trailing ? trailing.endIdx : commentEnd,
136
- fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
137
- explicitReply,
138
- });
139
-
140
- // Advance regex lastIndex past the consumed anchor so the next iteration
141
- // doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
142
- // tempt the matcher to look for another `{>>...<<}` in the body of the
143
- // anchor span).
144
- if (trailing) {
145
- commentPattern.lastIndex = trailing.endIdx;
146
- }
147
- }
148
-
149
- if (rawMatches.length === 0) {
150
- return { markedMarkdown: markdown, comments: [] };
151
- }
152
-
153
- // Two-mode reply detection driven by the markdown itself:
154
- // - If any comment carries the `↪ ` author prefix, the markdown came
155
- // through `insertCommentsIntoMarkdown` and we use prefix-only mode.
156
- // Distinct clusters that happen to land at gap=0 (a real failure
157
- // mode on dense reviewer docs — 298-comment paper produced 9 such
158
- // collisions) are not misthreaded.
159
- // - If no comment carries the prefix, the markdown was hand-typed.
160
- // Fall back to gap < 10 adjacency for backward compat with users
161
- // who write CriticMarkup directly.
162
- const ADJACENT_THRESHOLD = 10;
163
- const useExplicitMode = rawMatches.some(m => m.explicitReply);
164
- const comments: PreparedComment[] = [];
165
- let clusterParentIdx = -1; // Index of first comment in current cluster
166
- let lastCommentEnd = -1;
167
-
168
- for (let i = 0; i < rawMatches.length; i++) {
169
- const m = rawMatches[i];
170
- if (!m) continue;
171
-
172
- const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
173
- const isAdjacent = useExplicitMode
174
- ? m.explicitReply
175
- : gap < ADJACENT_THRESHOLD;
176
-
177
- // Reset cluster if there's a gap (comments not in same cluster)
178
- if (!isAdjacent) {
179
- clusterParentIdx = -1;
180
- }
181
-
182
- if (clusterParentIdx === -1) {
183
- // First comment in cluster = parent (regardless of author)
184
- comments.push({
185
- author: m.author,
186
- text: m.text,
187
- anchor: m.anchor,
188
- start: m.start,
189
- end: m.end,
190
- fullMatch: m.fullMatch,
191
- isReply: false,
192
- parentIdx: null,
193
- commentIdx: comments.length
194
- });
195
- clusterParentIdx = comments.length - 1;
196
- } else {
197
- // Subsequent comment in cluster = reply to first comment
198
- comments.push({
199
- author: m.author,
200
- text: m.text,
201
- anchor: m.anchor,
202
- start: m.start,
203
- end: m.end,
204
- fullMatch: m.fullMatch,
205
- isReply: true,
206
- parentIdx: clusterParentIdx,
207
- commentIdx: comments.length
208
- });
209
- }
210
-
211
- lastCommentEnd = m.end;
212
- }
213
-
214
- // Propagate anchors from replies to parents
215
- // If a reply has an anchor but its parent doesn't, move the anchor to the parent
216
- // Track flags for special handling during marker generation
217
- for (const c of comments) {
218
- if (c.isReply && c.anchor && c.parentIdx !== null) {
219
- const parent = comments[c.parentIdx];
220
- if (parent && !parent.anchor) {
221
- parent.anchor = c.anchor;
222
- parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
223
- c.placesParentMarkers = true; // This reply should place the parent's markers
224
- c.anchor = null;
225
- }
226
- }
227
- }
228
-
229
- // Build marked markdown - only parent comments get markers
230
- // Process from end to start to preserve positions
231
- let markedMarkdown = markdown;
232
-
233
- for (let i = comments.length - 1; i >= 0; i--) {
234
- const c = comments[i];
235
- if (!c) continue;
236
-
237
- if (c.isReply) {
238
- // Reply: remove from document entirely (will be in comments.xml only)
239
- // Also consume one preceding whitespace char to avoid double spaces.
240
- // We deliberately consume at most one — walking arbitrarily backwards
241
- // would shift positions that lower-index comments still depend on.
242
- let removeStart = c.start;
243
- if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
244
- removeStart--;
245
- }
246
-
247
- // If this reply places parent's markers (anchor was propagated)
248
- if (c.placesParentMarkers && c.parentIdx !== null) {
249
- // Extract anchor text from the original match
250
- const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
251
- if (anchorMatch) {
252
- const anchorText = anchorMatch[1] ?? '';
253
- // Output markers with PARENT's index around the anchor text
254
- const parentIdx = c.parentIdx;
255
- const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
256
- markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
257
- } else {
258
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
259
- }
260
- } else {
261
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
262
- }
263
- } else {
264
- // Parent comment
265
- if (c.anchorFromReply) {
266
- // Anchor markers are placed by the reply, just remove this comment.
267
- // Consume one preceding whitespace char only (see reply branch above).
268
- let removeStart = c.start;
269
- if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
270
- removeStart--;
271
- }
272
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
273
- } else {
274
- // Normal case: replace with markers
275
- const anchor = c.anchor || '';
276
- const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
277
- markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
278
- }
279
- }
280
- }
281
-
282
- return { markedMarkdown, comments };
283
- }
284
-
285
- function createCommentsXml(comments: CommentWithIds[]): string {
286
- // Word expects date without milliseconds: 2025-12-30T08:33:00Z
287
- const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
288
-
289
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
290
- // Minimal namespaces matching golden file structure
291
- xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
292
-
293
- // Use a consistent rsid (8-char hex) for all comments in this batch
294
- const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
295
-
296
- for (const comment of comments) {
297
- xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
298
- // First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
299
- xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
300
- xml += `<w:r><w:annotationRef/></w:r>`;
301
- xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
302
- xml += `</w:p>`;
303
- if (comment.isReply) {
304
- // Second empty paragraph: rsidRDefault matches rsidR
305
- xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
306
- }
307
- xml += `</w:comment>`;
308
- }
309
-
310
- xml += '</w:comments>';
311
- return xml;
312
- }
313
-
314
- function createCommentsExtendedXml(comments: CommentWithIds[]): string {
315
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
316
- // Minimal namespaces matching golden file structure
317
- xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
318
-
319
- for (const comment of comments) {
320
- if (comment.isReply && comment.parentParaId) {
321
- // Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
322
- xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
323
- } else {
324
- // Parent comment: use paraId (first paragraph)
325
- xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
326
- }
327
- }
328
-
329
- xml += '</w15:commentsEx>';
330
- return xml;
331
- }
332
-
333
- function generateDurableId(index: number): string {
334
- // Generate unique 8-char hex ID for durableId
335
- // CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
336
- // Word interprets durableIds as signed 32-bit integers
337
- const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
338
- const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
339
- return id.toString(16).toUpperCase().padStart(8, '0');
340
- }
341
-
342
- function createCommentsIdsXml(comments: CommentWithIds[]): string {
343
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
344
- // Minimal namespaces matching golden file structure
345
- xml += '<w16cid:commentsIds ';
346
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
347
- xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
348
- xml += 'mc:Ignorable="w16cid">';
349
-
350
- for (const comment of comments) {
351
- // ONE entry per comment using the LAST paragraph's paraId:
352
- // - Parent comments (1 paragraph): use paraId
353
- // - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
354
- const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
355
- xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
356
- }
357
-
358
- xml += '</w16cid:commentsIds>';
359
- return xml;
360
- }
361
-
362
- function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
363
- const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
364
-
365
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
366
- // Minimal namespaces matching golden file structure
367
- xml += '<w16cex:commentsExtensible ';
368
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
369
- xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
370
- xml += 'mc:Ignorable="w16cex">';
371
-
372
- for (const comment of comments) {
373
- // ONE entry per comment using the durableId
374
- xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
375
- }
376
-
377
- xml += '</w16cex:commentsExtensible>';
378
- return xml;
379
- }
380
-
381
- // Generate deterministic user IDs for authors (no hardcoded personal data)
382
-
383
- function createPeopleXml(comments: CommentWithIds[]): string {
384
- // Extract unique authors
385
- const authors = [...new Set(comments.map(c => c.author))];
386
-
387
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
388
- xml += '<w15:people ';
389
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
390
- xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
391
- xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
392
- xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
393
- xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
394
- xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
395
- xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
396
- xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
397
- xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
398
- xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
399
- xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
400
- xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
401
-
402
- for (const author of authors) {
403
- const userId = generateUserId(author);
404
- xml += `<w15:person w15:author="${escapeXml(author)}">`;
405
- xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
406
- xml += `</w15:person>`;
407
- }
408
-
409
- xml += '</w15:people>';
410
- return xml;
411
- }
412
-
413
- function generateUserId(author: string): string {
414
- // Generate a deterministic 16-char hex ID from author name
415
- let hash = 0;
416
- for (let i = 0; i < author.length; i++) {
417
- hash = ((hash << 5) - hash) + author.charCodeAt(i);
418
- hash = hash & hash;
419
- }
420
- return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
421
- }
422
-
423
- /**
424
- * Inject comments at marker positions
425
- */
426
- export async function injectCommentsAtMarkers(
427
- docxPath: string,
428
- comments: PreparedComment[],
429
- outputPath: string
430
- ): Promise<InjectionResult> {
431
- try {
432
- if (!fs.existsSync(docxPath)) {
433
- return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
434
- }
435
-
436
- if (comments.length === 0) {
437
- fs.copyFileSync(docxPath, outputPath);
438
- return { success: true, commentCount: 0, skippedComments: 0 };
439
- }
440
-
441
- const zip = new AdmZip(docxPath);
442
- const documentEntry = zip.getEntry('word/document.xml');
443
- if (!documentEntry) {
444
- return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
445
- }
446
-
447
- let documentXml = zip.readAsText(documentEntry);
448
-
449
- // Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
450
- const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
451
- ...c,
452
- id: String(idx + 1),
453
- paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
454
- paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
455
- durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
456
- }));
457
-
458
- // Link replies to parent paraIds
459
- for (const c of commentsWithIds) {
460
- if (c.isReply && c.parentIdx !== null) {
461
- const parent = commentsWithIds[c.parentIdx];
462
- if (parent) {
463
- c.parentParaId = parent.paraId;
464
- }
465
- }
466
- }
467
-
468
- const injectedIds = new Set<string>();
469
-
470
- // Process only parent comments (non-replies) for document ranges
471
- const parentComments = commentsWithIds.filter(c => !c.isReply);
472
-
473
- for (let i = parentComments.length - 1; i >= 0; i--) {
474
- const comment = parentComments[i];
475
- if (!comment) continue;
476
- const idx = comment.commentIdx;
477
-
478
- const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
479
- const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
480
-
481
- // Pandoc duplicates inline image alt-text into <wp:docPr descr="...">
482
- // metadata attributes AND into the visible caption paragraph. A naive
483
- // indexOf hits the metadata-attribute occurrence first, where there is
484
- // no <w:t> element so dissectRun fails. Skip occurrences whose position
485
- // is inside an XML tag (last unbalanced '<' before position).
486
- // See: https://github.com/gcol33/docrev/issues/4
487
- function findInTextContent(haystack: string, needle: string, fromIdx = 0): number {
488
- let i = fromIdx;
489
- while (true) {
490
- const p = haystack.indexOf(needle, i);
491
- if (p < 0) return -1;
492
- const lastLt = haystack.lastIndexOf('<', p);
493
- const lastGt = haystack.lastIndexOf('>', p);
494
- if (lastLt > lastGt) {
495
- i = p + 1;
496
- continue;
497
- }
498
- return p;
499
- }
500
- }
501
-
502
- const startPos = findInTextContent(documentXml, startMarker);
503
- const endPos = startPos === -1
504
- ? -1
505
- : findInTextContent(documentXml, endMarker, startPos + startMarker.length);
506
-
507
- if (startPos === -1 || endPos === -1) continue;
508
-
509
- // Find the runs containing each marker. Pandoc may split a single
510
- // markdown anchor across multiple <w:r> blocks when it applies styling
511
- // mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
512
- // The same-run path (current happy path) collapses into the multi-run
513
- // path when start and end runs coincide.
514
- const startRunOpen = Math.max(
515
- documentXml.lastIndexOf('<w:r>', startPos),
516
- documentXml.lastIndexOf('<w:r ', startPos),
517
- );
518
- const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
519
- const endRunOpen = Math.max(
520
- documentXml.lastIndexOf('<w:r>', endPos),
521
- documentXml.lastIndexOf('<w:r ', endPos),
522
- );
523
- const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
524
-
525
- if (
526
- startRunOpen === -1 || startRunCloseIdx === -1 ||
527
- endRunOpen === -1 || endRunCloseIdx === -1
528
- ) continue;
529
-
530
- const startRunClose = startRunCloseIdx + '</w:r>'.length;
531
- const endRunClose = endRunCloseIdx + '</w:r>'.length;
532
-
533
- const startRunFull = documentXml.slice(startRunOpen, startRunClose);
534
- const endRunFull = documentXml.slice(endRunOpen, endRunClose);
535
-
536
- // Extract <w:rPr> and <w:t> element shape from each run. Both pieces
537
- // are needed verbatim so a textBefore split keeps its original styling
538
- // and so the post-anchor textAfter render keeps the end run's styling.
539
- function dissectRun(runXml: string, marker: string): {
540
- rPr: string;
541
- tElement: string;
542
- textBefore: string;
543
- textAfter: string;
544
- } | null {
545
- const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
546
- const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
547
- if (!tMatch) return null;
548
- const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
549
- if (!tOpenMatch) return null;
550
- const tContent = tMatch[1] ?? '';
551
- const markerInT = tContent.indexOf(marker);
552
- if (markerInT === -1) return null;
553
- return {
554
- rPr: rPrMatch ? rPrMatch[0] : '',
555
- tElement: tOpenMatch[0],
556
- textBefore: tContent.slice(0, markerInT),
557
- textAfter: tContent.slice(markerInT + marker.length),
558
- };
559
- }
560
-
561
- let replacement = '';
562
- const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
563
-
564
- const emitRangeStarts = () => {
565
- replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
566
- for (const reply of replies) {
567
- replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
568
- }
569
- };
570
-
571
- const emitRangeEnds = () => {
572
- replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
573
- replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
574
- for (const reply of replies) {
575
- replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
576
- replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
577
- injectedIds.add(reply.id);
578
- }
579
- };
580
-
581
- if (startRunOpen === endRunOpen) {
582
- // Same-run path: both markers live inside one <w:t>. Original logic.
583
- const startInfo = dissectRun(startRunFull, startMarker);
584
- if (!startInfo) continue;
585
- const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
586
- const endInTextRel = startInfo.textAfter.indexOf(endMarker);
587
- if (endInTextRel === -1) continue;
588
- const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
589
- let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
590
- let anchorText = anchorTextSame;
591
- let textBefore = startInfo.textBefore;
592
-
593
- // Empty anchor: borrow the next word so the comment has something
594
- // to anchor on. Then normalize the trailing double space.
595
- if (!anchorText && textAfter) {
596
- const wordMatch = textAfter.match(/^\s*(\S+)/);
597
- if (wordMatch) {
598
- anchorText = wordMatch[1] ?? '';
599
- textAfter = textAfter.slice(wordMatch[0].length);
600
- }
601
- }
602
- if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
603
- textAfter = textAfter.slice(1);
604
- }
605
- // Suppress unused warning for pre-empty-anchor fullText var
606
- void fullText;
607
-
608
- if (textBefore) {
609
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
610
- }
611
- emitRangeStarts();
612
- if (anchorText) {
613
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
614
- }
615
- emitRangeEnds();
616
- if (textAfter) {
617
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
618
- }
619
- documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
620
- injectedIds.add(comment.id);
621
- continue;
622
- }
623
-
624
- // Multi-run path: markers sit in different <w:r> blocks because pandoc
625
- // applied mid-anchor styling. Split the start run at the start marker,
626
- // keep all middle runs verbatim (they carry the styled anchor portions),
627
- // split the end run at the end marker.
628
- const startInfo = dissectRun(startRunFull, startMarker);
629
- const endInfo = dissectRun(endRunFull, endMarker);
630
- if (!startInfo || !endInfo) continue;
631
-
632
- const middle = documentXml.slice(startRunClose, endRunOpen);
633
-
634
- if (startInfo.textBefore) {
635
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
636
- }
637
- emitRangeStarts();
638
- if (startInfo.textAfter) {
639
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
640
- }
641
- replacement += middle;
642
- if (endInfo.textBefore) {
643
- replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
644
- }
645
- emitRangeEnds();
646
- if (endInfo.textAfter) {
647
- replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
648
- }
649
-
650
- documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
651
- injectedIds.add(comment.id);
652
- }
653
-
654
- // Add required namespaces to document.xml for comment threading
655
- const requiredNs: Record<string, string> = {
656
- 'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
657
- 'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
658
- 'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
659
- 'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
660
- 'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
661
- };
662
-
663
- // Find <w:document and add namespaces
664
- const docTagMatch = documentXml.match(/<w:document[^>]*>/);
665
- if (docTagMatch) {
666
- let docTag = docTagMatch[0];
667
- let modified = false;
668
- for (const [attr, val] of Object.entries(requiredNs)) {
669
- if (!docTag.includes(attr)) {
670
- docTag = docTag.replace('>', ` ${attr}="${val}">`);
671
- modified = true;
672
- }
673
- }
674
- // Add mc:Ignorable if mc namespace was added
675
- if (modified && !docTag.includes('mc:Ignorable')) {
676
- docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
677
- }
678
- documentXml = documentXml.replace(docTagMatch[0], docTag);
679
- }
680
-
681
- // Update document.xml
682
- zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
683
-
684
- // All comments (parents + replies) go in comments.xml
685
- // But only include if parent was injected
686
- const includedComments = commentsWithIds.filter(c => {
687
- if (!c.isReply) {
688
- return injectedIds.has(c.id);
689
- } else {
690
- // Include reply if its parent was injected
691
- const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
692
- return parent && injectedIds.has(parent.id);
693
- }
694
- });
695
-
696
- // Create comments.xml
697
- const commentsXml = createCommentsXml(includedComments);
698
- if (zip.getEntry('word/comments.xml')) {
699
- zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
700
- } else {
701
- zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
702
- }
703
-
704
- // Create commentsExtended.xml with reply threading
705
- const commentsExtXml = createCommentsExtendedXml(includedComments);
706
- if (zip.getEntry('word/commentsExtended.xml')) {
707
- zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
708
- } else {
709
- zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
710
- }
711
-
712
- // Create commentsIds.xml (Word 2016+)
713
- const commentsIdsXml = createCommentsIdsXml(includedComments);
714
- if (zip.getEntry('word/commentsIds.xml')) {
715
- zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
716
- } else {
717
- zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
718
- }
719
-
720
- // Create commentsExtensible.xml (Word 2018+)
721
- const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
722
- if (zip.getEntry('word/commentsExtensible.xml')) {
723
- zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
724
- } else {
725
- zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
726
- }
727
-
728
- // Create people.xml (author definitions with Windows Live IDs)
729
- const peopleXml = createPeopleXml(includedComments);
730
- if (zip.getEntry('word/people.xml')) {
731
- zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
732
- } else {
733
- zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
734
- }
735
-
736
- // Update [Content_Types].xml
737
- const contentTypesEntry = zip.getEntry('[Content_Types].xml');
738
- if (contentTypesEntry) {
739
- let contentTypes = zip.readAsText(contentTypesEntry);
740
-
741
- if (!contentTypes.includes('comments.xml')) {
742
- const insertPoint = contentTypes.lastIndexOf('</Types>');
743
- contentTypes = contentTypes.slice(0, insertPoint) +
744
- '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
745
- contentTypes.slice(insertPoint);
746
- }
747
-
748
- if (!contentTypes.includes('commentsExtended.xml')) {
749
- const insertPoint = contentTypes.lastIndexOf('</Types>');
750
- contentTypes = contentTypes.slice(0, insertPoint) +
751
- '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
752
- contentTypes.slice(insertPoint);
753
- }
754
-
755
- if (!contentTypes.includes('commentsIds.xml')) {
756
- const insertPoint = contentTypes.lastIndexOf('</Types>');
757
- contentTypes = contentTypes.slice(0, insertPoint) +
758
- '<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
759
- contentTypes.slice(insertPoint);
760
- }
761
-
762
- if (!contentTypes.includes('commentsExtensible.xml')) {
763
- const insertPoint = contentTypes.lastIndexOf('</Types>');
764
- contentTypes = contentTypes.slice(0, insertPoint) +
765
- '<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
766
- contentTypes.slice(insertPoint);
767
- }
768
-
769
- if (!contentTypes.includes('people.xml')) {
770
- const insertPoint = contentTypes.lastIndexOf('</Types>');
771
- contentTypes = contentTypes.slice(0, insertPoint) +
772
- '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
773
- contentTypes.slice(insertPoint);
774
- }
775
-
776
- zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
777
- }
778
-
779
- // Update relationships
780
- const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
781
- if (relsEntry) {
782
- let rels = zip.readAsText(relsEntry);
783
-
784
- const rIdMatches = rels.match(/rId(\d+)/g) || [];
785
- const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
786
-
787
- if (!rels.includes('comments.xml')) {
788
- const insertPoint = rels.lastIndexOf('</Relationships>');
789
- rels = rels.slice(0, insertPoint) +
790
- `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
791
- rels.slice(insertPoint);
792
- }
793
-
794
- if (!rels.includes('commentsExtended.xml')) {
795
- const insertPoint = rels.lastIndexOf('</Relationships>');
796
- rels = rels.slice(0, insertPoint) +
797
- `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
798
- rels.slice(insertPoint);
799
- }
800
-
801
- if (!rels.includes('commentsIds.xml')) {
802
- const insertPoint = rels.lastIndexOf('</Relationships>');
803
- rels = rels.slice(0, insertPoint) +
804
- `<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
805
- rels.slice(insertPoint);
806
- }
807
-
808
- if (!rels.includes('commentsExtensible.xml')) {
809
- const insertPoint = rels.lastIndexOf('</Relationships>');
810
- rels = rels.slice(0, insertPoint) +
811
- `<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
812
- rels.slice(insertPoint);
813
- }
814
-
815
- if (!rels.includes('people.xml')) {
816
- const insertPoint = rels.lastIndexOf('</Relationships>');
817
- rels = rels.slice(0, insertPoint) +
818
- `<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
819
- rels.slice(insertPoint);
820
- }
821
-
822
- zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
823
- }
824
-
825
- zip.writeZip(outputPath);
826
-
827
- const parentCount = includedComments.filter(c => !c.isReply).length;
828
- const replyCount = includedComments.filter(c => c.isReply).length;
829
-
830
- return {
831
- success: true,
832
- commentCount: parentCount,
833
- replyCount: replyCount,
834
- skippedComments: comments.length - includedComments.length,
835
- };
836
-
837
- } catch (err: any) {
838
- return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
839
- }
840
- }
1
+ /**
2
+ * Word comment injection with reply threading
3
+ *
4
+ * Flow:
5
+ * 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
6
+ * - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
7
+ * - Subsequent adjacent comments = replies (no markers, attach to parent)
8
+ * 2. Pandoc converts to DOCX
9
+ * 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
10
+ * - Replies go in comments.xml with parent reference in commentsExtended.xml
11
+ */
12
+
13
+ import * as fs from 'fs';
14
+ import AdmZip from 'adm-zip';
15
+ import { escapeXml } from './utils.js';
16
+
17
+ const MARKER_START_PREFIX = '⟦CMS:';
18
+ const MARKER_END_PREFIX = '⟦CME:';
19
+ const MARKER_SUFFIX = '⟧';
20
+
21
+ interface ParsedComment {
22
+ author: string;
23
+ text: string;
24
+ anchor: string | null;
25
+ start: number;
26
+ end: number;
27
+ fullMatch: string;
28
+ }
29
+
30
+ interface PreparedComment extends ParsedComment {
31
+ isReply: boolean;
32
+ parentIdx: number | null;
33
+ commentIdx: number;
34
+ anchorFromReply?: boolean;
35
+ placesParentMarkers?: boolean;
36
+ }
37
+
38
+ interface PrepareResult {
39
+ markedMarkdown: string;
40
+ comments: PreparedComment[];
41
+ }
42
+
43
+ interface CommentWithIds extends PreparedComment {
44
+ id: string;
45
+ paraId: string;
46
+ paraId2: string;
47
+ durableId: string;
48
+ parentParaId?: string;
49
+ }
50
+
51
+ interface InjectionResult {
52
+ success: boolean;
53
+ commentCount: number;
54
+ replyCount?: number;
55
+ skippedComments: number;
56
+ error?: string;
57
+ }
58
+
59
+ function generateParaId(commentIdx: number, paraNum: number): string {
60
+ // Generate 8-character uppercase hex ID matching Word format
61
+ // Word uses IDs like "3F25BC58", "0331C187"
62
+ // Must be deterministic - same inputs always produce same output
63
+ const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
64
+ return id.toString(16).toUpperCase().padStart(8, '0');
65
+ }
66
+
67
+ /**
68
+ * Parse comments and create markers
69
+ *
70
+ * Returns:
71
+ * - markedMarkdown: markdown with markers for parent comments only
72
+ * - comments: array with author, text, isReply, parentIdx
73
+ */
74
+ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
75
+ // Match the comment block first; extend manually to capture an optional
76
+ // trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
77
+ // bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
78
+ // `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
79
+ // ourselves and verify a `{.mark}` suffix.
80
+ const commentPattern = /\{>>([\s\S]+?)<<\}/g;
81
+
82
+ function tryParseTrailingAnchor(
83
+ text: string,
84
+ fromIdx: number,
85
+ ): { anchor: string; endIdx: number } | null {
86
+ let i = fromIdx;
87
+ while (i < text.length && /\s/.test(text[i] ?? '')) i++;
88
+ if (text[i] !== '[') return null;
89
+ let depth = 1;
90
+ let j = i + 1;
91
+ while (j < text.length) {
92
+ const ch = text[j];
93
+ if (ch === '[') depth++;
94
+ else if (ch === ']') {
95
+ depth--;
96
+ if (depth === 0) break;
97
+ }
98
+ j++;
99
+ }
100
+ if (depth !== 0) return null;
101
+ if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
102
+ return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
103
+ }
104
+
105
+ const REPLY_PREFIX = '↪ ';
106
+ const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
107
+ let match: RegExpExecArray | null;
108
+ while ((match = commentPattern.exec(markdown)) !== null) {
109
+ const content = match[1] ?? '';
110
+ let author = 'Unknown';
111
+ let text = content;
112
+ const colonIdx = content.indexOf(':');
113
+ if (colonIdx > 0 && colonIdx < 30) {
114
+ author = content.slice(0, colonIdx).trim();
115
+ text = content.slice(colonIdx + 1).trim();
116
+ }
117
+
118
+ // The `↪ ` prefix is the authoritative reply signal emitted by
119
+ // `insertCommentsIntoMarkdown`. Strip it from the author before injection
120
+ // so Word displays the real name.
121
+ let explicitReply = false;
122
+ if (author.startsWith(REPLY_PREFIX)) {
123
+ explicitReply = true;
124
+ author = author.slice(REPLY_PREFIX.length).trim();
125
+ }
126
+
127
+ const commentEnd = match.index + match[0].length;
128
+ const trailing = tryParseTrailingAnchor(markdown, commentEnd);
129
+
130
+ rawMatches.push({
131
+ author,
132
+ text,
133
+ anchor: trailing ? trailing.anchor : null,
134
+ start: match.index,
135
+ end: trailing ? trailing.endIdx : commentEnd,
136
+ fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
137
+ explicitReply,
138
+ });
139
+
140
+ // Advance regex lastIndex past the consumed anchor so the next iteration
141
+ // doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
142
+ // tempt the matcher to look for another `{>>...<<}` in the body of the
143
+ // anchor span).
144
+ if (trailing) {
145
+ commentPattern.lastIndex = trailing.endIdx;
146
+ }
147
+ }
148
+
149
+ if (rawMatches.length === 0) {
150
+ return { markedMarkdown: markdown, comments: [] };
151
+ }
152
+
153
+ // Two-mode reply detection driven by the markdown itself:
154
+ // - If any comment carries the `↪ ` author prefix, the markdown came
155
+ // through `insertCommentsIntoMarkdown` and we use prefix-only mode.
156
+ // Distinct clusters that happen to land at gap=0 (a real failure
157
+ // mode on dense reviewer docs — 298-comment paper produced 9 such
158
+ // collisions) are not misthreaded.
159
+ // - If no comment carries the prefix, the markdown was hand-typed.
160
+ // Fall back to gap < 10 adjacency for backward compat with users
161
+ // who write CriticMarkup directly.
162
+ const ADJACENT_THRESHOLD = 10;
163
+ const useExplicitMode = rawMatches.some(m => m.explicitReply);
164
+ const comments: PreparedComment[] = [];
165
+ let clusterParentIdx = -1; // Index of first comment in current cluster
166
+ let lastCommentEnd = -1;
167
+
168
+ for (let i = 0; i < rawMatches.length; i++) {
169
+ const m = rawMatches[i];
170
+ if (!m) continue;
171
+
172
+ const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
173
+ const isAdjacent = useExplicitMode
174
+ ? m.explicitReply
175
+ : gap < ADJACENT_THRESHOLD;
176
+
177
+ // Reset cluster if there's a gap (comments not in same cluster)
178
+ if (!isAdjacent) {
179
+ clusterParentIdx = -1;
180
+ }
181
+
182
+ if (clusterParentIdx === -1) {
183
+ // First comment in cluster = parent (regardless of author)
184
+ comments.push({
185
+ author: m.author,
186
+ text: m.text,
187
+ anchor: m.anchor,
188
+ start: m.start,
189
+ end: m.end,
190
+ fullMatch: m.fullMatch,
191
+ isReply: false,
192
+ parentIdx: null,
193
+ commentIdx: comments.length
194
+ });
195
+ clusterParentIdx = comments.length - 1;
196
+ } else {
197
+ // Subsequent comment in cluster = reply to first comment
198
+ comments.push({
199
+ author: m.author,
200
+ text: m.text,
201
+ anchor: m.anchor,
202
+ start: m.start,
203
+ end: m.end,
204
+ fullMatch: m.fullMatch,
205
+ isReply: true,
206
+ parentIdx: clusterParentIdx,
207
+ commentIdx: comments.length
208
+ });
209
+ }
210
+
211
+ lastCommentEnd = m.end;
212
+ }
213
+
214
+ // Propagate anchors from replies to parents
215
+ // If a reply has an anchor but its parent doesn't, move the anchor to the parent
216
+ // Track flags for special handling during marker generation
217
+ for (const c of comments) {
218
+ if (c.isReply && c.anchor && c.parentIdx !== null) {
219
+ const parent = comments[c.parentIdx];
220
+ if (parent && !parent.anchor) {
221
+ parent.anchor = c.anchor;
222
+ parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
223
+ c.placesParentMarkers = true; // This reply should place the parent's markers
224
+ c.anchor = null;
225
+ }
226
+ }
227
+ }
228
+
229
+ // Build marked markdown - only parent comments get markers
230
+ // Process from end to start to preserve positions
231
+ let markedMarkdown = markdown;
232
+
233
+ for (let i = comments.length - 1; i >= 0; i--) {
234
+ const c = comments[i];
235
+ if (!c) continue;
236
+
237
+ if (c.isReply) {
238
+ // Reply: remove from document entirely (will be in comments.xml only)
239
+ // Also consume one preceding whitespace char to avoid double spaces.
240
+ // We deliberately consume at most one — walking arbitrarily backwards
241
+ // would shift positions that lower-index comments still depend on.
242
+ let removeStart = c.start;
243
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
244
+ removeStart--;
245
+ }
246
+
247
+ // If this reply places parent's markers (anchor was propagated)
248
+ if (c.placesParentMarkers && c.parentIdx !== null) {
249
+ // Extract anchor text from the original match
250
+ const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
251
+ if (anchorMatch) {
252
+ const anchorText = anchorMatch[1] ?? '';
253
+ // Output markers with PARENT's index around the anchor text
254
+ const parentIdx = c.parentIdx;
255
+ const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
256
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
257
+ } else {
258
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
259
+ }
260
+ } else {
261
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
262
+ }
263
+ } else {
264
+ // Parent comment
265
+ if (c.anchorFromReply) {
266
+ // Anchor markers are placed by the reply, just remove this comment.
267
+ // Consume one preceding whitespace char only (see reply branch above).
268
+ let removeStart = c.start;
269
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
270
+ removeStart--;
271
+ }
272
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
273
+ } else {
274
+ // Normal case: replace with markers
275
+ const anchor = c.anchor || '';
276
+ const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
277
+ markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
278
+ }
279
+ }
280
+ }
281
+
282
+ return { markedMarkdown, comments };
283
+ }
284
+
285
+ function createCommentsXml(comments: CommentWithIds[]): string {
286
+ // Word expects date without milliseconds: 2025-12-30T08:33:00Z
287
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
288
+
289
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
290
+ // Minimal namespaces matching golden file structure
291
+ xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
292
+
293
+ // Use a consistent rsid (8-char hex) for all comments in this batch
294
+ const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
295
+
296
+ for (const comment of comments) {
297
+ xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
298
+ // First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
299
+ xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
300
+ xml += `<w:r><w:annotationRef/></w:r>`;
301
+ xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
302
+ xml += `</w:p>`;
303
+ if (comment.isReply) {
304
+ // Second empty paragraph: rsidRDefault matches rsidR
305
+ xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
306
+ }
307
+ xml += `</w:comment>`;
308
+ }
309
+
310
+ xml += '</w:comments>';
311
+ return xml;
312
+ }
313
+
314
+ function createCommentsExtendedXml(comments: CommentWithIds[]): string {
315
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
316
+ // Minimal namespaces matching golden file structure
317
+ xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
318
+
319
+ for (const comment of comments) {
320
+ if (comment.isReply && comment.parentParaId) {
321
+ // Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
322
+ xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
323
+ } else {
324
+ // Parent comment: use paraId (first paragraph)
325
+ xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
326
+ }
327
+ }
328
+
329
+ xml += '</w15:commentsEx>';
330
+ return xml;
331
+ }
332
+
333
+ function generateDurableId(index: number): string {
334
+ // Generate unique 8-char hex ID for durableId
335
+ // CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
336
+ // Word interprets durableIds as signed 32-bit integers
337
+ const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
338
+ const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
339
+ return id.toString(16).toUpperCase().padStart(8, '0');
340
+ }
341
+
342
+ function createCommentsIdsXml(comments: CommentWithIds[]): string {
343
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
344
+ // Minimal namespaces matching golden file structure
345
+ xml += '<w16cid:commentsIds ';
346
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
347
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
348
+ xml += 'mc:Ignorable="w16cid">';
349
+
350
+ for (const comment of comments) {
351
+ // ONE entry per comment using the LAST paragraph's paraId:
352
+ // - Parent comments (1 paragraph): use paraId
353
+ // - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
354
+ const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
355
+ xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
356
+ }
357
+
358
+ xml += '</w16cid:commentsIds>';
359
+ return xml;
360
+ }
361
+
362
+ function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
363
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
364
+
365
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
366
+ // Minimal namespaces matching golden file structure
367
+ xml += '<w16cex:commentsExtensible ';
368
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
369
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
370
+ xml += 'mc:Ignorable="w16cex">';
371
+
372
+ for (const comment of comments) {
373
+ // ONE entry per comment using the durableId
374
+ xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
375
+ }
376
+
377
+ xml += '</w16cex:commentsExtensible>';
378
+ return xml;
379
+ }
380
+
381
+ // Generate deterministic user IDs for authors (no hardcoded personal data)
382
+
383
+ function createPeopleXml(comments: CommentWithIds[]): string {
384
+ // Extract unique authors
385
+ const authors = [...new Set(comments.map(c => c.author))];
386
+
387
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
388
+ xml += '<w15:people ';
389
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
390
+ xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
391
+ xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
392
+ xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
393
+ xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
394
+ xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
395
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
396
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
397
+ xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
398
+ xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
399
+ xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
400
+ xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
401
+
402
+ for (const author of authors) {
403
+ const userId = generateUserId(author);
404
+ xml += `<w15:person w15:author="${escapeXml(author)}">`;
405
+ xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
406
+ xml += `</w15:person>`;
407
+ }
408
+
409
+ xml += '</w15:people>';
410
+ return xml;
411
+ }
412
+
413
+ function generateUserId(author: string): string {
414
+ // Generate a deterministic 16-char hex ID from author name
415
+ let hash = 0;
416
+ for (let i = 0; i < author.length; i++) {
417
+ hash = ((hash << 5) - hash) + author.charCodeAt(i);
418
+ hash = hash & hash;
419
+ }
420
+ return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
421
+ }
422
+
423
+ /**
424
+ * Inject comments at marker positions
425
+ */
426
+ export async function injectCommentsAtMarkers(
427
+ docxPath: string,
428
+ comments: PreparedComment[],
429
+ outputPath: string
430
+ ): Promise<InjectionResult> {
431
+ try {
432
+ if (!fs.existsSync(docxPath)) {
433
+ return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
434
+ }
435
+
436
+ if (comments.length === 0) {
437
+ fs.copyFileSync(docxPath, outputPath);
438
+ return { success: true, commentCount: 0, skippedComments: 0 };
439
+ }
440
+
441
+ const zip = new AdmZip(docxPath);
442
+ const documentEntry = zip.getEntry('word/document.xml');
443
+ if (!documentEntry) {
444
+ return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
445
+ }
446
+
447
+ let documentXml = zip.readAsText(documentEntry);
448
+
449
+ // Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
450
+ const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
451
+ ...c,
452
+ id: String(idx + 1),
453
+ paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
454
+ paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
455
+ durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
456
+ }));
457
+
458
+ // Link replies to parent paraIds
459
+ for (const c of commentsWithIds) {
460
+ if (c.isReply && c.parentIdx !== null) {
461
+ const parent = commentsWithIds[c.parentIdx];
462
+ if (parent) {
463
+ c.parentParaId = parent.paraId;
464
+ }
465
+ }
466
+ }
467
+
468
+ const injectedIds = new Set<string>();
469
+
470
+ // Process only parent comments (non-replies) for document ranges
471
+ const parentComments = commentsWithIds.filter(c => !c.isReply);
472
+
473
+ for (let i = parentComments.length - 1; i >= 0; i--) {
474
+ const comment = parentComments[i];
475
+ if (!comment) continue;
476
+ const idx = comment.commentIdx;
477
+
478
+ const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
479
+ const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
480
+
481
+ // Pandoc duplicates inline image alt-text into <wp:docPr descr="...">
482
+ // metadata attributes AND into the visible caption paragraph. A naive
483
+ // indexOf hits the metadata-attribute occurrence first, where there is
484
+ // no <w:t> element so dissectRun fails. Skip occurrences whose position
485
+ // is inside an XML tag (last unbalanced '<' before position).
486
+ // See: https://github.com/gcol33/docrev/issues/4
487
+ function findInTextContent(haystack: string, needle: string, fromIdx = 0): number {
488
+ let i = fromIdx;
489
+ while (true) {
490
+ const p = haystack.indexOf(needle, i);
491
+ if (p < 0) return -1;
492
+ const lastLt = haystack.lastIndexOf('<', p);
493
+ const lastGt = haystack.lastIndexOf('>', p);
494
+ if (lastLt > lastGt) {
495
+ i = p + 1;
496
+ continue;
497
+ }
498
+ return p;
499
+ }
500
+ }
501
+
502
+ const startPos = findInTextContent(documentXml, startMarker);
503
+ const endPos = startPos === -1
504
+ ? -1
505
+ : findInTextContent(documentXml, endMarker, startPos + startMarker.length);
506
+
507
+ if (startPos === -1 || endPos === -1) continue;
508
+
509
+ // Find the runs containing each marker. Pandoc may split a single
510
+ // markdown anchor across multiple <w:r> blocks when it applies styling
511
+ // mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
512
+ // The same-run path (current happy path) collapses into the multi-run
513
+ // path when start and end runs coincide.
514
+ const startRunOpen = Math.max(
515
+ documentXml.lastIndexOf('<w:r>', startPos),
516
+ documentXml.lastIndexOf('<w:r ', startPos),
517
+ );
518
+ const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
519
+ const endRunOpen = Math.max(
520
+ documentXml.lastIndexOf('<w:r>', endPos),
521
+ documentXml.lastIndexOf('<w:r ', endPos),
522
+ );
523
+ const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
524
+
525
+ if (
526
+ startRunOpen === -1 || startRunCloseIdx === -1 ||
527
+ endRunOpen === -1 || endRunCloseIdx === -1
528
+ ) continue;
529
+
530
+ const startRunClose = startRunCloseIdx + '</w:r>'.length;
531
+ const endRunClose = endRunCloseIdx + '</w:r>'.length;
532
+
533
+ const startRunFull = documentXml.slice(startRunOpen, startRunClose);
534
+ const endRunFull = documentXml.slice(endRunOpen, endRunClose);
535
+
536
+ // Extract <w:rPr> and <w:t> element shape from each run. Both pieces
537
+ // are needed verbatim so a textBefore split keeps its original styling
538
+ // and so the post-anchor textAfter render keeps the end run's styling.
539
+ function dissectRun(runXml: string, marker: string): {
540
+ rPr: string;
541
+ tElement: string;
542
+ textBefore: string;
543
+ textAfter: string;
544
+ } | null {
545
+ const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
546
+ const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
547
+ if (!tMatch) return null;
548
+ const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
549
+ if (!tOpenMatch) return null;
550
+ const tContent = tMatch[1] ?? '';
551
+ const markerInT = tContent.indexOf(marker);
552
+ if (markerInT === -1) return null;
553
+ return {
554
+ rPr: rPrMatch ? rPrMatch[0] : '',
555
+ tElement: tOpenMatch[0],
556
+ textBefore: tContent.slice(0, markerInT),
557
+ textAfter: tContent.slice(markerInT + marker.length),
558
+ };
559
+ }
560
+
561
+ let replacement = '';
562
+ const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
563
+
564
+ const emitRangeStarts = () => {
565
+ replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
566
+ for (const reply of replies) {
567
+ replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
568
+ }
569
+ };
570
+
571
+ const emitRangeEnds = () => {
572
+ replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
573
+ replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
574
+ for (const reply of replies) {
575
+ replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
576
+ replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
577
+ injectedIds.add(reply.id);
578
+ }
579
+ };
580
+
581
+ if (startRunOpen === endRunOpen) {
582
+ // Same-run path: both markers live inside one <w:t>. Original logic.
583
+ const startInfo = dissectRun(startRunFull, startMarker);
584
+ if (!startInfo) continue;
585
+ const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
586
+ const endInTextRel = startInfo.textAfter.indexOf(endMarker);
587
+ if (endInTextRel === -1) continue;
588
+ const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
589
+ let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
590
+ let anchorText = anchorTextSame;
591
+ let textBefore = startInfo.textBefore;
592
+
593
+ // Empty anchor: borrow the next word so the comment has something
594
+ // to anchor on. Then normalize the trailing double space.
595
+ if (!anchorText && textAfter) {
596
+ const wordMatch = textAfter.match(/^\s*(\S+)/);
597
+ if (wordMatch) {
598
+ anchorText = wordMatch[1] ?? '';
599
+ textAfter = textAfter.slice(wordMatch[0].length);
600
+ }
601
+ }
602
+ if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
603
+ textAfter = textAfter.slice(1);
604
+ }
605
+ // Suppress unused warning for pre-empty-anchor fullText var
606
+ void fullText;
607
+
608
+ if (textBefore) {
609
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
610
+ }
611
+ emitRangeStarts();
612
+ if (anchorText) {
613
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
614
+ }
615
+ emitRangeEnds();
616
+ if (textAfter) {
617
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
618
+ }
619
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
620
+ injectedIds.add(comment.id);
621
+ continue;
622
+ }
623
+
624
+ // Multi-run path: markers sit in different <w:r> blocks because pandoc
625
+ // applied mid-anchor styling. Split the start run at the start marker,
626
+ // keep all middle runs verbatim (they carry the styled anchor portions),
627
+ // split the end run at the end marker.
628
+ const startInfo = dissectRun(startRunFull, startMarker);
629
+ const endInfo = dissectRun(endRunFull, endMarker);
630
+ if (!startInfo || !endInfo) continue;
631
+
632
+ const middle = documentXml.slice(startRunClose, endRunOpen);
633
+
634
+ if (startInfo.textBefore) {
635
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
636
+ }
637
+ emitRangeStarts();
638
+ if (startInfo.textAfter) {
639
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
640
+ }
641
+ replacement += middle;
642
+ if (endInfo.textBefore) {
643
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
644
+ }
645
+ emitRangeEnds();
646
+ if (endInfo.textAfter) {
647
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
648
+ }
649
+
650
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
651
+ injectedIds.add(comment.id);
652
+ }
653
+
654
+ // Add required namespaces to document.xml for comment threading
655
+ const requiredNs: Record<string, string> = {
656
+ 'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
657
+ 'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
658
+ 'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
659
+ 'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
660
+ 'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
661
+ };
662
+
663
+ // Find <w:document and add namespaces
664
+ const docTagMatch = documentXml.match(/<w:document[^>]*>/);
665
+ if (docTagMatch) {
666
+ let docTag = docTagMatch[0];
667
+ let modified = false;
668
+ for (const [attr, val] of Object.entries(requiredNs)) {
669
+ if (!docTag.includes(attr)) {
670
+ docTag = docTag.replace('>', ` ${attr}="${val}">`);
671
+ modified = true;
672
+ }
673
+ }
674
+ // Add mc:Ignorable if mc namespace was added
675
+ if (modified && !docTag.includes('mc:Ignorable')) {
676
+ docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
677
+ }
678
+ documentXml = documentXml.replace(docTagMatch[0], docTag);
679
+ }
680
+
681
+ // Update document.xml
682
+ zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
683
+
684
+ // All comments (parents + replies) go in comments.xml
685
+ // But only include if parent was injected
686
+ const includedComments = commentsWithIds.filter(c => {
687
+ if (!c.isReply) {
688
+ return injectedIds.has(c.id);
689
+ } else {
690
+ // Include reply if its parent was injected
691
+ const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
692
+ return parent && injectedIds.has(parent.id);
693
+ }
694
+ });
695
+
696
+ // Create comments.xml
697
+ const commentsXml = createCommentsXml(includedComments);
698
+ if (zip.getEntry('word/comments.xml')) {
699
+ zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
700
+ } else {
701
+ zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
702
+ }
703
+
704
+ // Create commentsExtended.xml with reply threading
705
+ const commentsExtXml = createCommentsExtendedXml(includedComments);
706
+ if (zip.getEntry('word/commentsExtended.xml')) {
707
+ zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
708
+ } else {
709
+ zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
710
+ }
711
+
712
+ // Create commentsIds.xml (Word 2016+)
713
+ const commentsIdsXml = createCommentsIdsXml(includedComments);
714
+ if (zip.getEntry('word/commentsIds.xml')) {
715
+ zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
716
+ } else {
717
+ zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
718
+ }
719
+
720
+ // Create commentsExtensible.xml (Word 2018+)
721
+ const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
722
+ if (zip.getEntry('word/commentsExtensible.xml')) {
723
+ zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
724
+ } else {
725
+ zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
726
+ }
727
+
728
+ // Create people.xml (author definitions with Windows Live IDs)
729
+ const peopleXml = createPeopleXml(includedComments);
730
+ if (zip.getEntry('word/people.xml')) {
731
+ zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
732
+ } else {
733
+ zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
734
+ }
735
+
736
+ // Update [Content_Types].xml
737
+ const contentTypesEntry = zip.getEntry('[Content_Types].xml');
738
+ if (contentTypesEntry) {
739
+ let contentTypes = zip.readAsText(contentTypesEntry);
740
+
741
+ if (!contentTypes.includes('comments.xml')) {
742
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
743
+ contentTypes = contentTypes.slice(0, insertPoint) +
744
+ '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
745
+ contentTypes.slice(insertPoint);
746
+ }
747
+
748
+ if (!contentTypes.includes('commentsExtended.xml')) {
749
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
750
+ contentTypes = contentTypes.slice(0, insertPoint) +
751
+ '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
752
+ contentTypes.slice(insertPoint);
753
+ }
754
+
755
+ if (!contentTypes.includes('commentsIds.xml')) {
756
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
757
+ contentTypes = contentTypes.slice(0, insertPoint) +
758
+ '<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
759
+ contentTypes.slice(insertPoint);
760
+ }
761
+
762
+ if (!contentTypes.includes('commentsExtensible.xml')) {
763
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
764
+ contentTypes = contentTypes.slice(0, insertPoint) +
765
+ '<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
766
+ contentTypes.slice(insertPoint);
767
+ }
768
+
769
+ if (!contentTypes.includes('people.xml')) {
770
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
771
+ contentTypes = contentTypes.slice(0, insertPoint) +
772
+ '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
773
+ contentTypes.slice(insertPoint);
774
+ }
775
+
776
+ zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
777
+ }
778
+
779
+ // Update relationships
780
+ const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
781
+ if (relsEntry) {
782
+ let rels = zip.readAsText(relsEntry);
783
+
784
+ const rIdMatches = rels.match(/rId(\d+)/g) || [];
785
+ const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
786
+
787
+ if (!rels.includes('comments.xml')) {
788
+ const insertPoint = rels.lastIndexOf('</Relationships>');
789
+ rels = rels.slice(0, insertPoint) +
790
+ `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
791
+ rels.slice(insertPoint);
792
+ }
793
+
794
+ if (!rels.includes('commentsExtended.xml')) {
795
+ const insertPoint = rels.lastIndexOf('</Relationships>');
796
+ rels = rels.slice(0, insertPoint) +
797
+ `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
798
+ rels.slice(insertPoint);
799
+ }
800
+
801
+ if (!rels.includes('commentsIds.xml')) {
802
+ const insertPoint = rels.lastIndexOf('</Relationships>');
803
+ rels = rels.slice(0, insertPoint) +
804
+ `<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
805
+ rels.slice(insertPoint);
806
+ }
807
+
808
+ if (!rels.includes('commentsExtensible.xml')) {
809
+ const insertPoint = rels.lastIndexOf('</Relationships>');
810
+ rels = rels.slice(0, insertPoint) +
811
+ `<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
812
+ rels.slice(insertPoint);
813
+ }
814
+
815
+ if (!rels.includes('people.xml')) {
816
+ const insertPoint = rels.lastIndexOf('</Relationships>');
817
+ rels = rels.slice(0, insertPoint) +
818
+ `<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
819
+ rels.slice(insertPoint);
820
+ }
821
+
822
+ zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
823
+ }
824
+
825
+ zip.writeZip(outputPath);
826
+
827
+ const parentCount = includedComments.filter(c => !c.isReply).length;
828
+ const replyCount = includedComments.filter(c => c.isReply).length;
829
+
830
+ return {
831
+ success: true,
832
+ commentCount: parentCount,
833
+ replyCount: replyCount,
834
+ skippedComments: comments.length - includedComments.length,
835
+ };
836
+
837
+ } catch (err: any) {
838
+ return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
839
+ }
840
+ }