docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
@@ -1,798 +1,817 @@
1
- /**
2
- * Word comment injection with reply threading
3
- *
4
- * Flow:
5
- * 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
6
- * - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
7
- * - Subsequent adjacent comments = replies (no markers, attach to parent)
8
- * 2. Pandoc converts to DOCX
9
- * 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
10
- * - Replies go in comments.xml with parent reference in commentsExtended.xml
11
- */
12
-
13
- import * as fs from 'fs';
14
- import AdmZip from 'adm-zip';
15
- import { escapeXml } from './utils.js';
16
-
17
- const MARKER_START_PREFIX = '⟦CMS:';
18
- const MARKER_END_PREFIX = '⟦CME:';
19
- const MARKER_SUFFIX = '⟧';
20
-
21
- interface ParsedComment {
22
- author: string;
23
- text: string;
24
- anchor: string | null;
25
- start: number;
26
- end: number;
27
- fullMatch: string;
28
- }
29
-
30
- interface PreparedComment extends ParsedComment {
31
- isReply: boolean;
32
- parentIdx: number | null;
33
- commentIdx: number;
34
- anchorFromReply?: boolean;
35
- placesParentMarkers?: boolean;
36
- }
37
-
38
- interface PrepareResult {
39
- markedMarkdown: string;
40
- comments: PreparedComment[];
41
- }
42
-
43
- interface CommentWithIds extends PreparedComment {
44
- id: string;
45
- paraId: string;
46
- paraId2: string;
47
- durableId: string;
48
- parentParaId?: string;
49
- }
50
-
51
- interface InjectionResult {
52
- success: boolean;
53
- commentCount: number;
54
- replyCount?: number;
55
- skippedComments: number;
56
- error?: string;
57
- }
58
-
59
- function generateParaId(commentIdx: number, paraNum: number): string {
60
- // Generate 8-character uppercase hex ID matching Word format
61
- // Word uses IDs like "3F25BC58", "0331C187"
62
- // Must be deterministic - same inputs always produce same output
63
- const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
64
- return id.toString(16).toUpperCase().padStart(8, '0');
65
- }
66
-
67
- /**
68
- * Parse comments and create markers
69
- *
70
- * Returns:
71
- * - markedMarkdown: markdown with markers for parent comments only
72
- * - comments: array with author, text, isReply, parentIdx
73
- */
74
- export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
75
- // Match the comment block first; extend manually to capture an optional
76
- // trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
77
- // bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
78
- // `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
79
- // ourselves and verify a `{.mark}` suffix.
80
- const commentPattern = /\{>>([\s\S]+?)<<\}/g;
81
-
82
- function tryParseTrailingAnchor(
83
- text: string,
84
- fromIdx: number,
85
- ): { anchor: string; endIdx: number } | null {
86
- let i = fromIdx;
87
- while (i < text.length && /\s/.test(text[i] ?? '')) i++;
88
- if (text[i] !== '[') return null;
89
- let depth = 1;
90
- let j = i + 1;
91
- while (j < text.length) {
92
- const ch = text[j];
93
- if (ch === '[') depth++;
94
- else if (ch === ']') {
95
- depth--;
96
- if (depth === 0) break;
97
- }
98
- j++;
99
- }
100
- if (depth !== 0) return null;
101
- if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
102
- return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
103
- }
104
-
105
- const rawMatches: ParsedComment[] = [];
106
- let match: RegExpExecArray | null;
107
- while ((match = commentPattern.exec(markdown)) !== null) {
108
- const content = match[1] ?? '';
109
- let author = 'Unknown';
110
- let text = content;
111
- const colonIdx = content.indexOf(':');
112
- if (colonIdx > 0 && colonIdx < 30) {
113
- author = content.slice(0, colonIdx).trim();
114
- text = content.slice(colonIdx + 1).trim();
115
- }
116
-
117
- const commentEnd = match.index + match[0].length;
118
- const trailing = tryParseTrailingAnchor(markdown, commentEnd);
119
-
120
- rawMatches.push({
121
- author,
122
- text,
123
- anchor: trailing ? trailing.anchor : null,
124
- start: match.index,
125
- end: trailing ? trailing.endIdx : commentEnd,
126
- fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
127
- });
128
-
129
- // Advance regex lastIndex past the consumed anchor so the next iteration
130
- // doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
131
- // tempt the matcher to look for another `{>>...<<}` in the body of the
132
- // anchor span).
133
- if (trailing) {
134
- commentPattern.lastIndex = trailing.endIdx;
135
- }
136
- }
137
-
138
- if (rawMatches.length === 0) {
139
- return { markedMarkdown: markdown, comments: [] };
140
- }
141
-
142
- // Detect reply relationships based on adjacency
143
- // First comment in a cluster = parent, all subsequent = replies to that parent
144
- // Comments are "adjacent" if there's minimal text between them (< 10 chars)
145
- const ADJACENT_THRESHOLD = 10;
146
- const comments: PreparedComment[] = [];
147
- let clusterParentIdx = -1; // Index of first comment in current cluster
148
- let lastCommentEnd = -1;
149
-
150
- for (let i = 0; i < rawMatches.length; i++) {
151
- const m = rawMatches[i];
152
- if (!m) continue;
153
-
154
- // Check if this comment is adjacent to the previous one
155
- const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
156
- const isAdjacent = gap < ADJACENT_THRESHOLD;
157
-
158
- // Reset cluster if there's a gap (comments not in same cluster)
159
- if (!isAdjacent) {
160
- clusterParentIdx = -1;
161
- }
162
-
163
- if (clusterParentIdx === -1) {
164
- // First comment in cluster = parent (regardless of author)
165
- comments.push({
166
- author: m.author,
167
- text: m.text,
168
- anchor: m.anchor,
169
- start: m.start,
170
- end: m.end,
171
- fullMatch: m.fullMatch,
172
- isReply: false,
173
- parentIdx: null,
174
- commentIdx: comments.length
175
- });
176
- clusterParentIdx = comments.length - 1;
177
- } else {
178
- // Subsequent comment in cluster = reply to first comment
179
- comments.push({
180
- author: m.author,
181
- text: m.text,
182
- anchor: m.anchor,
183
- start: m.start,
184
- end: m.end,
185
- fullMatch: m.fullMatch,
186
- isReply: true,
187
- parentIdx: clusterParentIdx,
188
- commentIdx: comments.length
189
- });
190
- }
191
-
192
- lastCommentEnd = m.end;
193
- }
194
-
195
- // Propagate anchors from replies to parents
196
- // If a reply has an anchor but its parent doesn't, move the anchor to the parent
197
- // Track flags for special handling during marker generation
198
- for (const c of comments) {
199
- if (c.isReply && c.anchor && c.parentIdx !== null) {
200
- const parent = comments[c.parentIdx];
201
- if (parent && !parent.anchor) {
202
- parent.anchor = c.anchor;
203
- parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
204
- c.placesParentMarkers = true; // This reply should place the parent's markers
205
- c.anchor = null;
206
- }
207
- }
208
- }
209
-
210
- // Build marked markdown - only parent comments get markers
211
- // Process from end to start to preserve positions
212
- let markedMarkdown = markdown;
213
-
214
- for (let i = comments.length - 1; i >= 0; i--) {
215
- const c = comments[i];
216
- if (!c) continue;
217
-
218
- if (c.isReply) {
219
- // Reply: remove from document entirely (will be in comments.xml only)
220
- // Also consume one preceding whitespace char to avoid double spaces.
221
- // We deliberately consume at most one — walking arbitrarily backwards
222
- // would shift positions that lower-index comments still depend on.
223
- let removeStart = c.start;
224
- if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
225
- removeStart--;
226
- }
227
-
228
- // If this reply places parent's markers (anchor was propagated)
229
- if (c.placesParentMarkers && c.parentIdx !== null) {
230
- // Extract anchor text from the original match
231
- const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
232
- if (anchorMatch) {
233
- const anchorText = anchorMatch[1] ?? '';
234
- // Output markers with PARENT's index around the anchor text
235
- const parentIdx = c.parentIdx;
236
- const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
237
- markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
238
- } else {
239
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
240
- }
241
- } else {
242
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
243
- }
244
- } else {
245
- // Parent comment
246
- if (c.anchorFromReply) {
247
- // Anchor markers are placed by the reply, just remove this comment.
248
- // Consume one preceding whitespace char only (see reply branch above).
249
- let removeStart = c.start;
250
- if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
251
- removeStart--;
252
- }
253
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
254
- } else {
255
- // Normal case: replace with markers
256
- const anchor = c.anchor || '';
257
- const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
258
- markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
259
- }
260
- }
261
- }
262
-
263
- return { markedMarkdown, comments };
264
- }
265
-
266
- function createCommentsXml(comments: CommentWithIds[]): string {
267
- // Word expects date without milliseconds: 2025-12-30T08:33:00Z
268
- const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
269
-
270
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
271
- // Minimal namespaces matching golden file structure
272
- xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
273
-
274
- // Use a consistent rsid (8-char hex) for all comments in this batch
275
- const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
276
-
277
- for (const comment of comments) {
278
- xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
279
- // First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
280
- xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
281
- xml += `<w:r><w:annotationRef/></w:r>`;
282
- xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
283
- xml += `</w:p>`;
284
- if (comment.isReply) {
285
- // Second empty paragraph: rsidRDefault matches rsidR
286
- xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
287
- }
288
- xml += `</w:comment>`;
289
- }
290
-
291
- xml += '</w:comments>';
292
- return xml;
293
- }
294
-
295
- function createCommentsExtendedXml(comments: CommentWithIds[]): string {
296
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
297
- // Minimal namespaces matching golden file structure
298
- xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
299
-
300
- for (const comment of comments) {
301
- if (comment.isReply && comment.parentParaId) {
302
- // Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
303
- xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
304
- } else {
305
- // Parent comment: use paraId (first paragraph)
306
- xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
307
- }
308
- }
309
-
310
- xml += '</w15:commentsEx>';
311
- return xml;
312
- }
313
-
314
- function generateDurableId(index: number): string {
315
- // Generate unique 8-char hex ID for durableId
316
- // CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
317
- // Word interprets durableIds as signed 32-bit integers
318
- const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
319
- const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
320
- return id.toString(16).toUpperCase().padStart(8, '0');
321
- }
322
-
323
- function createCommentsIdsXml(comments: CommentWithIds[]): string {
324
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
325
- // Minimal namespaces matching golden file structure
326
- xml += '<w16cid:commentsIds ';
327
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
328
- xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
329
- xml += 'mc:Ignorable="w16cid">';
330
-
331
- for (const comment of comments) {
332
- // ONE entry per comment using the LAST paragraph's paraId:
333
- // - Parent comments (1 paragraph): use paraId
334
- // - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
335
- const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
336
- xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
337
- }
338
-
339
- xml += '</w16cid:commentsIds>';
340
- return xml;
341
- }
342
-
343
- function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
344
- const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
345
-
346
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
347
- // Minimal namespaces matching golden file structure
348
- xml += '<w16cex:commentsExtensible ';
349
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
350
- xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
351
- xml += 'mc:Ignorable="w16cex">';
352
-
353
- for (const comment of comments) {
354
- // ONE entry per comment using the durableId
355
- xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
356
- }
357
-
358
- xml += '</w16cex:commentsExtensible>';
359
- return xml;
360
- }
361
-
362
- // Generate deterministic user IDs for authors (no hardcoded personal data)
363
-
364
- function createPeopleXml(comments: CommentWithIds[]): string {
365
- // Extract unique authors
366
- const authors = [...new Set(comments.map(c => c.author))];
367
-
368
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
369
- xml += '<w15:people ';
370
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
371
- xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
372
- xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
373
- xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
374
- xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
375
- xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
376
- xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
377
- xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
378
- xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
379
- xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
380
- xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
381
- xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
382
-
383
- for (const author of authors) {
384
- const userId = generateUserId(author);
385
- xml += `<w15:person w15:author="${escapeXml(author)}">`;
386
- xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
387
- xml += `</w15:person>`;
388
- }
389
-
390
- xml += '</w15:people>';
391
- return xml;
392
- }
393
-
394
- function generateUserId(author: string): string {
395
- // Generate a deterministic 16-char hex ID from author name
396
- let hash = 0;
397
- for (let i = 0; i < author.length; i++) {
398
- hash = ((hash << 5) - hash) + author.charCodeAt(i);
399
- hash = hash & hash;
400
- }
401
- return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
402
- }
403
-
404
- /**
405
- * Inject comments at marker positions
406
- */
407
- export async function injectCommentsAtMarkers(
408
- docxPath: string,
409
- comments: PreparedComment[],
410
- outputPath: string
411
- ): Promise<InjectionResult> {
412
- try {
413
- if (!fs.existsSync(docxPath)) {
414
- return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
415
- }
416
-
417
- if (comments.length === 0) {
418
- fs.copyFileSync(docxPath, outputPath);
419
- return { success: true, commentCount: 0, skippedComments: 0 };
420
- }
421
-
422
- const zip = new AdmZip(docxPath);
423
- const documentEntry = zip.getEntry('word/document.xml');
424
- if (!documentEntry) {
425
- return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
426
- }
427
-
428
- let documentXml = zip.readAsText(documentEntry);
429
-
430
- // Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
431
- const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
432
- ...c,
433
- id: String(idx + 1),
434
- paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
435
- paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
436
- durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
437
- }));
438
-
439
- // Link replies to parent paraIds
440
- for (const c of commentsWithIds) {
441
- if (c.isReply && c.parentIdx !== null) {
442
- const parent = commentsWithIds[c.parentIdx];
443
- if (parent) {
444
- c.parentParaId = parent.paraId;
445
- }
446
- }
447
- }
448
-
449
- const injectedIds = new Set<string>();
450
-
451
- // Process only parent comments (non-replies) for document ranges
452
- const parentComments = commentsWithIds.filter(c => !c.isReply);
453
-
454
- for (let i = parentComments.length - 1; i >= 0; i--) {
455
- const comment = parentComments[i];
456
- if (!comment) continue;
457
- const idx = comment.commentIdx;
458
-
459
- const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
460
- const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
461
-
462
- const startPos = documentXml.indexOf(startMarker);
463
- const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
464
-
465
- if (startPos === -1 || endPos === -1) continue;
466
-
467
- // Find the runs containing each marker. Pandoc may split a single
468
- // markdown anchor across multiple <w:r> blocks when it applies styling
469
- // mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
470
- // The same-run path (current happy path) collapses into the multi-run
471
- // path when start and end runs coincide.
472
- const startRunOpen = Math.max(
473
- documentXml.lastIndexOf('<w:r>', startPos),
474
- documentXml.lastIndexOf('<w:r ', startPos),
475
- );
476
- const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
477
- const endRunOpen = Math.max(
478
- documentXml.lastIndexOf('<w:r>', endPos),
479
- documentXml.lastIndexOf('<w:r ', endPos),
480
- );
481
- const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
482
-
483
- if (
484
- startRunOpen === -1 || startRunCloseIdx === -1 ||
485
- endRunOpen === -1 || endRunCloseIdx === -1
486
- ) continue;
487
-
488
- const startRunClose = startRunCloseIdx + '</w:r>'.length;
489
- const endRunClose = endRunCloseIdx + '</w:r>'.length;
490
-
491
- const startRunFull = documentXml.slice(startRunOpen, startRunClose);
492
- const endRunFull = documentXml.slice(endRunOpen, endRunClose);
493
-
494
- // Extract <w:rPr> and <w:t> element shape from each run. Both pieces
495
- // are needed verbatim so a textBefore split keeps its original styling
496
- // and so the post-anchor textAfter render keeps the end run's styling.
497
- function dissectRun(runXml: string, marker: string): {
498
- rPr: string;
499
- tElement: string;
500
- textBefore: string;
501
- textAfter: string;
502
- } | null {
503
- const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
504
- const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
505
- if (!tMatch) return null;
506
- const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
507
- if (!tOpenMatch) return null;
508
- const tContent = tMatch[1] ?? '';
509
- const markerInT = tContent.indexOf(marker);
510
- if (markerInT === -1) return null;
511
- return {
512
- rPr: rPrMatch ? rPrMatch[0] : '',
513
- tElement: tOpenMatch[0],
514
- textBefore: tContent.slice(0, markerInT),
515
- textAfter: tContent.slice(markerInT + marker.length),
516
- };
517
- }
518
-
519
- let replacement = '';
520
- const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
521
-
522
- const emitRangeStarts = () => {
523
- replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
524
- for (const reply of replies) {
525
- replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
526
- }
527
- };
528
-
529
- const emitRangeEnds = () => {
530
- replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
531
- replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
532
- for (const reply of replies) {
533
- replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
534
- replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
535
- injectedIds.add(reply.id);
536
- }
537
- };
538
-
539
- if (startRunOpen === endRunOpen) {
540
- // Same-run path: both markers live inside one <w:t>. Original logic.
541
- const startInfo = dissectRun(startRunFull, startMarker);
542
- if (!startInfo) continue;
543
- const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
544
- const endInTextRel = startInfo.textAfter.indexOf(endMarker);
545
- if (endInTextRel === -1) continue;
546
- const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
547
- let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
548
- let anchorText = anchorTextSame;
549
- let textBefore = startInfo.textBefore;
550
-
551
- // Empty anchor: borrow the next word so the comment has something
552
- // to anchor on. Then normalize the trailing double space.
553
- if (!anchorText && textAfter) {
554
- const wordMatch = textAfter.match(/^\s*(\S+)/);
555
- if (wordMatch) {
556
- anchorText = wordMatch[1] ?? '';
557
- textAfter = textAfter.slice(wordMatch[0].length);
558
- }
559
- }
560
- if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
561
- textAfter = textAfter.slice(1);
562
- }
563
- // Suppress unused warning for pre-empty-anchor fullText var
564
- void fullText;
565
-
566
- if (textBefore) {
567
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
568
- }
569
- emitRangeStarts();
570
- if (anchorText) {
571
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
572
- }
573
- emitRangeEnds();
574
- if (textAfter) {
575
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
576
- }
577
- documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
578
- injectedIds.add(comment.id);
579
- continue;
580
- }
581
-
582
- // Multi-run path: markers sit in different <w:r> blocks because pandoc
583
- // applied mid-anchor styling. Split the start run at the start marker,
584
- // keep all middle runs verbatim (they carry the styled anchor portions),
585
- // split the end run at the end marker.
586
- const startInfo = dissectRun(startRunFull, startMarker);
587
- const endInfo = dissectRun(endRunFull, endMarker);
588
- if (!startInfo || !endInfo) continue;
589
-
590
- const middle = documentXml.slice(startRunClose, endRunOpen);
591
-
592
- if (startInfo.textBefore) {
593
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
594
- }
595
- emitRangeStarts();
596
- if (startInfo.textAfter) {
597
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
598
- }
599
- replacement += middle;
600
- if (endInfo.textBefore) {
601
- replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
602
- }
603
- emitRangeEnds();
604
- if (endInfo.textAfter) {
605
- replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
606
- }
607
-
608
- documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
609
- injectedIds.add(comment.id);
610
- }
611
-
612
- // Add required namespaces to document.xml for comment threading
613
- const requiredNs: Record<string, string> = {
614
- 'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
615
- 'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
616
- 'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
617
- 'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
618
- 'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
619
- };
620
-
621
- // Find <w:document and add namespaces
622
- const docTagMatch = documentXml.match(/<w:document[^>]*>/);
623
- if (docTagMatch) {
624
- let docTag = docTagMatch[0];
625
- let modified = false;
626
- for (const [attr, val] of Object.entries(requiredNs)) {
627
- if (!docTag.includes(attr)) {
628
- docTag = docTag.replace('>', ` ${attr}="${val}">`);
629
- modified = true;
630
- }
631
- }
632
- // Add mc:Ignorable if mc namespace was added
633
- if (modified && !docTag.includes('mc:Ignorable')) {
634
- docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
635
- }
636
- documentXml = documentXml.replace(docTagMatch[0], docTag);
637
- }
638
-
639
- // Update document.xml
640
- zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
641
-
642
- // All comments (parents + replies) go in comments.xml
643
- // But only include if parent was injected
644
- const includedComments = commentsWithIds.filter(c => {
645
- if (!c.isReply) {
646
- return injectedIds.has(c.id);
647
- } else {
648
- // Include reply if its parent was injected
649
- const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
650
- return parent && injectedIds.has(parent.id);
651
- }
652
- });
653
-
654
- // Create comments.xml
655
- const commentsXml = createCommentsXml(includedComments);
656
- if (zip.getEntry('word/comments.xml')) {
657
- zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
658
- } else {
659
- zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
660
- }
661
-
662
- // Create commentsExtended.xml with reply threading
663
- const commentsExtXml = createCommentsExtendedXml(includedComments);
664
- if (zip.getEntry('word/commentsExtended.xml')) {
665
- zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
666
- } else {
667
- zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
668
- }
669
-
670
- // Create commentsIds.xml (Word 2016+)
671
- const commentsIdsXml = createCommentsIdsXml(includedComments);
672
- if (zip.getEntry('word/commentsIds.xml')) {
673
- zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
674
- } else {
675
- zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
676
- }
677
-
678
- // Create commentsExtensible.xml (Word 2018+)
679
- const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
680
- if (zip.getEntry('word/commentsExtensible.xml')) {
681
- zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
682
- } else {
683
- zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
684
- }
685
-
686
- // Create people.xml (author definitions with Windows Live IDs)
687
- const peopleXml = createPeopleXml(includedComments);
688
- if (zip.getEntry('word/people.xml')) {
689
- zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
690
- } else {
691
- zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
692
- }
693
-
694
- // Update [Content_Types].xml
695
- const contentTypesEntry = zip.getEntry('[Content_Types].xml');
696
- if (contentTypesEntry) {
697
- let contentTypes = zip.readAsText(contentTypesEntry);
698
-
699
- if (!contentTypes.includes('comments.xml')) {
700
- const insertPoint = contentTypes.lastIndexOf('</Types>');
701
- contentTypes = contentTypes.slice(0, insertPoint) +
702
- '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
703
- contentTypes.slice(insertPoint);
704
- }
705
-
706
- if (!contentTypes.includes('commentsExtended.xml')) {
707
- const insertPoint = contentTypes.lastIndexOf('</Types>');
708
- contentTypes = contentTypes.slice(0, insertPoint) +
709
- '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
710
- contentTypes.slice(insertPoint);
711
- }
712
-
713
- if (!contentTypes.includes('commentsIds.xml')) {
714
- const insertPoint = contentTypes.lastIndexOf('</Types>');
715
- contentTypes = contentTypes.slice(0, insertPoint) +
716
- '<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
717
- contentTypes.slice(insertPoint);
718
- }
719
-
720
- if (!contentTypes.includes('commentsExtensible.xml')) {
721
- const insertPoint = contentTypes.lastIndexOf('</Types>');
722
- contentTypes = contentTypes.slice(0, insertPoint) +
723
- '<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
724
- contentTypes.slice(insertPoint);
725
- }
726
-
727
- if (!contentTypes.includes('people.xml')) {
728
- const insertPoint = contentTypes.lastIndexOf('</Types>');
729
- contentTypes = contentTypes.slice(0, insertPoint) +
730
- '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
731
- contentTypes.slice(insertPoint);
732
- }
733
-
734
- zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
735
- }
736
-
737
- // Update relationships
738
- const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
739
- if (relsEntry) {
740
- let rels = zip.readAsText(relsEntry);
741
-
742
- const rIdMatches = rels.match(/rId(\d+)/g) || [];
743
- const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
744
-
745
- if (!rels.includes('comments.xml')) {
746
- const insertPoint = rels.lastIndexOf('</Relationships>');
747
- rels = rels.slice(0, insertPoint) +
748
- `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
749
- rels.slice(insertPoint);
750
- }
751
-
752
- if (!rels.includes('commentsExtended.xml')) {
753
- const insertPoint = rels.lastIndexOf('</Relationships>');
754
- rels = rels.slice(0, insertPoint) +
755
- `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
756
- rels.slice(insertPoint);
757
- }
758
-
759
- if (!rels.includes('commentsIds.xml')) {
760
- const insertPoint = rels.lastIndexOf('</Relationships>');
761
- rels = rels.slice(0, insertPoint) +
762
- `<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
763
- rels.slice(insertPoint);
764
- }
765
-
766
- if (!rels.includes('commentsExtensible.xml')) {
767
- const insertPoint = rels.lastIndexOf('</Relationships>');
768
- rels = rels.slice(0, insertPoint) +
769
- `<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
770
- rels.slice(insertPoint);
771
- }
772
-
773
- if (!rels.includes('people.xml')) {
774
- const insertPoint = rels.lastIndexOf('</Relationships>');
775
- rels = rels.slice(0, insertPoint) +
776
- `<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
777
- rels.slice(insertPoint);
778
- }
779
-
780
- zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
781
- }
782
-
783
- zip.writeZip(outputPath);
784
-
785
- const parentCount = includedComments.filter(c => !c.isReply).length;
786
- const replyCount = includedComments.filter(c => c.isReply).length;
787
-
788
- return {
789
- success: true,
790
- commentCount: parentCount,
791
- replyCount: replyCount,
792
- skippedComments: comments.length - includedComments.length,
793
- };
794
-
795
- } catch (err: any) {
796
- return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
797
- }
798
- }
1
+ /**
2
+ * Word comment injection with reply threading
3
+ *
4
+ * Flow:
5
+ * 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
6
+ * - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
7
+ * - Subsequent adjacent comments = replies (no markers, attach to parent)
8
+ * 2. Pandoc converts to DOCX
9
+ * 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
10
+ * - Replies go in comments.xml with parent reference in commentsExtended.xml
11
+ */
12
+
13
+ import * as fs from 'fs';
14
+ import AdmZip from 'adm-zip';
15
+ import { escapeXml } from './utils.js';
16
+
17
+ const MARKER_START_PREFIX = '⟦CMS:';
18
+ const MARKER_END_PREFIX = '⟦CME:';
19
+ const MARKER_SUFFIX = '⟧';
20
+
21
+ interface ParsedComment {
22
+ author: string;
23
+ text: string;
24
+ anchor: string | null;
25
+ start: number;
26
+ end: number;
27
+ fullMatch: string;
28
+ }
29
+
30
+ interface PreparedComment extends ParsedComment {
31
+ isReply: boolean;
32
+ parentIdx: number | null;
33
+ commentIdx: number;
34
+ anchorFromReply?: boolean;
35
+ placesParentMarkers?: boolean;
36
+ }
37
+
38
+ interface PrepareResult {
39
+ markedMarkdown: string;
40
+ comments: PreparedComment[];
41
+ }
42
+
43
+ interface CommentWithIds extends PreparedComment {
44
+ id: string;
45
+ paraId: string;
46
+ paraId2: string;
47
+ durableId: string;
48
+ parentParaId?: string;
49
+ }
50
+
51
+ interface InjectionResult {
52
+ success: boolean;
53
+ commentCount: number;
54
+ replyCount?: number;
55
+ skippedComments: number;
56
+ error?: string;
57
+ }
58
+
59
+ function generateParaId(commentIdx: number, paraNum: number): string {
60
+ // Generate 8-character uppercase hex ID matching Word format
61
+ // Word uses IDs like "3F25BC58", "0331C187"
62
+ // Must be deterministic - same inputs always produce same output
63
+ const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
64
+ return id.toString(16).toUpperCase().padStart(8, '0');
65
+ }
66
+
67
+ /**
68
+ * Parse comments and create markers
69
+ *
70
+ * Returns:
71
+ * - markedMarkdown: markdown with markers for parent comments only
72
+ * - comments: array with author, text, isReply, parentIdx
73
+ */
74
+ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
75
+ // Match the comment block first; extend manually to capture an optional
76
+ // trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
77
+ // bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
78
+ // `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
79
+ // ourselves and verify a `{.mark}` suffix.
80
+ const commentPattern = /\{>>([\s\S]+?)<<\}/g;
81
+
82
+ function tryParseTrailingAnchor(
83
+ text: string,
84
+ fromIdx: number,
85
+ ): { anchor: string; endIdx: number } | null {
86
+ let i = fromIdx;
87
+ while (i < text.length && /\s/.test(text[i] ?? '')) i++;
88
+ if (text[i] !== '[') return null;
89
+ let depth = 1;
90
+ let j = i + 1;
91
+ while (j < text.length) {
92
+ const ch = text[j];
93
+ if (ch === '[') depth++;
94
+ else if (ch === ']') {
95
+ depth--;
96
+ if (depth === 0) break;
97
+ }
98
+ j++;
99
+ }
100
+ if (depth !== 0) return null;
101
+ if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
102
+ return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
103
+ }
104
+
105
+ const REPLY_PREFIX = '↪ ';
106
+ const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
107
+ let match: RegExpExecArray | null;
108
+ while ((match = commentPattern.exec(markdown)) !== null) {
109
+ const content = match[1] ?? '';
110
+ let author = 'Unknown';
111
+ let text = content;
112
+ const colonIdx = content.indexOf(':');
113
+ if (colonIdx > 0 && colonIdx < 30) {
114
+ author = content.slice(0, colonIdx).trim();
115
+ text = content.slice(colonIdx + 1).trim();
116
+ }
117
+
118
+ // The `↪ ` prefix is the authoritative reply signal emitted by
119
+ // `insertCommentsIntoMarkdown`. Strip it from the author before injection
120
+ // so Word displays the real name.
121
+ let explicitReply = false;
122
+ if (author.startsWith(REPLY_PREFIX)) {
123
+ explicitReply = true;
124
+ author = author.slice(REPLY_PREFIX.length).trim();
125
+ }
126
+
127
+ const commentEnd = match.index + match[0].length;
128
+ const trailing = tryParseTrailingAnchor(markdown, commentEnd);
129
+
130
+ rawMatches.push({
131
+ author,
132
+ text,
133
+ anchor: trailing ? trailing.anchor : null,
134
+ start: match.index,
135
+ end: trailing ? trailing.endIdx : commentEnd,
136
+ fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
137
+ explicitReply,
138
+ });
139
+
140
+ // Advance regex lastIndex past the consumed anchor so the next iteration
141
+ // doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
142
+ // tempt the matcher to look for another `{>>...<<}` in the body of the
143
+ // anchor span).
144
+ if (trailing) {
145
+ commentPattern.lastIndex = trailing.endIdx;
146
+ }
147
+ }
148
+
149
+ if (rawMatches.length === 0) {
150
+ return { markedMarkdown: markdown, comments: [] };
151
+ }
152
+
153
+ // Two-mode reply detection driven by the markdown itself:
154
+ // - If any comment carries the `↪ ` author prefix, the markdown came
155
+ // through `insertCommentsIntoMarkdown` and we use prefix-only mode.
156
+ // Distinct clusters that happen to land at gap=0 (a real failure
157
+ // mode on dense reviewer docs — 298-comment paper produced 9 such
158
+ // collisions) are not misthreaded.
159
+ // - If no comment carries the prefix, the markdown was hand-typed.
160
+ // Fall back to gap < 10 adjacency for backward compat with users
161
+ // who write CriticMarkup directly.
162
+ const ADJACENT_THRESHOLD = 10;
163
+ const useExplicitMode = rawMatches.some(m => m.explicitReply);
164
+ const comments: PreparedComment[] = [];
165
+ let clusterParentIdx = -1; // Index of first comment in current cluster
166
+ let lastCommentEnd = -1;
167
+
168
+ for (let i = 0; i < rawMatches.length; i++) {
169
+ const m = rawMatches[i];
170
+ if (!m) continue;
171
+
172
+ const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
173
+ const isAdjacent = useExplicitMode
174
+ ? m.explicitReply
175
+ : gap < ADJACENT_THRESHOLD;
176
+
177
+ // Reset cluster if there's a gap (comments not in same cluster)
178
+ if (!isAdjacent) {
179
+ clusterParentIdx = -1;
180
+ }
181
+
182
+ if (clusterParentIdx === -1) {
183
+ // First comment in cluster = parent (regardless of author)
184
+ comments.push({
185
+ author: m.author,
186
+ text: m.text,
187
+ anchor: m.anchor,
188
+ start: m.start,
189
+ end: m.end,
190
+ fullMatch: m.fullMatch,
191
+ isReply: false,
192
+ parentIdx: null,
193
+ commentIdx: comments.length
194
+ });
195
+ clusterParentIdx = comments.length - 1;
196
+ } else {
197
+ // Subsequent comment in cluster = reply to first comment
198
+ comments.push({
199
+ author: m.author,
200
+ text: m.text,
201
+ anchor: m.anchor,
202
+ start: m.start,
203
+ end: m.end,
204
+ fullMatch: m.fullMatch,
205
+ isReply: true,
206
+ parentIdx: clusterParentIdx,
207
+ commentIdx: comments.length
208
+ });
209
+ }
210
+
211
+ lastCommentEnd = m.end;
212
+ }
213
+
214
+ // Propagate anchors from replies to parents
215
+ // If a reply has an anchor but its parent doesn't, move the anchor to the parent
216
+ // Track flags for special handling during marker generation
217
+ for (const c of comments) {
218
+ if (c.isReply && c.anchor && c.parentIdx !== null) {
219
+ const parent = comments[c.parentIdx];
220
+ if (parent && !parent.anchor) {
221
+ parent.anchor = c.anchor;
222
+ parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
223
+ c.placesParentMarkers = true; // This reply should place the parent's markers
224
+ c.anchor = null;
225
+ }
226
+ }
227
+ }
228
+
229
+ // Build marked markdown - only parent comments get markers
230
+ // Process from end to start to preserve positions
231
+ let markedMarkdown = markdown;
232
+
233
+ for (let i = comments.length - 1; i >= 0; i--) {
234
+ const c = comments[i];
235
+ if (!c) continue;
236
+
237
+ if (c.isReply) {
238
+ // Reply: remove from document entirely (will be in comments.xml only)
239
+ // Also consume one preceding whitespace char to avoid double spaces.
240
+ // We deliberately consume at most one — walking arbitrarily backwards
241
+ // would shift positions that lower-index comments still depend on.
242
+ let removeStart = c.start;
243
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
244
+ removeStart--;
245
+ }
246
+
247
+ // If this reply places parent's markers (anchor was propagated)
248
+ if (c.placesParentMarkers && c.parentIdx !== null) {
249
+ // Extract anchor text from the original match
250
+ const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
251
+ if (anchorMatch) {
252
+ const anchorText = anchorMatch[1] ?? '';
253
+ // Output markers with PARENT's index around the anchor text
254
+ const parentIdx = c.parentIdx;
255
+ const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
256
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
257
+ } else {
258
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
259
+ }
260
+ } else {
261
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
262
+ }
263
+ } else {
264
+ // Parent comment
265
+ if (c.anchorFromReply) {
266
+ // Anchor markers are placed by the reply, just remove this comment.
267
+ // Consume one preceding whitespace char only (see reply branch above).
268
+ let removeStart = c.start;
269
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
270
+ removeStart--;
271
+ }
272
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
273
+ } else {
274
+ // Normal case: replace with markers
275
+ const anchor = c.anchor || '';
276
+ const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
277
+ markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
278
+ }
279
+ }
280
+ }
281
+
282
+ return { markedMarkdown, comments };
283
+ }
284
+
285
+ function createCommentsXml(comments: CommentWithIds[]): string {
286
+ // Word expects date without milliseconds: 2025-12-30T08:33:00Z
287
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
288
+
289
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
290
+ // Minimal namespaces matching golden file structure
291
+ xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
292
+
293
+ // Use a consistent rsid (8-char hex) for all comments in this batch
294
+ const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
295
+
296
+ for (const comment of comments) {
297
+ xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
298
+ // First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
299
+ xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
300
+ xml += `<w:r><w:annotationRef/></w:r>`;
301
+ xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
302
+ xml += `</w:p>`;
303
+ if (comment.isReply) {
304
+ // Second empty paragraph: rsidRDefault matches rsidR
305
+ xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
306
+ }
307
+ xml += `</w:comment>`;
308
+ }
309
+
310
+ xml += '</w:comments>';
311
+ return xml;
312
+ }
313
+
314
+ function createCommentsExtendedXml(comments: CommentWithIds[]): string {
315
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
316
+ // Minimal namespaces matching golden file structure
317
+ xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
318
+
319
+ for (const comment of comments) {
320
+ if (comment.isReply && comment.parentParaId) {
321
+ // Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
322
+ xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
323
+ } else {
324
+ // Parent comment: use paraId (first paragraph)
325
+ xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
326
+ }
327
+ }
328
+
329
+ xml += '</w15:commentsEx>';
330
+ return xml;
331
+ }
332
+
333
+ function generateDurableId(index: number): string {
334
+ // Generate unique 8-char hex ID for durableId
335
+ // CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
336
+ // Word interprets durableIds as signed 32-bit integers
337
+ const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
338
+ const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
339
+ return id.toString(16).toUpperCase().padStart(8, '0');
340
+ }
341
+
342
+ function createCommentsIdsXml(comments: CommentWithIds[]): string {
343
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
344
+ // Minimal namespaces matching golden file structure
345
+ xml += '<w16cid:commentsIds ';
346
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
347
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
348
+ xml += 'mc:Ignorable="w16cid">';
349
+
350
+ for (const comment of comments) {
351
+ // ONE entry per comment using the LAST paragraph's paraId:
352
+ // - Parent comments (1 paragraph): use paraId
353
+ // - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
354
+ const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
355
+ xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
356
+ }
357
+
358
+ xml += '</w16cid:commentsIds>';
359
+ return xml;
360
+ }
361
+
362
+ function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
363
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
364
+
365
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
366
+ // Minimal namespaces matching golden file structure
367
+ xml += '<w16cex:commentsExtensible ';
368
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
369
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
370
+ xml += 'mc:Ignorable="w16cex">';
371
+
372
+ for (const comment of comments) {
373
+ // ONE entry per comment using the durableId
374
+ xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
375
+ }
376
+
377
+ xml += '</w16cex:commentsExtensible>';
378
+ return xml;
379
+ }
380
+
381
+ // Generate deterministic user IDs for authors (no hardcoded personal data)
382
+
383
+ function createPeopleXml(comments: CommentWithIds[]): string {
384
+ // Extract unique authors
385
+ const authors = [...new Set(comments.map(c => c.author))];
386
+
387
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
388
+ xml += '<w15:people ';
389
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
390
+ xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
391
+ xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
392
+ xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
393
+ xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
394
+ xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
395
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
396
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
397
+ xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
398
+ xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
399
+ xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
400
+ xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
401
+
402
+ for (const author of authors) {
403
+ const userId = generateUserId(author);
404
+ xml += `<w15:person w15:author="${escapeXml(author)}">`;
405
+ xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
406
+ xml += `</w15:person>`;
407
+ }
408
+
409
+ xml += '</w15:people>';
410
+ return xml;
411
+ }
412
+
413
+ function generateUserId(author: string): string {
414
+ // Generate a deterministic 16-char hex ID from author name
415
+ let hash = 0;
416
+ for (let i = 0; i < author.length; i++) {
417
+ hash = ((hash << 5) - hash) + author.charCodeAt(i);
418
+ hash = hash & hash;
419
+ }
420
+ return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
421
+ }
422
+
423
+ /**
424
+ * Inject comments at marker positions
425
+ */
426
+ export async function injectCommentsAtMarkers(
427
+ docxPath: string,
428
+ comments: PreparedComment[],
429
+ outputPath: string
430
+ ): Promise<InjectionResult> {
431
+ try {
432
+ if (!fs.existsSync(docxPath)) {
433
+ return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
434
+ }
435
+
436
+ if (comments.length === 0) {
437
+ fs.copyFileSync(docxPath, outputPath);
438
+ return { success: true, commentCount: 0, skippedComments: 0 };
439
+ }
440
+
441
+ const zip = new AdmZip(docxPath);
442
+ const documentEntry = zip.getEntry('word/document.xml');
443
+ if (!documentEntry) {
444
+ return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
445
+ }
446
+
447
+ let documentXml = zip.readAsText(documentEntry);
448
+
449
+ // Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
450
+ const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
451
+ ...c,
452
+ id: String(idx + 1),
453
+ paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
454
+ paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
455
+ durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
456
+ }));
457
+
458
+ // Link replies to parent paraIds
459
+ for (const c of commentsWithIds) {
460
+ if (c.isReply && c.parentIdx !== null) {
461
+ const parent = commentsWithIds[c.parentIdx];
462
+ if (parent) {
463
+ c.parentParaId = parent.paraId;
464
+ }
465
+ }
466
+ }
467
+
468
+ const injectedIds = new Set<string>();
469
+
470
+ // Process only parent comments (non-replies) for document ranges
471
+ const parentComments = commentsWithIds.filter(c => !c.isReply);
472
+
473
+ for (let i = parentComments.length - 1; i >= 0; i--) {
474
+ const comment = parentComments[i];
475
+ if (!comment) continue;
476
+ const idx = comment.commentIdx;
477
+
478
+ const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
479
+ const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
480
+
481
+ const startPos = documentXml.indexOf(startMarker);
482
+ const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
483
+
484
+ if (startPos === -1 || endPos === -1) continue;
485
+
486
+ // Find the runs containing each marker. Pandoc may split a single
487
+ // markdown anchor across multiple <w:r> blocks when it applies styling
488
+ // mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
489
+ // The same-run path (current happy path) collapses into the multi-run
490
+ // path when start and end runs coincide.
491
+ const startRunOpen = Math.max(
492
+ documentXml.lastIndexOf('<w:r>', startPos),
493
+ documentXml.lastIndexOf('<w:r ', startPos),
494
+ );
495
+ const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
496
+ const endRunOpen = Math.max(
497
+ documentXml.lastIndexOf('<w:r>', endPos),
498
+ documentXml.lastIndexOf('<w:r ', endPos),
499
+ );
500
+ const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
501
+
502
+ if (
503
+ startRunOpen === -1 || startRunCloseIdx === -1 ||
504
+ endRunOpen === -1 || endRunCloseIdx === -1
505
+ ) continue;
506
+
507
+ const startRunClose = startRunCloseIdx + '</w:r>'.length;
508
+ const endRunClose = endRunCloseIdx + '</w:r>'.length;
509
+
510
+ const startRunFull = documentXml.slice(startRunOpen, startRunClose);
511
+ const endRunFull = documentXml.slice(endRunOpen, endRunClose);
512
+
513
+ // Extract <w:rPr> and <w:t> element shape from each run. Both pieces
514
+ // are needed verbatim so a textBefore split keeps its original styling
515
+ // and so the post-anchor textAfter render keeps the end run's styling.
516
+ function dissectRun(runXml: string, marker: string): {
517
+ rPr: string;
518
+ tElement: string;
519
+ textBefore: string;
520
+ textAfter: string;
521
+ } | null {
522
+ const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
523
+ const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
524
+ if (!tMatch) return null;
525
+ const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
526
+ if (!tOpenMatch) return null;
527
+ const tContent = tMatch[1] ?? '';
528
+ const markerInT = tContent.indexOf(marker);
529
+ if (markerInT === -1) return null;
530
+ return {
531
+ rPr: rPrMatch ? rPrMatch[0] : '',
532
+ tElement: tOpenMatch[0],
533
+ textBefore: tContent.slice(0, markerInT),
534
+ textAfter: tContent.slice(markerInT + marker.length),
535
+ };
536
+ }
537
+
538
+ let replacement = '';
539
+ const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
540
+
541
+ const emitRangeStarts = () => {
542
+ replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
543
+ for (const reply of replies) {
544
+ replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
545
+ }
546
+ };
547
+
548
+ const emitRangeEnds = () => {
549
+ replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
550
+ replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
551
+ for (const reply of replies) {
552
+ replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
553
+ replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
554
+ injectedIds.add(reply.id);
555
+ }
556
+ };
557
+
558
+ if (startRunOpen === endRunOpen) {
559
+ // Same-run path: both markers live inside one <w:t>. Original logic.
560
+ const startInfo = dissectRun(startRunFull, startMarker);
561
+ if (!startInfo) continue;
562
+ const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
563
+ const endInTextRel = startInfo.textAfter.indexOf(endMarker);
564
+ if (endInTextRel === -1) continue;
565
+ const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
566
+ let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
567
+ let anchorText = anchorTextSame;
568
+ let textBefore = startInfo.textBefore;
569
+
570
+ // Empty anchor: borrow the next word so the comment has something
571
+ // to anchor on. Then normalize the trailing double space.
572
+ if (!anchorText && textAfter) {
573
+ const wordMatch = textAfter.match(/^\s*(\S+)/);
574
+ if (wordMatch) {
575
+ anchorText = wordMatch[1] ?? '';
576
+ textAfter = textAfter.slice(wordMatch[0].length);
577
+ }
578
+ }
579
+ if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
580
+ textAfter = textAfter.slice(1);
581
+ }
582
+ // Suppress unused warning for pre-empty-anchor fullText var
583
+ void fullText;
584
+
585
+ if (textBefore) {
586
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
587
+ }
588
+ emitRangeStarts();
589
+ if (anchorText) {
590
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
591
+ }
592
+ emitRangeEnds();
593
+ if (textAfter) {
594
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
595
+ }
596
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
597
+ injectedIds.add(comment.id);
598
+ continue;
599
+ }
600
+
601
+ // Multi-run path: markers sit in different <w:r> blocks because pandoc
602
+ // applied mid-anchor styling. Split the start run at the start marker,
603
+ // keep all middle runs verbatim (they carry the styled anchor portions),
604
+ // split the end run at the end marker.
605
+ const startInfo = dissectRun(startRunFull, startMarker);
606
+ const endInfo = dissectRun(endRunFull, endMarker);
607
+ if (!startInfo || !endInfo) continue;
608
+
609
+ const middle = documentXml.slice(startRunClose, endRunOpen);
610
+
611
+ if (startInfo.textBefore) {
612
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
613
+ }
614
+ emitRangeStarts();
615
+ if (startInfo.textAfter) {
616
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
617
+ }
618
+ replacement += middle;
619
+ if (endInfo.textBefore) {
620
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
621
+ }
622
+ emitRangeEnds();
623
+ if (endInfo.textAfter) {
624
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
625
+ }
626
+
627
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
628
+ injectedIds.add(comment.id);
629
+ }
630
+
631
+ // Add required namespaces to document.xml for comment threading
632
+ const requiredNs: Record<string, string> = {
633
+ 'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
634
+ 'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
635
+ 'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
636
+ 'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
637
+ 'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
638
+ };
639
+
640
+ // Find <w:document and add namespaces
641
+ const docTagMatch = documentXml.match(/<w:document[^>]*>/);
642
+ if (docTagMatch) {
643
+ let docTag = docTagMatch[0];
644
+ let modified = false;
645
+ for (const [attr, val] of Object.entries(requiredNs)) {
646
+ if (!docTag.includes(attr)) {
647
+ docTag = docTag.replace('>', ` ${attr}="${val}">`);
648
+ modified = true;
649
+ }
650
+ }
651
+ // Add mc:Ignorable if mc namespace was added
652
+ if (modified && !docTag.includes('mc:Ignorable')) {
653
+ docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
654
+ }
655
+ documentXml = documentXml.replace(docTagMatch[0], docTag);
656
+ }
657
+
658
+ // Update document.xml
659
+ zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
660
+
661
+ // All comments (parents + replies) go in comments.xml
662
+ // But only include if parent was injected
663
+ const includedComments = commentsWithIds.filter(c => {
664
+ if (!c.isReply) {
665
+ return injectedIds.has(c.id);
666
+ } else {
667
+ // Include reply if its parent was injected
668
+ const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
669
+ return parent && injectedIds.has(parent.id);
670
+ }
671
+ });
672
+
673
+ // Create comments.xml
674
+ const commentsXml = createCommentsXml(includedComments);
675
+ if (zip.getEntry('word/comments.xml')) {
676
+ zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
677
+ } else {
678
+ zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
679
+ }
680
+
681
+ // Create commentsExtended.xml with reply threading
682
+ const commentsExtXml = createCommentsExtendedXml(includedComments);
683
+ if (zip.getEntry('word/commentsExtended.xml')) {
684
+ zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
685
+ } else {
686
+ zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
687
+ }
688
+
689
+ // Create commentsIds.xml (Word 2016+)
690
+ const commentsIdsXml = createCommentsIdsXml(includedComments);
691
+ if (zip.getEntry('word/commentsIds.xml')) {
692
+ zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
693
+ } else {
694
+ zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
695
+ }
696
+
697
+ // Create commentsExtensible.xml (Word 2018+)
698
+ const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
699
+ if (zip.getEntry('word/commentsExtensible.xml')) {
700
+ zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
701
+ } else {
702
+ zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
703
+ }
704
+
705
+ // Create people.xml (author definitions with Windows Live IDs)
706
+ const peopleXml = createPeopleXml(includedComments);
707
+ if (zip.getEntry('word/people.xml')) {
708
+ zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
709
+ } else {
710
+ zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
711
+ }
712
+
713
+ // Update [Content_Types].xml
714
+ const contentTypesEntry = zip.getEntry('[Content_Types].xml');
715
+ if (contentTypesEntry) {
716
+ let contentTypes = zip.readAsText(contentTypesEntry);
717
+
718
+ if (!contentTypes.includes('comments.xml')) {
719
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
720
+ contentTypes = contentTypes.slice(0, insertPoint) +
721
+ '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
722
+ contentTypes.slice(insertPoint);
723
+ }
724
+
725
+ if (!contentTypes.includes('commentsExtended.xml')) {
726
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
727
+ contentTypes = contentTypes.slice(0, insertPoint) +
728
+ '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
729
+ contentTypes.slice(insertPoint);
730
+ }
731
+
732
+ if (!contentTypes.includes('commentsIds.xml')) {
733
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
734
+ contentTypes = contentTypes.slice(0, insertPoint) +
735
+ '<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
736
+ contentTypes.slice(insertPoint);
737
+ }
738
+
739
+ if (!contentTypes.includes('commentsExtensible.xml')) {
740
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
741
+ contentTypes = contentTypes.slice(0, insertPoint) +
742
+ '<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
743
+ contentTypes.slice(insertPoint);
744
+ }
745
+
746
+ if (!contentTypes.includes('people.xml')) {
747
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
748
+ contentTypes = contentTypes.slice(0, insertPoint) +
749
+ '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
750
+ contentTypes.slice(insertPoint);
751
+ }
752
+
753
+ zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
754
+ }
755
+
756
+ // Update relationships
757
+ const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
758
+ if (relsEntry) {
759
+ let rels = zip.readAsText(relsEntry);
760
+
761
+ const rIdMatches = rels.match(/rId(\d+)/g) || [];
762
+ const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
763
+
764
+ if (!rels.includes('comments.xml')) {
765
+ const insertPoint = rels.lastIndexOf('</Relationships>');
766
+ rels = rels.slice(0, insertPoint) +
767
+ `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
768
+ rels.slice(insertPoint);
769
+ }
770
+
771
+ if (!rels.includes('commentsExtended.xml')) {
772
+ const insertPoint = rels.lastIndexOf('</Relationships>');
773
+ rels = rels.slice(0, insertPoint) +
774
+ `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
775
+ rels.slice(insertPoint);
776
+ }
777
+
778
+ if (!rels.includes('commentsIds.xml')) {
779
+ const insertPoint = rels.lastIndexOf('</Relationships>');
780
+ rels = rels.slice(0, insertPoint) +
781
+ `<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
782
+ rels.slice(insertPoint);
783
+ }
784
+
785
+ if (!rels.includes('commentsExtensible.xml')) {
786
+ const insertPoint = rels.lastIndexOf('</Relationships>');
787
+ rels = rels.slice(0, insertPoint) +
788
+ `<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
789
+ rels.slice(insertPoint);
790
+ }
791
+
792
+ if (!rels.includes('people.xml')) {
793
+ const insertPoint = rels.lastIndexOf('</Relationships>');
794
+ rels = rels.slice(0, insertPoint) +
795
+ `<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
796
+ rels.slice(insertPoint);
797
+ }
798
+
799
+ zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
800
+ }
801
+
802
+ zip.writeZip(outputPath);
803
+
804
+ const parentCount = includedComments.filter(c => !c.isReply).length;
805
+ const replyCount = includedComments.filter(c => c.isReply).length;
806
+
807
+ return {
808
+ success: true,
809
+ commentCount: parentCount,
810
+ replyCount: replyCount,
811
+ skippedComments: comments.length - includedComments.length,
812
+ };
813
+
814
+ } catch (err: any) {
815
+ return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
816
+ }
817
+ }