docrev 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +411 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +38 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +68 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/pdf-comments.js +44 -44
  43. package/dist/lib/plugins.js +57 -57
  44. package/dist/lib/pptx-themes.js +115 -115
  45. package/dist/lib/spelling.js +2 -2
  46. package/dist/lib/templates.js +387 -387
  47. package/dist/lib/themes.js +51 -51
  48. package/eslint.config.js +27 -27
  49. package/lib/anchor-match.ts +276 -276
  50. package/lib/annotations.ts +644 -644
  51. package/lib/build.ts +1300 -1251
  52. package/lib/citations.ts +160 -160
  53. package/lib/commands/build.ts +833 -801
  54. package/lib/commands/citations.ts +515 -515
  55. package/lib/commands/comments.ts +1050 -1050
  56. package/lib/commands/context.ts +174 -174
  57. package/lib/commands/core.ts +309 -309
  58. package/lib/commands/doi.ts +435 -435
  59. package/lib/commands/file-ops.ts +372 -372
  60. package/lib/commands/history.ts +320 -320
  61. package/lib/commands/index.ts +87 -87
  62. package/lib/commands/init.ts +259 -259
  63. package/lib/commands/merge-resolve.ts +378 -378
  64. package/lib/commands/preview.ts +178 -178
  65. package/lib/commands/project-info.ts +244 -244
  66. package/lib/commands/quality.ts +517 -517
  67. package/lib/commands/response.ts +454 -454
  68. package/lib/commands/section-boundaries.ts +82 -82
  69. package/lib/commands/sections.ts +451 -451
  70. package/lib/commands/sync.ts +706 -706
  71. package/lib/commands/text-ops.ts +449 -449
  72. package/lib/commands/utilities.ts +448 -448
  73. package/lib/commands/verify-anchors.ts +272 -272
  74. package/lib/commands/word-tools.ts +340 -340
  75. package/lib/comment-realign.ts +517 -517
  76. package/lib/config.ts +84 -84
  77. package/lib/crossref.ts +781 -781
  78. package/lib/csl.ts +191 -191
  79. package/lib/dependencies.ts +98 -98
  80. package/lib/diff-engine.ts +465 -465
  81. package/lib/doi-cache.ts +115 -115
  82. package/lib/doi.ts +897 -897
  83. package/lib/equations.ts +506 -506
  84. package/lib/errors.ts +346 -346
  85. package/lib/format.ts +541 -541
  86. package/lib/git.ts +326 -326
  87. package/lib/grammar.ts +303 -303
  88. package/lib/image-registry.ts +180 -180
  89. package/lib/import.ts +911 -911
  90. package/lib/journals.ts +543 -543
  91. package/lib/merge.ts +633 -633
  92. package/lib/orcid.ts +144 -144
  93. package/lib/pdf-comments.ts +263 -263
  94. package/lib/pdf-import.ts +524 -524
  95. package/lib/plugins.ts +362 -362
  96. package/lib/postprocess.ts +188 -188
  97. package/lib/pptx-color-filter.lua +37 -37
  98. package/lib/pptx-template.ts +469 -469
  99. package/lib/pptx-themes.ts +483 -483
  100. package/lib/protect-restore.ts +520 -520
  101. package/lib/rate-limiter.ts +94 -94
  102. package/lib/response.ts +197 -197
  103. package/lib/restore-references.ts +240 -240
  104. package/lib/review.ts +327 -327
  105. package/lib/schema.ts +417 -417
  106. package/lib/scientific-words.ts +73 -73
  107. package/lib/sections.ts +335 -335
  108. package/lib/slides.ts +756 -756
  109. package/lib/spelling.ts +334 -334
  110. package/lib/templates.ts +526 -526
  111. package/lib/themes.ts +742 -742
  112. package/lib/trackchanges.ts +247 -247
  113. package/lib/tui.ts +450 -450
  114. package/lib/types.ts +550 -550
  115. package/lib/undo.ts +250 -250
  116. package/lib/utils.ts +69 -69
  117. package/lib/variables.ts +179 -179
  118. package/lib/word-extraction.ts +806 -806
  119. package/lib/word.ts +643 -643
  120. package/lib/wordcomments.ts +817 -817
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +28 -28
  123. package/skill/REFERENCE.md +473 -431
  124. package/skill/SKILL.md +274 -258
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
@@ -1,817 +1,817 @@
1
- /**
2
- * Word comment injection with reply threading
3
- *
4
- * Flow:
5
- * 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
6
- * - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
7
- * - Subsequent adjacent comments = replies (no markers, attach to parent)
8
- * 2. Pandoc converts to DOCX
9
- * 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
10
- * - Replies go in comments.xml with parent reference in commentsExtended.xml
11
- */
12
-
13
- import * as fs from 'fs';
14
- import AdmZip from 'adm-zip';
15
- import { escapeXml } from './utils.js';
16
-
17
- const MARKER_START_PREFIX = '⟦CMS:';
18
- const MARKER_END_PREFIX = '⟦CME:';
19
- const MARKER_SUFFIX = '⟧';
20
-
21
- interface ParsedComment {
22
- author: string;
23
- text: string;
24
- anchor: string | null;
25
- start: number;
26
- end: number;
27
- fullMatch: string;
28
- }
29
-
30
- interface PreparedComment extends ParsedComment {
31
- isReply: boolean;
32
- parentIdx: number | null;
33
- commentIdx: number;
34
- anchorFromReply?: boolean;
35
- placesParentMarkers?: boolean;
36
- }
37
-
38
- interface PrepareResult {
39
- markedMarkdown: string;
40
- comments: PreparedComment[];
41
- }
42
-
43
- interface CommentWithIds extends PreparedComment {
44
- id: string;
45
- paraId: string;
46
- paraId2: string;
47
- durableId: string;
48
- parentParaId?: string;
49
- }
50
-
51
- interface InjectionResult {
52
- success: boolean;
53
- commentCount: number;
54
- replyCount?: number;
55
- skippedComments: number;
56
- error?: string;
57
- }
58
-
59
- function generateParaId(commentIdx: number, paraNum: number): string {
60
- // Generate 8-character uppercase hex ID matching Word format
61
- // Word uses IDs like "3F25BC58", "0331C187"
62
- // Must be deterministic - same inputs always produce same output
63
- const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
64
- return id.toString(16).toUpperCase().padStart(8, '0');
65
- }
66
-
67
- /**
68
- * Parse comments and create markers
69
- *
70
- * Returns:
71
- * - markedMarkdown: markdown with markers for parent comments only
72
- * - comments: array with author, text, isReply, parentIdx
73
- */
74
- export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
75
- // Match the comment block first; extend manually to capture an optional
76
- // trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
77
- // bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
78
- // `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
79
- // ourselves and verify a `{.mark}` suffix.
80
- const commentPattern = /\{>>([\s\S]+?)<<\}/g;
81
-
82
- function tryParseTrailingAnchor(
83
- text: string,
84
- fromIdx: number,
85
- ): { anchor: string; endIdx: number } | null {
86
- let i = fromIdx;
87
- while (i < text.length && /\s/.test(text[i] ?? '')) i++;
88
- if (text[i] !== '[') return null;
89
- let depth = 1;
90
- let j = i + 1;
91
- while (j < text.length) {
92
- const ch = text[j];
93
- if (ch === '[') depth++;
94
- else if (ch === ']') {
95
- depth--;
96
- if (depth === 0) break;
97
- }
98
- j++;
99
- }
100
- if (depth !== 0) return null;
101
- if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
102
- return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
103
- }
104
-
105
- const REPLY_PREFIX = '↪ ';
106
- const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
107
- let match: RegExpExecArray | null;
108
- while ((match = commentPattern.exec(markdown)) !== null) {
109
- const content = match[1] ?? '';
110
- let author = 'Unknown';
111
- let text = content;
112
- const colonIdx = content.indexOf(':');
113
- if (colonIdx > 0 && colonIdx < 30) {
114
- author = content.slice(0, colonIdx).trim();
115
- text = content.slice(colonIdx + 1).trim();
116
- }
117
-
118
- // The `↪ ` prefix is the authoritative reply signal emitted by
119
- // `insertCommentsIntoMarkdown`. Strip it from the author before injection
120
- // so Word displays the real name.
121
- let explicitReply = false;
122
- if (author.startsWith(REPLY_PREFIX)) {
123
- explicitReply = true;
124
- author = author.slice(REPLY_PREFIX.length).trim();
125
- }
126
-
127
- const commentEnd = match.index + match[0].length;
128
- const trailing = tryParseTrailingAnchor(markdown, commentEnd);
129
-
130
- rawMatches.push({
131
- author,
132
- text,
133
- anchor: trailing ? trailing.anchor : null,
134
- start: match.index,
135
- end: trailing ? trailing.endIdx : commentEnd,
136
- fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
137
- explicitReply,
138
- });
139
-
140
- // Advance regex lastIndex past the consumed anchor so the next iteration
141
- // doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
142
- // tempt the matcher to look for another `{>>...<<}` in the body of the
143
- // anchor span).
144
- if (trailing) {
145
- commentPattern.lastIndex = trailing.endIdx;
146
- }
147
- }
148
-
149
- if (rawMatches.length === 0) {
150
- return { markedMarkdown: markdown, comments: [] };
151
- }
152
-
153
- // Two-mode reply detection driven by the markdown itself:
154
- // - If any comment carries the `↪ ` author prefix, the markdown came
155
- // through `insertCommentsIntoMarkdown` and we use prefix-only mode.
156
- // Distinct clusters that happen to land at gap=0 (a real failure
157
- // mode on dense reviewer docs — 298-comment paper produced 9 such
158
- // collisions) are not misthreaded.
159
- // - If no comment carries the prefix, the markdown was hand-typed.
160
- // Fall back to gap < 10 adjacency for backward compat with users
161
- // who write CriticMarkup directly.
162
- const ADJACENT_THRESHOLD = 10;
163
- const useExplicitMode = rawMatches.some(m => m.explicitReply);
164
- const comments: PreparedComment[] = [];
165
- let clusterParentIdx = -1; // Index of first comment in current cluster
166
- let lastCommentEnd = -1;
167
-
168
- for (let i = 0; i < rawMatches.length; i++) {
169
- const m = rawMatches[i];
170
- if (!m) continue;
171
-
172
- const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
173
- const isAdjacent = useExplicitMode
174
- ? m.explicitReply
175
- : gap < ADJACENT_THRESHOLD;
176
-
177
- // Reset cluster if there's a gap (comments not in same cluster)
178
- if (!isAdjacent) {
179
- clusterParentIdx = -1;
180
- }
181
-
182
- if (clusterParentIdx === -1) {
183
- // First comment in cluster = parent (regardless of author)
184
- comments.push({
185
- author: m.author,
186
- text: m.text,
187
- anchor: m.anchor,
188
- start: m.start,
189
- end: m.end,
190
- fullMatch: m.fullMatch,
191
- isReply: false,
192
- parentIdx: null,
193
- commentIdx: comments.length
194
- });
195
- clusterParentIdx = comments.length - 1;
196
- } else {
197
- // Subsequent comment in cluster = reply to first comment
198
- comments.push({
199
- author: m.author,
200
- text: m.text,
201
- anchor: m.anchor,
202
- start: m.start,
203
- end: m.end,
204
- fullMatch: m.fullMatch,
205
- isReply: true,
206
- parentIdx: clusterParentIdx,
207
- commentIdx: comments.length
208
- });
209
- }
210
-
211
- lastCommentEnd = m.end;
212
- }
213
-
214
- // Propagate anchors from replies to parents
215
- // If a reply has an anchor but its parent doesn't, move the anchor to the parent
216
- // Track flags for special handling during marker generation
217
- for (const c of comments) {
218
- if (c.isReply && c.anchor && c.parentIdx !== null) {
219
- const parent = comments[c.parentIdx];
220
- if (parent && !parent.anchor) {
221
- parent.anchor = c.anchor;
222
- parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
223
- c.placesParentMarkers = true; // This reply should place the parent's markers
224
- c.anchor = null;
225
- }
226
- }
227
- }
228
-
229
- // Build marked markdown - only parent comments get markers
230
- // Process from end to start to preserve positions
231
- let markedMarkdown = markdown;
232
-
233
- for (let i = comments.length - 1; i >= 0; i--) {
234
- const c = comments[i];
235
- if (!c) continue;
236
-
237
- if (c.isReply) {
238
- // Reply: remove from document entirely (will be in comments.xml only)
239
- // Also consume one preceding whitespace char to avoid double spaces.
240
- // We deliberately consume at most one — walking arbitrarily backwards
241
- // would shift positions that lower-index comments still depend on.
242
- let removeStart = c.start;
243
- if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
244
- removeStart--;
245
- }
246
-
247
- // If this reply places parent's markers (anchor was propagated)
248
- if (c.placesParentMarkers && c.parentIdx !== null) {
249
- // Extract anchor text from the original match
250
- const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
251
- if (anchorMatch) {
252
- const anchorText = anchorMatch[1] ?? '';
253
- // Output markers with PARENT's index around the anchor text
254
- const parentIdx = c.parentIdx;
255
- const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
256
- markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
257
- } else {
258
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
259
- }
260
- } else {
261
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
262
- }
263
- } else {
264
- // Parent comment
265
- if (c.anchorFromReply) {
266
- // Anchor markers are placed by the reply, just remove this comment.
267
- // Consume one preceding whitespace char only (see reply branch above).
268
- let removeStart = c.start;
269
- if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
270
- removeStart--;
271
- }
272
- markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
273
- } else {
274
- // Normal case: replace with markers
275
- const anchor = c.anchor || '';
276
- const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
277
- markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
278
- }
279
- }
280
- }
281
-
282
- return { markedMarkdown, comments };
283
- }
284
-
285
- function createCommentsXml(comments: CommentWithIds[]): string {
286
- // Word expects date without milliseconds: 2025-12-30T08:33:00Z
287
- const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
288
-
289
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
290
- // Minimal namespaces matching golden file structure
291
- xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
292
-
293
- // Use a consistent rsid (8-char hex) for all comments in this batch
294
- const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
295
-
296
- for (const comment of comments) {
297
- xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
298
- // First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
299
- xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
300
- xml += `<w:r><w:annotationRef/></w:r>`;
301
- xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
302
- xml += `</w:p>`;
303
- if (comment.isReply) {
304
- // Second empty paragraph: rsidRDefault matches rsidR
305
- xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
306
- }
307
- xml += `</w:comment>`;
308
- }
309
-
310
- xml += '</w:comments>';
311
- return xml;
312
- }
313
-
314
- function createCommentsExtendedXml(comments: CommentWithIds[]): string {
315
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
316
- // Minimal namespaces matching golden file structure
317
- xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
318
-
319
- for (const comment of comments) {
320
- if (comment.isReply && comment.parentParaId) {
321
- // Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
322
- xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
323
- } else {
324
- // Parent comment: use paraId (first paragraph)
325
- xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
326
- }
327
- }
328
-
329
- xml += '</w15:commentsEx>';
330
- return xml;
331
- }
332
-
333
- function generateDurableId(index: number): string {
334
- // Generate unique 8-char hex ID for durableId
335
- // CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
336
- // Word interprets durableIds as signed 32-bit integers
337
- const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
338
- const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
339
- return id.toString(16).toUpperCase().padStart(8, '0');
340
- }
341
-
342
- function createCommentsIdsXml(comments: CommentWithIds[]): string {
343
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
344
- // Minimal namespaces matching golden file structure
345
- xml += '<w16cid:commentsIds ';
346
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
347
- xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
348
- xml += 'mc:Ignorable="w16cid">';
349
-
350
- for (const comment of comments) {
351
- // ONE entry per comment using the LAST paragraph's paraId:
352
- // - Parent comments (1 paragraph): use paraId
353
- // - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
354
- const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
355
- xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
356
- }
357
-
358
- xml += '</w16cid:commentsIds>';
359
- return xml;
360
- }
361
-
362
- function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
363
- const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
364
-
365
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
366
- // Minimal namespaces matching golden file structure
367
- xml += '<w16cex:commentsExtensible ';
368
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
369
- xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
370
- xml += 'mc:Ignorable="w16cex">';
371
-
372
- for (const comment of comments) {
373
- // ONE entry per comment using the durableId
374
- xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
375
- }
376
-
377
- xml += '</w16cex:commentsExtensible>';
378
- return xml;
379
- }
380
-
381
- // Generate deterministic user IDs for authors (no hardcoded personal data)
382
-
383
- function createPeopleXml(comments: CommentWithIds[]): string {
384
- // Extract unique authors
385
- const authors = [...new Set(comments.map(c => c.author))];
386
-
387
- let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
388
- xml += '<w15:people ';
389
- xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
390
- xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
391
- xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
392
- xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
393
- xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
394
- xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
395
- xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
396
- xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
397
- xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
398
- xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
399
- xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
400
- xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
401
-
402
- for (const author of authors) {
403
- const userId = generateUserId(author);
404
- xml += `<w15:person w15:author="${escapeXml(author)}">`;
405
- xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
406
- xml += `</w15:person>`;
407
- }
408
-
409
- xml += '</w15:people>';
410
- return xml;
411
- }
412
-
413
- function generateUserId(author: string): string {
414
- // Generate a deterministic 16-char hex ID from author name
415
- let hash = 0;
416
- for (let i = 0; i < author.length; i++) {
417
- hash = ((hash << 5) - hash) + author.charCodeAt(i);
418
- hash = hash & hash;
419
- }
420
- return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
421
- }
422
-
423
- /**
424
- * Inject comments at marker positions
425
- */
426
- export async function injectCommentsAtMarkers(
427
- docxPath: string,
428
- comments: PreparedComment[],
429
- outputPath: string
430
- ): Promise<InjectionResult> {
431
- try {
432
- if (!fs.existsSync(docxPath)) {
433
- return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
434
- }
435
-
436
- if (comments.length === 0) {
437
- fs.copyFileSync(docxPath, outputPath);
438
- return { success: true, commentCount: 0, skippedComments: 0 };
439
- }
440
-
441
- const zip = new AdmZip(docxPath);
442
- const documentEntry = zip.getEntry('word/document.xml');
443
- if (!documentEntry) {
444
- return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
445
- }
446
-
447
- let documentXml = zip.readAsText(documentEntry);
448
-
449
- // Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
450
- const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
451
- ...c,
452
- id: String(idx + 1),
453
- paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
454
- paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
455
- durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
456
- }));
457
-
458
- // Link replies to parent paraIds
459
- for (const c of commentsWithIds) {
460
- if (c.isReply && c.parentIdx !== null) {
461
- const parent = commentsWithIds[c.parentIdx];
462
- if (parent) {
463
- c.parentParaId = parent.paraId;
464
- }
465
- }
466
- }
467
-
468
- const injectedIds = new Set<string>();
469
-
470
- // Process only parent comments (non-replies) for document ranges
471
- const parentComments = commentsWithIds.filter(c => !c.isReply);
472
-
473
- for (let i = parentComments.length - 1; i >= 0; i--) {
474
- const comment = parentComments[i];
475
- if (!comment) continue;
476
- const idx = comment.commentIdx;
477
-
478
- const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
479
- const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
480
-
481
- const startPos = documentXml.indexOf(startMarker);
482
- const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
483
-
484
- if (startPos === -1 || endPos === -1) continue;
485
-
486
- // Find the runs containing each marker. Pandoc may split a single
487
- // markdown anchor across multiple <w:r> blocks when it applies styling
488
- // mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
489
- // The same-run path (current happy path) collapses into the multi-run
490
- // path when start and end runs coincide.
491
- const startRunOpen = Math.max(
492
- documentXml.lastIndexOf('<w:r>', startPos),
493
- documentXml.lastIndexOf('<w:r ', startPos),
494
- );
495
- const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
496
- const endRunOpen = Math.max(
497
- documentXml.lastIndexOf('<w:r>', endPos),
498
- documentXml.lastIndexOf('<w:r ', endPos),
499
- );
500
- const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
501
-
502
- if (
503
- startRunOpen === -1 || startRunCloseIdx === -1 ||
504
- endRunOpen === -1 || endRunCloseIdx === -1
505
- ) continue;
506
-
507
- const startRunClose = startRunCloseIdx + '</w:r>'.length;
508
- const endRunClose = endRunCloseIdx + '</w:r>'.length;
509
-
510
- const startRunFull = documentXml.slice(startRunOpen, startRunClose);
511
- const endRunFull = documentXml.slice(endRunOpen, endRunClose);
512
-
513
- // Extract <w:rPr> and <w:t> element shape from each run. Both pieces
514
- // are needed verbatim so a textBefore split keeps its original styling
515
- // and so the post-anchor textAfter render keeps the end run's styling.
516
- function dissectRun(runXml: string, marker: string): {
517
- rPr: string;
518
- tElement: string;
519
- textBefore: string;
520
- textAfter: string;
521
- } | null {
522
- const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
523
- const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
524
- if (!tMatch) return null;
525
- const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
526
- if (!tOpenMatch) return null;
527
- const tContent = tMatch[1] ?? '';
528
- const markerInT = tContent.indexOf(marker);
529
- if (markerInT === -1) return null;
530
- return {
531
- rPr: rPrMatch ? rPrMatch[0] : '',
532
- tElement: tOpenMatch[0],
533
- textBefore: tContent.slice(0, markerInT),
534
- textAfter: tContent.slice(markerInT + marker.length),
535
- };
536
- }
537
-
538
- let replacement = '';
539
- const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
540
-
541
- const emitRangeStarts = () => {
542
- replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
543
- for (const reply of replies) {
544
- replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
545
- }
546
- };
547
-
548
- const emitRangeEnds = () => {
549
- replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
550
- replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
551
- for (const reply of replies) {
552
- replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
553
- replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
554
- injectedIds.add(reply.id);
555
- }
556
- };
557
-
558
- if (startRunOpen === endRunOpen) {
559
- // Same-run path: both markers live inside one <w:t>. Original logic.
560
- const startInfo = dissectRun(startRunFull, startMarker);
561
- if (!startInfo) continue;
562
- const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
563
- const endInTextRel = startInfo.textAfter.indexOf(endMarker);
564
- if (endInTextRel === -1) continue;
565
- const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
566
- let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
567
- let anchorText = anchorTextSame;
568
- let textBefore = startInfo.textBefore;
569
-
570
- // Empty anchor: borrow the next word so the comment has something
571
- // to anchor on. Then normalize the trailing double space.
572
- if (!anchorText && textAfter) {
573
- const wordMatch = textAfter.match(/^\s*(\S+)/);
574
- if (wordMatch) {
575
- anchorText = wordMatch[1] ?? '';
576
- textAfter = textAfter.slice(wordMatch[0].length);
577
- }
578
- }
579
- if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
580
- textAfter = textAfter.slice(1);
581
- }
582
- // Suppress unused warning for pre-empty-anchor fullText var
583
- void fullText;
584
-
585
- if (textBefore) {
586
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
587
- }
588
- emitRangeStarts();
589
- if (anchorText) {
590
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
591
- }
592
- emitRangeEnds();
593
- if (textAfter) {
594
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
595
- }
596
- documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
597
- injectedIds.add(comment.id);
598
- continue;
599
- }
600
-
601
- // Multi-run path: markers sit in different <w:r> blocks because pandoc
602
- // applied mid-anchor styling. Split the start run at the start marker,
603
- // keep all middle runs verbatim (they carry the styled anchor portions),
604
- // split the end run at the end marker.
605
- const startInfo = dissectRun(startRunFull, startMarker);
606
- const endInfo = dissectRun(endRunFull, endMarker);
607
- if (!startInfo || !endInfo) continue;
608
-
609
- const middle = documentXml.slice(startRunClose, endRunOpen);
610
-
611
- if (startInfo.textBefore) {
612
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
613
- }
614
- emitRangeStarts();
615
- if (startInfo.textAfter) {
616
- replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
617
- }
618
- replacement += middle;
619
- if (endInfo.textBefore) {
620
- replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
621
- }
622
- emitRangeEnds();
623
- if (endInfo.textAfter) {
624
- replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
625
- }
626
-
627
- documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
628
- injectedIds.add(comment.id);
629
- }
630
-
631
- // Add required namespaces to document.xml for comment threading
632
- const requiredNs: Record<string, string> = {
633
- 'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
634
- 'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
635
- 'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
636
- 'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
637
- 'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
638
- };
639
-
640
- // Find <w:document and add namespaces
641
- const docTagMatch = documentXml.match(/<w:document[^>]*>/);
642
- if (docTagMatch) {
643
- let docTag = docTagMatch[0];
644
- let modified = false;
645
- for (const [attr, val] of Object.entries(requiredNs)) {
646
- if (!docTag.includes(attr)) {
647
- docTag = docTag.replace('>', ` ${attr}="${val}">`);
648
- modified = true;
649
- }
650
- }
651
- // Add mc:Ignorable if mc namespace was added
652
- if (modified && !docTag.includes('mc:Ignorable')) {
653
- docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
654
- }
655
- documentXml = documentXml.replace(docTagMatch[0], docTag);
656
- }
657
-
658
- // Update document.xml
659
- zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
660
-
661
- // All comments (parents + replies) go in comments.xml
662
- // But only include if parent was injected
663
- const includedComments = commentsWithIds.filter(c => {
664
- if (!c.isReply) {
665
- return injectedIds.has(c.id);
666
- } else {
667
- // Include reply if its parent was injected
668
- const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
669
- return parent && injectedIds.has(parent.id);
670
- }
671
- });
672
-
673
- // Create comments.xml
674
- const commentsXml = createCommentsXml(includedComments);
675
- if (zip.getEntry('word/comments.xml')) {
676
- zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
677
- } else {
678
- zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
679
- }
680
-
681
- // Create commentsExtended.xml with reply threading
682
- const commentsExtXml = createCommentsExtendedXml(includedComments);
683
- if (zip.getEntry('word/commentsExtended.xml')) {
684
- zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
685
- } else {
686
- zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
687
- }
688
-
689
- // Create commentsIds.xml (Word 2016+)
690
- const commentsIdsXml = createCommentsIdsXml(includedComments);
691
- if (zip.getEntry('word/commentsIds.xml')) {
692
- zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
693
- } else {
694
- zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
695
- }
696
-
697
- // Create commentsExtensible.xml (Word 2018+)
698
- const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
699
- if (zip.getEntry('word/commentsExtensible.xml')) {
700
- zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
701
- } else {
702
- zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
703
- }
704
-
705
- // Create people.xml (author definitions with Windows Live IDs)
706
- const peopleXml = createPeopleXml(includedComments);
707
- if (zip.getEntry('word/people.xml')) {
708
- zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
709
- } else {
710
- zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
711
- }
712
-
713
- // Update [Content_Types].xml
714
- const contentTypesEntry = zip.getEntry('[Content_Types].xml');
715
- if (contentTypesEntry) {
716
- let contentTypes = zip.readAsText(contentTypesEntry);
717
-
718
- if (!contentTypes.includes('comments.xml')) {
719
- const insertPoint = contentTypes.lastIndexOf('</Types>');
720
- contentTypes = contentTypes.slice(0, insertPoint) +
721
- '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
722
- contentTypes.slice(insertPoint);
723
- }
724
-
725
- if (!contentTypes.includes('commentsExtended.xml')) {
726
- const insertPoint = contentTypes.lastIndexOf('</Types>');
727
- contentTypes = contentTypes.slice(0, insertPoint) +
728
- '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
729
- contentTypes.slice(insertPoint);
730
- }
731
-
732
- if (!contentTypes.includes('commentsIds.xml')) {
733
- const insertPoint = contentTypes.lastIndexOf('</Types>');
734
- contentTypes = contentTypes.slice(0, insertPoint) +
735
- '<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
736
- contentTypes.slice(insertPoint);
737
- }
738
-
739
- if (!contentTypes.includes('commentsExtensible.xml')) {
740
- const insertPoint = contentTypes.lastIndexOf('</Types>');
741
- contentTypes = contentTypes.slice(0, insertPoint) +
742
- '<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
743
- contentTypes.slice(insertPoint);
744
- }
745
-
746
- if (!contentTypes.includes('people.xml')) {
747
- const insertPoint = contentTypes.lastIndexOf('</Types>');
748
- contentTypes = contentTypes.slice(0, insertPoint) +
749
- '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
750
- contentTypes.slice(insertPoint);
751
- }
752
-
753
- zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
754
- }
755
-
756
- // Update relationships
757
- const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
758
- if (relsEntry) {
759
- let rels = zip.readAsText(relsEntry);
760
-
761
- const rIdMatches = rels.match(/rId(\d+)/g) || [];
762
- const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
763
-
764
- if (!rels.includes('comments.xml')) {
765
- const insertPoint = rels.lastIndexOf('</Relationships>');
766
- rels = rels.slice(0, insertPoint) +
767
- `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
768
- rels.slice(insertPoint);
769
- }
770
-
771
- if (!rels.includes('commentsExtended.xml')) {
772
- const insertPoint = rels.lastIndexOf('</Relationships>');
773
- rels = rels.slice(0, insertPoint) +
774
- `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
775
- rels.slice(insertPoint);
776
- }
777
-
778
- if (!rels.includes('commentsIds.xml')) {
779
- const insertPoint = rels.lastIndexOf('</Relationships>');
780
- rels = rels.slice(0, insertPoint) +
781
- `<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
782
- rels.slice(insertPoint);
783
- }
784
-
785
- if (!rels.includes('commentsExtensible.xml')) {
786
- const insertPoint = rels.lastIndexOf('</Relationships>');
787
- rels = rels.slice(0, insertPoint) +
788
- `<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
789
- rels.slice(insertPoint);
790
- }
791
-
792
- if (!rels.includes('people.xml')) {
793
- const insertPoint = rels.lastIndexOf('</Relationships>');
794
- rels = rels.slice(0, insertPoint) +
795
- `<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
796
- rels.slice(insertPoint);
797
- }
798
-
799
- zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
800
- }
801
-
802
- zip.writeZip(outputPath);
803
-
804
- const parentCount = includedComments.filter(c => !c.isReply).length;
805
- const replyCount = includedComments.filter(c => c.isReply).length;
806
-
807
- return {
808
- success: true,
809
- commentCount: parentCount,
810
- replyCount: replyCount,
811
- skippedComments: comments.length - includedComments.length,
812
- };
813
-
814
- } catch (err: any) {
815
- return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
816
- }
817
- }
1
+ /**
2
+ * Word comment injection with reply threading
3
+ *
4
+ * Flow:
5
+ * 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
6
+ * - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
7
+ * - Subsequent adjacent comments = replies (no markers, attach to parent)
8
+ * 2. Pandoc converts to DOCX
9
+ * 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
10
+ * - Replies go in comments.xml with parent reference in commentsExtended.xml
11
+ */
12
+
13
+ import * as fs from 'fs';
14
+ import AdmZip from 'adm-zip';
15
+ import { escapeXml } from './utils.js';
16
+
17
+ const MARKER_START_PREFIX = '⟦CMS:';
18
+ const MARKER_END_PREFIX = '⟦CME:';
19
+ const MARKER_SUFFIX = '⟧';
20
+
21
+ interface ParsedComment {
22
+ author: string;
23
+ text: string;
24
+ anchor: string | null;
25
+ start: number;
26
+ end: number;
27
+ fullMatch: string;
28
+ }
29
+
30
+ interface PreparedComment extends ParsedComment {
31
+ isReply: boolean;
32
+ parentIdx: number | null;
33
+ commentIdx: number;
34
+ anchorFromReply?: boolean;
35
+ placesParentMarkers?: boolean;
36
+ }
37
+
38
+ interface PrepareResult {
39
+ markedMarkdown: string;
40
+ comments: PreparedComment[];
41
+ }
42
+
43
+ interface CommentWithIds extends PreparedComment {
44
+ id: string;
45
+ paraId: string;
46
+ paraId2: string;
47
+ durableId: string;
48
+ parentParaId?: string;
49
+ }
50
+
51
+ interface InjectionResult {
52
+ success: boolean;
53
+ commentCount: number;
54
+ replyCount?: number;
55
+ skippedComments: number;
56
+ error?: string;
57
+ }
58
+
59
+ function generateParaId(commentIdx: number, paraNum: number): string {
60
+ // Generate 8-character uppercase hex ID matching Word format
61
+ // Word uses IDs like "3F25BC58", "0331C187"
62
+ // Must be deterministic - same inputs always produce same output
63
+ const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
64
+ return id.toString(16).toUpperCase().padStart(8, '0');
65
+ }
66
+
67
+ /**
68
+ * Parse comments and create markers
69
+ *
70
+ * Returns:
71
+ * - markedMarkdown: markdown with markers for parent comments only
72
+ * - comments: array with author, text, isReply, parentIdx
73
+ */
74
+ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
75
+ // Match the comment block first; extend manually to capture an optional
76
+ // trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
77
+ // bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
78
+ // `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
79
+ // ourselves and verify a `{.mark}` suffix.
80
+ const commentPattern = /\{>>([\s\S]+?)<<\}/g;
81
+
82
+ function tryParseTrailingAnchor(
83
+ text: string,
84
+ fromIdx: number,
85
+ ): { anchor: string; endIdx: number } | null {
86
+ let i = fromIdx;
87
+ while (i < text.length && /\s/.test(text[i] ?? '')) i++;
88
+ if (text[i] !== '[') return null;
89
+ let depth = 1;
90
+ let j = i + 1;
91
+ while (j < text.length) {
92
+ const ch = text[j];
93
+ if (ch === '[') depth++;
94
+ else if (ch === ']') {
95
+ depth--;
96
+ if (depth === 0) break;
97
+ }
98
+ j++;
99
+ }
100
+ if (depth !== 0) return null;
101
+ if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
102
+ return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
103
+ }
104
+
105
+ const REPLY_PREFIX = '↪ ';
106
+ const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
107
+ let match: RegExpExecArray | null;
108
+ while ((match = commentPattern.exec(markdown)) !== null) {
109
+ const content = match[1] ?? '';
110
+ let author = 'Unknown';
111
+ let text = content;
112
+ const colonIdx = content.indexOf(':');
113
+ if (colonIdx > 0 && colonIdx < 30) {
114
+ author = content.slice(0, colonIdx).trim();
115
+ text = content.slice(colonIdx + 1).trim();
116
+ }
117
+
118
+ // The `↪ ` prefix is the authoritative reply signal emitted by
119
+ // `insertCommentsIntoMarkdown`. Strip it from the author before injection
120
+ // so Word displays the real name.
121
+ let explicitReply = false;
122
+ if (author.startsWith(REPLY_PREFIX)) {
123
+ explicitReply = true;
124
+ author = author.slice(REPLY_PREFIX.length).trim();
125
+ }
126
+
127
+ const commentEnd = match.index + match[0].length;
128
+ const trailing = tryParseTrailingAnchor(markdown, commentEnd);
129
+
130
+ rawMatches.push({
131
+ author,
132
+ text,
133
+ anchor: trailing ? trailing.anchor : null,
134
+ start: match.index,
135
+ end: trailing ? trailing.endIdx : commentEnd,
136
+ fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
137
+ explicitReply,
138
+ });
139
+
140
+ // Advance regex lastIndex past the consumed anchor so the next iteration
141
+ // doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
142
+ // tempt the matcher to look for another `{>>...<<}` in the body of the
143
+ // anchor span).
144
+ if (trailing) {
145
+ commentPattern.lastIndex = trailing.endIdx;
146
+ }
147
+ }
148
+
149
+ if (rawMatches.length === 0) {
150
+ return { markedMarkdown: markdown, comments: [] };
151
+ }
152
+
153
+ // Two-mode reply detection driven by the markdown itself:
154
+ // - If any comment carries the `↪ ` author prefix, the markdown came
155
+ // through `insertCommentsIntoMarkdown` and we use prefix-only mode.
156
+ // Distinct clusters that happen to land at gap=0 (a real failure
157
+ // mode on dense reviewer docs — 298-comment paper produced 9 such
158
+ // collisions) are not misthreaded.
159
+ // - If no comment carries the prefix, the markdown was hand-typed.
160
+ // Fall back to gap < 10 adjacency for backward compat with users
161
+ // who write CriticMarkup directly.
162
+ const ADJACENT_THRESHOLD = 10;
163
+ const useExplicitMode = rawMatches.some(m => m.explicitReply);
164
+ const comments: PreparedComment[] = [];
165
+ let clusterParentIdx = -1; // Index of first comment in current cluster
166
+ let lastCommentEnd = -1;
167
+
168
+ for (let i = 0; i < rawMatches.length; i++) {
169
+ const m = rawMatches[i];
170
+ if (!m) continue;
171
+
172
+ const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
173
+ const isAdjacent = useExplicitMode
174
+ ? m.explicitReply
175
+ : gap < ADJACENT_THRESHOLD;
176
+
177
+ // Reset cluster if there's a gap (comments not in same cluster)
178
+ if (!isAdjacent) {
179
+ clusterParentIdx = -1;
180
+ }
181
+
182
+ if (clusterParentIdx === -1) {
183
+ // First comment in cluster = parent (regardless of author)
184
+ comments.push({
185
+ author: m.author,
186
+ text: m.text,
187
+ anchor: m.anchor,
188
+ start: m.start,
189
+ end: m.end,
190
+ fullMatch: m.fullMatch,
191
+ isReply: false,
192
+ parentIdx: null,
193
+ commentIdx: comments.length
194
+ });
195
+ clusterParentIdx = comments.length - 1;
196
+ } else {
197
+ // Subsequent comment in cluster = reply to first comment
198
+ comments.push({
199
+ author: m.author,
200
+ text: m.text,
201
+ anchor: m.anchor,
202
+ start: m.start,
203
+ end: m.end,
204
+ fullMatch: m.fullMatch,
205
+ isReply: true,
206
+ parentIdx: clusterParentIdx,
207
+ commentIdx: comments.length
208
+ });
209
+ }
210
+
211
+ lastCommentEnd = m.end;
212
+ }
213
+
214
+ // Propagate anchors from replies to parents
215
+ // If a reply has an anchor but its parent doesn't, move the anchor to the parent
216
+ // Track flags for special handling during marker generation
217
+ for (const c of comments) {
218
+ if (c.isReply && c.anchor && c.parentIdx !== null) {
219
+ const parent = comments[c.parentIdx];
220
+ if (parent && !parent.anchor) {
221
+ parent.anchor = c.anchor;
222
+ parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
223
+ c.placesParentMarkers = true; // This reply should place the parent's markers
224
+ c.anchor = null;
225
+ }
226
+ }
227
+ }
228
+
229
+ // Build marked markdown - only parent comments get markers
230
+ // Process from end to start to preserve positions
231
+ let markedMarkdown = markdown;
232
+
233
+ for (let i = comments.length - 1; i >= 0; i--) {
234
+ const c = comments[i];
235
+ if (!c) continue;
236
+
237
+ if (c.isReply) {
238
+ // Reply: remove from document entirely (will be in comments.xml only)
239
+ // Also consume one preceding whitespace char to avoid double spaces.
240
+ // We deliberately consume at most one — walking arbitrarily backwards
241
+ // would shift positions that lower-index comments still depend on.
242
+ let removeStart = c.start;
243
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
244
+ removeStart--;
245
+ }
246
+
247
+ // If this reply places parent's markers (anchor was propagated)
248
+ if (c.placesParentMarkers && c.parentIdx !== null) {
249
+ // Extract anchor text from the original match
250
+ const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
251
+ if (anchorMatch) {
252
+ const anchorText = anchorMatch[1] ?? '';
253
+ // Output markers with PARENT's index around the anchor text
254
+ const parentIdx = c.parentIdx;
255
+ const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
256
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
257
+ } else {
258
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
259
+ }
260
+ } else {
261
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
262
+ }
263
+ } else {
264
+ // Parent comment
265
+ if (c.anchorFromReply) {
266
+ // Anchor markers are placed by the reply, just remove this comment.
267
+ // Consume one preceding whitespace char only (see reply branch above).
268
+ let removeStart = c.start;
269
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
270
+ removeStart--;
271
+ }
272
+ markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
273
+ } else {
274
+ // Normal case: replace with markers
275
+ const anchor = c.anchor || '';
276
+ const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
277
+ markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
278
+ }
279
+ }
280
+ }
281
+
282
+ return { markedMarkdown, comments };
283
+ }
284
+
285
+ function createCommentsXml(comments: CommentWithIds[]): string {
286
+ // Word expects date without milliseconds: 2025-12-30T08:33:00Z
287
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
288
+
289
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
290
+ // Minimal namespaces matching golden file structure
291
+ xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
292
+
293
+ // Use a consistent rsid (8-char hex) for all comments in this batch
294
+ const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
295
+
296
+ for (const comment of comments) {
297
+ xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
298
+ // First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
299
+ xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
300
+ xml += `<w:r><w:annotationRef/></w:r>`;
301
+ xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
302
+ xml += `</w:p>`;
303
+ if (comment.isReply) {
304
+ // Second empty paragraph: rsidRDefault matches rsidR
305
+ xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
306
+ }
307
+ xml += `</w:comment>`;
308
+ }
309
+
310
+ xml += '</w:comments>';
311
+ return xml;
312
+ }
313
+
314
+ function createCommentsExtendedXml(comments: CommentWithIds[]): string {
315
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
316
+ // Minimal namespaces matching golden file structure
317
+ xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
318
+
319
+ for (const comment of comments) {
320
+ if (comment.isReply && comment.parentParaId) {
321
+ // Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
322
+ xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
323
+ } else {
324
+ // Parent comment: use paraId (first paragraph)
325
+ xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
326
+ }
327
+ }
328
+
329
+ xml += '</w15:commentsEx>';
330
+ return xml;
331
+ }
332
+
333
+ function generateDurableId(index: number): string {
334
+ // Generate unique 8-char hex ID for durableId
335
+ // CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
336
+ // Word interprets durableIds as signed 32-bit integers
337
+ const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
338
+ const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
339
+ return id.toString(16).toUpperCase().padStart(8, '0');
340
+ }
341
+
342
+ function createCommentsIdsXml(comments: CommentWithIds[]): string {
343
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
344
+ // Minimal namespaces matching golden file structure
345
+ xml += '<w16cid:commentsIds ';
346
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
347
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
348
+ xml += 'mc:Ignorable="w16cid">';
349
+
350
+ for (const comment of comments) {
351
+ // ONE entry per comment using the LAST paragraph's paraId:
352
+ // - Parent comments (1 paragraph): use paraId
353
+ // - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
354
+ const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
355
+ xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
356
+ }
357
+
358
+ xml += '</w16cid:commentsIds>';
359
+ return xml;
360
+ }
361
+
362
+ function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
363
+ const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
364
+
365
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
366
+ // Minimal namespaces matching golden file structure
367
+ xml += '<w16cex:commentsExtensible ';
368
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
369
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
370
+ xml += 'mc:Ignorable="w16cex">';
371
+
372
+ for (const comment of comments) {
373
+ // ONE entry per comment using the durableId
374
+ xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
375
+ }
376
+
377
+ xml += '</w16cex:commentsExtensible>';
378
+ return xml;
379
+ }
380
+
381
+ // Generate deterministic user IDs for authors (no hardcoded personal data)
382
+
383
+ function createPeopleXml(comments: CommentWithIds[]): string {
384
+ // Extract unique authors
385
+ const authors = [...new Set(comments.map(c => c.author))];
386
+
387
+ let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
388
+ xml += '<w15:people ';
389
+ xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
390
+ xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
391
+ xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
392
+ xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
393
+ xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
394
+ xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
395
+ xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
396
+ xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
397
+ xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
398
+ xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
399
+ xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
400
+ xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
401
+
402
+ for (const author of authors) {
403
+ const userId = generateUserId(author);
404
+ xml += `<w15:person w15:author="${escapeXml(author)}">`;
405
+ xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
406
+ xml += `</w15:person>`;
407
+ }
408
+
409
+ xml += '</w15:people>';
410
+ return xml;
411
+ }
412
+
413
+ function generateUserId(author: string): string {
414
+ // Generate a deterministic 16-char hex ID from author name
415
+ let hash = 0;
416
+ for (let i = 0; i < author.length; i++) {
417
+ hash = ((hash << 5) - hash) + author.charCodeAt(i);
418
+ hash = hash & hash;
419
+ }
420
+ return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
421
+ }
422
+
423
+ /**
424
+ * Inject comments at marker positions
425
+ */
426
+ export async function injectCommentsAtMarkers(
427
+ docxPath: string,
428
+ comments: PreparedComment[],
429
+ outputPath: string
430
+ ): Promise<InjectionResult> {
431
+ try {
432
+ if (!fs.existsSync(docxPath)) {
433
+ return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
434
+ }
435
+
436
+ if (comments.length === 0) {
437
+ fs.copyFileSync(docxPath, outputPath);
438
+ return { success: true, commentCount: 0, skippedComments: 0 };
439
+ }
440
+
441
+ const zip = new AdmZip(docxPath);
442
+ const documentEntry = zip.getEntry('word/document.xml');
443
+ if (!documentEntry) {
444
+ return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
445
+ }
446
+
447
+ let documentXml = zip.readAsText(documentEntry);
448
+
449
+ // Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
450
+ const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
451
+ ...c,
452
+ id: String(idx + 1),
453
+ paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
454
+ paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
455
+ durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
456
+ }));
457
+
458
+ // Link replies to parent paraIds
459
+ for (const c of commentsWithIds) {
460
+ if (c.isReply && c.parentIdx !== null) {
461
+ const parent = commentsWithIds[c.parentIdx];
462
+ if (parent) {
463
+ c.parentParaId = parent.paraId;
464
+ }
465
+ }
466
+ }
467
+
468
+ const injectedIds = new Set<string>();
469
+
470
+ // Process only parent comments (non-replies) for document ranges
471
+ const parentComments = commentsWithIds.filter(c => !c.isReply);
472
+
473
+ for (let i = parentComments.length - 1; i >= 0; i--) {
474
+ const comment = parentComments[i];
475
+ if (!comment) continue;
476
+ const idx = comment.commentIdx;
477
+
478
+ const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
479
+ const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
480
+
481
+ const startPos = documentXml.indexOf(startMarker);
482
+ const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
483
+
484
+ if (startPos === -1 || endPos === -1) continue;
485
+
486
+ // Find the runs containing each marker. Pandoc may split a single
487
+ // markdown anchor across multiple <w:r> blocks when it applies styling
488
+ // mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
489
+ // The same-run path (current happy path) collapses into the multi-run
490
+ // path when start and end runs coincide.
491
+ const startRunOpen = Math.max(
492
+ documentXml.lastIndexOf('<w:r>', startPos),
493
+ documentXml.lastIndexOf('<w:r ', startPos),
494
+ );
495
+ const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
496
+ const endRunOpen = Math.max(
497
+ documentXml.lastIndexOf('<w:r>', endPos),
498
+ documentXml.lastIndexOf('<w:r ', endPos),
499
+ );
500
+ const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
501
+
502
+ if (
503
+ startRunOpen === -1 || startRunCloseIdx === -1 ||
504
+ endRunOpen === -1 || endRunCloseIdx === -1
505
+ ) continue;
506
+
507
+ const startRunClose = startRunCloseIdx + '</w:r>'.length;
508
+ const endRunClose = endRunCloseIdx + '</w:r>'.length;
509
+
510
+ const startRunFull = documentXml.slice(startRunOpen, startRunClose);
511
+ const endRunFull = documentXml.slice(endRunOpen, endRunClose);
512
+
513
+ // Extract <w:rPr> and <w:t> element shape from each run. Both pieces
514
+ // are needed verbatim so a textBefore split keeps its original styling
515
+ // and so the post-anchor textAfter render keeps the end run's styling.
516
+ function dissectRun(runXml: string, marker: string): {
517
+ rPr: string;
518
+ tElement: string;
519
+ textBefore: string;
520
+ textAfter: string;
521
+ } | null {
522
+ const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
523
+ const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
524
+ if (!tMatch) return null;
525
+ const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
526
+ if (!tOpenMatch) return null;
527
+ const tContent = tMatch[1] ?? '';
528
+ const markerInT = tContent.indexOf(marker);
529
+ if (markerInT === -1) return null;
530
+ return {
531
+ rPr: rPrMatch ? rPrMatch[0] : '',
532
+ tElement: tOpenMatch[0],
533
+ textBefore: tContent.slice(0, markerInT),
534
+ textAfter: tContent.slice(markerInT + marker.length),
535
+ };
536
+ }
537
+
538
+ let replacement = '';
539
+ const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
540
+
541
+ const emitRangeStarts = () => {
542
+ replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
543
+ for (const reply of replies) {
544
+ replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
545
+ }
546
+ };
547
+
548
+ const emitRangeEnds = () => {
549
+ replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
550
+ replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
551
+ for (const reply of replies) {
552
+ replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
553
+ replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
554
+ injectedIds.add(reply.id);
555
+ }
556
+ };
557
+
558
+ if (startRunOpen === endRunOpen) {
559
+ // Same-run path: both markers live inside one <w:t>. Original logic.
560
+ const startInfo = dissectRun(startRunFull, startMarker);
561
+ if (!startInfo) continue;
562
+ const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
563
+ const endInTextRel = startInfo.textAfter.indexOf(endMarker);
564
+ if (endInTextRel === -1) continue;
565
+ const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
566
+ let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
567
+ let anchorText = anchorTextSame;
568
+ let textBefore = startInfo.textBefore;
569
+
570
+ // Empty anchor: borrow the next word so the comment has something
571
+ // to anchor on. Then normalize the trailing double space.
572
+ if (!anchorText && textAfter) {
573
+ const wordMatch = textAfter.match(/^\s*(\S+)/);
574
+ if (wordMatch) {
575
+ anchorText = wordMatch[1] ?? '';
576
+ textAfter = textAfter.slice(wordMatch[0].length);
577
+ }
578
+ }
579
+ if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
580
+ textAfter = textAfter.slice(1);
581
+ }
582
+ // Suppress unused warning for pre-empty-anchor fullText var
583
+ void fullText;
584
+
585
+ if (textBefore) {
586
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
587
+ }
588
+ emitRangeStarts();
589
+ if (anchorText) {
590
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
591
+ }
592
+ emitRangeEnds();
593
+ if (textAfter) {
594
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
595
+ }
596
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
597
+ injectedIds.add(comment.id);
598
+ continue;
599
+ }
600
+
601
+ // Multi-run path: markers sit in different <w:r> blocks because pandoc
602
+ // applied mid-anchor styling. Split the start run at the start marker,
603
+ // keep all middle runs verbatim (they carry the styled anchor portions),
604
+ // split the end run at the end marker.
605
+ const startInfo = dissectRun(startRunFull, startMarker);
606
+ const endInfo = dissectRun(endRunFull, endMarker);
607
+ if (!startInfo || !endInfo) continue;
608
+
609
+ const middle = documentXml.slice(startRunClose, endRunOpen);
610
+
611
+ if (startInfo.textBefore) {
612
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
613
+ }
614
+ emitRangeStarts();
615
+ if (startInfo.textAfter) {
616
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
617
+ }
618
+ replacement += middle;
619
+ if (endInfo.textBefore) {
620
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
621
+ }
622
+ emitRangeEnds();
623
+ if (endInfo.textAfter) {
624
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
625
+ }
626
+
627
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
628
+ injectedIds.add(comment.id);
629
+ }
630
+
631
+ // Add required namespaces to document.xml for comment threading
632
+ const requiredNs: Record<string, string> = {
633
+ 'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
634
+ 'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
635
+ 'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
636
+ 'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
637
+ 'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
638
+ };
639
+
640
+ // Find <w:document and add namespaces
641
+ const docTagMatch = documentXml.match(/<w:document[^>]*>/);
642
+ if (docTagMatch) {
643
+ let docTag = docTagMatch[0];
644
+ let modified = false;
645
+ for (const [attr, val] of Object.entries(requiredNs)) {
646
+ if (!docTag.includes(attr)) {
647
+ docTag = docTag.replace('>', ` ${attr}="${val}">`);
648
+ modified = true;
649
+ }
650
+ }
651
+ // Add mc:Ignorable if mc namespace was added
652
+ if (modified && !docTag.includes('mc:Ignorable')) {
653
+ docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
654
+ }
655
+ documentXml = documentXml.replace(docTagMatch[0], docTag);
656
+ }
657
+
658
+ // Update document.xml
659
+ zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
660
+
661
+ // All comments (parents + replies) go in comments.xml
662
+ // But only include if parent was injected
663
+ const includedComments = commentsWithIds.filter(c => {
664
+ if (!c.isReply) {
665
+ return injectedIds.has(c.id);
666
+ } else {
667
+ // Include reply if its parent was injected
668
+ const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
669
+ return parent && injectedIds.has(parent.id);
670
+ }
671
+ });
672
+
673
+ // Create comments.xml
674
+ const commentsXml = createCommentsXml(includedComments);
675
+ if (zip.getEntry('word/comments.xml')) {
676
+ zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
677
+ } else {
678
+ zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
679
+ }
680
+
681
+ // Create commentsExtended.xml with reply threading
682
+ const commentsExtXml = createCommentsExtendedXml(includedComments);
683
+ if (zip.getEntry('word/commentsExtended.xml')) {
684
+ zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
685
+ } else {
686
+ zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
687
+ }
688
+
689
+ // Create commentsIds.xml (Word 2016+)
690
+ const commentsIdsXml = createCommentsIdsXml(includedComments);
691
+ if (zip.getEntry('word/commentsIds.xml')) {
692
+ zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
693
+ } else {
694
+ zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
695
+ }
696
+
697
+ // Create commentsExtensible.xml (Word 2018+)
698
+ const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
699
+ if (zip.getEntry('word/commentsExtensible.xml')) {
700
+ zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
701
+ } else {
702
+ zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
703
+ }
704
+
705
+ // Create people.xml (author definitions with Windows Live IDs)
706
+ const peopleXml = createPeopleXml(includedComments);
707
+ if (zip.getEntry('word/people.xml')) {
708
+ zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
709
+ } else {
710
+ zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
711
+ }
712
+
713
+ // Update [Content_Types].xml
714
+ const contentTypesEntry = zip.getEntry('[Content_Types].xml');
715
+ if (contentTypesEntry) {
716
+ let contentTypes = zip.readAsText(contentTypesEntry);
717
+
718
+ if (!contentTypes.includes('comments.xml')) {
719
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
720
+ contentTypes = contentTypes.slice(0, insertPoint) +
721
+ '<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
722
+ contentTypes.slice(insertPoint);
723
+ }
724
+
725
+ if (!contentTypes.includes('commentsExtended.xml')) {
726
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
727
+ contentTypes = contentTypes.slice(0, insertPoint) +
728
+ '<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
729
+ contentTypes.slice(insertPoint);
730
+ }
731
+
732
+ if (!contentTypes.includes('commentsIds.xml')) {
733
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
734
+ contentTypes = contentTypes.slice(0, insertPoint) +
735
+ '<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
736
+ contentTypes.slice(insertPoint);
737
+ }
738
+
739
+ if (!contentTypes.includes('commentsExtensible.xml')) {
740
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
741
+ contentTypes = contentTypes.slice(0, insertPoint) +
742
+ '<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
743
+ contentTypes.slice(insertPoint);
744
+ }
745
+
746
+ if (!contentTypes.includes('people.xml')) {
747
+ const insertPoint = contentTypes.lastIndexOf('</Types>');
748
+ contentTypes = contentTypes.slice(0, insertPoint) +
749
+ '<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
750
+ contentTypes.slice(insertPoint);
751
+ }
752
+
753
+ zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
754
+ }
755
+
756
+ // Update relationships
757
+ const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
758
+ if (relsEntry) {
759
+ let rels = zip.readAsText(relsEntry);
760
+
761
+ const rIdMatches = rels.match(/rId(\d+)/g) || [];
762
+ const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
763
+
764
+ if (!rels.includes('comments.xml')) {
765
+ const insertPoint = rels.lastIndexOf('</Relationships>');
766
+ rels = rels.slice(0, insertPoint) +
767
+ `<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
768
+ rels.slice(insertPoint);
769
+ }
770
+
771
+ if (!rels.includes('commentsExtended.xml')) {
772
+ const insertPoint = rels.lastIndexOf('</Relationships>');
773
+ rels = rels.slice(0, insertPoint) +
774
+ `<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
775
+ rels.slice(insertPoint);
776
+ }
777
+
778
+ if (!rels.includes('commentsIds.xml')) {
779
+ const insertPoint = rels.lastIndexOf('</Relationships>');
780
+ rels = rels.slice(0, insertPoint) +
781
+ `<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
782
+ rels.slice(insertPoint);
783
+ }
784
+
785
+ if (!rels.includes('commentsExtensible.xml')) {
786
+ const insertPoint = rels.lastIndexOf('</Relationships>');
787
+ rels = rels.slice(0, insertPoint) +
788
+ `<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
789
+ rels.slice(insertPoint);
790
+ }
791
+
792
+ if (!rels.includes('people.xml')) {
793
+ const insertPoint = rels.lastIndexOf('</Relationships>');
794
+ rels = rels.slice(0, insertPoint) +
795
+ `<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
796
+ rels.slice(insertPoint);
797
+ }
798
+
799
+ zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
800
+ }
801
+
802
+ zip.writeZip(outputPath);
803
+
804
+ const parentCount = includedComments.filter(c => !c.isReply).length;
805
+ const replyCount = includedComments.filter(c => c.isReply).length;
806
+
807
+ return {
808
+ success: true,
809
+ commentCount: parentCount,
810
+ replyCount: replyCount,
811
+ skippedComments: comments.length - includedComments.length,
812
+ };
813
+
814
+ } catch (err: any) {
815
+ return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
816
+ }
817
+ }