docrev 0.9.13 → 0.9.14
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -9
- package/.gitattributes +1 -1
- package/CHANGELOG.md +149 -149
- package/PLAN-tables-and-postprocess.md +850 -850
- package/README.md +391 -391
- package/bin/rev.js +11 -11
- package/bin/rev.ts +145 -145
- package/completions/rev.bash +127 -127
- package/completions/rev.ps1 +210 -210
- package/completions/rev.zsh +207 -207
- package/dev_notes/stress2/build_adversarial.ts +186 -186
- package/dev_notes/stress2/drift_matcher.ts +62 -62
- package/dev_notes/stress2/probe_anchors.ts +35 -35
- package/dev_notes/stress2/project/discussion.before.md +3 -3
- package/dev_notes/stress2/project/discussion.md +3 -3
- package/dev_notes/stress2/project/methods.before.md +20 -20
- package/dev_notes/stress2/project/methods.md +20 -20
- package/dev_notes/stress2/project/rev.yaml +5 -5
- package/dev_notes/stress2/project/sections.yaml +4 -4
- package/dev_notes/stress2/sections.yaml +5 -5
- package/dev_notes/stress2/trace_placement.ts +50 -50
- package/dev_notes/stresstest_boundaries.ts +27 -27
- package/dev_notes/stresstest_drift_apply.ts +43 -43
- package/dev_notes/stresstest_drift_compare.ts +43 -43
- package/dev_notes/stresstest_drift_v2.ts +54 -54
- package/dev_notes/stresstest_inspect.ts +54 -54
- package/dev_notes/stresstest_pstyle.ts +55 -55
- package/dev_notes/stresstest_section_debug.ts +23 -23
- package/dev_notes/stresstest_split.ts +70 -70
- package/dev_notes/stresstest_trace.ts +19 -19
- package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
- package/dist/lib/build.d.ts +38 -1
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +68 -30
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/build.d.ts.map +1 -1
- package/dist/lib/commands/build.js +38 -5
- package/dist/lib/commands/build.js.map +1 -1
- package/dist/lib/commands/utilities.js +164 -164
- package/dist/lib/commands/word-tools.js +8 -8
- package/dist/lib/grammar.js +3 -3
- package/dist/lib/pdf-comments.js +44 -44
- package/dist/lib/plugins.js +57 -57
- package/dist/lib/pptx-themes.js +115 -115
- package/dist/lib/spelling.js +2 -2
- package/dist/lib/templates.js +387 -387
- package/dist/lib/themes.js +51 -51
- package/eslint.config.js +27 -27
- package/lib/anchor-match.ts +276 -276
- package/lib/annotations.ts +644 -644
- package/lib/build.ts +1300 -1251
- package/lib/citations.ts +160 -160
- package/lib/commands/build.ts +833 -801
- package/lib/commands/citations.ts +515 -515
- package/lib/commands/comments.ts +1050 -1050
- package/lib/commands/context.ts +174 -174
- package/lib/commands/core.ts +309 -309
- package/lib/commands/doi.ts +435 -435
- package/lib/commands/file-ops.ts +372 -372
- package/lib/commands/history.ts +320 -320
- package/lib/commands/index.ts +87 -87
- package/lib/commands/init.ts +259 -259
- package/lib/commands/merge-resolve.ts +378 -378
- package/lib/commands/preview.ts +178 -178
- package/lib/commands/project-info.ts +244 -244
- package/lib/commands/quality.ts +517 -517
- package/lib/commands/response.ts +454 -454
- package/lib/commands/section-boundaries.ts +82 -82
- package/lib/commands/sections.ts +451 -451
- package/lib/commands/sync.ts +706 -706
- package/lib/commands/text-ops.ts +449 -449
- package/lib/commands/utilities.ts +448 -448
- package/lib/commands/verify-anchors.ts +272 -272
- package/lib/commands/word-tools.ts +340 -340
- package/lib/comment-realign.ts +517 -517
- package/lib/config.ts +84 -84
- package/lib/crossref.ts +781 -781
- package/lib/csl.ts +191 -191
- package/lib/dependencies.ts +98 -98
- package/lib/diff-engine.ts +465 -465
- package/lib/doi-cache.ts +115 -115
- package/lib/doi.ts +897 -897
- package/lib/equations.ts +506 -506
- package/lib/errors.ts +346 -346
- package/lib/format.ts +541 -541
- package/lib/git.ts +326 -326
- package/lib/grammar.ts +303 -303
- package/lib/image-registry.ts +180 -180
- package/lib/import.ts +911 -911
- package/lib/journals.ts +543 -543
- package/lib/merge.ts +633 -633
- package/lib/orcid.ts +144 -144
- package/lib/pdf-comments.ts +263 -263
- package/lib/pdf-import.ts +524 -524
- package/lib/plugins.ts +362 -362
- package/lib/postprocess.ts +188 -188
- package/lib/pptx-color-filter.lua +37 -37
- package/lib/pptx-template.ts +469 -469
- package/lib/pptx-themes.ts +483 -483
- package/lib/protect-restore.ts +520 -520
- package/lib/rate-limiter.ts +94 -94
- package/lib/response.ts +197 -197
- package/lib/restore-references.ts +240 -240
- package/lib/review.ts +327 -327
- package/lib/schema.ts +417 -417
- package/lib/scientific-words.ts +73 -73
- package/lib/sections.ts +335 -335
- package/lib/slides.ts +756 -756
- package/lib/spelling.ts +334 -334
- package/lib/templates.ts +526 -526
- package/lib/themes.ts +742 -742
- package/lib/trackchanges.ts +247 -247
- package/lib/tui.ts +450 -450
- package/lib/types.ts +550 -550
- package/lib/undo.ts +250 -250
- package/lib/utils.ts +69 -69
- package/lib/variables.ts +179 -179
- package/lib/word-extraction.ts +806 -806
- package/lib/word.ts +643 -643
- package/lib/wordcomments.ts +817 -817
- package/package.json +137 -137
- package/scripts/postbuild.js +28 -28
- package/skill/REFERENCE.md +431 -431
- package/skill/SKILL.md +258 -258
- package/tsconfig.json +26 -26
- package/types/index.d.ts +525 -525
package/lib/wordcomments.ts
CHANGED
|
@@ -1,817 +1,817 @@
|
|
|
1
|
-
/**
|
|
2
|
-
* Word comment injection with reply threading
|
|
3
|
-
*
|
|
4
|
-
* Flow:
|
|
5
|
-
* 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
|
|
6
|
-
* - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
|
|
7
|
-
* - Subsequent adjacent comments = replies (no markers, attach to parent)
|
|
8
|
-
* 2. Pandoc converts to DOCX
|
|
9
|
-
* 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
|
|
10
|
-
* - Replies go in comments.xml with parent reference in commentsExtended.xml
|
|
11
|
-
*/
|
|
12
|
-
|
|
13
|
-
import * as fs from 'fs';
|
|
14
|
-
import AdmZip from 'adm-zip';
|
|
15
|
-
import { escapeXml } from './utils.js';
|
|
16
|
-
|
|
17
|
-
const MARKER_START_PREFIX = '⟦CMS:';
|
|
18
|
-
const MARKER_END_PREFIX = '⟦CME:';
|
|
19
|
-
const MARKER_SUFFIX = '⟧';
|
|
20
|
-
|
|
21
|
-
interface ParsedComment {
|
|
22
|
-
author: string;
|
|
23
|
-
text: string;
|
|
24
|
-
anchor: string | null;
|
|
25
|
-
start: number;
|
|
26
|
-
end: number;
|
|
27
|
-
fullMatch: string;
|
|
28
|
-
}
|
|
29
|
-
|
|
30
|
-
interface PreparedComment extends ParsedComment {
|
|
31
|
-
isReply: boolean;
|
|
32
|
-
parentIdx: number | null;
|
|
33
|
-
commentIdx: number;
|
|
34
|
-
anchorFromReply?: boolean;
|
|
35
|
-
placesParentMarkers?: boolean;
|
|
36
|
-
}
|
|
37
|
-
|
|
38
|
-
interface PrepareResult {
|
|
39
|
-
markedMarkdown: string;
|
|
40
|
-
comments: PreparedComment[];
|
|
41
|
-
}
|
|
42
|
-
|
|
43
|
-
interface CommentWithIds extends PreparedComment {
|
|
44
|
-
id: string;
|
|
45
|
-
paraId: string;
|
|
46
|
-
paraId2: string;
|
|
47
|
-
durableId: string;
|
|
48
|
-
parentParaId?: string;
|
|
49
|
-
}
|
|
50
|
-
|
|
51
|
-
interface InjectionResult {
|
|
52
|
-
success: boolean;
|
|
53
|
-
commentCount: number;
|
|
54
|
-
replyCount?: number;
|
|
55
|
-
skippedComments: number;
|
|
56
|
-
error?: string;
|
|
57
|
-
}
|
|
58
|
-
|
|
59
|
-
function generateParaId(commentIdx: number, paraNum: number): string {
|
|
60
|
-
// Generate 8-character uppercase hex ID matching Word format
|
|
61
|
-
// Word uses IDs like "3F25BC58", "0331C187"
|
|
62
|
-
// Must be deterministic - same inputs always produce same output
|
|
63
|
-
const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
|
|
64
|
-
return id.toString(16).toUpperCase().padStart(8, '0');
|
|
65
|
-
}
|
|
66
|
-
|
|
67
|
-
/**
|
|
68
|
-
* Parse comments and create markers
|
|
69
|
-
*
|
|
70
|
-
* Returns:
|
|
71
|
-
* - markedMarkdown: markdown with markers for parent comments only
|
|
72
|
-
* - comments: array with author, text, isReply, parentIdx
|
|
73
|
-
*/
|
|
74
|
-
export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
75
|
-
// Match the comment block first; extend manually to capture an optional
|
|
76
|
-
// trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
|
|
77
|
-
// bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
|
|
78
|
-
// `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
|
|
79
|
-
// ourselves and verify a `{.mark}` suffix.
|
|
80
|
-
const commentPattern = /\{>>([\s\S]+?)<<\}/g;
|
|
81
|
-
|
|
82
|
-
function tryParseTrailingAnchor(
|
|
83
|
-
text: string,
|
|
84
|
-
fromIdx: number,
|
|
85
|
-
): { anchor: string; endIdx: number } | null {
|
|
86
|
-
let i = fromIdx;
|
|
87
|
-
while (i < text.length && /\s/.test(text[i] ?? '')) i++;
|
|
88
|
-
if (text[i] !== '[') return null;
|
|
89
|
-
let depth = 1;
|
|
90
|
-
let j = i + 1;
|
|
91
|
-
while (j < text.length) {
|
|
92
|
-
const ch = text[j];
|
|
93
|
-
if (ch === '[') depth++;
|
|
94
|
-
else if (ch === ']') {
|
|
95
|
-
depth--;
|
|
96
|
-
if (depth === 0) break;
|
|
97
|
-
}
|
|
98
|
-
j++;
|
|
99
|
-
}
|
|
100
|
-
if (depth !== 0) return null;
|
|
101
|
-
if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
|
|
102
|
-
return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
|
|
103
|
-
}
|
|
104
|
-
|
|
105
|
-
const REPLY_PREFIX = '↪ ';
|
|
106
|
-
const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
|
|
107
|
-
let match: RegExpExecArray | null;
|
|
108
|
-
while ((match = commentPattern.exec(markdown)) !== null) {
|
|
109
|
-
const content = match[1] ?? '';
|
|
110
|
-
let author = 'Unknown';
|
|
111
|
-
let text = content;
|
|
112
|
-
const colonIdx = content.indexOf(':');
|
|
113
|
-
if (colonIdx > 0 && colonIdx < 30) {
|
|
114
|
-
author = content.slice(0, colonIdx).trim();
|
|
115
|
-
text = content.slice(colonIdx + 1).trim();
|
|
116
|
-
}
|
|
117
|
-
|
|
118
|
-
// The `↪ ` prefix is the authoritative reply signal emitted by
|
|
119
|
-
// `insertCommentsIntoMarkdown`. Strip it from the author before injection
|
|
120
|
-
// so Word displays the real name.
|
|
121
|
-
let explicitReply = false;
|
|
122
|
-
if (author.startsWith(REPLY_PREFIX)) {
|
|
123
|
-
explicitReply = true;
|
|
124
|
-
author = author.slice(REPLY_PREFIX.length).trim();
|
|
125
|
-
}
|
|
126
|
-
|
|
127
|
-
const commentEnd = match.index + match[0].length;
|
|
128
|
-
const trailing = tryParseTrailingAnchor(markdown, commentEnd);
|
|
129
|
-
|
|
130
|
-
rawMatches.push({
|
|
131
|
-
author,
|
|
132
|
-
text,
|
|
133
|
-
anchor: trailing ? trailing.anchor : null,
|
|
134
|
-
start: match.index,
|
|
135
|
-
end: trailing ? trailing.endIdx : commentEnd,
|
|
136
|
-
fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
|
|
137
|
-
explicitReply,
|
|
138
|
-
});
|
|
139
|
-
|
|
140
|
-
// Advance regex lastIndex past the consumed anchor so the next iteration
|
|
141
|
-
// doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
|
|
142
|
-
// tempt the matcher to look for another `{>>...<<}` in the body of the
|
|
143
|
-
// anchor span).
|
|
144
|
-
if (trailing) {
|
|
145
|
-
commentPattern.lastIndex = trailing.endIdx;
|
|
146
|
-
}
|
|
147
|
-
}
|
|
148
|
-
|
|
149
|
-
if (rawMatches.length === 0) {
|
|
150
|
-
return { markedMarkdown: markdown, comments: [] };
|
|
151
|
-
}
|
|
152
|
-
|
|
153
|
-
// Two-mode reply detection driven by the markdown itself:
|
|
154
|
-
// - If any comment carries the `↪ ` author prefix, the markdown came
|
|
155
|
-
// through `insertCommentsIntoMarkdown` and we use prefix-only mode.
|
|
156
|
-
// Distinct clusters that happen to land at gap=0 (a real failure
|
|
157
|
-
// mode on dense reviewer docs — 298-comment paper produced 9 such
|
|
158
|
-
// collisions) are not misthreaded.
|
|
159
|
-
// - If no comment carries the prefix, the markdown was hand-typed.
|
|
160
|
-
// Fall back to gap < 10 adjacency for backward compat with users
|
|
161
|
-
// who write CriticMarkup directly.
|
|
162
|
-
const ADJACENT_THRESHOLD = 10;
|
|
163
|
-
const useExplicitMode = rawMatches.some(m => m.explicitReply);
|
|
164
|
-
const comments: PreparedComment[] = [];
|
|
165
|
-
let clusterParentIdx = -1; // Index of first comment in current cluster
|
|
166
|
-
let lastCommentEnd = -1;
|
|
167
|
-
|
|
168
|
-
for (let i = 0; i < rawMatches.length; i++) {
|
|
169
|
-
const m = rawMatches[i];
|
|
170
|
-
if (!m) continue;
|
|
171
|
-
|
|
172
|
-
const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
|
|
173
|
-
const isAdjacent = useExplicitMode
|
|
174
|
-
? m.explicitReply
|
|
175
|
-
: gap < ADJACENT_THRESHOLD;
|
|
176
|
-
|
|
177
|
-
// Reset cluster if there's a gap (comments not in same cluster)
|
|
178
|
-
if (!isAdjacent) {
|
|
179
|
-
clusterParentIdx = -1;
|
|
180
|
-
}
|
|
181
|
-
|
|
182
|
-
if (clusterParentIdx === -1) {
|
|
183
|
-
// First comment in cluster = parent (regardless of author)
|
|
184
|
-
comments.push({
|
|
185
|
-
author: m.author,
|
|
186
|
-
text: m.text,
|
|
187
|
-
anchor: m.anchor,
|
|
188
|
-
start: m.start,
|
|
189
|
-
end: m.end,
|
|
190
|
-
fullMatch: m.fullMatch,
|
|
191
|
-
isReply: false,
|
|
192
|
-
parentIdx: null,
|
|
193
|
-
commentIdx: comments.length
|
|
194
|
-
});
|
|
195
|
-
clusterParentIdx = comments.length - 1;
|
|
196
|
-
} else {
|
|
197
|
-
// Subsequent comment in cluster = reply to first comment
|
|
198
|
-
comments.push({
|
|
199
|
-
author: m.author,
|
|
200
|
-
text: m.text,
|
|
201
|
-
anchor: m.anchor,
|
|
202
|
-
start: m.start,
|
|
203
|
-
end: m.end,
|
|
204
|
-
fullMatch: m.fullMatch,
|
|
205
|
-
isReply: true,
|
|
206
|
-
parentIdx: clusterParentIdx,
|
|
207
|
-
commentIdx: comments.length
|
|
208
|
-
});
|
|
209
|
-
}
|
|
210
|
-
|
|
211
|
-
lastCommentEnd = m.end;
|
|
212
|
-
}
|
|
213
|
-
|
|
214
|
-
// Propagate anchors from replies to parents
|
|
215
|
-
// If a reply has an anchor but its parent doesn't, move the anchor to the parent
|
|
216
|
-
// Track flags for special handling during marker generation
|
|
217
|
-
for (const c of comments) {
|
|
218
|
-
if (c.isReply && c.anchor && c.parentIdx !== null) {
|
|
219
|
-
const parent = comments[c.parentIdx];
|
|
220
|
-
if (parent && !parent.anchor) {
|
|
221
|
-
parent.anchor = c.anchor;
|
|
222
|
-
parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
|
|
223
|
-
c.placesParentMarkers = true; // This reply should place the parent's markers
|
|
224
|
-
c.anchor = null;
|
|
225
|
-
}
|
|
226
|
-
}
|
|
227
|
-
}
|
|
228
|
-
|
|
229
|
-
// Build marked markdown - only parent comments get markers
|
|
230
|
-
// Process from end to start to preserve positions
|
|
231
|
-
let markedMarkdown = markdown;
|
|
232
|
-
|
|
233
|
-
for (let i = comments.length - 1; i >= 0; i--) {
|
|
234
|
-
const c = comments[i];
|
|
235
|
-
if (!c) continue;
|
|
236
|
-
|
|
237
|
-
if (c.isReply) {
|
|
238
|
-
// Reply: remove from document entirely (will be in comments.xml only)
|
|
239
|
-
// Also consume one preceding whitespace char to avoid double spaces.
|
|
240
|
-
// We deliberately consume at most one — walking arbitrarily backwards
|
|
241
|
-
// would shift positions that lower-index comments still depend on.
|
|
242
|
-
let removeStart = c.start;
|
|
243
|
-
if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
|
|
244
|
-
removeStart--;
|
|
245
|
-
}
|
|
246
|
-
|
|
247
|
-
// If this reply places parent's markers (anchor was propagated)
|
|
248
|
-
if (c.placesParentMarkers && c.parentIdx !== null) {
|
|
249
|
-
// Extract anchor text from the original match
|
|
250
|
-
const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
|
|
251
|
-
if (anchorMatch) {
|
|
252
|
-
const anchorText = anchorMatch[1] ?? '';
|
|
253
|
-
// Output markers with PARENT's index around the anchor text
|
|
254
|
-
const parentIdx = c.parentIdx;
|
|
255
|
-
const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
|
|
256
|
-
markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
|
|
257
|
-
} else {
|
|
258
|
-
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
|
|
259
|
-
}
|
|
260
|
-
} else {
|
|
261
|
-
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
|
|
262
|
-
}
|
|
263
|
-
} else {
|
|
264
|
-
// Parent comment
|
|
265
|
-
if (c.anchorFromReply) {
|
|
266
|
-
// Anchor markers are placed by the reply, just remove this comment.
|
|
267
|
-
// Consume one preceding whitespace char only (see reply branch above).
|
|
268
|
-
let removeStart = c.start;
|
|
269
|
-
if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
|
|
270
|
-
removeStart--;
|
|
271
|
-
}
|
|
272
|
-
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
|
|
273
|
-
} else {
|
|
274
|
-
// Normal case: replace with markers
|
|
275
|
-
const anchor = c.anchor || '';
|
|
276
|
-
const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
|
|
277
|
-
markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
|
|
278
|
-
}
|
|
279
|
-
}
|
|
280
|
-
}
|
|
281
|
-
|
|
282
|
-
return { markedMarkdown, comments };
|
|
283
|
-
}
|
|
284
|
-
|
|
285
|
-
function createCommentsXml(comments: CommentWithIds[]): string {
|
|
286
|
-
// Word expects date without milliseconds: 2025-12-30T08:33:00Z
|
|
287
|
-
const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
|
|
288
|
-
|
|
289
|
-
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
290
|
-
// Minimal namespaces matching golden file structure
|
|
291
|
-
xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
|
|
292
|
-
|
|
293
|
-
// Use a consistent rsid (8-char hex) for all comments in this batch
|
|
294
|
-
const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
|
|
295
|
-
|
|
296
|
-
for (const comment of comments) {
|
|
297
|
-
xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
|
|
298
|
-
// First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
|
|
299
|
-
xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
|
|
300
|
-
xml += `<w:r><w:annotationRef/></w:r>`;
|
|
301
|
-
xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
|
|
302
|
-
xml += `</w:p>`;
|
|
303
|
-
if (comment.isReply) {
|
|
304
|
-
// Second empty paragraph: rsidRDefault matches rsidR
|
|
305
|
-
xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
|
|
306
|
-
}
|
|
307
|
-
xml += `</w:comment>`;
|
|
308
|
-
}
|
|
309
|
-
|
|
310
|
-
xml += '</w:comments>';
|
|
311
|
-
return xml;
|
|
312
|
-
}
|
|
313
|
-
|
|
314
|
-
function createCommentsExtendedXml(comments: CommentWithIds[]): string {
|
|
315
|
-
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
316
|
-
// Minimal namespaces matching golden file structure
|
|
317
|
-
xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
|
|
318
|
-
|
|
319
|
-
for (const comment of comments) {
|
|
320
|
-
if (comment.isReply && comment.parentParaId) {
|
|
321
|
-
// Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
|
|
322
|
-
xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
|
|
323
|
-
} else {
|
|
324
|
-
// Parent comment: use paraId (first paragraph)
|
|
325
|
-
xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
|
|
326
|
-
}
|
|
327
|
-
}
|
|
328
|
-
|
|
329
|
-
xml += '</w15:commentsEx>';
|
|
330
|
-
return xml;
|
|
331
|
-
}
|
|
332
|
-
|
|
333
|
-
function generateDurableId(index: number): string {
|
|
334
|
-
// Generate unique 8-char hex ID for durableId
|
|
335
|
-
// CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
|
|
336
|
-
// Word interprets durableIds as signed 32-bit integers
|
|
337
|
-
const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
|
|
338
|
-
const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
|
|
339
|
-
return id.toString(16).toUpperCase().padStart(8, '0');
|
|
340
|
-
}
|
|
341
|
-
|
|
342
|
-
function createCommentsIdsXml(comments: CommentWithIds[]): string {
|
|
343
|
-
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
344
|
-
// Minimal namespaces matching golden file structure
|
|
345
|
-
xml += '<w16cid:commentsIds ';
|
|
346
|
-
xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
|
|
347
|
-
xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
|
|
348
|
-
xml += 'mc:Ignorable="w16cid">';
|
|
349
|
-
|
|
350
|
-
for (const comment of comments) {
|
|
351
|
-
// ONE entry per comment using the LAST paragraph's paraId:
|
|
352
|
-
// - Parent comments (1 paragraph): use paraId
|
|
353
|
-
// - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
|
|
354
|
-
const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
|
|
355
|
-
xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
|
|
356
|
-
}
|
|
357
|
-
|
|
358
|
-
xml += '</w16cid:commentsIds>';
|
|
359
|
-
return xml;
|
|
360
|
-
}
|
|
361
|
-
|
|
362
|
-
function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
|
|
363
|
-
const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
|
|
364
|
-
|
|
365
|
-
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
366
|
-
// Minimal namespaces matching golden file structure
|
|
367
|
-
xml += '<w16cex:commentsExtensible ';
|
|
368
|
-
xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
|
|
369
|
-
xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
|
|
370
|
-
xml += 'mc:Ignorable="w16cex">';
|
|
371
|
-
|
|
372
|
-
for (const comment of comments) {
|
|
373
|
-
// ONE entry per comment using the durableId
|
|
374
|
-
xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
|
|
375
|
-
}
|
|
376
|
-
|
|
377
|
-
xml += '</w16cex:commentsExtensible>';
|
|
378
|
-
return xml;
|
|
379
|
-
}
|
|
380
|
-
|
|
381
|
-
// Generate deterministic user IDs for authors (no hardcoded personal data)
|
|
382
|
-
|
|
383
|
-
function createPeopleXml(comments: CommentWithIds[]): string {
|
|
384
|
-
// Extract unique authors
|
|
385
|
-
const authors = [...new Set(comments.map(c => c.author))];
|
|
386
|
-
|
|
387
|
-
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
388
|
-
xml += '<w15:people ';
|
|
389
|
-
xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
|
|
390
|
-
xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
|
|
391
|
-
xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
|
|
392
|
-
xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
|
|
393
|
-
xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
|
|
394
|
-
xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
|
|
395
|
-
xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
|
|
396
|
-
xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
|
|
397
|
-
xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
|
|
398
|
-
xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
|
|
399
|
-
xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
|
|
400
|
-
xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
|
|
401
|
-
|
|
402
|
-
for (const author of authors) {
|
|
403
|
-
const userId = generateUserId(author);
|
|
404
|
-
xml += `<w15:person w15:author="${escapeXml(author)}">`;
|
|
405
|
-
xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
|
|
406
|
-
xml += `</w15:person>`;
|
|
407
|
-
}
|
|
408
|
-
|
|
409
|
-
xml += '</w15:people>';
|
|
410
|
-
return xml;
|
|
411
|
-
}
|
|
412
|
-
|
|
413
|
-
function generateUserId(author: string): string {
|
|
414
|
-
// Generate a deterministic 16-char hex ID from author name
|
|
415
|
-
let hash = 0;
|
|
416
|
-
for (let i = 0; i < author.length; i++) {
|
|
417
|
-
hash = ((hash << 5) - hash) + author.charCodeAt(i);
|
|
418
|
-
hash = hash & hash;
|
|
419
|
-
}
|
|
420
|
-
return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
|
|
421
|
-
}
|
|
422
|
-
|
|
423
|
-
/**
|
|
424
|
-
* Inject comments at marker positions
|
|
425
|
-
*/
|
|
426
|
-
export async function injectCommentsAtMarkers(
|
|
427
|
-
docxPath: string,
|
|
428
|
-
comments: PreparedComment[],
|
|
429
|
-
outputPath: string
|
|
430
|
-
): Promise<InjectionResult> {
|
|
431
|
-
try {
|
|
432
|
-
if (!fs.existsSync(docxPath)) {
|
|
433
|
-
return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
|
|
434
|
-
}
|
|
435
|
-
|
|
436
|
-
if (comments.length === 0) {
|
|
437
|
-
fs.copyFileSync(docxPath, outputPath);
|
|
438
|
-
return { success: true, commentCount: 0, skippedComments: 0 };
|
|
439
|
-
}
|
|
440
|
-
|
|
441
|
-
const zip = new AdmZip(docxPath);
|
|
442
|
-
const documentEntry = zip.getEntry('word/document.xml');
|
|
443
|
-
if (!documentEntry) {
|
|
444
|
-
return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
|
|
445
|
-
}
|
|
446
|
-
|
|
447
|
-
let documentXml = zip.readAsText(documentEntry);
|
|
448
|
-
|
|
449
|
-
// Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
|
|
450
|
-
const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
|
|
451
|
-
...c,
|
|
452
|
-
id: String(idx + 1),
|
|
453
|
-
paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
|
|
454
|
-
paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
|
|
455
|
-
durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
|
|
456
|
-
}));
|
|
457
|
-
|
|
458
|
-
// Link replies to parent paraIds
|
|
459
|
-
for (const c of commentsWithIds) {
|
|
460
|
-
if (c.isReply && c.parentIdx !== null) {
|
|
461
|
-
const parent = commentsWithIds[c.parentIdx];
|
|
462
|
-
if (parent) {
|
|
463
|
-
c.parentParaId = parent.paraId;
|
|
464
|
-
}
|
|
465
|
-
}
|
|
466
|
-
}
|
|
467
|
-
|
|
468
|
-
const injectedIds = new Set<string>();
|
|
469
|
-
|
|
470
|
-
// Process only parent comments (non-replies) for document ranges
|
|
471
|
-
const parentComments = commentsWithIds.filter(c => !c.isReply);
|
|
472
|
-
|
|
473
|
-
for (let i = parentComments.length - 1; i >= 0; i--) {
|
|
474
|
-
const comment = parentComments[i];
|
|
475
|
-
if (!comment) continue;
|
|
476
|
-
const idx = comment.commentIdx;
|
|
477
|
-
|
|
478
|
-
const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
|
|
479
|
-
const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
|
|
480
|
-
|
|
481
|
-
const startPos = documentXml.indexOf(startMarker);
|
|
482
|
-
const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
|
|
483
|
-
|
|
484
|
-
if (startPos === -1 || endPos === -1) continue;
|
|
485
|
-
|
|
486
|
-
// Find the runs containing each marker. Pandoc may split a single
|
|
487
|
-
// markdown anchor across multiple <w:r> blocks when it applies styling
|
|
488
|
-
// mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
|
|
489
|
-
// The same-run path (current happy path) collapses into the multi-run
|
|
490
|
-
// path when start and end runs coincide.
|
|
491
|
-
const startRunOpen = Math.max(
|
|
492
|
-
documentXml.lastIndexOf('<w:r>', startPos),
|
|
493
|
-
documentXml.lastIndexOf('<w:r ', startPos),
|
|
494
|
-
);
|
|
495
|
-
const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
|
|
496
|
-
const endRunOpen = Math.max(
|
|
497
|
-
documentXml.lastIndexOf('<w:r>', endPos),
|
|
498
|
-
documentXml.lastIndexOf('<w:r ', endPos),
|
|
499
|
-
);
|
|
500
|
-
const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
|
|
501
|
-
|
|
502
|
-
if (
|
|
503
|
-
startRunOpen === -1 || startRunCloseIdx === -1 ||
|
|
504
|
-
endRunOpen === -1 || endRunCloseIdx === -1
|
|
505
|
-
) continue;
|
|
506
|
-
|
|
507
|
-
const startRunClose = startRunCloseIdx + '</w:r>'.length;
|
|
508
|
-
const endRunClose = endRunCloseIdx + '</w:r>'.length;
|
|
509
|
-
|
|
510
|
-
const startRunFull = documentXml.slice(startRunOpen, startRunClose);
|
|
511
|
-
const endRunFull = documentXml.slice(endRunOpen, endRunClose);
|
|
512
|
-
|
|
513
|
-
// Extract <w:rPr> and <w:t> element shape from each run. Both pieces
|
|
514
|
-
// are needed verbatim so a textBefore split keeps its original styling
|
|
515
|
-
// and so the post-anchor textAfter render keeps the end run's styling.
|
|
516
|
-
function dissectRun(runXml: string, marker: string): {
|
|
517
|
-
rPr: string;
|
|
518
|
-
tElement: string;
|
|
519
|
-
textBefore: string;
|
|
520
|
-
textAfter: string;
|
|
521
|
-
} | null {
|
|
522
|
-
const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
|
|
523
|
-
const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
|
|
524
|
-
if (!tMatch) return null;
|
|
525
|
-
const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
|
|
526
|
-
if (!tOpenMatch) return null;
|
|
527
|
-
const tContent = tMatch[1] ?? '';
|
|
528
|
-
const markerInT = tContent.indexOf(marker);
|
|
529
|
-
if (markerInT === -1) return null;
|
|
530
|
-
return {
|
|
531
|
-
rPr: rPrMatch ? rPrMatch[0] : '',
|
|
532
|
-
tElement: tOpenMatch[0],
|
|
533
|
-
textBefore: tContent.slice(0, markerInT),
|
|
534
|
-
textAfter: tContent.slice(markerInT + marker.length),
|
|
535
|
-
};
|
|
536
|
-
}
|
|
537
|
-
|
|
538
|
-
let replacement = '';
|
|
539
|
-
const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
|
|
540
|
-
|
|
541
|
-
const emitRangeStarts = () => {
|
|
542
|
-
replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
|
|
543
|
-
for (const reply of replies) {
|
|
544
|
-
replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
|
|
545
|
-
}
|
|
546
|
-
};
|
|
547
|
-
|
|
548
|
-
const emitRangeEnds = () => {
|
|
549
|
-
replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
|
|
550
|
-
replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
|
|
551
|
-
for (const reply of replies) {
|
|
552
|
-
replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
|
|
553
|
-
replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
|
|
554
|
-
injectedIds.add(reply.id);
|
|
555
|
-
}
|
|
556
|
-
};
|
|
557
|
-
|
|
558
|
-
if (startRunOpen === endRunOpen) {
|
|
559
|
-
// Same-run path: both markers live inside one <w:t>. Original logic.
|
|
560
|
-
const startInfo = dissectRun(startRunFull, startMarker);
|
|
561
|
-
if (!startInfo) continue;
|
|
562
|
-
const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
|
|
563
|
-
const endInTextRel = startInfo.textAfter.indexOf(endMarker);
|
|
564
|
-
if (endInTextRel === -1) continue;
|
|
565
|
-
const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
|
|
566
|
-
let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
|
|
567
|
-
let anchorText = anchorTextSame;
|
|
568
|
-
let textBefore = startInfo.textBefore;
|
|
569
|
-
|
|
570
|
-
// Empty anchor: borrow the next word so the comment has something
|
|
571
|
-
// to anchor on. Then normalize the trailing double space.
|
|
572
|
-
if (!anchorText && textAfter) {
|
|
573
|
-
const wordMatch = textAfter.match(/^\s*(\S+)/);
|
|
574
|
-
if (wordMatch) {
|
|
575
|
-
anchorText = wordMatch[1] ?? '';
|
|
576
|
-
textAfter = textAfter.slice(wordMatch[0].length);
|
|
577
|
-
}
|
|
578
|
-
}
|
|
579
|
-
if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
|
|
580
|
-
textAfter = textAfter.slice(1);
|
|
581
|
-
}
|
|
582
|
-
// Suppress unused warning for pre-empty-anchor fullText var
|
|
583
|
-
void fullText;
|
|
584
|
-
|
|
585
|
-
if (textBefore) {
|
|
586
|
-
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
|
|
587
|
-
}
|
|
588
|
-
emitRangeStarts();
|
|
589
|
-
if (anchorText) {
|
|
590
|
-
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
|
|
591
|
-
}
|
|
592
|
-
emitRangeEnds();
|
|
593
|
-
if (textAfter) {
|
|
594
|
-
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
|
|
595
|
-
}
|
|
596
|
-
documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
|
|
597
|
-
injectedIds.add(comment.id);
|
|
598
|
-
continue;
|
|
599
|
-
}
|
|
600
|
-
|
|
601
|
-
// Multi-run path: markers sit in different <w:r> blocks because pandoc
|
|
602
|
-
// applied mid-anchor styling. Split the start run at the start marker,
|
|
603
|
-
// keep all middle runs verbatim (they carry the styled anchor portions),
|
|
604
|
-
// split the end run at the end marker.
|
|
605
|
-
const startInfo = dissectRun(startRunFull, startMarker);
|
|
606
|
-
const endInfo = dissectRun(endRunFull, endMarker);
|
|
607
|
-
if (!startInfo || !endInfo) continue;
|
|
608
|
-
|
|
609
|
-
const middle = documentXml.slice(startRunClose, endRunOpen);
|
|
610
|
-
|
|
611
|
-
if (startInfo.textBefore) {
|
|
612
|
-
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
|
|
613
|
-
}
|
|
614
|
-
emitRangeStarts();
|
|
615
|
-
if (startInfo.textAfter) {
|
|
616
|
-
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
|
|
617
|
-
}
|
|
618
|
-
replacement += middle;
|
|
619
|
-
if (endInfo.textBefore) {
|
|
620
|
-
replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
|
|
621
|
-
}
|
|
622
|
-
emitRangeEnds();
|
|
623
|
-
if (endInfo.textAfter) {
|
|
624
|
-
replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
|
|
625
|
-
}
|
|
626
|
-
|
|
627
|
-
documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
|
|
628
|
-
injectedIds.add(comment.id);
|
|
629
|
-
}
|
|
630
|
-
|
|
631
|
-
// Add required namespaces to document.xml for comment threading
|
|
632
|
-
const requiredNs: Record<string, string> = {
|
|
633
|
-
'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
|
|
634
|
-
'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
|
|
635
|
-
'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
|
|
636
|
-
'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
|
|
637
|
-
'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
|
|
638
|
-
};
|
|
639
|
-
|
|
640
|
-
// Find <w:document and add namespaces
|
|
641
|
-
const docTagMatch = documentXml.match(/<w:document[^>]*>/);
|
|
642
|
-
if (docTagMatch) {
|
|
643
|
-
let docTag = docTagMatch[0];
|
|
644
|
-
let modified = false;
|
|
645
|
-
for (const [attr, val] of Object.entries(requiredNs)) {
|
|
646
|
-
if (!docTag.includes(attr)) {
|
|
647
|
-
docTag = docTag.replace('>', ` ${attr}="${val}">`);
|
|
648
|
-
modified = true;
|
|
649
|
-
}
|
|
650
|
-
}
|
|
651
|
-
// Add mc:Ignorable if mc namespace was added
|
|
652
|
-
if (modified && !docTag.includes('mc:Ignorable')) {
|
|
653
|
-
docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
|
|
654
|
-
}
|
|
655
|
-
documentXml = documentXml.replace(docTagMatch[0], docTag);
|
|
656
|
-
}
|
|
657
|
-
|
|
658
|
-
// Update document.xml
|
|
659
|
-
zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
|
|
660
|
-
|
|
661
|
-
// All comments (parents + replies) go in comments.xml
|
|
662
|
-
// But only include if parent was injected
|
|
663
|
-
const includedComments = commentsWithIds.filter(c => {
|
|
664
|
-
if (!c.isReply) {
|
|
665
|
-
return injectedIds.has(c.id);
|
|
666
|
-
} else {
|
|
667
|
-
// Include reply if its parent was injected
|
|
668
|
-
const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
|
|
669
|
-
return parent && injectedIds.has(parent.id);
|
|
670
|
-
}
|
|
671
|
-
});
|
|
672
|
-
|
|
673
|
-
// Create comments.xml
|
|
674
|
-
const commentsXml = createCommentsXml(includedComments);
|
|
675
|
-
if (zip.getEntry('word/comments.xml')) {
|
|
676
|
-
zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
|
|
677
|
-
} else {
|
|
678
|
-
zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
|
|
679
|
-
}
|
|
680
|
-
|
|
681
|
-
// Create commentsExtended.xml with reply threading
|
|
682
|
-
const commentsExtXml = createCommentsExtendedXml(includedComments);
|
|
683
|
-
if (zip.getEntry('word/commentsExtended.xml')) {
|
|
684
|
-
zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
|
|
685
|
-
} else {
|
|
686
|
-
zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
|
|
687
|
-
}
|
|
688
|
-
|
|
689
|
-
// Create commentsIds.xml (Word 2016+)
|
|
690
|
-
const commentsIdsXml = createCommentsIdsXml(includedComments);
|
|
691
|
-
if (zip.getEntry('word/commentsIds.xml')) {
|
|
692
|
-
zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
|
|
693
|
-
} else {
|
|
694
|
-
zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
|
|
695
|
-
}
|
|
696
|
-
|
|
697
|
-
// Create commentsExtensible.xml (Word 2018+)
|
|
698
|
-
const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
|
|
699
|
-
if (zip.getEntry('word/commentsExtensible.xml')) {
|
|
700
|
-
zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
|
|
701
|
-
} else {
|
|
702
|
-
zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
|
|
703
|
-
}
|
|
704
|
-
|
|
705
|
-
// Create people.xml (author definitions with Windows Live IDs)
|
|
706
|
-
const peopleXml = createPeopleXml(includedComments);
|
|
707
|
-
if (zip.getEntry('word/people.xml')) {
|
|
708
|
-
zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
|
|
709
|
-
} else {
|
|
710
|
-
zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
|
|
711
|
-
}
|
|
712
|
-
|
|
713
|
-
// Update [Content_Types].xml
|
|
714
|
-
const contentTypesEntry = zip.getEntry('[Content_Types].xml');
|
|
715
|
-
if (contentTypesEntry) {
|
|
716
|
-
let contentTypes = zip.readAsText(contentTypesEntry);
|
|
717
|
-
|
|
718
|
-
if (!contentTypes.includes('comments.xml')) {
|
|
719
|
-
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
720
|
-
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
721
|
-
'<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
|
|
722
|
-
contentTypes.slice(insertPoint);
|
|
723
|
-
}
|
|
724
|
-
|
|
725
|
-
if (!contentTypes.includes('commentsExtended.xml')) {
|
|
726
|
-
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
727
|
-
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
728
|
-
'<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
|
|
729
|
-
contentTypes.slice(insertPoint);
|
|
730
|
-
}
|
|
731
|
-
|
|
732
|
-
if (!contentTypes.includes('commentsIds.xml')) {
|
|
733
|
-
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
734
|
-
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
735
|
-
'<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
|
|
736
|
-
contentTypes.slice(insertPoint);
|
|
737
|
-
}
|
|
738
|
-
|
|
739
|
-
if (!contentTypes.includes('commentsExtensible.xml')) {
|
|
740
|
-
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
741
|
-
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
742
|
-
'<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
|
|
743
|
-
contentTypes.slice(insertPoint);
|
|
744
|
-
}
|
|
745
|
-
|
|
746
|
-
if (!contentTypes.includes('people.xml')) {
|
|
747
|
-
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
748
|
-
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
749
|
-
'<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
|
|
750
|
-
contentTypes.slice(insertPoint);
|
|
751
|
-
}
|
|
752
|
-
|
|
753
|
-
zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
|
|
754
|
-
}
|
|
755
|
-
|
|
756
|
-
// Update relationships
|
|
757
|
-
const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
|
|
758
|
-
if (relsEntry) {
|
|
759
|
-
let rels = zip.readAsText(relsEntry);
|
|
760
|
-
|
|
761
|
-
const rIdMatches = rels.match(/rId(\d+)/g) || [];
|
|
762
|
-
const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
|
|
763
|
-
|
|
764
|
-
if (!rels.includes('comments.xml')) {
|
|
765
|
-
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
766
|
-
rels = rels.slice(0, insertPoint) +
|
|
767
|
-
`<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
|
|
768
|
-
rels.slice(insertPoint);
|
|
769
|
-
}
|
|
770
|
-
|
|
771
|
-
if (!rels.includes('commentsExtended.xml')) {
|
|
772
|
-
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
773
|
-
rels = rels.slice(0, insertPoint) +
|
|
774
|
-
`<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
|
|
775
|
-
rels.slice(insertPoint);
|
|
776
|
-
}
|
|
777
|
-
|
|
778
|
-
if (!rels.includes('commentsIds.xml')) {
|
|
779
|
-
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
780
|
-
rels = rels.slice(0, insertPoint) +
|
|
781
|
-
`<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
|
|
782
|
-
rels.slice(insertPoint);
|
|
783
|
-
}
|
|
784
|
-
|
|
785
|
-
if (!rels.includes('commentsExtensible.xml')) {
|
|
786
|
-
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
787
|
-
rels = rels.slice(0, insertPoint) +
|
|
788
|
-
`<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
|
|
789
|
-
rels.slice(insertPoint);
|
|
790
|
-
}
|
|
791
|
-
|
|
792
|
-
if (!rels.includes('people.xml')) {
|
|
793
|
-
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
794
|
-
rels = rels.slice(0, insertPoint) +
|
|
795
|
-
`<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
|
|
796
|
-
rels.slice(insertPoint);
|
|
797
|
-
}
|
|
798
|
-
|
|
799
|
-
zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
|
|
800
|
-
}
|
|
801
|
-
|
|
802
|
-
zip.writeZip(outputPath);
|
|
803
|
-
|
|
804
|
-
const parentCount = includedComments.filter(c => !c.isReply).length;
|
|
805
|
-
const replyCount = includedComments.filter(c => c.isReply).length;
|
|
806
|
-
|
|
807
|
-
return {
|
|
808
|
-
success: true,
|
|
809
|
-
commentCount: parentCount,
|
|
810
|
-
replyCount: replyCount,
|
|
811
|
-
skippedComments: comments.length - includedComments.length,
|
|
812
|
-
};
|
|
813
|
-
|
|
814
|
-
} catch (err: any) {
|
|
815
|
-
return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
|
|
816
|
-
}
|
|
817
|
-
}
|
|
1
|
+
/**
|
|
2
|
+
* Word comment injection with reply threading
|
|
3
|
+
*
|
|
4
|
+
* Flow:
|
|
5
|
+
* 1. prepareMarkdownWithMarkers() - Parse comments, detect reply relationships
|
|
6
|
+
* - First comment in a cluster = parent (gets markers: ⟦CMS:n⟧anchor⟦CME:n⟧)
|
|
7
|
+
* - Subsequent adjacent comments = replies (no markers, attach to parent)
|
|
8
|
+
* 2. Pandoc converts to DOCX
|
|
9
|
+
* 3. injectCommentsAtMarkers() - Insert comment ranges for parents only
|
|
10
|
+
* - Replies go in comments.xml with parent reference in commentsExtended.xml
|
|
11
|
+
*/
|
|
12
|
+
|
|
13
|
+
import * as fs from 'fs';
|
|
14
|
+
import AdmZip from 'adm-zip';
|
|
15
|
+
import { escapeXml } from './utils.js';
|
|
16
|
+
|
|
17
|
+
const MARKER_START_PREFIX = '⟦CMS:';
|
|
18
|
+
const MARKER_END_PREFIX = '⟦CME:';
|
|
19
|
+
const MARKER_SUFFIX = '⟧';
|
|
20
|
+
|
|
21
|
+
interface ParsedComment {
|
|
22
|
+
author: string;
|
|
23
|
+
text: string;
|
|
24
|
+
anchor: string | null;
|
|
25
|
+
start: number;
|
|
26
|
+
end: number;
|
|
27
|
+
fullMatch: string;
|
|
28
|
+
}
|
|
29
|
+
|
|
30
|
+
interface PreparedComment extends ParsedComment {
|
|
31
|
+
isReply: boolean;
|
|
32
|
+
parentIdx: number | null;
|
|
33
|
+
commentIdx: number;
|
|
34
|
+
anchorFromReply?: boolean;
|
|
35
|
+
placesParentMarkers?: boolean;
|
|
36
|
+
}
|
|
37
|
+
|
|
38
|
+
interface PrepareResult {
|
|
39
|
+
markedMarkdown: string;
|
|
40
|
+
comments: PreparedComment[];
|
|
41
|
+
}
|
|
42
|
+
|
|
43
|
+
interface CommentWithIds extends PreparedComment {
|
|
44
|
+
id: string;
|
|
45
|
+
paraId: string;
|
|
46
|
+
paraId2: string;
|
|
47
|
+
durableId: string;
|
|
48
|
+
parentParaId?: string;
|
|
49
|
+
}
|
|
50
|
+
|
|
51
|
+
interface InjectionResult {
|
|
52
|
+
success: boolean;
|
|
53
|
+
commentCount: number;
|
|
54
|
+
replyCount?: number;
|
|
55
|
+
skippedComments: number;
|
|
56
|
+
error?: string;
|
|
57
|
+
}
|
|
58
|
+
|
|
59
|
+
function generateParaId(commentIdx: number, paraNum: number): string {
|
|
60
|
+
// Generate 8-character uppercase hex ID matching Word format
|
|
61
|
+
// Word uses IDs like "3F25BC58", "0331C187"
|
|
62
|
+
// Must be deterministic - same inputs always produce same output
|
|
63
|
+
const id = 0x10000000 + (commentIdx * 0x00100000) + (paraNum * 0x00001000);
|
|
64
|
+
return id.toString(16).toUpperCase().padStart(8, '0');
|
|
65
|
+
}
|
|
66
|
+
|
|
67
|
+
/**
|
|
68
|
+
* Parse comments and create markers
|
|
69
|
+
*
|
|
70
|
+
* Returns:
|
|
71
|
+
* - markedMarkdown: markdown with markers for parent comments only
|
|
72
|
+
* - comments: array with author, text, isReply, parentIdx
|
|
73
|
+
*/
|
|
74
|
+
export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
75
|
+
// Match the comment block first; extend manually to capture an optional
|
|
76
|
+
// trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
|
|
77
|
+
// bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
|
|
78
|
+
// `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
|
|
79
|
+
// ourselves and verify a `{.mark}` suffix.
|
|
80
|
+
const commentPattern = /\{>>([\s\S]+?)<<\}/g;
|
|
81
|
+
|
|
82
|
+
function tryParseTrailingAnchor(
|
|
83
|
+
text: string,
|
|
84
|
+
fromIdx: number,
|
|
85
|
+
): { anchor: string; endIdx: number } | null {
|
|
86
|
+
let i = fromIdx;
|
|
87
|
+
while (i < text.length && /\s/.test(text[i] ?? '')) i++;
|
|
88
|
+
if (text[i] !== '[') return null;
|
|
89
|
+
let depth = 1;
|
|
90
|
+
let j = i + 1;
|
|
91
|
+
while (j < text.length) {
|
|
92
|
+
const ch = text[j];
|
|
93
|
+
if (ch === '[') depth++;
|
|
94
|
+
else if (ch === ']') {
|
|
95
|
+
depth--;
|
|
96
|
+
if (depth === 0) break;
|
|
97
|
+
}
|
|
98
|
+
j++;
|
|
99
|
+
}
|
|
100
|
+
if (depth !== 0) return null;
|
|
101
|
+
if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
|
|
102
|
+
return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
const REPLY_PREFIX = '↪ ';
|
|
106
|
+
const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
|
|
107
|
+
let match: RegExpExecArray | null;
|
|
108
|
+
while ((match = commentPattern.exec(markdown)) !== null) {
|
|
109
|
+
const content = match[1] ?? '';
|
|
110
|
+
let author = 'Unknown';
|
|
111
|
+
let text = content;
|
|
112
|
+
const colonIdx = content.indexOf(':');
|
|
113
|
+
if (colonIdx > 0 && colonIdx < 30) {
|
|
114
|
+
author = content.slice(0, colonIdx).trim();
|
|
115
|
+
text = content.slice(colonIdx + 1).trim();
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
// The `↪ ` prefix is the authoritative reply signal emitted by
|
|
119
|
+
// `insertCommentsIntoMarkdown`. Strip it from the author before injection
|
|
120
|
+
// so Word displays the real name.
|
|
121
|
+
let explicitReply = false;
|
|
122
|
+
if (author.startsWith(REPLY_PREFIX)) {
|
|
123
|
+
explicitReply = true;
|
|
124
|
+
author = author.slice(REPLY_PREFIX.length).trim();
|
|
125
|
+
}
|
|
126
|
+
|
|
127
|
+
const commentEnd = match.index + match[0].length;
|
|
128
|
+
const trailing = tryParseTrailingAnchor(markdown, commentEnd);
|
|
129
|
+
|
|
130
|
+
rawMatches.push({
|
|
131
|
+
author,
|
|
132
|
+
text,
|
|
133
|
+
anchor: trailing ? trailing.anchor : null,
|
|
134
|
+
start: match.index,
|
|
135
|
+
end: trailing ? trailing.endIdx : commentEnd,
|
|
136
|
+
fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
|
|
137
|
+
explicitReply,
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
// Advance regex lastIndex past the consumed anchor so the next iteration
|
|
141
|
+
// doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
|
|
142
|
+
// tempt the matcher to look for another `{>>...<<}` in the body of the
|
|
143
|
+
// anchor span).
|
|
144
|
+
if (trailing) {
|
|
145
|
+
commentPattern.lastIndex = trailing.endIdx;
|
|
146
|
+
}
|
|
147
|
+
}
|
|
148
|
+
|
|
149
|
+
if (rawMatches.length === 0) {
|
|
150
|
+
return { markedMarkdown: markdown, comments: [] };
|
|
151
|
+
}
|
|
152
|
+
|
|
153
|
+
// Two-mode reply detection driven by the markdown itself:
|
|
154
|
+
// - If any comment carries the `↪ ` author prefix, the markdown came
|
|
155
|
+
// through `insertCommentsIntoMarkdown` and we use prefix-only mode.
|
|
156
|
+
// Distinct clusters that happen to land at gap=0 (a real failure
|
|
157
|
+
// mode on dense reviewer docs — 298-comment paper produced 9 such
|
|
158
|
+
// collisions) are not misthreaded.
|
|
159
|
+
// - If no comment carries the prefix, the markdown was hand-typed.
|
|
160
|
+
// Fall back to gap < 10 adjacency for backward compat with users
|
|
161
|
+
// who write CriticMarkup directly.
|
|
162
|
+
const ADJACENT_THRESHOLD = 10;
|
|
163
|
+
const useExplicitMode = rawMatches.some(m => m.explicitReply);
|
|
164
|
+
const comments: PreparedComment[] = [];
|
|
165
|
+
let clusterParentIdx = -1; // Index of first comment in current cluster
|
|
166
|
+
let lastCommentEnd = -1;
|
|
167
|
+
|
|
168
|
+
for (let i = 0; i < rawMatches.length; i++) {
|
|
169
|
+
const m = rawMatches[i];
|
|
170
|
+
if (!m) continue;
|
|
171
|
+
|
|
172
|
+
const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
|
|
173
|
+
const isAdjacent = useExplicitMode
|
|
174
|
+
? m.explicitReply
|
|
175
|
+
: gap < ADJACENT_THRESHOLD;
|
|
176
|
+
|
|
177
|
+
// Reset cluster if there's a gap (comments not in same cluster)
|
|
178
|
+
if (!isAdjacent) {
|
|
179
|
+
clusterParentIdx = -1;
|
|
180
|
+
}
|
|
181
|
+
|
|
182
|
+
if (clusterParentIdx === -1) {
|
|
183
|
+
// First comment in cluster = parent (regardless of author)
|
|
184
|
+
comments.push({
|
|
185
|
+
author: m.author,
|
|
186
|
+
text: m.text,
|
|
187
|
+
anchor: m.anchor,
|
|
188
|
+
start: m.start,
|
|
189
|
+
end: m.end,
|
|
190
|
+
fullMatch: m.fullMatch,
|
|
191
|
+
isReply: false,
|
|
192
|
+
parentIdx: null,
|
|
193
|
+
commentIdx: comments.length
|
|
194
|
+
});
|
|
195
|
+
clusterParentIdx = comments.length - 1;
|
|
196
|
+
} else {
|
|
197
|
+
// Subsequent comment in cluster = reply to first comment
|
|
198
|
+
comments.push({
|
|
199
|
+
author: m.author,
|
|
200
|
+
text: m.text,
|
|
201
|
+
anchor: m.anchor,
|
|
202
|
+
start: m.start,
|
|
203
|
+
end: m.end,
|
|
204
|
+
fullMatch: m.fullMatch,
|
|
205
|
+
isReply: true,
|
|
206
|
+
parentIdx: clusterParentIdx,
|
|
207
|
+
commentIdx: comments.length
|
|
208
|
+
});
|
|
209
|
+
}
|
|
210
|
+
|
|
211
|
+
lastCommentEnd = m.end;
|
|
212
|
+
}
|
|
213
|
+
|
|
214
|
+
// Propagate anchors from replies to parents
|
|
215
|
+
// If a reply has an anchor but its parent doesn't, move the anchor to the parent
|
|
216
|
+
// Track flags for special handling during marker generation
|
|
217
|
+
for (const c of comments) {
|
|
218
|
+
if (c.isReply && c.anchor && c.parentIdx !== null) {
|
|
219
|
+
const parent = comments[c.parentIdx];
|
|
220
|
+
if (parent && !parent.anchor) {
|
|
221
|
+
parent.anchor = c.anchor;
|
|
222
|
+
parent.anchorFromReply = true; // Parent's anchor came from a reply (markers placed by reply)
|
|
223
|
+
c.placesParentMarkers = true; // This reply should place the parent's markers
|
|
224
|
+
c.anchor = null;
|
|
225
|
+
}
|
|
226
|
+
}
|
|
227
|
+
}
|
|
228
|
+
|
|
229
|
+
// Build marked markdown - only parent comments get markers
|
|
230
|
+
// Process from end to start to preserve positions
|
|
231
|
+
let markedMarkdown = markdown;
|
|
232
|
+
|
|
233
|
+
for (let i = comments.length - 1; i >= 0; i--) {
|
|
234
|
+
const c = comments[i];
|
|
235
|
+
if (!c) continue;
|
|
236
|
+
|
|
237
|
+
if (c.isReply) {
|
|
238
|
+
// Reply: remove from document entirely (will be in comments.xml only)
|
|
239
|
+
// Also consume one preceding whitespace char to avoid double spaces.
|
|
240
|
+
// We deliberately consume at most one — walking arbitrarily backwards
|
|
241
|
+
// would shift positions that lower-index comments still depend on.
|
|
242
|
+
let removeStart = c.start;
|
|
243
|
+
if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
|
|
244
|
+
removeStart--;
|
|
245
|
+
}
|
|
246
|
+
|
|
247
|
+
// If this reply places parent's markers (anchor was propagated)
|
|
248
|
+
if (c.placesParentMarkers && c.parentIdx !== null) {
|
|
249
|
+
// Extract anchor text from the original match
|
|
250
|
+
const anchorMatch = c.fullMatch.match(/\[([^\]]+)\]\{\.mark\}$/);
|
|
251
|
+
if (anchorMatch) {
|
|
252
|
+
const anchorText = anchorMatch[1] ?? '';
|
|
253
|
+
// Output markers with PARENT's index around the anchor text
|
|
254
|
+
const parentIdx = c.parentIdx;
|
|
255
|
+
const replacement = `${MARKER_START_PREFIX}${parentIdx}${MARKER_SUFFIX}${anchorText}${MARKER_END_PREFIX}${parentIdx}${MARKER_SUFFIX}`;
|
|
256
|
+
markedMarkdown = markedMarkdown.slice(0, removeStart) + replacement + markedMarkdown.slice(c.end);
|
|
257
|
+
} else {
|
|
258
|
+
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
|
|
259
|
+
}
|
|
260
|
+
} else {
|
|
261
|
+
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
|
|
262
|
+
}
|
|
263
|
+
} else {
|
|
264
|
+
// Parent comment
|
|
265
|
+
if (c.anchorFromReply) {
|
|
266
|
+
// Anchor markers are placed by the reply, just remove this comment.
|
|
267
|
+
// Consume one preceding whitespace char only (see reply branch above).
|
|
268
|
+
let removeStart = c.start;
|
|
269
|
+
if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
|
|
270
|
+
removeStart--;
|
|
271
|
+
}
|
|
272
|
+
markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
|
|
273
|
+
} else {
|
|
274
|
+
// Normal case: replace with markers
|
|
275
|
+
const anchor = c.anchor || '';
|
|
276
|
+
const replacement = `${MARKER_START_PREFIX}${i}${MARKER_SUFFIX}${anchor}${MARKER_END_PREFIX}${i}${MARKER_SUFFIX}`;
|
|
277
|
+
markedMarkdown = markedMarkdown.slice(0, c.start) + replacement + markedMarkdown.slice(c.end);
|
|
278
|
+
}
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
282
|
+
return { markedMarkdown, comments };
|
|
283
|
+
}
|
|
284
|
+
|
|
285
|
+
function createCommentsXml(comments: CommentWithIds[]): string {
|
|
286
|
+
// Word expects date without milliseconds: 2025-12-30T08:33:00Z
|
|
287
|
+
const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
|
|
288
|
+
|
|
289
|
+
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
290
|
+
// Minimal namespaces matching golden file structure
|
|
291
|
+
xml += '<w:comments xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
|
|
292
|
+
|
|
293
|
+
// Use a consistent rsid (8-char hex) for all comments in this batch
|
|
294
|
+
const rsid = '00' + (Date.now() % 0xFFFFFF).toString(16).toUpperCase().padStart(6, '0');
|
|
295
|
+
|
|
296
|
+
for (const comment of comments) {
|
|
297
|
+
xml += `<w:comment w:id="${comment.id}" w:author="${escapeXml(comment.author)}" w:date="${now}" w:initials="${comment.author.split(' ').map(n => n[0]).join('')}">`;
|
|
298
|
+
// First paragraph: rsidRDefault="00000000", annotationRef without rStyle wrapper
|
|
299
|
+
xml += `<w:p w14:paraId="${comment.paraId}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="00000000">`;
|
|
300
|
+
xml += `<w:r><w:annotationRef/></w:r>`;
|
|
301
|
+
xml += `<w:r><w:t>${escapeXml(comment.text)}</w:t></w:r>`;
|
|
302
|
+
xml += `</w:p>`;
|
|
303
|
+
if (comment.isReply) {
|
|
304
|
+
// Second empty paragraph: rsidRDefault matches rsidR
|
|
305
|
+
xml += `<w:p w14:paraId="${comment.paraId2}" w14:textId="77777777" w:rsidR="${rsid}" w:rsidRDefault="${rsid}"/>`;
|
|
306
|
+
}
|
|
307
|
+
xml += `</w:comment>`;
|
|
308
|
+
}
|
|
309
|
+
|
|
310
|
+
xml += '</w:comments>';
|
|
311
|
+
return xml;
|
|
312
|
+
}
|
|
313
|
+
|
|
314
|
+
function createCommentsExtendedXml(comments: CommentWithIds[]): string {
|
|
315
|
+
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
316
|
+
// Minimal namespaces matching golden file structure
|
|
317
|
+
xml += '<w15:commentsEx xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" mc:Ignorable="w14 w15">';
|
|
318
|
+
|
|
319
|
+
for (const comment of comments) {
|
|
320
|
+
if (comment.isReply && comment.parentParaId) {
|
|
321
|
+
// Reply: use paraId2 (the second/empty paragraph) and link to parent's paraId
|
|
322
|
+
xml += `<w15:commentEx w15:paraId="${comment.paraId2}" w15:paraIdParent="${comment.parentParaId}" w15:done="0"/>`;
|
|
323
|
+
} else {
|
|
324
|
+
// Parent comment: use paraId (first paragraph)
|
|
325
|
+
xml += `<w15:commentEx w15:paraId="${comment.paraId}" w15:done="0"/>`;
|
|
326
|
+
}
|
|
327
|
+
}
|
|
328
|
+
|
|
329
|
+
xml += '</w15:commentsEx>';
|
|
330
|
+
return xml;
|
|
331
|
+
}
|
|
332
|
+
|
|
333
|
+
function generateDurableId(index: number): string {
|
|
334
|
+
// Generate unique 8-char hex ID for durableId
|
|
335
|
+
// CRITICAL: Must stay within signed 32-bit range (< 0x7FFFFFFF = 2147483647)
|
|
336
|
+
// Word interprets durableIds as signed 32-bit integers
|
|
337
|
+
const base = 0x10000000 + (Date.now() % 0x40000000); // Base between 0x10000000 and 0x50000000
|
|
338
|
+
const id = (base + index * 0x01000000) % 0x7FFFFFFF; // Keep under signed 32-bit max
|
|
339
|
+
return id.toString(16).toUpperCase().padStart(8, '0');
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
function createCommentsIdsXml(comments: CommentWithIds[]): string {
|
|
343
|
+
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
344
|
+
// Minimal namespaces matching golden file structure
|
|
345
|
+
xml += '<w16cid:commentsIds ';
|
|
346
|
+
xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
|
|
347
|
+
xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
|
|
348
|
+
xml += 'mc:Ignorable="w16cid">';
|
|
349
|
+
|
|
350
|
+
for (const comment of comments) {
|
|
351
|
+
// ONE entry per comment using the LAST paragraph's paraId:
|
|
352
|
+
// - Parent comments (1 paragraph): use paraId
|
|
353
|
+
// - Reply comments (2 paragraphs): use paraId2 (the second/empty paragraph)
|
|
354
|
+
const useParaId = comment.isReply ? comment.paraId2 : comment.paraId;
|
|
355
|
+
xml += `<w16cid:commentId w16cid:paraId="${useParaId}" w16cid:durableId="${comment.durableId}"/>`;
|
|
356
|
+
}
|
|
357
|
+
|
|
358
|
+
xml += '</w16cid:commentsIds>';
|
|
359
|
+
return xml;
|
|
360
|
+
}
|
|
361
|
+
|
|
362
|
+
function createCommentsExtensibleXml(comments: CommentWithIds[]): string {
|
|
363
|
+
const now = new Date().toISOString().replace(/\.\d{3}Z$/, 'Z');
|
|
364
|
+
|
|
365
|
+
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
366
|
+
// Minimal namespaces matching golden file structure
|
|
367
|
+
xml += '<w16cex:commentsExtensible ';
|
|
368
|
+
xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
|
|
369
|
+
xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
|
|
370
|
+
xml += 'mc:Ignorable="w16cex">';
|
|
371
|
+
|
|
372
|
+
for (const comment of comments) {
|
|
373
|
+
// ONE entry per comment using the durableId
|
|
374
|
+
xml += `<w16cex:commentExtensible w16cex:durableId="${comment.durableId}" w16cex:dateUtc="${now}"/>`;
|
|
375
|
+
}
|
|
376
|
+
|
|
377
|
+
xml += '</w16cex:commentsExtensible>';
|
|
378
|
+
return xml;
|
|
379
|
+
}
|
|
380
|
+
|
|
381
|
+
// Generate deterministic user IDs for authors (no hardcoded personal data)
|
|
382
|
+
|
|
383
|
+
function createPeopleXml(comments: CommentWithIds[]): string {
|
|
384
|
+
// Extract unique authors
|
|
385
|
+
const authors = [...new Set(comments.map(c => c.author))];
|
|
386
|
+
|
|
387
|
+
let xml = '<?xml version="1.0" encoding="UTF-8" standalone="yes"?>\n';
|
|
388
|
+
xml += '<w15:people ';
|
|
389
|
+
xml += 'xmlns:mc="http://schemas.openxmlformats.org/markup-compatibility/2006" ';
|
|
390
|
+
xml += 'xmlns:x="http://schemas.openxmlformats.org/spreadsheetml/2006/main" ';
|
|
391
|
+
xml += 'xmlns:r="http://schemas.openxmlformats.org/officeDocument/2006/relationships" ';
|
|
392
|
+
xml += 'xmlns:w="http://schemas.openxmlformats.org/wordprocessingml/2006/main" ';
|
|
393
|
+
xml += 'xmlns:w14="http://schemas.microsoft.com/office/word/2010/wordml" ';
|
|
394
|
+
xml += 'xmlns:w15="http://schemas.microsoft.com/office/word/2012/wordml" ';
|
|
395
|
+
xml += 'xmlns:w16cex="http://schemas.microsoft.com/office/word/2018/wordml/cex" ';
|
|
396
|
+
xml += 'xmlns:w16cid="http://schemas.microsoft.com/office/word/2016/wordml/cid" ';
|
|
397
|
+
xml += 'xmlns:w16="http://schemas.microsoft.com/office/word/2018/wordml" ';
|
|
398
|
+
xml += 'xmlns:w16sdtdh="http://schemas.microsoft.com/office/word/2020/wordml/sdtdatahash" ';
|
|
399
|
+
xml += 'xmlns:w16se="http://schemas.microsoft.com/office/word/2015/wordml/symex" ';
|
|
400
|
+
xml += 'mc:Ignorable="w14 w15 w16se w16cid w16 w16cex w16sdtdh">';
|
|
401
|
+
|
|
402
|
+
for (const author of authors) {
|
|
403
|
+
const userId = generateUserId(author);
|
|
404
|
+
xml += `<w15:person w15:author="${escapeXml(author)}">`;
|
|
405
|
+
xml += `<w15:presenceInfo w15:providerId="Windows Live" w15:userId="${userId}"/>`;
|
|
406
|
+
xml += `</w15:person>`;
|
|
407
|
+
}
|
|
408
|
+
|
|
409
|
+
xml += '</w15:people>';
|
|
410
|
+
return xml;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
function generateUserId(author: string): string {
|
|
414
|
+
// Generate a deterministic 16-char hex ID from author name
|
|
415
|
+
let hash = 0;
|
|
416
|
+
for (let i = 0; i < author.length; i++) {
|
|
417
|
+
hash = ((hash << 5) - hash) + author.charCodeAt(i);
|
|
418
|
+
hash = hash & hash;
|
|
419
|
+
}
|
|
420
|
+
return Math.abs(hash).toString(16).padStart(16, '0').slice(0, 16);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
/**
|
|
424
|
+
* Inject comments at marker positions
|
|
425
|
+
*/
|
|
426
|
+
export async function injectCommentsAtMarkers(
|
|
427
|
+
docxPath: string,
|
|
428
|
+
comments: PreparedComment[],
|
|
429
|
+
outputPath: string
|
|
430
|
+
): Promise<InjectionResult> {
|
|
431
|
+
try {
|
|
432
|
+
if (!fs.existsSync(docxPath)) {
|
|
433
|
+
return { success: false, commentCount: 0, skippedComments: 0, error: `File not found: ${docxPath}` };
|
|
434
|
+
}
|
|
435
|
+
|
|
436
|
+
if (comments.length === 0) {
|
|
437
|
+
fs.copyFileSync(docxPath, outputPath);
|
|
438
|
+
return { success: true, commentCount: 0, skippedComments: 0 };
|
|
439
|
+
}
|
|
440
|
+
|
|
441
|
+
const zip = new AdmZip(docxPath);
|
|
442
|
+
const documentEntry = zip.getEntry('word/document.xml');
|
|
443
|
+
if (!documentEntry) {
|
|
444
|
+
return { success: false, commentCount: 0, skippedComments: 0, error: 'Invalid DOCX: no document.xml' };
|
|
445
|
+
}
|
|
446
|
+
|
|
447
|
+
let documentXml = zip.readAsText(documentEntry);
|
|
448
|
+
|
|
449
|
+
// Assign IDs and paraIds (IDs start at 1, not 0 - Word convention)
|
|
450
|
+
const commentsWithIds: CommentWithIds[] = comments.map((c, idx) => ({
|
|
451
|
+
...c,
|
|
452
|
+
id: String(idx + 1),
|
|
453
|
+
paraId: generateParaId(idx, 1), // First paragraph (e.g., 10000001)
|
|
454
|
+
paraId2: generateParaId(idx, 2), // Second paragraph (e.g., 10000002)
|
|
455
|
+
durableId: generateDurableId(idx), // Unique ID for commentsIds/commentsExtensible
|
|
456
|
+
}));
|
|
457
|
+
|
|
458
|
+
// Link replies to parent paraIds
|
|
459
|
+
for (const c of commentsWithIds) {
|
|
460
|
+
if (c.isReply && c.parentIdx !== null) {
|
|
461
|
+
const parent = commentsWithIds[c.parentIdx];
|
|
462
|
+
if (parent) {
|
|
463
|
+
c.parentParaId = parent.paraId;
|
|
464
|
+
}
|
|
465
|
+
}
|
|
466
|
+
}
|
|
467
|
+
|
|
468
|
+
const injectedIds = new Set<string>();
|
|
469
|
+
|
|
470
|
+
// Process only parent comments (non-replies) for document ranges
|
|
471
|
+
const parentComments = commentsWithIds.filter(c => !c.isReply);
|
|
472
|
+
|
|
473
|
+
for (let i = parentComments.length - 1; i >= 0; i--) {
|
|
474
|
+
const comment = parentComments[i];
|
|
475
|
+
if (!comment) continue;
|
|
476
|
+
const idx = comment.commentIdx;
|
|
477
|
+
|
|
478
|
+
const startMarker = `${MARKER_START_PREFIX}${idx}${MARKER_SUFFIX}`;
|
|
479
|
+
const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
|
|
480
|
+
|
|
481
|
+
const startPos = documentXml.indexOf(startMarker);
|
|
482
|
+
const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
|
|
483
|
+
|
|
484
|
+
if (startPos === -1 || endPos === -1) continue;
|
|
485
|
+
|
|
486
|
+
// Find the runs containing each marker. Pandoc may split a single
|
|
487
|
+
// markdown anchor across multiple <w:r> blocks when it applies styling
|
|
488
|
+
// mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
|
|
489
|
+
// The same-run path (current happy path) collapses into the multi-run
|
|
490
|
+
// path when start and end runs coincide.
|
|
491
|
+
const startRunOpen = Math.max(
|
|
492
|
+
documentXml.lastIndexOf('<w:r>', startPos),
|
|
493
|
+
documentXml.lastIndexOf('<w:r ', startPos),
|
|
494
|
+
);
|
|
495
|
+
const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
|
|
496
|
+
const endRunOpen = Math.max(
|
|
497
|
+
documentXml.lastIndexOf('<w:r>', endPos),
|
|
498
|
+
documentXml.lastIndexOf('<w:r ', endPos),
|
|
499
|
+
);
|
|
500
|
+
const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
|
|
501
|
+
|
|
502
|
+
if (
|
|
503
|
+
startRunOpen === -1 || startRunCloseIdx === -1 ||
|
|
504
|
+
endRunOpen === -1 || endRunCloseIdx === -1
|
|
505
|
+
) continue;
|
|
506
|
+
|
|
507
|
+
const startRunClose = startRunCloseIdx + '</w:r>'.length;
|
|
508
|
+
const endRunClose = endRunCloseIdx + '</w:r>'.length;
|
|
509
|
+
|
|
510
|
+
const startRunFull = documentXml.slice(startRunOpen, startRunClose);
|
|
511
|
+
const endRunFull = documentXml.slice(endRunOpen, endRunClose);
|
|
512
|
+
|
|
513
|
+
// Extract <w:rPr> and <w:t> element shape from each run. Both pieces
|
|
514
|
+
// are needed verbatim so a textBefore split keeps its original styling
|
|
515
|
+
// and so the post-anchor textAfter render keeps the end run's styling.
|
|
516
|
+
function dissectRun(runXml: string, marker: string): {
|
|
517
|
+
rPr: string;
|
|
518
|
+
tElement: string;
|
|
519
|
+
textBefore: string;
|
|
520
|
+
textAfter: string;
|
|
521
|
+
} | null {
|
|
522
|
+
const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
|
|
523
|
+
const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
|
|
524
|
+
if (!tMatch) return null;
|
|
525
|
+
const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
|
|
526
|
+
if (!tOpenMatch) return null;
|
|
527
|
+
const tContent = tMatch[1] ?? '';
|
|
528
|
+
const markerInT = tContent.indexOf(marker);
|
|
529
|
+
if (markerInT === -1) return null;
|
|
530
|
+
return {
|
|
531
|
+
rPr: rPrMatch ? rPrMatch[0] : '',
|
|
532
|
+
tElement: tOpenMatch[0],
|
|
533
|
+
textBefore: tContent.slice(0, markerInT),
|
|
534
|
+
textAfter: tContent.slice(markerInT + marker.length),
|
|
535
|
+
};
|
|
536
|
+
}
|
|
537
|
+
|
|
538
|
+
let replacement = '';
|
|
539
|
+
const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
|
|
540
|
+
|
|
541
|
+
const emitRangeStarts = () => {
|
|
542
|
+
replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
|
|
543
|
+
for (const reply of replies) {
|
|
544
|
+
replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
|
|
545
|
+
}
|
|
546
|
+
};
|
|
547
|
+
|
|
548
|
+
const emitRangeEnds = () => {
|
|
549
|
+
replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
|
|
550
|
+
replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
|
|
551
|
+
for (const reply of replies) {
|
|
552
|
+
replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
|
|
553
|
+
replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
|
|
554
|
+
injectedIds.add(reply.id);
|
|
555
|
+
}
|
|
556
|
+
};
|
|
557
|
+
|
|
558
|
+
if (startRunOpen === endRunOpen) {
|
|
559
|
+
// Same-run path: both markers live inside one <w:t>. Original logic.
|
|
560
|
+
const startInfo = dissectRun(startRunFull, startMarker);
|
|
561
|
+
if (!startInfo) continue;
|
|
562
|
+
const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
|
|
563
|
+
const endInTextRel = startInfo.textAfter.indexOf(endMarker);
|
|
564
|
+
if (endInTextRel === -1) continue;
|
|
565
|
+
const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
|
|
566
|
+
let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
|
|
567
|
+
let anchorText = anchorTextSame;
|
|
568
|
+
let textBefore = startInfo.textBefore;
|
|
569
|
+
|
|
570
|
+
// Empty anchor: borrow the next word so the comment has something
|
|
571
|
+
// to anchor on. Then normalize the trailing double space.
|
|
572
|
+
if (!anchorText && textAfter) {
|
|
573
|
+
const wordMatch = textAfter.match(/^\s*(\S+)/);
|
|
574
|
+
if (wordMatch) {
|
|
575
|
+
anchorText = wordMatch[1] ?? '';
|
|
576
|
+
textAfter = textAfter.slice(wordMatch[0].length);
|
|
577
|
+
}
|
|
578
|
+
}
|
|
579
|
+
if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
|
|
580
|
+
textAfter = textAfter.slice(1);
|
|
581
|
+
}
|
|
582
|
+
// Suppress unused warning for pre-empty-anchor fullText var
|
|
583
|
+
void fullText;
|
|
584
|
+
|
|
585
|
+
if (textBefore) {
|
|
586
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
|
|
587
|
+
}
|
|
588
|
+
emitRangeStarts();
|
|
589
|
+
if (anchorText) {
|
|
590
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
|
|
591
|
+
}
|
|
592
|
+
emitRangeEnds();
|
|
593
|
+
if (textAfter) {
|
|
594
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
|
|
595
|
+
}
|
|
596
|
+
documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
|
|
597
|
+
injectedIds.add(comment.id);
|
|
598
|
+
continue;
|
|
599
|
+
}
|
|
600
|
+
|
|
601
|
+
// Multi-run path: markers sit in different <w:r> blocks because pandoc
|
|
602
|
+
// applied mid-anchor styling. Split the start run at the start marker,
|
|
603
|
+
// keep all middle runs verbatim (they carry the styled anchor portions),
|
|
604
|
+
// split the end run at the end marker.
|
|
605
|
+
const startInfo = dissectRun(startRunFull, startMarker);
|
|
606
|
+
const endInfo = dissectRun(endRunFull, endMarker);
|
|
607
|
+
if (!startInfo || !endInfo) continue;
|
|
608
|
+
|
|
609
|
+
const middle = documentXml.slice(startRunClose, endRunOpen);
|
|
610
|
+
|
|
611
|
+
if (startInfo.textBefore) {
|
|
612
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
|
|
613
|
+
}
|
|
614
|
+
emitRangeStarts();
|
|
615
|
+
if (startInfo.textAfter) {
|
|
616
|
+
replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
|
|
617
|
+
}
|
|
618
|
+
replacement += middle;
|
|
619
|
+
if (endInfo.textBefore) {
|
|
620
|
+
replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
|
|
621
|
+
}
|
|
622
|
+
emitRangeEnds();
|
|
623
|
+
if (endInfo.textAfter) {
|
|
624
|
+
replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
|
|
625
|
+
}
|
|
626
|
+
|
|
627
|
+
documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
|
|
628
|
+
injectedIds.add(comment.id);
|
|
629
|
+
}
|
|
630
|
+
|
|
631
|
+
// Add required namespaces to document.xml for comment threading
|
|
632
|
+
const requiredNs: Record<string, string> = {
|
|
633
|
+
'xmlns:w14': 'http://schemas.microsoft.com/office/word/2010/wordml',
|
|
634
|
+
'xmlns:w15': 'http://schemas.microsoft.com/office/word/2012/wordml',
|
|
635
|
+
'xmlns:w16cid': 'http://schemas.microsoft.com/office/word/2016/wordml/cid',
|
|
636
|
+
'xmlns:w16cex': 'http://schemas.microsoft.com/office/word/2018/wordml/cex',
|
|
637
|
+
'xmlns:mc': 'http://schemas.openxmlformats.org/markup-compatibility/2006',
|
|
638
|
+
};
|
|
639
|
+
|
|
640
|
+
// Find <w:document and add namespaces
|
|
641
|
+
const docTagMatch = documentXml.match(/<w:document[^>]*>/);
|
|
642
|
+
if (docTagMatch) {
|
|
643
|
+
let docTag = docTagMatch[0];
|
|
644
|
+
let modified = false;
|
|
645
|
+
for (const [attr, val] of Object.entries(requiredNs)) {
|
|
646
|
+
if (!docTag.includes(attr)) {
|
|
647
|
+
docTag = docTag.replace('>', ` ${attr}="${val}">`);
|
|
648
|
+
modified = true;
|
|
649
|
+
}
|
|
650
|
+
}
|
|
651
|
+
// Add mc:Ignorable if mc namespace was added
|
|
652
|
+
if (modified && !docTag.includes('mc:Ignorable')) {
|
|
653
|
+
docTag = docTag.replace('>', ' mc:Ignorable="w14 w15 w16cid w16cex">');
|
|
654
|
+
}
|
|
655
|
+
documentXml = documentXml.replace(docTagMatch[0], docTag);
|
|
656
|
+
}
|
|
657
|
+
|
|
658
|
+
// Update document.xml
|
|
659
|
+
zip.updateFile('word/document.xml', Buffer.from(documentXml, 'utf-8'));
|
|
660
|
+
|
|
661
|
+
// All comments (parents + replies) go in comments.xml
|
|
662
|
+
// But only include if parent was injected
|
|
663
|
+
const includedComments = commentsWithIds.filter(c => {
|
|
664
|
+
if (!c.isReply) {
|
|
665
|
+
return injectedIds.has(c.id);
|
|
666
|
+
} else {
|
|
667
|
+
// Include reply if its parent was injected
|
|
668
|
+
const parent = c.parentIdx !== null ? commentsWithIds[c.parentIdx] : undefined;
|
|
669
|
+
return parent && injectedIds.has(parent.id);
|
|
670
|
+
}
|
|
671
|
+
});
|
|
672
|
+
|
|
673
|
+
// Create comments.xml
|
|
674
|
+
const commentsXml = createCommentsXml(includedComments);
|
|
675
|
+
if (zip.getEntry('word/comments.xml')) {
|
|
676
|
+
zip.updateFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
|
|
677
|
+
} else {
|
|
678
|
+
zip.addFile('word/comments.xml', Buffer.from(commentsXml, 'utf-8'));
|
|
679
|
+
}
|
|
680
|
+
|
|
681
|
+
// Create commentsExtended.xml with reply threading
|
|
682
|
+
const commentsExtXml = createCommentsExtendedXml(includedComments);
|
|
683
|
+
if (zip.getEntry('word/commentsExtended.xml')) {
|
|
684
|
+
zip.updateFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
|
|
685
|
+
} else {
|
|
686
|
+
zip.addFile('word/commentsExtended.xml', Buffer.from(commentsExtXml, 'utf-8'));
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
// Create commentsIds.xml (Word 2016+)
|
|
690
|
+
const commentsIdsXml = createCommentsIdsXml(includedComments);
|
|
691
|
+
if (zip.getEntry('word/commentsIds.xml')) {
|
|
692
|
+
zip.updateFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
|
|
693
|
+
} else {
|
|
694
|
+
zip.addFile('word/commentsIds.xml', Buffer.from(commentsIdsXml, 'utf-8'));
|
|
695
|
+
}
|
|
696
|
+
|
|
697
|
+
// Create commentsExtensible.xml (Word 2018+)
|
|
698
|
+
const commentsExtensibleXml = createCommentsExtensibleXml(includedComments);
|
|
699
|
+
if (zip.getEntry('word/commentsExtensible.xml')) {
|
|
700
|
+
zip.updateFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
|
|
701
|
+
} else {
|
|
702
|
+
zip.addFile('word/commentsExtensible.xml', Buffer.from(commentsExtensibleXml, 'utf-8'));
|
|
703
|
+
}
|
|
704
|
+
|
|
705
|
+
// Create people.xml (author definitions with Windows Live IDs)
|
|
706
|
+
const peopleXml = createPeopleXml(includedComments);
|
|
707
|
+
if (zip.getEntry('word/people.xml')) {
|
|
708
|
+
zip.updateFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
|
|
709
|
+
} else {
|
|
710
|
+
zip.addFile('word/people.xml', Buffer.from(peopleXml, 'utf-8'));
|
|
711
|
+
}
|
|
712
|
+
|
|
713
|
+
// Update [Content_Types].xml
|
|
714
|
+
const contentTypesEntry = zip.getEntry('[Content_Types].xml');
|
|
715
|
+
if (contentTypesEntry) {
|
|
716
|
+
let contentTypes = zip.readAsText(contentTypesEntry);
|
|
717
|
+
|
|
718
|
+
if (!contentTypes.includes('comments.xml')) {
|
|
719
|
+
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
720
|
+
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
721
|
+
'<Override PartName="/word/comments.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.comments+xml"/>\n' +
|
|
722
|
+
contentTypes.slice(insertPoint);
|
|
723
|
+
}
|
|
724
|
+
|
|
725
|
+
if (!contentTypes.includes('commentsExtended.xml')) {
|
|
726
|
+
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
727
|
+
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
728
|
+
'<Override PartName="/word/commentsExtended.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtended+xml"/>\n' +
|
|
729
|
+
contentTypes.slice(insertPoint);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
if (!contentTypes.includes('commentsIds.xml')) {
|
|
733
|
+
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
734
|
+
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
735
|
+
'<Override PartName="/word/commentsIds.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsIds+xml"/>\n' +
|
|
736
|
+
contentTypes.slice(insertPoint);
|
|
737
|
+
}
|
|
738
|
+
|
|
739
|
+
if (!contentTypes.includes('commentsExtensible.xml')) {
|
|
740
|
+
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
741
|
+
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
742
|
+
'<Override PartName="/word/commentsExtensible.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.commentsExtensible+xml"/>\n' +
|
|
743
|
+
contentTypes.slice(insertPoint);
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
if (!contentTypes.includes('people.xml')) {
|
|
747
|
+
const insertPoint = contentTypes.lastIndexOf('</Types>');
|
|
748
|
+
contentTypes = contentTypes.slice(0, insertPoint) +
|
|
749
|
+
'<Override PartName="/word/people.xml" ContentType="application/vnd.openxmlformats-officedocument.wordprocessingml.people+xml"/>\n' +
|
|
750
|
+
contentTypes.slice(insertPoint);
|
|
751
|
+
}
|
|
752
|
+
|
|
753
|
+
zip.updateFile('[Content_Types].xml', Buffer.from(contentTypes, 'utf-8'));
|
|
754
|
+
}
|
|
755
|
+
|
|
756
|
+
// Update relationships
|
|
757
|
+
const relsEntry = zip.getEntry('word/_rels/document.xml.rels');
|
|
758
|
+
if (relsEntry) {
|
|
759
|
+
let rels = zip.readAsText(relsEntry);
|
|
760
|
+
|
|
761
|
+
const rIdMatches = rels.match(/rId(\d+)/g) || [];
|
|
762
|
+
const maxId = rIdMatches.reduce((max, r) => Math.max(max, parseInt(r.replace('rId', ''))), 0);
|
|
763
|
+
|
|
764
|
+
if (!rels.includes('comments.xml')) {
|
|
765
|
+
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
766
|
+
rels = rels.slice(0, insertPoint) +
|
|
767
|
+
`<Relationship Id="rId${maxId + 1}" Type="http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments" Target="comments.xml"/>\n` +
|
|
768
|
+
rels.slice(insertPoint);
|
|
769
|
+
}
|
|
770
|
+
|
|
771
|
+
if (!rels.includes('commentsExtended.xml')) {
|
|
772
|
+
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
773
|
+
rels = rels.slice(0, insertPoint) +
|
|
774
|
+
`<Relationship Id="rId${maxId + 2}" Type="http://schemas.microsoft.com/office/2011/relationships/commentsExtended" Target="commentsExtended.xml"/>\n` +
|
|
775
|
+
rels.slice(insertPoint);
|
|
776
|
+
}
|
|
777
|
+
|
|
778
|
+
if (!rels.includes('commentsIds.xml')) {
|
|
779
|
+
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
780
|
+
rels = rels.slice(0, insertPoint) +
|
|
781
|
+
`<Relationship Id="rId${maxId + 3}" Type="http://schemas.microsoft.com/office/2016/09/relationships/commentsIds" Target="commentsIds.xml"/>\n` +
|
|
782
|
+
rels.slice(insertPoint);
|
|
783
|
+
}
|
|
784
|
+
|
|
785
|
+
if (!rels.includes('commentsExtensible.xml')) {
|
|
786
|
+
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
787
|
+
rels = rels.slice(0, insertPoint) +
|
|
788
|
+
`<Relationship Id="rId${maxId + 4}" Type="http://schemas.microsoft.com/office/2018/08/relationships/commentsExtensible" Target="commentsExtensible.xml"/>\n` +
|
|
789
|
+
rels.slice(insertPoint);
|
|
790
|
+
}
|
|
791
|
+
|
|
792
|
+
if (!rels.includes('people.xml')) {
|
|
793
|
+
const insertPoint = rels.lastIndexOf('</Relationships>');
|
|
794
|
+
rels = rels.slice(0, insertPoint) +
|
|
795
|
+
`<Relationship Id="rId${maxId + 5}" Type="http://schemas.microsoft.com/office/2011/relationships/people" Target="people.xml"/>\n` +
|
|
796
|
+
rels.slice(insertPoint);
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
zip.updateFile('word/_rels/document.xml.rels', Buffer.from(rels, 'utf-8'));
|
|
800
|
+
}
|
|
801
|
+
|
|
802
|
+
zip.writeZip(outputPath);
|
|
803
|
+
|
|
804
|
+
const parentCount = includedComments.filter(c => !c.isReply).length;
|
|
805
|
+
const replyCount = includedComments.filter(c => c.isReply).length;
|
|
806
|
+
|
|
807
|
+
return {
|
|
808
|
+
success: true,
|
|
809
|
+
commentCount: parentCount,
|
|
810
|
+
replyCount: replyCount,
|
|
811
|
+
skippedComments: comments.length - includedComments.length,
|
|
812
|
+
};
|
|
813
|
+
|
|
814
|
+
} catch (err: any) {
|
|
815
|
+
return { success: false, commentCount: 0, skippedComments: 0, error: err.message };
|
|
816
|
+
}
|
|
817
|
+
}
|