docrev 0.9.7 → 0.9.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. package/CHANGELOG.md +21 -0
  2. package/dev_notes/stress2/adversarial.docx +0 -0
  3. package/dev_notes/stress2/build_adversarial.ts +186 -0
  4. package/dev_notes/stress2/drift_matcher.ts +62 -0
  5. package/dev_notes/stress2/probe_anchors.ts +35 -0
  6. package/dev_notes/stress2/project/adversarial.docx +0 -0
  7. package/dev_notes/stress2/project/discussion.before.md +3 -0
  8. package/dev_notes/stress2/project/discussion.md +3 -0
  9. package/dev_notes/stress2/project/methods.before.md +20 -0
  10. package/dev_notes/stress2/project/methods.md +20 -0
  11. package/dev_notes/stress2/project/rev.yaml +5 -0
  12. package/dev_notes/stress2/project/sections.yaml +4 -0
  13. package/dev_notes/stress2/sections.yaml +5 -0
  14. package/dev_notes/stress2/trace_placement.ts +50 -0
  15. package/dev_notes/stresstest_boundaries.ts +27 -0
  16. package/dev_notes/stresstest_drift_apply.ts +43 -0
  17. package/dev_notes/stresstest_drift_compare.ts +43 -0
  18. package/dev_notes/stresstest_drift_v2.ts +54 -0
  19. package/dev_notes/stresstest_inspect.ts +54 -0
  20. package/dev_notes/stresstest_pstyle.ts +55 -0
  21. package/dev_notes/stresstest_section_debug.ts +23 -0
  22. package/dev_notes/stresstest_split.ts +70 -0
  23. package/dev_notes/stresstest_trace.ts +19 -0
  24. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
  25. package/dist/lib/anchor-match.d.ts +10 -0
  26. package/dist/lib/anchor-match.d.ts.map +1 -1
  27. package/dist/lib/anchor-match.js +35 -0
  28. package/dist/lib/anchor-match.js.map +1 -1
  29. package/dist/lib/annotations.d.ts.map +1 -1
  30. package/dist/lib/annotations.js +16 -6
  31. package/dist/lib/annotations.js.map +1 -1
  32. package/dist/lib/build.d.ts +12 -0
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +12 -0
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/quality.js +1 -1
  37. package/dist/lib/commands/quality.js.map +1 -1
  38. package/dist/lib/commands/section-boundaries.d.ts +1 -1
  39. package/dist/lib/commands/section-boundaries.d.ts.map +1 -1
  40. package/dist/lib/commands/section-boundaries.js +12 -2
  41. package/dist/lib/commands/section-boundaries.js.map +1 -1
  42. package/dist/lib/commands/sync.js +19 -13
  43. package/dist/lib/commands/sync.js.map +1 -1
  44. package/dist/lib/commands/verify-anchors.d.ts.map +1 -1
  45. package/dist/lib/commands/verify-anchors.js +15 -4
  46. package/dist/lib/commands/verify-anchors.js.map +1 -1
  47. package/dist/lib/comment-realign.js +2 -2
  48. package/dist/lib/comment-realign.js.map +1 -1
  49. package/dist/lib/import.d.ts +12 -0
  50. package/dist/lib/import.d.ts.map +1 -1
  51. package/dist/lib/import.js +289 -60
  52. package/dist/lib/import.js.map +1 -1
  53. package/dist/lib/response.js +1 -1
  54. package/dist/lib/response.js.map +1 -1
  55. package/dist/lib/types.d.ts +20 -0
  56. package/dist/lib/types.d.ts.map +1 -1
  57. package/dist/lib/word-extraction.d.ts +6 -0
  58. package/dist/lib/word-extraction.d.ts.map +1 -1
  59. package/dist/lib/word-extraction.js +46 -3
  60. package/dist/lib/word-extraction.js.map +1 -1
  61. package/dist/lib/wordcomments.d.ts.map +1 -1
  62. package/dist/lib/wordcomments.js +188 -78
  63. package/dist/lib/wordcomments.js.map +1 -1
  64. package/lib/anchor-match.ts +38 -0
  65. package/lib/annotations.ts +16 -6
  66. package/lib/build.ts +24 -0
  67. package/lib/commands/quality.ts +1 -1
  68. package/lib/commands/section-boundaries.ts +11 -1
  69. package/lib/commands/sync.ts +21 -16
  70. package/lib/commands/verify-anchors.ts +15 -4
  71. package/lib/comment-realign.ts +2 -2
  72. package/lib/import.ts +304 -61
  73. package/lib/response.ts +1 -1
  74. package/lib/types.ts +20 -0
  75. package/lib/word-extraction.ts +50 -3
  76. package/lib/wordcomments.ts +205 -88
  77. package/package.json +1 -1
  78. package/dist/package.json +0 -137
package/lib/types.ts CHANGED
@@ -69,6 +69,22 @@ export interface PdfConfig {
69
69
  geometry?: string;
70
70
  linestretch?: number;
71
71
  toc?: boolean;
72
+ /**
73
+ * LaTeX engine to use for PDF output. One of `pdflatex` (default),
74
+ * `xelatex`, `lualatex`, `tectonic`, etc. xelatex/lualatex are required
75
+ * for native UTF-8 rendering of diacritics in author names, place
76
+ * names, and species epithets.
77
+ */
78
+ engine?: string;
79
+ /** Roman/serif main font (xelatex/lualatex only — uses fontspec). */
80
+ mainfont?: string;
81
+ /** Sans-serif font (xelatex/lualatex only). */
82
+ sansfont?: string;
83
+ /** Monospace font (xelatex/lualatex only). */
84
+ monofont?: string;
85
+ numbersections?: boolean;
86
+ template?: string;
87
+ headerIncludes?: string;
72
88
  }
73
89
 
74
90
  export interface DocxConfig {
@@ -338,6 +354,10 @@ export interface JournalFormatting {
338
354
  linestretch?: number;
339
355
  template?: string;
340
356
  numbersections?: boolean;
357
+ engine?: string;
358
+ mainfont?: string;
359
+ sansfont?: string;
360
+ monofont?: string;
341
361
  };
342
362
  docx?: {
343
363
  reference?: string;
@@ -18,6 +18,12 @@ export interface WordComment {
18
18
  author: string;
19
19
  date: string;
20
20
  text: string;
21
+ /**
22
+ * Parent comment id when this is a reply in a Word comment thread.
23
+ * Resolved from `commentsExtended.xml`'s `w15:paraIdParent` field.
24
+ * `undefined` for top-level comments.
25
+ */
26
+ parentId?: string;
21
27
  }
22
28
 
23
29
  export interface TextNode {
@@ -126,7 +132,6 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
126
132
 
127
133
  const parsed = await parseStringPromise(commentsXml, { explicitArray: false });
128
134
 
129
- const ns = 'w:';
130
135
  const commentsRoot = parsed['w:comments'];
131
136
  if (!commentsRoot || !commentsRoot['w:comment']) {
132
137
  return comments;
@@ -137,12 +142,18 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
137
142
  ? commentsRoot['w:comment']
138
143
  : [commentsRoot['w:comment']];
139
144
 
145
+ // Map every paraId that lives inside a comment back to that comment's id.
146
+ // Word's commentsExtended.xml expresses threading via w15:paraIdParent,
147
+ // which references the parent's first <w:p>. Replies use a secondary
148
+ // (often-empty) <w:p>, so each comment may contribute multiple paraIds.
149
+ const paraIdToCommentId = new Map<string, string>();
150
+
140
151
  for (const comment of commentNodes) {
141
152
  const id = comment.$?.['w:id'] || '';
142
153
  const author = comment.$?.['w:author'] || 'Unknown';
143
154
  const date = comment.$?.['w:date'] || '';
144
155
 
145
- // Extract text from nested w:p/w:r/w:t elements
156
+ // Extract text from nested w:p/w:r/w:t elements and record paraIds.
146
157
  let text = '';
147
158
  const extractText = (node: any): void => {
148
159
  if (!node) return;
@@ -160,13 +171,49 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
160
171
  }
161
172
  if (node['w:p']) {
162
173
  const paras = Array.isArray(node['w:p']) ? node['w:p'] : [node['w:p']];
163
- paras.forEach(extractText);
174
+ for (const para of paras) {
175
+ const paraId = para?.$?.['w14:paraId'];
176
+ if (paraId && id) paraIdToCommentId.set(paraId, id);
177
+ extractText(para);
178
+ }
164
179
  }
165
180
  };
166
181
  extractText(comment);
167
182
 
168
183
  comments.push({ id, author, date: date.slice(0, 10), text: text.trim() });
169
184
  }
185
+
186
+ // Resolve parent links from commentsExtended.xml. Missing entry just
187
+ // means the docx has no threading metadata (e.g. legacy/non-Word source).
188
+ const extendedEntry = zip.getEntry('word/commentsExtended.xml');
189
+ if (extendedEntry && paraIdToCommentId.size > 0) {
190
+ let extendedXml = '';
191
+ try {
192
+ extendedXml = extendedEntry.getData().toString('utf8');
193
+ } catch {
194
+ // Unreadable threading metadata is non-fatal; skip parent linking.
195
+ }
196
+ if (extendedXml) {
197
+ const parentByCommentId = new Map<string, string>();
198
+ const exPattern = /<w15:commentEx\b([^>]*?)\/>/g;
199
+ let m: RegExpExecArray | null;
200
+ while ((m = exPattern.exec(extendedXml)) !== null) {
201
+ const attrs = m[1] ?? '';
202
+ const paraIdMatch = attrs.match(/w15:paraId="([^"]+)"/);
203
+ const parentMatch = attrs.match(/w15:paraIdParent="([^"]+)"/);
204
+ if (!paraIdMatch || !parentMatch) continue;
205
+ const childCommentId = paraIdToCommentId.get(paraIdMatch[1]);
206
+ const parentCommentId = paraIdToCommentId.get(parentMatch[1]);
207
+ if (childCommentId && parentCommentId && childCommentId !== parentCommentId) {
208
+ parentByCommentId.set(childCommentId, parentCommentId);
209
+ }
210
+ }
211
+ for (const c of comments) {
212
+ const parent = parentByCommentId.get(c.id);
213
+ if (parent) c.parentId = parent;
214
+ }
215
+ }
216
+ }
170
217
  } catch (err: any) {
171
218
  // Re-throw with more context if it's already an Error we created
172
219
  if (err.message.includes('Invalid Word document') || err.message.includes('File not found')) {
@@ -72,10 +72,38 @@ function generateParaId(commentIdx: number, paraNum: number): string {
72
72
  * - comments: array with author, text, isReply, parentIdx
73
73
  */
74
74
  export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
75
- // Match all comments with optional anchor
76
- const commentPattern = /\{>>(.+?)<<\}(?:\s*\[([^\]]+)\]\{\.mark\})?/g;
75
+ // Match the comment block first; extend manually to capture an optional
76
+ // trailing `[anchor]{.mark}` span. A regex `[^\]]+` for the anchor would
77
+ // bail on the inner `]` of nested syntax (e.g. `[[0..9]]{.mark}` or
78
+ // `[*phrase*]{.mark}` after pandoc-rewriting), so we walk the brackets
79
+ // ourselves and verify a `{.mark}` suffix.
80
+ const commentPattern = /\{>>([\s\S]+?)<<\}/g;
81
+
82
+ function tryParseTrailingAnchor(
83
+ text: string,
84
+ fromIdx: number,
85
+ ): { anchor: string; endIdx: number } | null {
86
+ let i = fromIdx;
87
+ while (i < text.length && /\s/.test(text[i] ?? '')) i++;
88
+ if (text[i] !== '[') return null;
89
+ let depth = 1;
90
+ let j = i + 1;
91
+ while (j < text.length) {
92
+ const ch = text[j];
93
+ if (ch === '[') depth++;
94
+ else if (ch === ']') {
95
+ depth--;
96
+ if (depth === 0) break;
97
+ }
98
+ j++;
99
+ }
100
+ if (depth !== 0) return null;
101
+ if (text.slice(j + 1, j + 8) !== '{.mark}') return null;
102
+ return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
103
+ }
77
104
 
78
- const rawMatches: ParsedComment[] = [];
105
+ const REPLY_PREFIX = '↪ ';
106
+ const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
79
107
  let match: RegExpExecArray | null;
80
108
  while ((match = commentPattern.exec(markdown)) !== null) {
81
109
  const content = match[1] ?? '';
@@ -87,24 +115,52 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
87
115
  text = content.slice(colonIdx + 1).trim();
88
116
  }
89
117
 
118
+ // The `↪ ` prefix is the authoritative reply signal emitted by
119
+ // `insertCommentsIntoMarkdown`. Strip it from the author before injection
120
+ // so Word displays the real name.
121
+ let explicitReply = false;
122
+ if (author.startsWith(REPLY_PREFIX)) {
123
+ explicitReply = true;
124
+ author = author.slice(REPLY_PREFIX.length).trim();
125
+ }
126
+
127
+ const commentEnd = match.index + match[0].length;
128
+ const trailing = tryParseTrailingAnchor(markdown, commentEnd);
129
+
90
130
  rawMatches.push({
91
131
  author,
92
132
  text,
93
- anchor: match[2] || null,
133
+ anchor: trailing ? trailing.anchor : null,
94
134
  start: match.index,
95
- end: match.index + match[0].length,
96
- fullMatch: match[0]
135
+ end: trailing ? trailing.endIdx : commentEnd,
136
+ fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
137
+ explicitReply,
97
138
  });
139
+
140
+ // Advance regex lastIndex past the consumed anchor so the next iteration
141
+ // doesn't re-scan inside it (e.g. `[*emphasis*]{.mark}` would otherwise
142
+ // tempt the matcher to look for another `{>>...<<}` in the body of the
143
+ // anchor span).
144
+ if (trailing) {
145
+ commentPattern.lastIndex = trailing.endIdx;
146
+ }
98
147
  }
99
148
 
100
149
  if (rawMatches.length === 0) {
101
150
  return { markedMarkdown: markdown, comments: [] };
102
151
  }
103
152
 
104
- // Detect reply relationships based on adjacency
105
- // First comment in a cluster = parent, all subsequent = replies to that parent
106
- // Comments are "adjacent" if there's minimal text between them (< 10 chars)
153
+ // Two-mode reply detection driven by the markdown itself:
154
+ // - If any comment carries the `↪ ` author prefix, the markdown came
155
+ // through `insertCommentsIntoMarkdown` and we use prefix-only mode.
156
+ // Distinct clusters that happen to land at gap=0 (a real failure
157
+ // mode on dense reviewer docs — 298-comment paper produced 9 such
158
+ // collisions) are not misthreaded.
159
+ // - If no comment carries the prefix, the markdown was hand-typed.
160
+ // Fall back to gap < 10 adjacency for backward compat with users
161
+ // who write CriticMarkup directly.
107
162
  const ADJACENT_THRESHOLD = 10;
163
+ const useExplicitMode = rawMatches.some(m => m.explicitReply);
108
164
  const comments: PreparedComment[] = [];
109
165
  let clusterParentIdx = -1; // Index of first comment in current cluster
110
166
  let lastCommentEnd = -1;
@@ -113,9 +169,10 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
113
169
  const m = rawMatches[i];
114
170
  if (!m) continue;
115
171
 
116
- // Check if this comment is adjacent to the previous one
117
172
  const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
118
- const isAdjacent = gap < ADJACENT_THRESHOLD;
173
+ const isAdjacent = useExplicitMode
174
+ ? m.explicitReply
175
+ : gap < ADJACENT_THRESHOLD;
119
176
 
120
177
  // Reset cluster if there's a gap (comments not in same cluster)
121
178
  if (!isAdjacent) {
@@ -179,10 +236,11 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
179
236
 
180
237
  if (c.isReply) {
181
238
  // Reply: remove from document entirely (will be in comments.xml only)
182
- // Also consume leading whitespace to avoid double spaces
239
+ // Also consume one preceding whitespace char to avoid double spaces.
240
+ // We deliberately consume at most one — walking arbitrarily backwards
241
+ // would shift positions that lower-index comments still depend on.
183
242
  let removeStart = c.start;
184
- const charBefore = markedMarkdown[removeStart - 1];
185
- while (removeStart > 0 && charBefore && /\s/.test(charBefore)) {
243
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
186
244
  removeStart--;
187
245
  }
188
246
 
@@ -205,10 +263,10 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
205
263
  } else {
206
264
  // Parent comment
207
265
  if (c.anchorFromReply) {
208
- // Anchor markers are placed by the reply, just remove this comment
266
+ // Anchor markers are placed by the reply, just remove this comment.
267
+ // Consume one preceding whitespace char only (see reply branch above).
209
268
  let removeStart = c.start;
210
- const charBefore = markedMarkdown[removeStart - 1];
211
- while (removeStart > 0 && charBefore && /\s/.test(charBefore)) {
269
+ if (removeStart > 0 && /\s/.test(markedMarkdown[removeStart - 1] ?? '')) {
212
270
  removeStart--;
213
271
  }
214
272
  markedMarkdown = markedMarkdown.slice(0, removeStart) + markedMarkdown.slice(c.end);
@@ -421,93 +479,152 @@ export async function injectCommentsAtMarkers(
421
479
  const endMarker = `${MARKER_END_PREFIX}${idx}${MARKER_SUFFIX}`;
422
480
 
423
481
  const startPos = documentXml.indexOf(startMarker);
424
- const endPos = documentXml.indexOf(endMarker);
482
+ const endPos = documentXml.indexOf(endMarker, startPos + startMarker.length);
425
483
 
426
484
  if (startPos === -1 || endPos === -1) continue;
427
485
 
428
- // Find the <w:r> containing the markers
429
- const rStartBefore = documentXml.lastIndexOf('<w:r>', startPos);
430
- const rStartOpen = documentXml.lastIndexOf('<w:r ', startPos);
431
- const rStart = Math.max(rStartBefore, rStartOpen);
432
- const rEndPos = documentXml.indexOf('</w:r>', endPos);
433
-
434
- if (rStart === -1 || rEndPos === -1) continue;
435
-
436
- const rEnd = rEndPos + '</w:r>'.length;
437
- const runContent = documentXml.slice(rStart, rEnd);
438
-
439
- // Extract styling
440
- const rPrMatch = runContent.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
441
- const rPr = rPrMatch ? rPrMatch[0] : '';
442
-
443
- // Extract text
444
- const textMatch = runContent.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
445
- if (!textMatch) continue;
446
-
447
- const fullText = textMatch[1] ?? '';
448
- const tElementMatch = textMatch[0].match(/<w:t[^>]*>/);
449
- if (!tElementMatch) continue;
450
- const tElement = tElementMatch[0];
451
-
452
- const startInText = fullText.indexOf(startMarker);
453
- const endInText = fullText.indexOf(endMarker);
454
- if (startInText === -1 || endInText === -1) continue;
486
+ // Find the runs containing each marker. Pandoc may split a single
487
+ // markdown anchor across multiple <w:r> blocks when it applies styling
488
+ // mid-anchor (smart-quote substitution, *italic*, `code`, **bold**).
489
+ // The same-run path (current happy path) collapses into the multi-run
490
+ // path when start and end runs coincide.
491
+ const startRunOpen = Math.max(
492
+ documentXml.lastIndexOf('<w:r>', startPos),
493
+ documentXml.lastIndexOf('<w:r ', startPos),
494
+ );
495
+ const startRunCloseIdx = documentXml.indexOf('</w:r>', startPos);
496
+ const endRunOpen = Math.max(
497
+ documentXml.lastIndexOf('<w:r>', endPos),
498
+ documentXml.lastIndexOf('<w:r ', endPos),
499
+ );
500
+ const endRunCloseIdx = documentXml.indexOf('</w:r>', endPos);
501
+
502
+ if (
503
+ startRunOpen === -1 || startRunCloseIdx === -1 ||
504
+ endRunOpen === -1 || endRunCloseIdx === -1
505
+ ) continue;
506
+
507
+ const startRunClose = startRunCloseIdx + '</w:r>'.length;
508
+ const endRunClose = endRunCloseIdx + '</w:r>'.length;
509
+
510
+ const startRunFull = documentXml.slice(startRunOpen, startRunClose);
511
+ const endRunFull = documentXml.slice(endRunOpen, endRunClose);
512
+
513
+ // Extract <w:rPr> and <w:t> element shape from each run. Both pieces
514
+ // are needed verbatim so a textBefore split keeps its original styling
515
+ // and so the post-anchor textAfter render keeps the end run's styling.
516
+ function dissectRun(runXml: string, marker: string): {
517
+ rPr: string;
518
+ tElement: string;
519
+ textBefore: string;
520
+ textAfter: string;
521
+ } | null {
522
+ const rPrMatch = runXml.match(/<w:rPr>[\s\S]*?<\/w:rPr>/);
523
+ const tMatch = runXml.match(/<w:t[^>]*>([\s\S]*?)<\/w:t>/);
524
+ if (!tMatch) return null;
525
+ const tOpenMatch = tMatch[0].match(/<w:t[^>]*>/);
526
+ if (!tOpenMatch) return null;
527
+ const tContent = tMatch[1] ?? '';
528
+ const markerInT = tContent.indexOf(marker);
529
+ if (markerInT === -1) return null;
530
+ return {
531
+ rPr: rPrMatch ? rPrMatch[0] : '',
532
+ tElement: tOpenMatch[0],
533
+ textBefore: tContent.slice(0, markerInT),
534
+ textAfter: tContent.slice(markerInT + marker.length),
535
+ };
536
+ }
455
537
 
456
- let textBefore = fullText.slice(0, startInText);
457
- let anchorText = fullText.slice(startInText + startMarker.length, endInText);
458
- let textAfter = fullText.slice(endInText + endMarker.length);
538
+ let replacement = '';
539
+ const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
459
540
 
460
- // When anchor is empty, use the first word from textAfter as fallback
461
- if (!anchorText && textAfter) {
462
- const wordMatch = textAfter.match(/^\s*(\S+)/);
463
- if (wordMatch) {
464
- anchorText = wordMatch[1] ?? '';
465
- textAfter = textAfter.slice(wordMatch[0].length);
541
+ const emitRangeStarts = () => {
542
+ replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
543
+ for (const reply of replies) {
544
+ replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
466
545
  }
467
- }
546
+ };
547
+
548
+ const emitRangeEnds = () => {
549
+ replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
550
+ replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
551
+ for (const reply of replies) {
552
+ replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
553
+ replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
554
+ injectedIds.add(reply.id);
555
+ }
556
+ };
557
+
558
+ if (startRunOpen === endRunOpen) {
559
+ // Same-run path: both markers live inside one <w:t>. Original logic.
560
+ const startInfo = dissectRun(startRunFull, startMarker);
561
+ if (!startInfo) continue;
562
+ const fullText = startInfo.textBefore + startMarker + startInfo.textAfter;
563
+ const endInTextRel = startInfo.textAfter.indexOf(endMarker);
564
+ if (endInTextRel === -1) continue;
565
+ const anchorTextSame = startInfo.textAfter.slice(0, endInTextRel);
566
+ let textAfter = startInfo.textAfter.slice(endInTextRel + endMarker.length);
567
+ let anchorText = anchorTextSame;
568
+ let textBefore = startInfo.textBefore;
569
+
570
+ // Empty anchor: borrow the next word so the comment has something
571
+ // to anchor on. Then normalize the trailing double space.
572
+ if (!anchorText && textAfter) {
573
+ const wordMatch = textAfter.match(/^\s*(\S+)/);
574
+ if (wordMatch) {
575
+ anchorText = wordMatch[1] ?? '';
576
+ textAfter = textAfter.slice(wordMatch[0].length);
577
+ }
578
+ }
579
+ if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
580
+ textAfter = textAfter.slice(1);
581
+ }
582
+ // Suppress unused warning for pre-empty-anchor fullText var
583
+ void fullText;
468
584
 
469
- // When anchor is still empty, normalize double spaces to single space
470
- if (!anchorText && textBefore.endsWith(' ') && textAfter.startsWith(' ')) {
471
- textAfter = textAfter.slice(1); // Remove leading space from textAfter
585
+ if (textBefore) {
586
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textBefore}</w:t></w:r>`;
587
+ }
588
+ emitRangeStarts();
589
+ if (anchorText) {
590
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${anchorText}</w:t></w:r>`;
591
+ }
592
+ emitRangeEnds();
593
+ if (textAfter) {
594
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${textAfter}</w:t></w:r>`;
595
+ }
596
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(startRunClose);
597
+ injectedIds.add(comment.id);
598
+ continue;
472
599
  }
473
600
 
474
- // Build replacement
475
- let replacement = '';
476
-
477
- if (textBefore) {
478
- replacement += `<w:r>${rPr}${tElement}${textBefore}</w:t></w:r>`;
479
- }
601
+ // Multi-run path: markers sit in different <w:r> blocks because pandoc
602
+ // applied mid-anchor styling. Split the start run at the start marker,
603
+ // keep all middle runs verbatim (they carry the styled anchor portions),
604
+ // split the end run at the end marker.
605
+ const startInfo = dissectRun(startRunFull, startMarker);
606
+ const endInfo = dissectRun(endRunFull, endMarker);
607
+ if (!startInfo || !endInfo) continue;
480
608
 
481
- // Find replies to this comment
482
- const replies = commentsWithIds.filter(c => c.isReply && c.parentIdx === comment?.commentIdx);
609
+ const middle = documentXml.slice(startRunClose, endRunOpen);
483
610
 
484
- // Start ranges for parent AND all replies (nested)
485
- replacement += `<w:commentRangeStart w:id="${comment.id}"/>`;
486
- for (const reply of replies) {
487
- replacement += `<w:commentRangeStart w:id="${reply.id}"/>`;
611
+ if (startInfo.textBefore) {
612
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textBefore}</w:t></w:r>`;
488
613
  }
489
-
490
- // Anchor text
491
- if (anchorText) {
492
- replacement += `<w:r>${rPr}${tElement}${anchorText}</w:t></w:r>`;
614
+ emitRangeStarts();
615
+ if (startInfo.textAfter) {
616
+ replacement += `<w:r>${startInfo.rPr}${startInfo.tElement}${startInfo.textAfter}</w:t></w:r>`;
493
617
  }
494
-
495
- // End parent range and reference (NO rStyle wrapper - required for threading)
496
- replacement += `<w:commentRangeEnd w:id="${comment.id}"/>`;
497
- replacement += `<w:r><w:commentReference w:id="${comment.id}"/></w:r>`;
498
-
499
- // End reply ranges and references (same position as parent, NO rStyle wrapper)
500
- for (const reply of replies) {
501
- replacement += `<w:commentRangeEnd w:id="${reply.id}"/>`;
502
- replacement += `<w:r><w:commentReference w:id="${reply.id}"/></w:r>`;
503
- injectedIds.add(reply.id);
618
+ replacement += middle;
619
+ if (endInfo.textBefore) {
620
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textBefore}</w:t></w:r>`;
504
621
  }
505
-
506
- if (textAfter) {
507
- replacement += `<w:r>${rPr}${tElement}${textAfter}</w:t></w:r>`;
622
+ emitRangeEnds();
623
+ if (endInfo.textAfter) {
624
+ replacement += `<w:r>${endInfo.rPr}${endInfo.tElement}${endInfo.textAfter}</w:t></w:r>`;
508
625
  }
509
626
 
510
- documentXml = documentXml.slice(0, rStart) + replacement + documentXml.slice(rEnd);
627
+ documentXml = documentXml.slice(0, startRunOpen) + replacement + documentXml.slice(endRunClose);
511
628
  injectedIds.add(comment.id);
512
629
  }
513
630
 
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "docrev",
3
- "version": "0.9.7",
3
+ "version": "0.9.13",
4
4
  "description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
5
5
  "type": "module",
6
6
  "types": "dist/lib/types.d.ts",
package/dist/package.json DELETED
@@ -1,137 +0,0 @@
1
- {
2
- "name": "docrev",
3
- "version": "0.9.4",
4
- "description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
5
- "type": "module",
6
- "types": "dist/lib/types.d.ts",
7
- "exports": {
8
- ".": {
9
- "types": "./dist/lib/annotations.d.ts",
10
- "import": "./dist/lib/annotations.js"
11
- },
12
- "./annotations": {
13
- "types": "./dist/lib/annotations.d.ts",
14
- "import": "./dist/lib/annotations.js"
15
- },
16
- "./build": {
17
- "types": "./dist/lib/build.d.ts",
18
- "import": "./dist/lib/build.js"
19
- },
20
- "./citations": {
21
- "types": "./dist/lib/citations.d.ts",
22
- "import": "./dist/lib/citations.js"
23
- },
24
- "./crossref": {
25
- "types": "./dist/lib/crossref.d.ts",
26
- "import": "./dist/lib/crossref.js"
27
- },
28
- "./doi": {
29
- "types": "./dist/lib/doi.d.ts",
30
- "import": "./dist/lib/doi.js"
31
- },
32
- "./equations": {
33
- "types": "./dist/lib/equations.d.ts",
34
- "import": "./dist/lib/equations.js"
35
- },
36
- "./git": {
37
- "types": "./dist/lib/git.d.ts",
38
- "import": "./dist/lib/git.js"
39
- },
40
- "./journals": {
41
- "types": "./dist/lib/journals.d.ts",
42
- "import": "./dist/lib/journals.js"
43
- },
44
- "./merge": {
45
- "types": "./dist/lib/merge.d.ts",
46
- "import": "./dist/lib/merge.js"
47
- },
48
- "./sections": {
49
- "types": "./dist/lib/sections.d.ts",
50
- "import": "./dist/lib/sections.js"
51
- },
52
- "./word": {
53
- "types": "./dist/lib/word.d.ts",
54
- "import": "./dist/lib/word.js"
55
- },
56
- "./variables": {
57
- "types": "./dist/lib/variables.d.ts",
58
- "import": "./dist/lib/variables.js"
59
- },
60
- "./grammar": {
61
- "types": "./dist/lib/grammar.d.ts",
62
- "import": "./dist/lib/grammar.js"
63
- },
64
- "./trackchanges": {
65
- "types": "./dist/lib/trackchanges.d.ts",
66
- "import": "./dist/lib/trackchanges.js"
67
- },
68
- "./spelling": {
69
- "types": "./dist/lib/spelling.d.ts",
70
- "import": "./dist/lib/spelling.js"
71
- },
72
- "./wordcomments": {
73
- "types": "./dist/lib/wordcomments.d.ts",
74
- "import": "./dist/lib/wordcomments.js"
75
- }
76
- },
77
- "engines": {
78
- "node": ">=18.0.0"
79
- },
80
- "bin": {
81
- "rev": "bin/rev.js"
82
- },
83
- "scripts": {
84
- "build": "tsc && node scripts/postbuild.js",
85
- "build:watch": "tsc --watch",
86
- "dev": "tsx bin/rev.ts",
87
- "test": "tsx --test test/*.test.js",
88
- "test:ts": "tsx --test test/*.test.ts",
89
- "test:watch": "node --test --watch test/*.test.js",
90
- "test:coverage": "c8 --reporter=text --reporter=lcov node --test test/*.test.js",
91
- "typecheck": "tsc --noEmit",
92
- "prepublishOnly": "npm run build"
93
- },
94
- "repository": {
95
- "type": "git",
96
- "url": "git+https://github.com/gcol33/docrev.git"
97
- },
98
- "bugs": {
99
- "url": "https://github.com/gcol33/docrev/issues"
100
- },
101
- "homepage": "https://github.com/gcol33/docrev#readme",
102
- "keywords": [
103
- "markdown",
104
- "word",
105
- "docx",
106
- "track-changes",
107
- "comments",
108
- "academic",
109
- "writing",
110
- "pandoc",
111
- "criticmarkup"
112
- ],
113
- "author": "Gilles Colling",
114
- "license": "MIT",
115
- "dependencies": {
116
- "adm-zip": "^0.5.16",
117
- "chalk": "^5.3.0",
118
- "commander": "^12.0.0",
119
- "dictionary-en": "^4.0.0",
120
- "dictionary-en-gb": "^3.0.0",
121
- "diff": "^8.0.2",
122
- "mathml-to-latex": "^1.5.0",
123
- "nspell": "^2.1.5",
124
- "pdf-lib": "^1.17.1",
125
- "pdfjs-dist": "^5.4.530",
126
- "tsx": "^4.21.0",
127
- "xml2js": "^0.6.2",
128
- "yaml": "^2.8.2"
129
- },
130
- "devDependencies": {
131
- "@types/adm-zip": "^0.5.7",
132
- "@types/node": "^25.2.0",
133
- "@types/xml2js": "^0.4.14",
134
- "c8": "^10.1.2",
135
- "typescript": "^5.9.3"
136
- }
137
- }