docrev 0.9.11 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/lib/build.d.ts +12 -0
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +12 -0
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +146 -24
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/types.d.ts +20 -0
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/word-extraction.d.ts +6 -0
- package/dist/lib/word-extraction.d.ts.map +1 -1
- package/dist/lib/word-extraction.js +46 -3
- package/dist/lib/word-extraction.js.map +1 -1
- package/dist/lib/wordcomments.d.ts.map +1 -1
- package/dist/lib/wordcomments.js +23 -5
- package/dist/lib/wordcomments.js.map +1 -1
- package/lib/build.ts +24 -0
- package/lib/import.ts +143 -24
- package/lib/types.ts +20 -0
- package/lib/word-extraction.ts +50 -3
- package/lib/wordcomments.ts +25 -6
- package/package.json +1 -1
package/lib/import.ts
CHANGED
|
@@ -269,6 +269,43 @@ function pushPastSectionHeading(text: string, pos: number): number {
|
|
|
269
269
|
return after;
|
|
270
270
|
}
|
|
271
271
|
|
|
272
|
+
/**
|
|
273
|
+
* Snap a position to the nearest whitespace boundary within ±50 chars so a
|
|
274
|
+
* proportional fallback insertion never lands mid-word.
|
|
275
|
+
*/
|
|
276
|
+
function snapToWordBoundary(text: string, pos: number): number {
|
|
277
|
+
if (pos <= 0) return 0;
|
|
278
|
+
if (pos >= text.length) return text.length;
|
|
279
|
+
if (/\s/.test(text[pos] ?? '')) return pos;
|
|
280
|
+
for (let d = 1; d <= 50; d++) {
|
|
281
|
+
if (pos + d < text.length && /\s/.test(text[pos + d] ?? '')) return pos + d;
|
|
282
|
+
if (pos - d >= 0 && /\s/.test(text[pos - d] ?? '')) return pos - d;
|
|
283
|
+
}
|
|
284
|
+
return pos;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Final-resort placement when every text-matching strategy failed. The docx
|
|
289
|
+
* carries a real `<w:commentRangeStart w:id="N">` marker at a known offset
|
|
290
|
+
* inside its body text — that's a structural anchor, even if the anchored
|
|
291
|
+
* span itself is empty and the surrounding context drifted in the target.
|
|
292
|
+
*
|
|
293
|
+
* Map docPosition into the target markdown proportionally and snap to a word
|
|
294
|
+
* boundary. This is approximate when the document was heavily restructured,
|
|
295
|
+
* but it's strictly better than silently dropping a reviewer's comment: the
|
|
296
|
+
* comment lands in roughly the right neighborhood and the reviewer can
|
|
297
|
+
* relocate it during their next pass.
|
|
298
|
+
*/
|
|
299
|
+
function proportionalFallback(
|
|
300
|
+
anchorData: CommentAnchorData,
|
|
301
|
+
target: string,
|
|
302
|
+
): number | null {
|
|
303
|
+
if (anchorData.docLength <= 0) return null;
|
|
304
|
+
const proportion = Math.min(anchorData.docPosition / anchorData.docLength, 1.0);
|
|
305
|
+
const rawPos = Math.floor(proportion * target.length);
|
|
306
|
+
return pushPastSectionHeading(target, snapToWordBoundary(target, rawPos));
|
|
307
|
+
}
|
|
308
|
+
|
|
272
309
|
/**
|
|
273
310
|
* Insert comments into markdown text based on anchor texts with context
|
|
274
311
|
*/
|
|
@@ -285,11 +322,41 @@ export function insertCommentsIntoMarkdown(
|
|
|
285
322
|
const duplicateWarnings: string[] = [];
|
|
286
323
|
const usedPositions = new Set<number>(); // For tie-breaking: track used positions
|
|
287
324
|
|
|
325
|
+
// Resolve threading: replies share their parent's anchor in Word, so they
|
|
326
|
+
// must inherit the parent's position and ride alongside it as one cluster.
|
|
327
|
+
// Letting each reply run through anchor scoring scatters the cluster (the
|
|
328
|
+
// same docPosition forces `usedPositions` to push later replies onto a
|
|
329
|
+
// different occurrence), which on re-build looks like independent comments
|
|
330
|
+
// and loses the paraIdParent threading. See gcol33/docrev issue #2.
|
|
331
|
+
const inputById = new Map<string, WordComment>();
|
|
332
|
+
for (const c of comments) inputById.set(c.id, c);
|
|
333
|
+
function rootIdOf(c: WordComment): string {
|
|
334
|
+
let cur: WordComment = c;
|
|
335
|
+
const seen = new Set<string>();
|
|
336
|
+
while (cur.parentId && !seen.has(cur.id)) {
|
|
337
|
+
seen.add(cur.id);
|
|
338
|
+
const parent = inputById.get(cur.parentId);
|
|
339
|
+
if (!parent || parent === cur) break;
|
|
340
|
+
cur = parent;
|
|
341
|
+
}
|
|
342
|
+
return cur.id;
|
|
343
|
+
}
|
|
344
|
+
const replyRootId = new Map<string, string>();
|
|
345
|
+
for (const c of comments) {
|
|
346
|
+
const root = rootIdOf(c);
|
|
347
|
+
if (root !== c.id) replyRootId.set(c.id, root);
|
|
348
|
+
}
|
|
349
|
+
|
|
288
350
|
// Anchor matching primitives live in lib/anchor-match.ts so that
|
|
289
351
|
// `rev verify-anchors` can use the same strategies for drift reporting.
|
|
290
352
|
|
|
291
|
-
// Get all positions in order (for sequential tie-breaking)
|
|
353
|
+
// Get all positions in order (for sequential tie-breaking).
|
|
354
|
+
// Replies skip scoring entirely — they piggyback on their root's position
|
|
355
|
+
// in the emit pass below.
|
|
292
356
|
const commentsWithPositions = comments.map((c): CommentWithPos => {
|
|
357
|
+
if (replyRootId.has(c.id)) {
|
|
358
|
+
return { ...c, pos: -1, anchorText: null, strategy: 'reply' };
|
|
359
|
+
}
|
|
293
360
|
const anchorData = anchors.get(c.id);
|
|
294
361
|
if (!anchorData) {
|
|
295
362
|
unmatchedCount++;
|
|
@@ -396,6 +463,14 @@ export function insertCommentsIntoMarkdown(
|
|
|
396
463
|
return { ...c, pos: occurrences[0], anchorText: null, isEmpty: true };
|
|
397
464
|
}
|
|
398
465
|
}
|
|
466
|
+
// Last resort: docx carried a structural marker at docPosition; map
|
|
467
|
+
// it proportionally into the target so the comment isn't dropped.
|
|
468
|
+
if (typeof anchorData === 'object') {
|
|
469
|
+
const fallback = proportionalFallback(anchorData, result);
|
|
470
|
+
if (fallback !== null) {
|
|
471
|
+
return { ...c, pos: fallback, anchorText: null, isEmpty: true, strategy: 'proportional-fallback' };
|
|
472
|
+
}
|
|
473
|
+
}
|
|
399
474
|
unmatchedCount++;
|
|
400
475
|
return { ...c, pos: -1, anchorText: null, isEmpty: true };
|
|
401
476
|
}
|
|
@@ -404,6 +479,14 @@ export function insertCommentsIntoMarkdown(
|
|
|
404
479
|
const { occurrences, matchedAnchor, strategy, stripped } = findAnchorInText(anchor, result, before, after);
|
|
405
480
|
|
|
406
481
|
if (occurrences.length === 0) {
|
|
482
|
+
// Same last-resort as the empty-anchor path: anchor text is gone from
|
|
483
|
+
// the target, but the marker's text-offset survived extraction.
|
|
484
|
+
if (typeof anchorData === 'object') {
|
|
485
|
+
const fallback = proportionalFallback(anchorData, result);
|
|
486
|
+
if (fallback !== null) {
|
|
487
|
+
return { ...c, pos: fallback, anchorText: null, strategy: 'proportional-fallback' };
|
|
488
|
+
}
|
|
489
|
+
}
|
|
407
490
|
unmatchedCount++;
|
|
408
491
|
return { ...c, pos: -1, anchorText: null };
|
|
409
492
|
}
|
|
@@ -434,53 +517,89 @@ export function insertCommentsIntoMarkdown(
|
|
|
434
517
|
}
|
|
435
518
|
});
|
|
436
519
|
|
|
437
|
-
//
|
|
438
|
-
|
|
520
|
+
// Group comments into clusters (root + ordered replies). The root carries
|
|
521
|
+
// the resolved position; replies inherit it and ride along in input order
|
|
522
|
+
// so the rebuilt CriticMarkup looks like `{>>p<<}{>>r1<<}{>>r2<<}[anchor]`
|
|
523
|
+
// and adjacency-based reply detection picks the cluster up again.
|
|
524
|
+
const byId = new Map<string, CommentWithPos>();
|
|
525
|
+
for (const cwp of commentsWithPositions) byId.set(cwp.id, cwp);
|
|
526
|
+
const repliesByRoot = new Map<string, CommentWithPos[]>();
|
|
527
|
+
for (const c of comments) {
|
|
528
|
+
const rootId = replyRootId.get(c.id);
|
|
529
|
+
if (!rootId) continue;
|
|
530
|
+
const cwp = byId.get(c.id);
|
|
531
|
+
if (!cwp) continue;
|
|
532
|
+
const list = repliesByRoot.get(rootId);
|
|
533
|
+
if (list) list.push(cwp);
|
|
534
|
+
else repliesByRoot.set(rootId, [cwp]);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Replies whose root never resolved (parent missing from the input slice or
|
|
538
|
+
// parent unmatched) count as unmatched too — there's no position to attach
|
|
539
|
+
// them to.
|
|
540
|
+
for (const [rootId, replies] of repliesByRoot) {
|
|
541
|
+
const root = byId.get(rootId);
|
|
542
|
+
if (!root || root.pos < 0) {
|
|
543
|
+
unmatchedCount += replies.length;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Roots only — replies attach during emission.
|
|
548
|
+
const rootsWithPos = commentsWithPositions.filter(
|
|
549
|
+
c => !replyRootId.has(c.id)
|
|
550
|
+
);
|
|
551
|
+
|
|
552
|
+
// Log any unmatched roots for debugging
|
|
553
|
+
const unmatched = rootsWithPos.filter((c) => c.pos < 0);
|
|
439
554
|
if (process.env.DEBUG) {
|
|
440
|
-
console.log(`[DEBUG] insertComments: ${comments.length} input, ${
|
|
555
|
+
console.log(`[DEBUG] insertComments: ${comments.length} input, ${rootsWithPos.length} roots, ${unmatched.length} unmatched roots, ${replyRootId.size} replies`);
|
|
441
556
|
if (unmatched.length > 0) {
|
|
442
557
|
unmatched.forEach(c => console.log(`[DEBUG] Unmatched ID=${c.id}: anchor="${(c.anchorText || 'none').slice(0,30)}"`));
|
|
443
558
|
}
|
|
444
559
|
}
|
|
445
560
|
|
|
446
|
-
const
|
|
561
|
+
const matchedRoots = rootsWithPos.filter((c) => c.pos >= 0);
|
|
447
562
|
|
|
448
563
|
// Sort by position descending (insert from end to avoid offset issues)
|
|
449
|
-
|
|
564
|
+
matchedRoots.sort((a, b) => b.pos - a.pos);
|
|
450
565
|
|
|
451
|
-
// Insert each
|
|
566
|
+
// Insert each cluster. With `wrapAnchor` (the default), the anchor text
|
|
452
567
|
// gets wrapped in `[anchor]{.mark}` so the rebuilt docx restores the
|
|
453
568
|
// original Word comment range. Without it, the comment block is inserted
|
|
454
569
|
// adjacent to the anchor and prose stays untouched — required for
|
|
455
570
|
// comments-only sync where multiple comments may share one anchor.
|
|
456
|
-
// Skip insertion when
|
|
457
|
-
//
|
|
458
|
-
//
|
|
459
|
-
//
|
|
460
|
-
//
|
|
461
|
-
// away.
|
|
571
|
+
// Skip insertion when the parent's CriticMarkup already lives near the
|
|
572
|
+
// target — re-running sync against the same docx would otherwise stack
|
|
573
|
+
// duplicates. A 200-char window catches both wrapped
|
|
574
|
+
// (`{>>...<<}[anchor]{.mark}`) and bare (`{>>...<<}anchor`) forms while
|
|
575
|
+
// ignoring incidental matches farther away.
|
|
462
576
|
let dedupedCount = 0;
|
|
463
|
-
for (const c of
|
|
464
|
-
const
|
|
577
|
+
for (const c of matchedRoots) {
|
|
578
|
+
const parentBlock = `{>>${c.author}: ${c.text}<<}`;
|
|
579
|
+
const replies = repliesByRoot.get(c.id) ?? [];
|
|
465
580
|
const windowStart = Math.max(0, c.pos - 200);
|
|
466
581
|
const windowEnd = Math.min(result.length, c.pos + 200);
|
|
467
|
-
if (result.slice(windowStart, windowEnd).includes(
|
|
468
|
-
|
|
582
|
+
if (result.slice(windowStart, windowEnd).includes(parentBlock)) {
|
|
583
|
+
// Cluster already synced; treat all members as deduped.
|
|
584
|
+
dedupedCount += 1 + replies.length;
|
|
469
585
|
continue;
|
|
470
586
|
}
|
|
587
|
+
// Replies carry an explicit `↪ ` author prefix so the round-trip does not
|
|
588
|
+
// depend on positional adjacency in the markdown. On dense reviewer docs
|
|
589
|
+
// distinct clusters frequently land at the same anchor position; without
|
|
590
|
+
// the prefix the re-parse would misthread them. The injection side strips
|
|
591
|
+
// `↪ ` back off the author so Word renders the original name.
|
|
592
|
+
const replyBlocks = replies.map(r => `{>>↪ ${r.author}: ${r.text}<<}`);
|
|
593
|
+
const combined = parentBlock + replyBlocks.join('');
|
|
471
594
|
if (wrapAnchor && c.anchorText && c.anchorEnd) {
|
|
472
595
|
const before = result.slice(0, c.pos);
|
|
473
596
|
const anchor = result.slice(c.pos, c.anchorEnd);
|
|
474
597
|
const after = result.slice(c.anchorEnd);
|
|
475
|
-
result = before +
|
|
598
|
+
result = before + combined + `[${anchor}]{.mark}` + after;
|
|
476
599
|
} else {
|
|
477
|
-
|
|
478
|
-
// tweaks; CriticMarkup blocks are invisible to readers, and adding a
|
|
479
|
-
// leading space would shift prose byte-for-byte (relevant when callers
|
|
480
|
-
// verify that --comments-only didn't touch the original).
|
|
481
|
-
result = result.slice(0, c.pos) + comment + result.slice(c.pos);
|
|
600
|
+
result = result.slice(0, c.pos) + combined + result.slice(c.pos);
|
|
482
601
|
}
|
|
483
|
-
placedCount
|
|
602
|
+
placedCount += 1 + replies.length;
|
|
484
603
|
}
|
|
485
604
|
|
|
486
605
|
if (outStats) {
|
package/lib/types.ts
CHANGED
|
@@ -69,6 +69,22 @@ export interface PdfConfig {
|
|
|
69
69
|
geometry?: string;
|
|
70
70
|
linestretch?: number;
|
|
71
71
|
toc?: boolean;
|
|
72
|
+
/**
|
|
73
|
+
* LaTeX engine to use for PDF output. One of `pdflatex` (default),
|
|
74
|
+
* `xelatex`, `lualatex`, `tectonic`, etc. xelatex/lualatex are required
|
|
75
|
+
* for native UTF-8 rendering of diacritics in author names, place
|
|
76
|
+
* names, and species epithets.
|
|
77
|
+
*/
|
|
78
|
+
engine?: string;
|
|
79
|
+
/** Roman/serif main font (xelatex/lualatex only — uses fontspec). */
|
|
80
|
+
mainfont?: string;
|
|
81
|
+
/** Sans-serif font (xelatex/lualatex only). */
|
|
82
|
+
sansfont?: string;
|
|
83
|
+
/** Monospace font (xelatex/lualatex only). */
|
|
84
|
+
monofont?: string;
|
|
85
|
+
numbersections?: boolean;
|
|
86
|
+
template?: string;
|
|
87
|
+
headerIncludes?: string;
|
|
72
88
|
}
|
|
73
89
|
|
|
74
90
|
export interface DocxConfig {
|
|
@@ -338,6 +354,10 @@ export interface JournalFormatting {
|
|
|
338
354
|
linestretch?: number;
|
|
339
355
|
template?: string;
|
|
340
356
|
numbersections?: boolean;
|
|
357
|
+
engine?: string;
|
|
358
|
+
mainfont?: string;
|
|
359
|
+
sansfont?: string;
|
|
360
|
+
monofont?: string;
|
|
341
361
|
};
|
|
342
362
|
docx?: {
|
|
343
363
|
reference?: string;
|
package/lib/word-extraction.ts
CHANGED
|
@@ -18,6 +18,12 @@ export interface WordComment {
|
|
|
18
18
|
author: string;
|
|
19
19
|
date: string;
|
|
20
20
|
text: string;
|
|
21
|
+
/**
|
|
22
|
+
* Parent comment id when this is a reply in a Word comment thread.
|
|
23
|
+
* Resolved from `commentsExtended.xml`'s `w15:paraIdParent` field.
|
|
24
|
+
* `undefined` for top-level comments.
|
|
25
|
+
*/
|
|
26
|
+
parentId?: string;
|
|
21
27
|
}
|
|
22
28
|
|
|
23
29
|
export interface TextNode {
|
|
@@ -126,7 +132,6 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
|
|
|
126
132
|
|
|
127
133
|
const parsed = await parseStringPromise(commentsXml, { explicitArray: false });
|
|
128
134
|
|
|
129
|
-
const ns = 'w:';
|
|
130
135
|
const commentsRoot = parsed['w:comments'];
|
|
131
136
|
if (!commentsRoot || !commentsRoot['w:comment']) {
|
|
132
137
|
return comments;
|
|
@@ -137,12 +142,18 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
|
|
|
137
142
|
? commentsRoot['w:comment']
|
|
138
143
|
: [commentsRoot['w:comment']];
|
|
139
144
|
|
|
145
|
+
// Map every paraId that lives inside a comment back to that comment's id.
|
|
146
|
+
// Word's commentsExtended.xml expresses threading via w15:paraIdParent,
|
|
147
|
+
// which references the parent's first <w:p>. Replies use a secondary
|
|
148
|
+
// (often-empty) <w:p>, so each comment may contribute multiple paraIds.
|
|
149
|
+
const paraIdToCommentId = new Map<string, string>();
|
|
150
|
+
|
|
140
151
|
for (const comment of commentNodes) {
|
|
141
152
|
const id = comment.$?.['w:id'] || '';
|
|
142
153
|
const author = comment.$?.['w:author'] || 'Unknown';
|
|
143
154
|
const date = comment.$?.['w:date'] || '';
|
|
144
155
|
|
|
145
|
-
// Extract text from nested w:p/w:r/w:t elements
|
|
156
|
+
// Extract text from nested w:p/w:r/w:t elements and record paraIds.
|
|
146
157
|
let text = '';
|
|
147
158
|
const extractText = (node: any): void => {
|
|
148
159
|
if (!node) return;
|
|
@@ -160,13 +171,49 @@ export async function extractWordComments(docxPath: string): Promise<WordComment
|
|
|
160
171
|
}
|
|
161
172
|
if (node['w:p']) {
|
|
162
173
|
const paras = Array.isArray(node['w:p']) ? node['w:p'] : [node['w:p']];
|
|
163
|
-
paras
|
|
174
|
+
for (const para of paras) {
|
|
175
|
+
const paraId = para?.$?.['w14:paraId'];
|
|
176
|
+
if (paraId && id) paraIdToCommentId.set(paraId, id);
|
|
177
|
+
extractText(para);
|
|
178
|
+
}
|
|
164
179
|
}
|
|
165
180
|
};
|
|
166
181
|
extractText(comment);
|
|
167
182
|
|
|
168
183
|
comments.push({ id, author, date: date.slice(0, 10), text: text.trim() });
|
|
169
184
|
}
|
|
185
|
+
|
|
186
|
+
// Resolve parent links from commentsExtended.xml. Missing entry just
|
|
187
|
+
// means the docx has no threading metadata (e.g. legacy/non-Word source).
|
|
188
|
+
const extendedEntry = zip.getEntry('word/commentsExtended.xml');
|
|
189
|
+
if (extendedEntry && paraIdToCommentId.size > 0) {
|
|
190
|
+
let extendedXml = '';
|
|
191
|
+
try {
|
|
192
|
+
extendedXml = extendedEntry.getData().toString('utf8');
|
|
193
|
+
} catch {
|
|
194
|
+
// Unreadable threading metadata is non-fatal; skip parent linking.
|
|
195
|
+
}
|
|
196
|
+
if (extendedXml) {
|
|
197
|
+
const parentByCommentId = new Map<string, string>();
|
|
198
|
+
const exPattern = /<w15:commentEx\b([^>]*?)\/>/g;
|
|
199
|
+
let m: RegExpExecArray | null;
|
|
200
|
+
while ((m = exPattern.exec(extendedXml)) !== null) {
|
|
201
|
+
const attrs = m[1] ?? '';
|
|
202
|
+
const paraIdMatch = attrs.match(/w15:paraId="([^"]+)"/);
|
|
203
|
+
const parentMatch = attrs.match(/w15:paraIdParent="([^"]+)"/);
|
|
204
|
+
if (!paraIdMatch || !parentMatch) continue;
|
|
205
|
+
const childCommentId = paraIdToCommentId.get(paraIdMatch[1]);
|
|
206
|
+
const parentCommentId = paraIdToCommentId.get(parentMatch[1]);
|
|
207
|
+
if (childCommentId && parentCommentId && childCommentId !== parentCommentId) {
|
|
208
|
+
parentByCommentId.set(childCommentId, parentCommentId);
|
|
209
|
+
}
|
|
210
|
+
}
|
|
211
|
+
for (const c of comments) {
|
|
212
|
+
const parent = parentByCommentId.get(c.id);
|
|
213
|
+
if (parent) c.parentId = parent;
|
|
214
|
+
}
|
|
215
|
+
}
|
|
216
|
+
}
|
|
170
217
|
} catch (err: any) {
|
|
171
218
|
// Re-throw with more context if it's already an Error we created
|
|
172
219
|
if (err.message.includes('Invalid Word document') || err.message.includes('File not found')) {
|
package/lib/wordcomments.ts
CHANGED
|
@@ -102,7 +102,8 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
102
102
|
return { anchor: text.slice(i + 1, j), endIdx: j + 8 };
|
|
103
103
|
}
|
|
104
104
|
|
|
105
|
-
const
|
|
105
|
+
const REPLY_PREFIX = '↪ ';
|
|
106
|
+
const rawMatches: (ParsedComment & { explicitReply: boolean })[] = [];
|
|
106
107
|
let match: RegExpExecArray | null;
|
|
107
108
|
while ((match = commentPattern.exec(markdown)) !== null) {
|
|
108
109
|
const content = match[1] ?? '';
|
|
@@ -114,6 +115,15 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
114
115
|
text = content.slice(colonIdx + 1).trim();
|
|
115
116
|
}
|
|
116
117
|
|
|
118
|
+
// The `↪ ` prefix is the authoritative reply signal emitted by
|
|
119
|
+
// `insertCommentsIntoMarkdown`. Strip it from the author before injection
|
|
120
|
+
// so Word displays the real name.
|
|
121
|
+
let explicitReply = false;
|
|
122
|
+
if (author.startsWith(REPLY_PREFIX)) {
|
|
123
|
+
explicitReply = true;
|
|
124
|
+
author = author.slice(REPLY_PREFIX.length).trim();
|
|
125
|
+
}
|
|
126
|
+
|
|
117
127
|
const commentEnd = match.index + match[0].length;
|
|
118
128
|
const trailing = tryParseTrailingAnchor(markdown, commentEnd);
|
|
119
129
|
|
|
@@ -124,6 +134,7 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
124
134
|
start: match.index,
|
|
125
135
|
end: trailing ? trailing.endIdx : commentEnd,
|
|
126
136
|
fullMatch: markdown.slice(match.index, trailing ? trailing.endIdx : commentEnd),
|
|
137
|
+
explicitReply,
|
|
127
138
|
});
|
|
128
139
|
|
|
129
140
|
// Advance regex lastIndex past the consumed anchor so the next iteration
|
|
@@ -139,10 +150,17 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
139
150
|
return { markedMarkdown: markdown, comments: [] };
|
|
140
151
|
}
|
|
141
152
|
|
|
142
|
-
//
|
|
143
|
-
//
|
|
144
|
-
//
|
|
153
|
+
// Two-mode reply detection driven by the markdown itself:
|
|
154
|
+
// - If any comment carries the `↪ ` author prefix, the markdown came
|
|
155
|
+
// through `insertCommentsIntoMarkdown` and we use prefix-only mode.
|
|
156
|
+
// Distinct clusters that happen to land at gap=0 (a real failure
|
|
157
|
+
// mode on dense reviewer docs — 298-comment paper produced 9 such
|
|
158
|
+
// collisions) are not misthreaded.
|
|
159
|
+
// - If no comment carries the prefix, the markdown was hand-typed.
|
|
160
|
+
// Fall back to gap < 10 adjacency for backward compat with users
|
|
161
|
+
// who write CriticMarkup directly.
|
|
145
162
|
const ADJACENT_THRESHOLD = 10;
|
|
163
|
+
const useExplicitMode = rawMatches.some(m => m.explicitReply);
|
|
146
164
|
const comments: PreparedComment[] = [];
|
|
147
165
|
let clusterParentIdx = -1; // Index of first comment in current cluster
|
|
148
166
|
let lastCommentEnd = -1;
|
|
@@ -151,9 +169,10 @@ export function prepareMarkdownWithMarkers(markdown: string): PrepareResult {
|
|
|
151
169
|
const m = rawMatches[i];
|
|
152
170
|
if (!m) continue;
|
|
153
171
|
|
|
154
|
-
// Check if this comment is adjacent to the previous one
|
|
155
172
|
const gap = lastCommentEnd >= 0 ? m.start - lastCommentEnd : Infinity;
|
|
156
|
-
const isAdjacent =
|
|
173
|
+
const isAdjacent = useExplicitMode
|
|
174
|
+
? m.explicitReply
|
|
175
|
+
: gap < ADJACENT_THRESHOLD;
|
|
157
176
|
|
|
158
177
|
// Reset cluster if there's a gap (comments not in same cluster)
|
|
159
178
|
if (!isAdjacent) {
|