docrev 0.9.7 → 0.9.13
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/CHANGELOG.md +21 -0
- package/dev_notes/stress2/adversarial.docx +0 -0
- package/dev_notes/stress2/build_adversarial.ts +186 -0
- package/dev_notes/stress2/drift_matcher.ts +62 -0
- package/dev_notes/stress2/probe_anchors.ts +35 -0
- package/dev_notes/stress2/project/adversarial.docx +0 -0
- package/dev_notes/stress2/project/discussion.before.md +3 -0
- package/dev_notes/stress2/project/discussion.md +3 -0
- package/dev_notes/stress2/project/methods.before.md +20 -0
- package/dev_notes/stress2/project/methods.md +20 -0
- package/dev_notes/stress2/project/rev.yaml +5 -0
- package/dev_notes/stress2/project/sections.yaml +4 -0
- package/dev_notes/stress2/sections.yaml +5 -0
- package/dev_notes/stress2/trace_placement.ts +50 -0
- package/dev_notes/stresstest_boundaries.ts +27 -0
- package/dev_notes/stresstest_drift_apply.ts +43 -0
- package/dev_notes/stresstest_drift_compare.ts +43 -0
- package/dev_notes/stresstest_drift_v2.ts +54 -0
- package/dev_notes/stresstest_inspect.ts +54 -0
- package/dev_notes/stresstest_pstyle.ts +55 -0
- package/dev_notes/stresstest_section_debug.ts +23 -0
- package/dev_notes/stresstest_split.ts +70 -0
- package/dev_notes/stresstest_trace.ts +19 -0
- package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
- package/dist/lib/anchor-match.d.ts +10 -0
- package/dist/lib/anchor-match.d.ts.map +1 -1
- package/dist/lib/anchor-match.js +35 -0
- package/dist/lib/anchor-match.js.map +1 -1
- package/dist/lib/annotations.d.ts.map +1 -1
- package/dist/lib/annotations.js +16 -6
- package/dist/lib/annotations.js.map +1 -1
- package/dist/lib/build.d.ts +12 -0
- package/dist/lib/build.d.ts.map +1 -1
- package/dist/lib/build.js +12 -0
- package/dist/lib/build.js.map +1 -1
- package/dist/lib/commands/quality.js +1 -1
- package/dist/lib/commands/quality.js.map +1 -1
- package/dist/lib/commands/section-boundaries.d.ts +1 -1
- package/dist/lib/commands/section-boundaries.d.ts.map +1 -1
- package/dist/lib/commands/section-boundaries.js +12 -2
- package/dist/lib/commands/section-boundaries.js.map +1 -1
- package/dist/lib/commands/sync.js +19 -13
- package/dist/lib/commands/sync.js.map +1 -1
- package/dist/lib/commands/verify-anchors.d.ts.map +1 -1
- package/dist/lib/commands/verify-anchors.js +15 -4
- package/dist/lib/commands/verify-anchors.js.map +1 -1
- package/dist/lib/comment-realign.js +2 -2
- package/dist/lib/comment-realign.js.map +1 -1
- package/dist/lib/import.d.ts +12 -0
- package/dist/lib/import.d.ts.map +1 -1
- package/dist/lib/import.js +289 -60
- package/dist/lib/import.js.map +1 -1
- package/dist/lib/response.js +1 -1
- package/dist/lib/response.js.map +1 -1
- package/dist/lib/types.d.ts +20 -0
- package/dist/lib/types.d.ts.map +1 -1
- package/dist/lib/word-extraction.d.ts +6 -0
- package/dist/lib/word-extraction.d.ts.map +1 -1
- package/dist/lib/word-extraction.js +46 -3
- package/dist/lib/word-extraction.js.map +1 -1
- package/dist/lib/wordcomments.d.ts.map +1 -1
- package/dist/lib/wordcomments.js +188 -78
- package/dist/lib/wordcomments.js.map +1 -1
- package/lib/anchor-match.ts +38 -0
- package/lib/annotations.ts +16 -6
- package/lib/build.ts +24 -0
- package/lib/commands/quality.ts +1 -1
- package/lib/commands/section-boundaries.ts +11 -1
- package/lib/commands/sync.ts +21 -16
- package/lib/commands/verify-anchors.ts +15 -4
- package/lib/comment-realign.ts +2 -2
- package/lib/import.ts +304 -61
- package/lib/response.ts +1 -1
- package/lib/types.ts +20 -0
- package/lib/word-extraction.ts +50 -3
- package/lib/wordcomments.ts +205 -88
- package/package.json +1 -1
- package/dist/package.json +0 -137
package/lib/commands/sync.ts
CHANGED
|
@@ -571,10 +571,12 @@ async function syncCommentsOnly(
|
|
|
571
571
|
let comments;
|
|
572
572
|
let anchors;
|
|
573
573
|
let headings;
|
|
574
|
+
let fullDocText = '';
|
|
574
575
|
try {
|
|
575
576
|
comments = await extractWordComments(docx);
|
|
576
577
|
const result = await extractCommentAnchors(docx);
|
|
577
578
|
anchors = result.anchors;
|
|
579
|
+
fullDocText = result.fullDocText;
|
|
578
580
|
headings = await extractHeadings(docx);
|
|
579
581
|
spin.stop();
|
|
580
582
|
} catch (err) {
|
|
@@ -592,7 +594,7 @@ async function syncCommentsOnly(
|
|
|
592
594
|
return;
|
|
593
595
|
}
|
|
594
596
|
|
|
595
|
-
const boundaries = computeSectionBoundaries(config.sections, headings);
|
|
597
|
+
const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
|
|
596
598
|
|
|
597
599
|
if (boundaries.length === 0) {
|
|
598
600
|
console.error(fmt.status('warning', 'No section headings detected in Word document.'));
|
|
@@ -615,12 +617,12 @@ async function syncCommentsOnly(
|
|
|
615
617
|
}
|
|
616
618
|
|
|
617
619
|
const firstBoundaryStart = boundaries[0].start;
|
|
618
|
-
const results: Array<{ file: string; placed: number; unmatched: number; skipped: boolean }> = [];
|
|
620
|
+
const results: Array<{ file: string; placed: number; deduped: number; unmatched: number; skipped: boolean }> = [];
|
|
619
621
|
|
|
620
622
|
for (const boundary of activeBoundaries) {
|
|
621
623
|
const sectionPath = path.join(options.dir, boundary.file);
|
|
622
624
|
if (!fs.existsSync(sectionPath)) {
|
|
623
|
-
results.push({ file: boundary.file, placed: 0, unmatched: 0, skipped: true });
|
|
625
|
+
results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: true });
|
|
624
626
|
continue;
|
|
625
627
|
}
|
|
626
628
|
|
|
@@ -635,55 +637,58 @@ async function syncCommentsOnly(
|
|
|
635
637
|
});
|
|
636
638
|
|
|
637
639
|
if (sectionComments.length === 0) {
|
|
638
|
-
results.push({ file: boundary.file, placed: 0, unmatched: 0, skipped: false });
|
|
640
|
+
results.push({ file: boundary.file, placed: 0, deduped: 0, unmatched: 0, skipped: false });
|
|
639
641
|
continue;
|
|
640
642
|
}
|
|
641
643
|
|
|
642
644
|
const original = fs.readFileSync(sectionPath, 'utf-8');
|
|
643
|
-
const commentPattern = /\{>>.*?<<\}/gs;
|
|
644
|
-
const beforeCount = (original.match(commentPattern) || []).length;
|
|
645
645
|
|
|
646
|
+
const stats = { placed: 0, deduped: 0, unmatched: 0 };
|
|
646
647
|
const annotated = insertCommentsIntoMarkdown(original, sectionComments, anchors, {
|
|
647
648
|
quiet: !process.env.DEBUG,
|
|
648
649
|
sectionBoundary: { start: boundary.start, end: boundary.end },
|
|
649
650
|
wrapAnchor: false,
|
|
651
|
+
outStats: stats,
|
|
650
652
|
});
|
|
651
653
|
|
|
652
|
-
|
|
653
|
-
const placed = afterCount - beforeCount;
|
|
654
|
-
const unmatched = sectionComments.length - placed;
|
|
655
|
-
|
|
656
|
-
if (!options.dryRun && placed > 0) {
|
|
654
|
+
if (!options.dryRun && stats.placed > 0) {
|
|
657
655
|
fs.writeFileSync(sectionPath, annotated, 'utf-8');
|
|
658
656
|
}
|
|
659
|
-
results.push({ file: boundary.file,
|
|
657
|
+
results.push({ file: boundary.file, ...stats, skipped: false });
|
|
660
658
|
}
|
|
661
659
|
|
|
662
660
|
const tableRows = results.map(r => {
|
|
663
661
|
if (r.skipped) {
|
|
664
|
-
return [chalk.dim(r.file), chalk.yellow('missing'), '', ''];
|
|
662
|
+
return [chalk.dim(r.file), chalk.yellow('missing'), '', '', ''];
|
|
665
663
|
}
|
|
666
664
|
return [
|
|
667
665
|
chalk.bold(r.file),
|
|
668
666
|
chalk.green(`${r.placed}`),
|
|
667
|
+
r.deduped > 0 ? chalk.cyan(`${r.deduped}`) : chalk.dim('-'),
|
|
669
668
|
r.unmatched > 0 ? chalk.yellow(`${r.unmatched}`) : chalk.dim('-'),
|
|
670
669
|
chalk.dim('comments only'),
|
|
671
670
|
];
|
|
672
671
|
});
|
|
673
672
|
|
|
674
673
|
console.log(fmt.table(
|
|
675
|
-
['File', 'Placed', 'Unmatched', 'Mode'],
|
|
674
|
+
['File', 'Placed', 'Already', 'Unmatched', 'Mode'],
|
|
676
675
|
tableRows,
|
|
677
|
-
{ align: ['left', 'right', 'right', 'left'] },
|
|
676
|
+
{ align: ['left', 'right', 'right', 'right', 'left'] },
|
|
678
677
|
));
|
|
679
678
|
console.log();
|
|
680
679
|
|
|
681
680
|
const totalPlaced = results.reduce((s, r) => s + r.placed, 0);
|
|
681
|
+
const totalDeduped = results.reduce((s, r) => s + r.deduped, 0);
|
|
682
682
|
const totalUnmatched = results.reduce((s, r) => s + r.unmatched, 0);
|
|
683
683
|
|
|
684
684
|
const lines: string[] = [];
|
|
685
685
|
lines.push(`${chalk.bold(comments.length)} comments in document`);
|
|
686
|
-
|
|
686
|
+
if (totalPlaced > 0) {
|
|
687
|
+
lines.push(`${chalk.bold(totalPlaced)} placed at anchors`);
|
|
688
|
+
}
|
|
689
|
+
if (totalDeduped > 0) {
|
|
690
|
+
lines.push(`${chalk.cyan(totalDeduped)} already present (skipped to avoid duplication)`);
|
|
691
|
+
}
|
|
687
692
|
if (totalUnmatched > 0) {
|
|
688
693
|
lines.push(`${chalk.yellow(totalUnmatched)} unmatched (no anchor in current prose)`);
|
|
689
694
|
}
|
|
@@ -23,7 +23,7 @@ import {
|
|
|
23
23
|
jsonOutput,
|
|
24
24
|
} from './context.js';
|
|
25
25
|
import type { Command } from 'commander';
|
|
26
|
-
import { findAnchorInText, classifyStrategy, type AnchorMatchQuality } from '../anchor-match.js';
|
|
26
|
+
import { findAnchorInText, classifyStrategy, scoreContextAt, type AnchorMatchQuality } from '../anchor-match.js';
|
|
27
27
|
import type { CommentAnchorData } from '../word-extraction.js';
|
|
28
28
|
import { computeSectionBoundaries } from './section-boundaries.js';
|
|
29
29
|
|
|
@@ -71,10 +71,12 @@ export function register(program: Command): void {
|
|
|
71
71
|
let comments;
|
|
72
72
|
let anchors;
|
|
73
73
|
let headings;
|
|
74
|
+
let fullDocText = '';
|
|
74
75
|
try {
|
|
75
76
|
comments = await extractWordComments(docxPath);
|
|
76
77
|
const result = await extractCommentAnchors(docxPath);
|
|
77
78
|
anchors = result.anchors;
|
|
79
|
+
fullDocText = result.fullDocText;
|
|
78
80
|
headings = await extractHeadings(docxPath);
|
|
79
81
|
} catch (err) {
|
|
80
82
|
const error = err as Error;
|
|
@@ -88,7 +90,7 @@ export function register(program: Command): void {
|
|
|
88
90
|
return;
|
|
89
91
|
}
|
|
90
92
|
|
|
91
|
-
const boundaries = computeSectionBoundaries(config.sections, headings);
|
|
93
|
+
const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
|
|
92
94
|
|
|
93
95
|
// Cache section markdown contents on first read
|
|
94
96
|
const sectionCache = new Map<string, string>();
|
|
@@ -165,7 +167,16 @@ export function register(program: Command): void {
|
|
|
165
167
|
const search = findAnchorInText(anchor.anchor, md, anchor.before, anchor.after);
|
|
166
168
|
let quality: AnchorMatchQuality | 'ambiguous' = classifyStrategy(search.strategy, search.occurrences.length);
|
|
167
169
|
if (quality === 'clean' && search.occurrences.length > 1) {
|
|
168
|
-
|
|
170
|
+
// Multiple direct hits — only flag as ambiguous when before/after
|
|
171
|
+
// context can't pick a clear winner. If one candidate scores
|
|
172
|
+
// strictly higher than the others, sync will place it correctly.
|
|
173
|
+
const anchorLen = anchor.anchor.length;
|
|
174
|
+
const scores = search.occurrences.map(p => scoreContextAt(p, md, anchor.before, anchor.after, anchorLen));
|
|
175
|
+
const max = Math.max(...scores);
|
|
176
|
+
const winners = scores.filter(s => s === max).length;
|
|
177
|
+
if (max === 0 || winners > 1) {
|
|
178
|
+
quality = 'ambiguous';
|
|
179
|
+
}
|
|
169
180
|
}
|
|
170
181
|
|
|
171
182
|
reports.push({
|
|
@@ -239,7 +250,7 @@ function printReport(docxPath: string, reports: CommentReport[]): void {
|
|
|
239
250
|
if (totals.unmatched > 0 || totals.ambiguous > 0) {
|
|
240
251
|
console.log();
|
|
241
252
|
console.log(chalk.dim('Comments flagged "unmatched" or "ambiguous" need manual placement.'));
|
|
242
|
-
console.log(chalk.dim('Run "rev sync --
|
|
253
|
+
console.log(chalk.dim('Run "rev sync --comments-only" to import the matched ones without touching prose.'));
|
|
243
254
|
}
|
|
244
255
|
}
|
|
245
256
|
|
package/lib/comment-realign.ts
CHANGED
|
@@ -370,7 +370,7 @@ export async function realignComments(
|
|
|
370
370
|
|
|
371
371
|
// Strip ALL comments (both authors) from markdown to start fresh
|
|
372
372
|
let markdown = originalMarkdown;
|
|
373
|
-
markdown = markdown.replace(/\s*\{>>[
|
|
373
|
+
markdown = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
|
|
374
374
|
console.log(`Stripped all comments from markdown`);
|
|
375
375
|
|
|
376
376
|
// Parse markdown paragraphs
|
|
@@ -469,7 +469,7 @@ export async function realignMarkdown(
|
|
|
469
469
|
);
|
|
470
470
|
|
|
471
471
|
// Strip ALL comments from markdown
|
|
472
|
-
let result = markdown.replace(/\s*\{>>[
|
|
472
|
+
let result = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
|
|
473
473
|
|
|
474
474
|
// Parse markdown paragraphs
|
|
475
475
|
const mdParagraphs = parseMdParagraphs(result);
|
package/lib/import.ts
CHANGED
|
@@ -36,7 +36,65 @@ import {
|
|
|
36
36
|
parseVisibleComments,
|
|
37
37
|
convertVisibleComments,
|
|
38
38
|
} from './restore-references.js';
|
|
39
|
-
import { findAnchorInText } from './anchor-match.js';
|
|
39
|
+
import { findAnchorInText, findAllOccurrences } from './anchor-match.js';
|
|
40
|
+
|
|
41
|
+
/**
|
|
42
|
+
* Pick the best position from candidate `occurrences` given the
|
|
43
|
+
* surrounding `before` / `after` context from the docx, while
|
|
44
|
+
* respecting `usedPositions` to avoid stacking distinct comments at
|
|
45
|
+
* the same anchor instance.
|
|
46
|
+
*
|
|
47
|
+
* Returns the chosen position, or -1 if every candidate is already used.
|
|
48
|
+
*/
|
|
49
|
+
function pickBestOccurrence(
|
|
50
|
+
occurrences: number[],
|
|
51
|
+
result: string,
|
|
52
|
+
before: string,
|
|
53
|
+
after: string,
|
|
54
|
+
anchorLen: number,
|
|
55
|
+
usedPositions: Set<number>,
|
|
56
|
+
): number {
|
|
57
|
+
if (occurrences.length === 0) return -1;
|
|
58
|
+
if (occurrences.length === 1) {
|
|
59
|
+
return usedPositions.has(occurrences[0]) ? -1 : occurrences[0];
|
|
60
|
+
}
|
|
61
|
+
|
|
62
|
+
let bestIdx = occurrences.find(p => !usedPositions.has(p)) ?? -1;
|
|
63
|
+
if (bestIdx < 0) return -1;
|
|
64
|
+
let bestScore = -1;
|
|
65
|
+
|
|
66
|
+
for (const pos of occurrences) {
|
|
67
|
+
if (usedPositions.has(pos)) continue;
|
|
68
|
+
let score = 0;
|
|
69
|
+
|
|
70
|
+
if (before) {
|
|
71
|
+
const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
|
|
72
|
+
const beforeLower = before.toLowerCase();
|
|
73
|
+
const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
|
|
74
|
+
for (const word of beforeWords) {
|
|
75
|
+
if (contextBefore.includes(word)) score += 2;
|
|
76
|
+
}
|
|
77
|
+
if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
if (after) {
|
|
81
|
+
const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
|
|
82
|
+
const afterLower = after.toLowerCase();
|
|
83
|
+
const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
|
|
84
|
+
for (const word of afterWords) {
|
|
85
|
+
if (contextAfter.includes(word)) score += 2;
|
|
86
|
+
}
|
|
87
|
+
if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
if (score > bestScore || (score === bestScore && pos < bestIdx)) {
|
|
91
|
+
bestScore = score;
|
|
92
|
+
bestIdx = pos;
|
|
93
|
+
}
|
|
94
|
+
}
|
|
95
|
+
|
|
96
|
+
return bestIdx;
|
|
97
|
+
}
|
|
40
98
|
|
|
41
99
|
// Re-export everything so existing imports from './import.js' still work
|
|
42
100
|
export {
|
|
@@ -100,6 +158,14 @@ export interface InsertCommentsOptions {
|
|
|
100
158
|
* comments sharing one anchor don't produce nested broken markup.
|
|
101
159
|
*/
|
|
102
160
|
wrapAnchor?: boolean;
|
|
161
|
+
/**
|
|
162
|
+
* Mutable output: when provided, the function fills in counters so callers
|
|
163
|
+
* can distinguish placement outcomes in their summary. `placed` counts new
|
|
164
|
+
* insertions, `deduped` counts comments that were already present at their
|
|
165
|
+
* anchor (skipped to avoid duplication on re-sync), `unmatched` counts
|
|
166
|
+
* comments whose anchor couldn't be located.
|
|
167
|
+
*/
|
|
168
|
+
outStats?: { placed: number; deduped: number; unmatched: number };
|
|
103
169
|
}
|
|
104
170
|
|
|
105
171
|
export interface CommentWithPos {
|
|
@@ -175,6 +241,71 @@ export interface MoveExtractedMediaResult {
|
|
|
175
241
|
// Functions
|
|
176
242
|
// ============================================
|
|
177
243
|
|
|
244
|
+
/**
|
|
245
|
+
* If `pos` lands inside a section file's leading `# Heading` line (or the
|
|
246
|
+
* blank line right after it), advance past the first paragraph break so
|
|
247
|
+
* the comment stays inside the section. A comment authored at the very
|
|
248
|
+
* start of a Word section maps to `pos === 0`, but inserting at column 0
|
|
249
|
+
* of a markdown file that begins with `# Heading` puts the `{>>...<<}`
|
|
250
|
+
* before the heading marker — Pandoc then treats the line as ordinary
|
|
251
|
+
* paragraph text and the comment renders in the previous section.
|
|
252
|
+
*/
|
|
253
|
+
function pushPastSectionHeading(text: string, pos: number): number {
|
|
254
|
+
if (pos > 0) {
|
|
255
|
+
const headingMatch = text.match(/^#{1,6}\s.+$/m);
|
|
256
|
+
if (!headingMatch || headingMatch.index === undefined) return pos;
|
|
257
|
+
const headingEnd = headingMatch.index + headingMatch[0].length;
|
|
258
|
+
if (pos >= headingEnd) return pos;
|
|
259
|
+
}
|
|
260
|
+
// pos is at-or-before the first heading line. Advance to the first
|
|
261
|
+
// non-blank position after the heading paragraph.
|
|
262
|
+
const headingLine = text.match(/^#{1,6}\s.+(?:\n|$)/m);
|
|
263
|
+
if (!headingLine || headingLine.index === undefined) return pos;
|
|
264
|
+
let after = headingLine.index + headingLine[0].length;
|
|
265
|
+
// Skip blank lines so we land at the start of the first body paragraph.
|
|
266
|
+
while (after < text.length && (text[after] === '\n' || text[after] === '\r')) {
|
|
267
|
+
after++;
|
|
268
|
+
}
|
|
269
|
+
return after;
|
|
270
|
+
}
|
|
271
|
+
|
|
272
|
+
/**
|
|
273
|
+
* Snap a position to the nearest whitespace boundary within ±50 chars so a
|
|
274
|
+
* proportional fallback insertion never lands mid-word.
|
|
275
|
+
*/
|
|
276
|
+
function snapToWordBoundary(text: string, pos: number): number {
|
|
277
|
+
if (pos <= 0) return 0;
|
|
278
|
+
if (pos >= text.length) return text.length;
|
|
279
|
+
if (/\s/.test(text[pos] ?? '')) return pos;
|
|
280
|
+
for (let d = 1; d <= 50; d++) {
|
|
281
|
+
if (pos + d < text.length && /\s/.test(text[pos + d] ?? '')) return pos + d;
|
|
282
|
+
if (pos - d >= 0 && /\s/.test(text[pos - d] ?? '')) return pos - d;
|
|
283
|
+
}
|
|
284
|
+
return pos;
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
/**
|
|
288
|
+
* Final-resort placement when every text-matching strategy failed. The docx
|
|
289
|
+
* carries a real `<w:commentRangeStart w:id="N">` marker at a known offset
|
|
290
|
+
* inside its body text — that's a structural anchor, even if the anchored
|
|
291
|
+
* span itself is empty and the surrounding context drifted in the target.
|
|
292
|
+
*
|
|
293
|
+
* Map docPosition into the target markdown proportionally and snap to a word
|
|
294
|
+
* boundary. This is approximate when the document was heavily restructured,
|
|
295
|
+
* but it's strictly better than silently dropping a reviewer's comment: the
|
|
296
|
+
* comment lands in roughly the right neighborhood and the reviewer can
|
|
297
|
+
* relocate it during their next pass.
|
|
298
|
+
*/
|
|
299
|
+
function proportionalFallback(
|
|
300
|
+
anchorData: CommentAnchorData,
|
|
301
|
+
target: string,
|
|
302
|
+
): number | null {
|
|
303
|
+
if (anchorData.docLength <= 0) return null;
|
|
304
|
+
const proportion = Math.min(anchorData.docPosition / anchorData.docLength, 1.0);
|
|
305
|
+
const rawPos = Math.floor(proportion * target.length);
|
|
306
|
+
return pushPastSectionHeading(target, snapToWordBoundary(target, rawPos));
|
|
307
|
+
}
|
|
308
|
+
|
|
178
309
|
/**
|
|
179
310
|
* Insert comments into markdown text based on anchor texts with context
|
|
180
311
|
*/
|
|
@@ -184,17 +315,48 @@ export function insertCommentsIntoMarkdown(
|
|
|
184
315
|
anchors: Map<string, CommentAnchorData | string>,
|
|
185
316
|
options: InsertCommentsOptions = {}
|
|
186
317
|
): string {
|
|
187
|
-
const { quiet = false, sectionBoundary = null, wrapAnchor = true } = options;
|
|
318
|
+
const { quiet = false, sectionBoundary = null, wrapAnchor = true, outStats } = options;
|
|
188
319
|
let result = markdown;
|
|
189
320
|
let unmatchedCount = 0;
|
|
321
|
+
let placedCount = 0;
|
|
190
322
|
const duplicateWarnings: string[] = [];
|
|
191
323
|
const usedPositions = new Set<number>(); // For tie-breaking: track used positions
|
|
192
324
|
|
|
325
|
+
// Resolve threading: replies share their parent's anchor in Word, so they
|
|
326
|
+
// must inherit the parent's position and ride alongside it as one cluster.
|
|
327
|
+
// Letting each reply run through anchor scoring scatters the cluster (the
|
|
328
|
+
// same docPosition forces `usedPositions` to push later replies onto a
|
|
329
|
+
// different occurrence), which on re-build looks like independent comments
|
|
330
|
+
// and loses the paraIdParent threading. See gcol33/docrev issue #2.
|
|
331
|
+
const inputById = new Map<string, WordComment>();
|
|
332
|
+
for (const c of comments) inputById.set(c.id, c);
|
|
333
|
+
function rootIdOf(c: WordComment): string {
|
|
334
|
+
let cur: WordComment = c;
|
|
335
|
+
const seen = new Set<string>();
|
|
336
|
+
while (cur.parentId && !seen.has(cur.id)) {
|
|
337
|
+
seen.add(cur.id);
|
|
338
|
+
const parent = inputById.get(cur.parentId);
|
|
339
|
+
if (!parent || parent === cur) break;
|
|
340
|
+
cur = parent;
|
|
341
|
+
}
|
|
342
|
+
return cur.id;
|
|
343
|
+
}
|
|
344
|
+
const replyRootId = new Map<string, string>();
|
|
345
|
+
for (const c of comments) {
|
|
346
|
+
const root = rootIdOf(c);
|
|
347
|
+
if (root !== c.id) replyRootId.set(c.id, root);
|
|
348
|
+
}
|
|
349
|
+
|
|
193
350
|
// Anchor matching primitives live in lib/anchor-match.ts so that
|
|
194
351
|
// `rev verify-anchors` can use the same strategies for drift reporting.
|
|
195
352
|
|
|
196
|
-
// Get all positions in order (for sequential tie-breaking)
|
|
353
|
+
// Get all positions in order (for sequential tie-breaking).
|
|
354
|
+
// Replies skip scoring entirely — they piggyback on their root's position
|
|
355
|
+
// in the emit pass below.
|
|
197
356
|
const commentsWithPositions = comments.map((c): CommentWithPos => {
|
|
357
|
+
if (replyRootId.has(c.id)) {
|
|
358
|
+
return { ...c, pos: -1, anchorText: null, strategy: 'reply' };
|
|
359
|
+
}
|
|
198
360
|
const anchorData = anchors.get(c.id);
|
|
199
361
|
if (!anchorData) {
|
|
200
362
|
unmatchedCount++;
|
|
@@ -222,6 +384,18 @@ export function insertCommentsIntoMarkdown(
|
|
|
222
384
|
const proportion = Math.min(relativePos / sectionLength, 1.0);
|
|
223
385
|
const markdownPos = Math.floor(proportion * result.length);
|
|
224
386
|
|
|
387
|
+
// For empty anchors, before/after context is the only signal that
|
|
388
|
+
// pinpoints the original split — without it, proportional placement
|
|
389
|
+
// can land mid-word or split unrelated phrases. Try context match
|
|
390
|
+
// first; only fall through to proportional when context is gone.
|
|
391
|
+
if ((!anchor || isEmpty) && (before || after)) {
|
|
392
|
+
const ctx = findAnchorInText('', result, before, after);
|
|
393
|
+
if (ctx.occurrences.length > 0) {
|
|
394
|
+
const pos = pushPastSectionHeading(result, ctx.occurrences[0]);
|
|
395
|
+
return { ...c, pos, anchorText: null, isEmpty: true, strategy: `ctx:${ctx.strategy}` };
|
|
396
|
+
}
|
|
397
|
+
}
|
|
398
|
+
|
|
225
399
|
let insertPos = markdownPos;
|
|
226
400
|
|
|
227
401
|
// Look for nearby word boundary
|
|
@@ -231,26 +405,52 @@ export function insertCommentsIntoMarkdown(
|
|
|
231
405
|
insertPos = Math.max(0, markdownPos - 25) + spaceIdx;
|
|
232
406
|
}
|
|
233
407
|
|
|
234
|
-
// If we have anchor text, try to find it near this position
|
|
408
|
+
// If we have anchor text, try to find it near this position.
|
|
409
|
+
// Collect ALL occurrences in the local window, then disambiguate
|
|
410
|
+
// via before/after context + usedPositions — otherwise two
|
|
411
|
+
// comments sharing the same anchor word would both collide at
|
|
412
|
+
// the leftmost match. The context-scoring helper handles the
|
|
413
|
+
// "repeated formulaic prose" case using docx-side context, which
|
|
414
|
+
// is a stronger signal than raw distance to the proportional
|
|
415
|
+
// insertPos (insertPos is itself an approximation).
|
|
235
416
|
if (anchor && !isEmpty) {
|
|
236
417
|
const searchStart = Math.max(0, insertPos - 200);
|
|
237
418
|
const searchEnd = Math.min(result.length, insertPos + 200);
|
|
238
419
|
const localSearch = result.slice(searchStart, searchEnd).toLowerCase();
|
|
239
420
|
const anchorLower = anchor.toLowerCase();
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
421
|
+
|
|
422
|
+
const localHits = findAllOccurrences(localSearch, anchorLower).map(i => searchStart + i);
|
|
423
|
+
if (localHits.length > 0) {
|
|
424
|
+
const chosen = pickBestOccurrence(localHits, result, before, after, anchor.length, usedPositions);
|
|
425
|
+
if (chosen >= 0) {
|
|
426
|
+
if (localHits.length > 1) {
|
|
427
|
+
duplicateWarnings.push(`"${anchor.slice(0, 40)}${anchor.length > 40 ? '...' : ''}" appears ${localHits.length} times in section window`);
|
|
428
|
+
}
|
|
429
|
+
usedPositions.add(chosen);
|
|
430
|
+
return { ...c, pos: chosen, anchorText: anchor, anchorEnd: chosen + anchor.length, strategy: 'position+text' };
|
|
431
|
+
}
|
|
243
432
|
}
|
|
433
|
+
|
|
244
434
|
// Try first few words
|
|
245
435
|
const words = anchor.split(/\s+/).slice(0, 4).join(' ').toLowerCase();
|
|
246
436
|
if (words.length >= 10) {
|
|
247
|
-
const
|
|
248
|
-
if (
|
|
249
|
-
|
|
437
|
+
const partialHits = findAllOccurrences(localSearch, words).map(i => searchStart + i);
|
|
438
|
+
if (partialHits.length > 0) {
|
|
439
|
+
const chosen = pickBestOccurrence(partialHits, result, before, after, words.length, usedPositions);
|
|
440
|
+
if (chosen >= 0) {
|
|
441
|
+
usedPositions.add(chosen);
|
|
442
|
+
return { ...c, pos: chosen, anchorText: words, anchorEnd: chosen + words.length, strategy: 'position+partial' };
|
|
443
|
+
}
|
|
250
444
|
}
|
|
251
445
|
}
|
|
252
446
|
}
|
|
253
447
|
|
|
448
|
+
// A docPosition at the very start of a section maps to markdownPos=0,
|
|
449
|
+
// which sits before the file's `# Heading` line and gets rendered in
|
|
450
|
+
// the previous section. Push past the heading line so the comment
|
|
451
|
+
// stays inside the section it was authored in.
|
|
452
|
+
insertPos = pushPastSectionHeading(result, insertPos);
|
|
453
|
+
|
|
254
454
|
return { ...c, pos: insertPos, anchorText: null, strategy: 'position-only' };
|
|
255
455
|
}
|
|
256
456
|
}
|
|
@@ -263,6 +463,14 @@ export function insertCommentsIntoMarkdown(
|
|
|
263
463
|
return { ...c, pos: occurrences[0], anchorText: null, isEmpty: true };
|
|
264
464
|
}
|
|
265
465
|
}
|
|
466
|
+
// Last resort: docx carried a structural marker at docPosition; map
|
|
467
|
+
// it proportionally into the target so the comment isn't dropped.
|
|
468
|
+
if (typeof anchorData === 'object') {
|
|
469
|
+
const fallback = proportionalFallback(anchorData, result);
|
|
470
|
+
if (fallback !== null) {
|
|
471
|
+
return { ...c, pos: fallback, anchorText: null, isEmpty: true, strategy: 'proportional-fallback' };
|
|
472
|
+
}
|
|
473
|
+
}
|
|
266
474
|
unmatchedCount++;
|
|
267
475
|
return { ...c, pos: -1, anchorText: null, isEmpty: true };
|
|
268
476
|
}
|
|
@@ -271,6 +479,14 @@ export function insertCommentsIntoMarkdown(
|
|
|
271
479
|
const { occurrences, matchedAnchor, strategy, stripped } = findAnchorInText(anchor, result, before, after);
|
|
272
480
|
|
|
273
481
|
if (occurrences.length === 0) {
|
|
482
|
+
// Same last-resort as the empty-anchor path: anchor text is gone from
|
|
483
|
+
// the target, but the marker's text-offset survived extraction.
|
|
484
|
+
if (typeof anchorData === 'object') {
|
|
485
|
+
const fallback = proportionalFallback(anchorData, result);
|
|
486
|
+
if (fallback !== null) {
|
|
487
|
+
return { ...c, pos: fallback, anchorText: null, strategy: 'proportional-fallback' };
|
|
488
|
+
}
|
|
489
|
+
}
|
|
274
490
|
unmatchedCount++;
|
|
275
491
|
return { ...c, pos: -1, anchorText: null };
|
|
276
492
|
}
|
|
@@ -290,82 +506,106 @@ export function insertCommentsIntoMarkdown(
|
|
|
290
506
|
duplicateWarnings.push(`"${matchedAnchor.slice(0, 40)}${matchedAnchor.length > 40 ? '...' : ''}" appears ${occurrences.length} times`);
|
|
291
507
|
}
|
|
292
508
|
|
|
293
|
-
|
|
294
|
-
|
|
295
|
-
|
|
296
|
-
for (const pos of occurrences) {
|
|
297
|
-
if (usedPositions.has(pos)) continue;
|
|
298
|
-
|
|
299
|
-
let score = 0;
|
|
300
|
-
|
|
301
|
-
if (before) {
|
|
302
|
-
const contextBefore = result.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
|
|
303
|
-
const beforeLower = before.toLowerCase();
|
|
304
|
-
const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
|
|
305
|
-
for (const word of beforeWords) {
|
|
306
|
-
if (contextBefore.includes(word)) score += 2;
|
|
307
|
-
}
|
|
308
|
-
if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
|
|
309
|
-
}
|
|
310
|
-
|
|
311
|
-
if (after) {
|
|
312
|
-
const contextAfter = result.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
|
|
313
|
-
const afterLower = after.toLowerCase();
|
|
314
|
-
const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
|
|
315
|
-
for (const word of afterWords) {
|
|
316
|
-
if (contextAfter.includes(word)) score += 2;
|
|
317
|
-
}
|
|
318
|
-
if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
|
|
319
|
-
}
|
|
320
|
-
|
|
321
|
-
if (score > bestScore || (score === bestScore && pos < bestIdx)) {
|
|
322
|
-
bestScore = score;
|
|
323
|
-
bestIdx = pos;
|
|
324
|
-
}
|
|
325
|
-
}
|
|
326
|
-
|
|
327
|
-
usedPositions.add(bestIdx);
|
|
509
|
+
const bestIdx = pickBestOccurrence(occurrences, result, before, after, anchorLen, usedPositions);
|
|
510
|
+
const finalIdx = bestIdx >= 0 ? bestIdx : occurrences[0];
|
|
511
|
+
usedPositions.add(finalIdx);
|
|
328
512
|
|
|
329
513
|
if (matchedAnchor) {
|
|
330
|
-
return { ...c, pos:
|
|
514
|
+
return { ...c, pos: finalIdx, anchorText: matchedAnchor, anchorEnd: finalIdx + anchorLen };
|
|
331
515
|
} else {
|
|
332
|
-
return { ...c, pos:
|
|
516
|
+
return { ...c, pos: finalIdx, anchorText: null };
|
|
333
517
|
}
|
|
334
518
|
});
|
|
335
519
|
|
|
336
|
-
//
|
|
337
|
-
|
|
520
|
+
// Group comments into clusters (root + ordered replies). The root carries
|
|
521
|
+
// the resolved position; replies inherit it and ride along in input order
|
|
522
|
+
// so the rebuilt CriticMarkup looks like `{>>p<<}{>>r1<<}{>>r2<<}[anchor]`
|
|
523
|
+
// and adjacency-based reply detection picks the cluster up again.
|
|
524
|
+
const byId = new Map<string, CommentWithPos>();
|
|
525
|
+
for (const cwp of commentsWithPositions) byId.set(cwp.id, cwp);
|
|
526
|
+
const repliesByRoot = new Map<string, CommentWithPos[]>();
|
|
527
|
+
for (const c of comments) {
|
|
528
|
+
const rootId = replyRootId.get(c.id);
|
|
529
|
+
if (!rootId) continue;
|
|
530
|
+
const cwp = byId.get(c.id);
|
|
531
|
+
if (!cwp) continue;
|
|
532
|
+
const list = repliesByRoot.get(rootId);
|
|
533
|
+
if (list) list.push(cwp);
|
|
534
|
+
else repliesByRoot.set(rootId, [cwp]);
|
|
535
|
+
}
|
|
536
|
+
|
|
537
|
+
// Replies whose root never resolved (parent missing from the input slice or
|
|
538
|
+
// parent unmatched) count as unmatched too — there's no position to attach
|
|
539
|
+
// them to.
|
|
540
|
+
for (const [rootId, replies] of repliesByRoot) {
|
|
541
|
+
const root = byId.get(rootId);
|
|
542
|
+
if (!root || root.pos < 0) {
|
|
543
|
+
unmatchedCount += replies.length;
|
|
544
|
+
}
|
|
545
|
+
}
|
|
546
|
+
|
|
547
|
+
// Roots only — replies attach during emission.
|
|
548
|
+
const rootsWithPos = commentsWithPositions.filter(
|
|
549
|
+
c => !replyRootId.has(c.id)
|
|
550
|
+
);
|
|
551
|
+
|
|
552
|
+
// Log any unmatched roots for debugging
|
|
553
|
+
const unmatched = rootsWithPos.filter((c) => c.pos < 0);
|
|
338
554
|
if (process.env.DEBUG) {
|
|
339
|
-
console.log(`[DEBUG] insertComments: ${comments.length} input, ${
|
|
555
|
+
console.log(`[DEBUG] insertComments: ${comments.length} input, ${rootsWithPos.length} roots, ${unmatched.length} unmatched roots, ${replyRootId.size} replies`);
|
|
340
556
|
if (unmatched.length > 0) {
|
|
341
557
|
unmatched.forEach(c => console.log(`[DEBUG] Unmatched ID=${c.id}: anchor="${(c.anchorText || 'none').slice(0,30)}"`));
|
|
342
558
|
}
|
|
343
559
|
}
|
|
344
560
|
|
|
345
|
-
const
|
|
561
|
+
const matchedRoots = rootsWithPos.filter((c) => c.pos >= 0);
|
|
346
562
|
|
|
347
563
|
// Sort by position descending (insert from end to avoid offset issues)
|
|
348
|
-
|
|
564
|
+
matchedRoots.sort((a, b) => b.pos - a.pos);
|
|
349
565
|
|
|
350
|
-
// Insert each
|
|
566
|
+
// Insert each cluster. With `wrapAnchor` (the default), the anchor text
|
|
351
567
|
// gets wrapped in `[anchor]{.mark}` so the rebuilt docx restores the
|
|
352
568
|
// original Word comment range. Without it, the comment block is inserted
|
|
353
569
|
// adjacent to the anchor and prose stays untouched — required for
|
|
354
570
|
// comments-only sync where multiple comments may share one anchor.
|
|
355
|
-
|
|
356
|
-
|
|
571
|
+
// Skip insertion when the parent's CriticMarkup already lives near the
|
|
572
|
+
// target — re-running sync against the same docx would otherwise stack
|
|
573
|
+
// duplicates. A 200-char window catches both wrapped
|
|
574
|
+
// (`{>>...<<}[anchor]{.mark}`) and bare (`{>>...<<}anchor`) forms while
|
|
575
|
+
// ignoring incidental matches farther away.
|
|
576
|
+
let dedupedCount = 0;
|
|
577
|
+
for (const c of matchedRoots) {
|
|
578
|
+
const parentBlock = `{>>${c.author}: ${c.text}<<}`;
|
|
579
|
+
const replies = repliesByRoot.get(c.id) ?? [];
|
|
580
|
+
const windowStart = Math.max(0, c.pos - 200);
|
|
581
|
+
const windowEnd = Math.min(result.length, c.pos + 200);
|
|
582
|
+
if (result.slice(windowStart, windowEnd).includes(parentBlock)) {
|
|
583
|
+
// Cluster already synced; treat all members as deduped.
|
|
584
|
+
dedupedCount += 1 + replies.length;
|
|
585
|
+
continue;
|
|
586
|
+
}
|
|
587
|
+
// Replies carry an explicit `↪ ` author prefix so the round-trip does not
|
|
588
|
+
// depend on positional adjacency in the markdown. On dense reviewer docs
|
|
589
|
+
// distinct clusters frequently land at the same anchor position; without
|
|
590
|
+
// the prefix the re-parse would misthread them. The injection side strips
|
|
591
|
+
// `↪ ` back off the author so Word renders the original name.
|
|
592
|
+
const replyBlocks = replies.map(r => `{>>↪ ${r.author}: ${r.text}<<}`);
|
|
593
|
+
const combined = parentBlock + replyBlocks.join('');
|
|
357
594
|
if (wrapAnchor && c.anchorText && c.anchorEnd) {
|
|
358
595
|
const before = result.slice(0, c.pos);
|
|
359
596
|
const anchor = result.slice(c.pos, c.anchorEnd);
|
|
360
597
|
const after = result.slice(c.anchorEnd);
|
|
361
|
-
result = before +
|
|
598
|
+
result = before + combined + `[${anchor}]{.mark}` + after;
|
|
362
599
|
} else {
|
|
363
|
-
|
|
364
|
-
// tweaks; CriticMarkup blocks are invisible to readers, and adding a
|
|
365
|
-
// leading space would shift prose byte-for-byte (relevant when callers
|
|
366
|
-
// verify that --comments-only didn't touch the original).
|
|
367
|
-
result = result.slice(0, c.pos) + comment + result.slice(c.pos);
|
|
600
|
+
result = result.slice(0, c.pos) + combined + result.slice(c.pos);
|
|
368
601
|
}
|
|
602
|
+
placedCount += 1 + replies.length;
|
|
603
|
+
}
|
|
604
|
+
|
|
605
|
+
if (outStats) {
|
|
606
|
+
outStats.placed = placedCount;
|
|
607
|
+
outStats.deduped = dedupedCount;
|
|
608
|
+
outStats.unmatched = unmatchedCount;
|
|
369
609
|
}
|
|
370
610
|
|
|
371
611
|
// Log warnings unless quiet mode
|
|
@@ -373,6 +613,9 @@ export function insertCommentsIntoMarkdown(
|
|
|
373
613
|
if (unmatchedCount > 0) {
|
|
374
614
|
console.warn(`Warning: ${unmatchedCount} comment(s) could not be matched to anchor text`);
|
|
375
615
|
}
|
|
616
|
+
if (dedupedCount > 0) {
|
|
617
|
+
console.warn(`Note: ${dedupedCount} comment(s) already present at anchor — skipped to avoid duplication`);
|
|
618
|
+
}
|
|
376
619
|
if (duplicateWarnings.length > 0) {
|
|
377
620
|
console.warn(`Warning: Duplicate anchor text found (using context & tie-breaks for placement):`);
|
|
378
621
|
for (const w of duplicateWarnings) {
|
package/lib/response.ts
CHANGED
|
@@ -46,7 +46,7 @@ export function parseCommentsWithReplies(text: string, file: string = ''): Comme
|
|
|
46
46
|
if (matches.length === 0) continue;
|
|
47
47
|
|
|
48
48
|
// Get context (surrounding text without comments)
|
|
49
|
-
const contextLine = line.replace(/\{>>[
|
|
49
|
+
const contextLine = line.replace(/\{>>[\s\S]+?<<\}/g, '').trim();
|
|
50
50
|
const context = contextLine.slice(0, 100) + (contextLine.length > 100 ? '...' : '');
|
|
51
51
|
|
|
52
52
|
// First match is the original comment, rest are replies
|