docrev 0.9.6 → 0.9.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (80) hide show
  1. package/CHANGELOG.md +41 -0
  2. package/dev_notes/bug_repro_comment_parser.md +71 -0
  3. package/dev_notes/stress2/adversarial.docx +0 -0
  4. package/dev_notes/stress2/build_adversarial.ts +186 -0
  5. package/dev_notes/stress2/drift_matcher.ts +62 -0
  6. package/dev_notes/stress2/probe_anchors.ts +35 -0
  7. package/dev_notes/stress2/project/adversarial.docx +0 -0
  8. package/dev_notes/stress2/project/discussion.before.md +3 -0
  9. package/dev_notes/stress2/project/discussion.md +3 -0
  10. package/dev_notes/stress2/project/methods.before.md +20 -0
  11. package/dev_notes/stress2/project/methods.md +20 -0
  12. package/dev_notes/stress2/project/rev.yaml +5 -0
  13. package/dev_notes/stress2/project/sections.yaml +4 -0
  14. package/dev_notes/stress2/sections.yaml +5 -0
  15. package/dev_notes/stress2/trace_placement.ts +50 -0
  16. package/dev_notes/stresstest_boundaries.ts +27 -0
  17. package/dev_notes/stresstest_drift_apply.ts +43 -0
  18. package/dev_notes/stresstest_drift_compare.ts +43 -0
  19. package/dev_notes/stresstest_drift_v2.ts +54 -0
  20. package/dev_notes/stresstest_inspect.ts +54 -0
  21. package/dev_notes/stresstest_pstyle.ts +55 -0
  22. package/dev_notes/stresstest_section_debug.ts +23 -0
  23. package/dev_notes/stresstest_split.ts +70 -0
  24. package/dev_notes/stresstest_trace.ts +19 -0
  25. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -0
  26. package/dist/lib/anchor-match.d.ts +51 -0
  27. package/dist/lib/anchor-match.d.ts.map +1 -0
  28. package/dist/lib/anchor-match.js +227 -0
  29. package/dist/lib/anchor-match.js.map +1 -0
  30. package/dist/lib/annotations.d.ts.map +1 -1
  31. package/dist/lib/annotations.js +24 -11
  32. package/dist/lib/annotations.js.map +1 -1
  33. package/dist/lib/commands/index.d.ts +2 -1
  34. package/dist/lib/commands/index.d.ts.map +1 -1
  35. package/dist/lib/commands/index.js +3 -1
  36. package/dist/lib/commands/index.js.map +1 -1
  37. package/dist/lib/commands/quality.js +1 -1
  38. package/dist/lib/commands/quality.js.map +1 -1
  39. package/dist/lib/commands/section-boundaries.d.ts +22 -0
  40. package/dist/lib/commands/section-boundaries.d.ts.map +1 -0
  41. package/dist/lib/commands/section-boundaries.js +63 -0
  42. package/dist/lib/commands/section-boundaries.js.map +1 -0
  43. package/dist/lib/commands/sync.d.ts.map +1 -1
  44. package/dist/lib/commands/sync.js +141 -0
  45. package/dist/lib/commands/sync.js.map +1 -1
  46. package/dist/lib/commands/verify-anchors.d.ts +17 -0
  47. package/dist/lib/commands/verify-anchors.d.ts.map +1 -0
  48. package/dist/lib/commands/verify-anchors.js +226 -0
  49. package/dist/lib/commands/verify-anchors.js.map +1 -0
  50. package/dist/lib/comment-realign.js +2 -2
  51. package/dist/lib/comment-realign.js.map +1 -1
  52. package/dist/lib/import.d.ts +26 -8
  53. package/dist/lib/import.d.ts.map +1 -1
  54. package/dist/lib/import.js +166 -187
  55. package/dist/lib/import.js.map +1 -1
  56. package/dist/lib/response.js +1 -1
  57. package/dist/lib/response.js.map +1 -1
  58. package/dist/lib/word-extraction.d.ts +23 -0
  59. package/dist/lib/word-extraction.d.ts.map +1 -1
  60. package/dist/lib/word-extraction.js +79 -0
  61. package/dist/lib/word-extraction.js.map +1 -1
  62. package/dist/lib/wordcomments.d.ts.map +1 -1
  63. package/dist/lib/wordcomments.js +165 -73
  64. package/dist/lib/wordcomments.js.map +1 -1
  65. package/lib/anchor-match.ts +276 -0
  66. package/lib/annotations.ts +25 -11
  67. package/lib/commands/index.ts +3 -0
  68. package/lib/commands/quality.ts +1 -1
  69. package/lib/commands/section-boundaries.ts +82 -0
  70. package/lib/commands/sync.ts +170 -0
  71. package/lib/commands/verify-anchors.ts +272 -0
  72. package/lib/comment-realign.ts +2 -2
  73. package/lib/import.ts +197 -209
  74. package/lib/response.ts +1 -1
  75. package/lib/word-extraction.ts +93 -0
  76. package/lib/wordcomments.ts +180 -82
  77. package/package.json +1 -1
  78. package/skill/REFERENCE.md +29 -2
  79. package/skill/SKILL.md +12 -2
  80. package/dist/package.json +0 -137
@@ -0,0 +1,272 @@
1
+ /**
2
+ * VERIFY-ANCHORS command: report drift between Word comment anchors
3
+ * and the current markdown.
4
+ *
5
+ * Useful when prose has been revised between sending the docx out for
6
+ * review and receiving it back. Each comment is classified by how well
7
+ * its anchor still matches the current section prose:
8
+ *
9
+ * clean – exact or whitespace-normalized hit
10
+ * drift – anchor only matches via stripped/partial fallbacks
11
+ * context-only – anchor text is gone, only surrounding context survives
12
+ * ambiguous – multiple matches, can't pick one without context
13
+ * unmatched – nothing maps; user must place the comment manually
14
+ */
15
+
16
+ import {
17
+ chalk,
18
+ fs,
19
+ path,
20
+ fmt,
21
+ loadConfig,
22
+ jsonMode,
23
+ jsonOutput,
24
+ } from './context.js';
25
+ import type { Command } from 'commander';
26
+ import { findAnchorInText, classifyStrategy, scoreContextAt, type AnchorMatchQuality } from '../anchor-match.js';
27
+ import type { CommentAnchorData } from '../word-extraction.js';
28
+ import { computeSectionBoundaries } from './section-boundaries.js';
29
+
30
+ interface VerifyOptions {
31
+ config: string;
32
+ dir: string;
33
+ json?: boolean;
34
+ }
35
+
36
+ interface CommentReport {
37
+ id: string;
38
+ author: string;
39
+ text: string;
40
+ section: string | null;
41
+ quality: AnchorMatchQuality | 'ambiguous';
42
+ strategy: string;
43
+ anchor: string;
44
+ occurrences: number;
45
+ }
46
+
47
+ export function register(program: Command): void {
48
+ program
49
+ .command('verify-anchors')
50
+ .description('Report drift between Word comment anchors and current markdown')
51
+ .argument('<file>', 'Word document with reviewer comments (.docx)')
52
+ .option('-c, --config <file>', 'Sections config file', 'sections.yaml')
53
+ .option('-d, --dir <directory>', 'Directory with section files', '.')
54
+ .option('--json', 'Output JSON report (for scripting)')
55
+ .action(async (docxPath: string, options: VerifyOptions) => {
56
+ if (!fs.existsSync(docxPath)) {
57
+ console.error(fmt.status('error', `File not found: ${docxPath}`));
58
+ process.exit(1);
59
+ }
60
+
61
+ const configPath = path.resolve(options.dir, options.config);
62
+ if (!fs.existsSync(configPath)) {
63
+ console.error(fmt.status('error', `Config not found: ${configPath}`));
64
+ console.error(chalk.dim(' Run "rev init" first to generate sections.yaml'));
65
+ process.exit(1);
66
+ }
67
+
68
+ const config = loadConfig(configPath);
69
+ const { extractWordComments, extractCommentAnchors, extractHeadings } = await import('../import.js');
70
+
71
+ let comments;
72
+ let anchors;
73
+ let headings;
74
+ let fullDocText = '';
75
+ try {
76
+ comments = await extractWordComments(docxPath);
77
+ const result = await extractCommentAnchors(docxPath);
78
+ anchors = result.anchors;
79
+ fullDocText = result.fullDocText;
80
+ headings = await extractHeadings(docxPath);
81
+ } catch (err) {
82
+ const error = err as Error;
83
+ console.error(fmt.status('error', `Failed to read ${path.basename(docxPath)}: ${error.message}`));
84
+ if (process.env.DEBUG) console.error(error.stack);
85
+ process.exit(1);
86
+ }
87
+
88
+ if (comments.length === 0) {
89
+ console.log(fmt.status('info', 'No comments found in document.'));
90
+ return;
91
+ }
92
+
93
+ const boundaries = computeSectionBoundaries(config.sections, headings, fullDocText.length);
94
+
95
+ // Cache section markdown contents on first read
96
+ const sectionCache = new Map<string, string>();
97
+ function loadSection(file: string): string | null {
98
+ if (sectionCache.has(file)) return sectionCache.get(file)!;
99
+ const sectionPath = path.join(options.dir, file);
100
+ if (!fs.existsSync(sectionPath)) return null;
101
+ const content = fs.readFileSync(sectionPath, 'utf-8');
102
+ sectionCache.set(file, content);
103
+ return content;
104
+ }
105
+
106
+ const firstBoundaryStart = boundaries.length > 0 ? boundaries[0].start : 0;
107
+ const reports: CommentReport[] = [];
108
+
109
+ for (const c of comments) {
110
+ const anchor: CommentAnchorData | undefined = anchors.get(c.id);
111
+ const anchorText = anchor?.anchor || '';
112
+ if (!anchor) {
113
+ reports.push({
114
+ id: c.id,
115
+ author: c.author,
116
+ text: c.text,
117
+ section: null,
118
+ quality: 'unmatched',
119
+ strategy: 'no-anchor',
120
+ anchor: '',
121
+ occurrences: 0,
122
+ });
123
+ continue;
124
+ }
125
+
126
+ // Determine which section file this comment lives in
127
+ let sectionFile: string | null = null;
128
+ for (const b of boundaries) {
129
+ if (anchor.docPosition >= b.start && anchor.docPosition < b.end) {
130
+ sectionFile = b.file;
131
+ break;
132
+ }
133
+ }
134
+ if (!sectionFile && boundaries.length > 0 && anchor.docPosition < firstBoundaryStart) {
135
+ sectionFile = boundaries[0].file;
136
+ }
137
+
138
+ if (!sectionFile) {
139
+ reports.push({
140
+ id: c.id,
141
+ author: c.author,
142
+ text: c.text,
143
+ section: null,
144
+ quality: 'unmatched',
145
+ strategy: 'no-section',
146
+ anchor: anchorText,
147
+ occurrences: 0,
148
+ });
149
+ continue;
150
+ }
151
+
152
+ const md = loadSection(sectionFile);
153
+ if (md === null) {
154
+ reports.push({
155
+ id: c.id,
156
+ author: c.author,
157
+ text: c.text,
158
+ section: sectionFile,
159
+ quality: 'unmatched',
160
+ strategy: 'missing-file',
161
+ anchor: anchorText,
162
+ occurrences: 0,
163
+ });
164
+ continue;
165
+ }
166
+
167
+ const search = findAnchorInText(anchor.anchor, md, anchor.before, anchor.after);
168
+ let quality: AnchorMatchQuality | 'ambiguous' = classifyStrategy(search.strategy, search.occurrences.length);
169
+ if (quality === 'clean' && search.occurrences.length > 1) {
170
+ // Multiple direct hits — only flag as ambiguous when before/after
171
+ // context can't pick a clear winner. If one candidate scores
172
+ // strictly higher than the others, sync will place it correctly.
173
+ const anchorLen = anchor.anchor.length;
174
+ const scores = search.occurrences.map(p => scoreContextAt(p, md, anchor.before, anchor.after, anchorLen));
175
+ const max = Math.max(...scores);
176
+ const winners = scores.filter(s => s === max).length;
177
+ if (max === 0 || winners > 1) {
178
+ quality = 'ambiguous';
179
+ }
180
+ }
181
+
182
+ reports.push({
183
+ id: c.id,
184
+ author: c.author,
185
+ text: c.text,
186
+ section: sectionFile,
187
+ quality,
188
+ strategy: search.strategy,
189
+ anchor: anchorText,
190
+ occurrences: search.occurrences.length,
191
+ });
192
+ }
193
+
194
+ if (options.json || jsonMode) {
195
+ jsonOutput({
196
+ file: docxPath,
197
+ totalComments: comments.length,
198
+ summary: tally(reports),
199
+ comments: reports,
200
+ });
201
+ return;
202
+ }
203
+
204
+ printReport(docxPath, reports);
205
+ });
206
+ }
207
+
208
+ function tally(reports: CommentReport[]): Record<string, number> {
209
+ const out: Record<string, number> = { clean: 0, drift: 0, 'context-only': 0, ambiguous: 0, unmatched: 0 };
210
+ for (const r of reports) out[r.quality] = (out[r.quality] || 0) + 1;
211
+ return out;
212
+ }
213
+
214
+ function printReport(docxPath: string, reports: CommentReport[]): void {
215
+ console.log(fmt.header(`Anchor Verification: ${path.basename(docxPath)}`));
216
+ console.log();
217
+
218
+ const totals = tally(reports);
219
+ const summaryLines: string[] = [];
220
+ summaryLines.push(`${chalk.green(totals.clean)} clean (anchor still matches)`);
221
+ if (totals.drift) summaryLines.push(`${chalk.cyan(totals.drift)} drifted (matched via fallback strategies)`);
222
+ if (totals['context-only']) summaryLines.push(`${chalk.yellow(totals['context-only'])} context-only (anchor text gone, neighbors survive)`);
223
+ if (totals.ambiguous) summaryLines.push(`${chalk.magenta(totals.ambiguous)} ambiguous (multiple candidate positions)`);
224
+ if (totals.unmatched) summaryLines.push(`${chalk.red(totals.unmatched)} unmatched (manual placement needed)`);
225
+ console.log(fmt.box(summaryLines.join('\n'), { title: 'Summary', padding: 0 }));
226
+ console.log();
227
+
228
+ // Per-comment table for everything that isn't a clean direct hit
229
+ const problems = reports.filter(r => r.quality !== 'clean');
230
+ if (problems.length === 0) {
231
+ console.log(fmt.status('success', 'All comment anchors match the current markdown.'));
232
+ return;
233
+ }
234
+
235
+ const rows = problems.map(r => [
236
+ chalk.dim(`#${r.id}`),
237
+ qualityColor(r.quality),
238
+ r.section ? chalk.bold(r.section) : chalk.dim('—'),
239
+ chalk.dim(r.strategy),
240
+ truncate(r.anchor, 35),
241
+ truncate(r.text, 35),
242
+ ]);
243
+
244
+ console.log(fmt.table(
245
+ ['ID', 'Quality', 'Section', 'Strategy', 'Anchor (Word)', 'Comment'],
246
+ rows,
247
+ { align: ['right', 'left', 'left', 'left', 'left', 'left'] },
248
+ ));
249
+
250
+ if (totals.unmatched > 0 || totals.ambiguous > 0) {
251
+ console.log();
252
+ console.log(chalk.dim('Comments flagged "unmatched" or "ambiguous" need manual placement.'));
253
+ console.log(chalk.dim('Run "rev sync --comments-only" to import the matched ones without touching prose.'));
254
+ }
255
+ }
256
+
257
+ function qualityColor(q: string): string {
258
+ switch (q) {
259
+ case 'clean': return chalk.green('clean');
260
+ case 'drift': return chalk.cyan('drift');
261
+ case 'context-only': return chalk.yellow('context');
262
+ case 'ambiguous': return chalk.magenta('ambiguous');
263
+ case 'unmatched': return chalk.red('unmatched');
264
+ default: return q;
265
+ }
266
+ }
267
+
268
+ function truncate(s: string, max: number): string {
269
+ if (!s) return chalk.dim('—');
270
+ const flat = s.replace(/\s+/g, ' ').trim();
271
+ return flat.length > max ? flat.slice(0, max - 1) + '…' : flat;
272
+ }
@@ -370,7 +370,7 @@ export async function realignComments(
370
370
 
371
371
  // Strip ALL comments (both authors) from markdown to start fresh
372
372
  let markdown = originalMarkdown;
373
- markdown = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
373
+ markdown = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
374
374
  console.log(`Stripped all comments from markdown`);
375
375
 
376
376
  // Parse markdown paragraphs
@@ -469,7 +469,7 @@ export async function realignMarkdown(
469
469
  );
470
470
 
471
471
  // Strip ALL comments from markdown
472
- let result = markdown.replace(/\s*\{>>[^<]+<<\}/g, '');
472
+ let result = markdown.replace(/\s*\{>>[\s\S]+?<<\}/g, '');
473
473
 
474
474
  // Parse markdown paragraphs
475
475
  const mdParagraphs = parseMdParagraphs(result);