docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -164
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -431
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/anchor-match.d.ts +1 -1
  11. package/dist/lib/anchor-match.d.ts.map +1 -1
  12. package/dist/lib/anchor-match.js +17 -47
  13. package/dist/lib/anchor-match.js.map +1 -1
  14. package/dist/lib/build.js +4 -4
  15. package/dist/lib/commands/context.d.ts +1 -1
  16. package/dist/lib/commands/context.d.ts.map +1 -1
  17. package/dist/lib/commands/context.js +1 -1
  18. package/dist/lib/commands/context.js.map +1 -1
  19. package/dist/lib/commands/sections.js +7 -7
  20. package/dist/lib/commands/sections.js.map +1 -1
  21. package/dist/lib/commands/sync.d.ts.map +1 -1
  22. package/dist/lib/commands/sync.js +15 -14
  23. package/dist/lib/commands/sync.js.map +1 -1
  24. package/dist/lib/commands/utilities.js +164 -164
  25. package/dist/lib/commands/verify-anchors.js +6 -6
  26. package/dist/lib/commands/verify-anchors.js.map +1 -1
  27. package/dist/lib/commands/word-tools.js +8 -8
  28. package/dist/lib/grammar.js +3 -3
  29. package/dist/lib/macro-filter.lua +201 -201
  30. package/dist/lib/pdf-comments.js +44 -44
  31. package/dist/lib/plugins.js +57 -57
  32. package/dist/lib/pptx-color-filter.lua +37 -37
  33. package/dist/lib/pptx-themes.js +115 -115
  34. package/dist/lib/sections.d.ts +35 -0
  35. package/dist/lib/sections.d.ts.map +1 -1
  36. package/dist/lib/sections.js +81 -0
  37. package/dist/lib/sections.js.map +1 -1
  38. package/dist/lib/spelling.js +2 -2
  39. package/dist/lib/templates.js +387 -387
  40. package/dist/lib/themes.js +51 -51
  41. package/docs-src/build.py +113 -113
  42. package/docs-src/extra.css +208 -208
  43. package/docs-src/md-to-html.lua +6 -6
  44. package/docs-src/template.html +116 -116
  45. package/eslint.config.js +27 -27
  46. package/lib/anchor-match.ts +276 -308
  47. package/lib/annotations.ts +644 -644
  48. package/lib/build.ts +1766 -1766
  49. package/lib/citations.ts +160 -160
  50. package/lib/commands/build.ts +855 -855
  51. package/lib/commands/citations.ts +515 -515
  52. package/lib/commands/comments.ts +1050 -1050
  53. package/lib/commands/context.ts +176 -174
  54. package/lib/commands/core.ts +309 -309
  55. package/lib/commands/doi.ts +435 -435
  56. package/lib/commands/file-ops.ts +372 -372
  57. package/lib/commands/history.ts +320 -320
  58. package/lib/commands/index.ts +87 -87
  59. package/lib/commands/init.ts +259 -259
  60. package/lib/commands/merge-resolve.ts +378 -378
  61. package/lib/commands/preview.ts +178 -178
  62. package/lib/commands/project-info.ts +244 -244
  63. package/lib/commands/quality.ts +517 -517
  64. package/lib/commands/response.ts +454 -454
  65. package/lib/commands/section-boundaries.ts +82 -82
  66. package/lib/commands/sections.ts +451 -451
  67. package/lib/commands/sync.ts +709 -706
  68. package/lib/commands/text-ops.ts +449 -449
  69. package/lib/commands/utilities.ts +448 -448
  70. package/lib/commands/verify-anchors.ts +272 -272
  71. package/lib/commands/word-tools.ts +340 -340
  72. package/lib/comment-realign.ts +517 -517
  73. package/lib/config.ts +84 -84
  74. package/lib/crossref.ts +781 -781
  75. package/lib/csl.ts +191 -191
  76. package/lib/dependencies.ts +98 -98
  77. package/lib/diff-engine.ts +465 -465
  78. package/lib/doi-cache.ts +115 -115
  79. package/lib/doi.ts +897 -897
  80. package/lib/equations.ts +506 -506
  81. package/lib/errors.ts +346 -346
  82. package/lib/format.ts +541 -541
  83. package/lib/git.ts +326 -326
  84. package/lib/grammar.ts +303 -303
  85. package/lib/image-registry.ts +180 -180
  86. package/lib/import.ts +911 -911
  87. package/lib/journals.ts +543 -543
  88. package/lib/macro-filter.lua +201 -201
  89. package/lib/macros.ts +273 -273
  90. package/lib/merge.ts +633 -633
  91. package/lib/orcid.ts +144 -144
  92. package/lib/pdf-comments.ts +263 -263
  93. package/lib/pdf-import.ts +524 -524
  94. package/lib/plugins.ts +362 -362
  95. package/lib/postprocess.ts +188 -188
  96. package/lib/pptx-color-filter.lua +37 -37
  97. package/lib/pptx-template.ts +469 -469
  98. package/lib/pptx-themes.ts +483 -483
  99. package/lib/protect-restore.ts +520 -520
  100. package/lib/rate-limiter.ts +94 -94
  101. package/lib/response.ts +197 -197
  102. package/lib/restore-references.ts +240 -240
  103. package/lib/review.ts +327 -327
  104. package/lib/schema.ts +488 -488
  105. package/lib/scientific-words.ts +73 -73
  106. package/lib/sections.ts +425 -335
  107. package/lib/slides.ts +756 -756
  108. package/lib/spelling.ts +334 -334
  109. package/lib/templates.ts +526 -526
  110. package/lib/themes.ts +742 -742
  111. package/lib/trackchanges.ts +247 -247
  112. package/lib/tui.ts +450 -450
  113. package/lib/types.ts +550 -550
  114. package/lib/undo.ts +250 -250
  115. package/lib/utils.ts +69 -69
  116. package/lib/variables.ts +179 -179
  117. package/lib/word-extraction.ts +806 -806
  118. package/lib/word.ts +643 -643
  119. package/lib/wordcomments.ts +840 -840
  120. package/mkdocs.yml +64 -64
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +47 -47
  123. package/skill/REFERENCE.md +539 -539
  124. package/skill/SKILL.md +295 -295
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
@@ -1,308 +1,276 @@
1
- /**
2
- * Anchor matching primitives shared between sync (insertion) and
3
- * verify-anchors (drift reporting). The functions are pure: given an
4
- * anchor string and surrounding context, locate candidate positions in
5
- * a target text using progressively looser strategies.
6
- */
7
-
8
- export type AnchorStrategy =
9
- | 'direct'
10
- | 'normalized'
11
- | 'stripped'
12
- | 'partial-start'
13
- | 'partial-start-stripped'
14
- | 'partial-window'
15
- | 'partial-window-stripped'
16
- | 'context-both'
17
- | 'context-before'
18
- | 'context-after'
19
- | 'split-match'
20
- | 'empty-anchor'
21
- | 'failed';
22
-
23
- export interface AnchorSearchResult {
24
- occurrences: number[];
25
- matchedAnchor: string | null;
26
- strategy: AnchorStrategy;
27
- stripped?: boolean;
28
- }
29
-
30
- /**
31
- * Strip CriticMarkup so the matcher sees plain prose instead of
32
- * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
33
- * underneath previously imported track changes.
34
- */
35
- export function stripCriticMarkup(text: string): string {
36
- return text
37
- .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
38
- .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
39
- .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
40
- .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
41
- .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
42
- }
43
-
44
- /**
45
- * Return every starting index where `needle` occurs in `haystack`.
46
- * Empty needles return no occurrences (empty matches are not useful
47
- * for anchor placement).
48
- */
49
- /**
50
- * Score how well the docx-side `before` / `after` context matches the
51
- * surroundings of a candidate position in the target text. Used by
52
- * `verify-anchors` to tell apart "multiple hits but context picks one
53
- * cleanly" (sync will place it correctly) from "multiple hits, context
54
- * doesn't help" (truly ambiguous, needs human placement).
55
- *
56
- * Returns 0 if no context was provided.
57
- */
58
- export function scoreContextAt(
59
- pos: number,
60
- text: string,
61
- before: string,
62
- after: string,
63
- anchorLen: number,
64
- ): number {
65
- let score = 0;
66
- if (before) {
67
- const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
68
- const beforeLower = before.toLowerCase();
69
- const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
70
- for (const word of beforeWords) {
71
- if (contextBefore.includes(word)) score += 2;
72
- }
73
- if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
74
- }
75
- if (after) {
76
- const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
77
- const afterLower = after.toLowerCase();
78
- const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
79
- for (const word of afterWords) {
80
- if (contextAfter.includes(word)) score += 2;
81
- }
82
- if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
83
- }
84
- return score;
85
- }
86
-
87
- export function findAllOccurrences(haystack: string, needle: string): number[] {
88
- if (!needle || needle.length === 0) return [];
89
- const occurrences: number[] = [];
90
- let idx = 0;
91
- while ((idx = haystack.indexOf(needle, idx)) !== -1) {
92
- occurrences.push(idx);
93
- idx += 1;
94
- }
95
- return occurrences;
96
- }
97
-
98
- /**
99
- * Find candidate positions for `anchor` in `text`, falling back through
100
- * progressively looser strategies (whitespace normalization, stripped
101
- * CriticMarkup, partial-prefix, surrounding context, word splitting).
102
- *
103
- * The returned `strategy` lets callers distinguish a clean direct hit
104
- * from a fuzzy approximation — useful for drift reporting.
105
- */
106
- export function findAnchorInText(
107
- anchor: string,
108
- text: string,
109
- before: string = '',
110
- after: string = ''
111
- ): AnchorSearchResult {
112
- // Empty anchor: skip directly to context-based matching
113
- if (!anchor || anchor.trim().length === 0) {
114
- if (before || after) {
115
- const beforeLower = (before || '').toLowerCase();
116
- const afterLower = (after || '').toLowerCase();
117
- const textLower = text.toLowerCase();
118
-
119
- if (before && after) {
120
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
121
- if (beforeIdx !== -1) {
122
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
123
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
124
- if (afterIdx !== -1 && afterIdx - searchStart < 500) {
125
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
126
- }
127
- }
128
- }
129
-
130
- if (before) {
131
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
132
- if (beforeIdx !== -1) {
133
- return {
134
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
135
- matchedAnchor: null,
136
- strategy: 'context-before',
137
- };
138
- }
139
- }
140
-
141
- if (after) {
142
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
143
- if (afterIdx !== -1) {
144
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
145
- }
146
- }
147
- }
148
- return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
149
- }
150
-
151
- const anchorLower = anchor.toLowerCase();
152
- const textLower = text.toLowerCase();
153
-
154
- // Strategy 1: direct match
155
- let occurrences = findAllOccurrences(textLower, anchorLower);
156
- if (occurrences.length > 0) {
157
- return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
158
- }
159
-
160
- // Strategy 2: normalized whitespace
161
- const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
162
- const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
163
- const idx = normalizedText.indexOf(normalizedAnchor);
164
- if (idx !== -1) {
165
- return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
166
- }
167
-
168
- // Strategy 3: match in stripped CriticMarkup version
169
- const strippedText = stripCriticMarkup(text);
170
- const strippedLower = strippedText.toLowerCase();
171
- occurrences = findAllOccurrences(strippedLower, anchorLower);
172
- if (occurrences.length > 0) {
173
- return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
174
- }
175
-
176
- // Strategy 4: word window from anchor (prefix or interior).
177
- // Sliding the window across the anchor catches the case where the
178
- // anchor's prefix has been edited but a chunk in the middle/end
179
- // survived intact (e.g. "Sensitivity analyses were performed by
180
- // perturbing the prior variance" → drifted "Sensitivity analyses
181
- // perturbed the prior variance" still contains "the prior variance").
182
- const words = anchor.split(/\s+/);
183
- if (words.length > 3) {
184
- for (let n = Math.min(6, words.length); n >= 3; n--) {
185
- for (let start = 0; start + n <= words.length; start++) {
186
- const window = words.slice(start, start + n).join(' ');
187
- const windowLower = window.toLowerCase();
188
- if (windowLower.length < 15) continue;
189
-
190
- let occ = findAllOccurrences(textLower, windowLower);
191
- if (occ.length > 0) {
192
- const strategy: AnchorStrategy = start === 0 ? 'partial-start' : 'partial-window';
193
- return { occurrences: occ, matchedAnchor: window, strategy };
194
- }
195
- occ = findAllOccurrences(strippedLower, windowLower);
196
- if (occ.length > 0) {
197
- const strategy: AnchorStrategy = start === 0 ? 'partial-start-stripped' : 'partial-window-stripped';
198
- return { occurrences: occ, matchedAnchor: window, strategy, stripped: true };
199
- }
200
- }
201
- }
202
- }
203
-
204
- // Strategy 5: context (before/after) only.
205
- //
206
- // For a non-empty anchor that already failed every text-based strategy
207
- // above, we treat context as a degraded placement: classify it
208
- // 'context-only' so callers can warn the user. We also reject
209
- // implausible brackets — if both contexts match but the gap between
210
- // them is far too small to contain the anchor (e.g. the anchored
211
- // sentence was deleted), do not silently land the comment between
212
- // the surviving sentences. Return 'failed' so the user is told to
213
- // place it manually.
214
- if (before || after) {
215
- const beforeLower = before.toLowerCase();
216
- const afterLower = after.toLowerCase();
217
- const anchorLen = anchor.length;
218
-
219
- if (before && after) {
220
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
221
- if (beforeIdx !== -1) {
222
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
223
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
224
- if (afterIdx !== -1) {
225
- const gap = afterIdx - searchStart;
226
- // Require the bracket to plausibly contain a remnant of the anchor.
227
- // Below 30% of anchor length: anchor was deleted — refuse to place.
228
- // Above 2× anchor length + slack: brackets are too far apart, the
229
- // matcher has latched onto unrelated repeats of common context.
230
- const minGap = Math.floor(anchorLen * 0.3);
231
- const maxGap = Math.min(500, anchorLen * 2 + 50);
232
- if (gap >= minGap && gap <= maxGap) {
233
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
234
- }
235
- // Both brackets found but gap implausible: anchor likely deleted.
236
- // Don't fall back to single-side context — that would silently
237
- // place the comment in the wrong location.
238
- return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
239
- }
240
- }
241
- }
242
-
243
- if (before) {
244
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
245
- if (beforeIdx !== -1) {
246
- return {
247
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
248
- matchedAnchor: null,
249
- strategy: 'context-before',
250
- };
251
- }
252
- }
253
-
254
- if (after) {
255
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
256
- if (afterIdx !== -1) {
257
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
258
- }
259
- }
260
- }
261
-
262
- // Strategy 6: split anchor on transition characters
263
- const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
264
- for (const sep of splitPatterns) {
265
- if (anchor.includes(sep)) {
266
- const parts = anchor.split(sep).filter(p => p.length >= 4);
267
- for (const part of parts) {
268
- const partLower = part.toLowerCase();
269
- occurrences = findAllOccurrences(textLower, partLower);
270
- if (occurrences.length > 0 && occurrences.length < 5) {
271
- return { occurrences, matchedAnchor: part, strategy: 'split-match' };
272
- }
273
- }
274
- }
275
- }
276
-
277
- return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
278
- }
279
-
280
- /**
281
- * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
282
- * Used by `verify-anchors` to summarize per-comment match quality.
283
- */
284
- export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
285
-
286
- export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
287
- if (occurrences === 0) return 'unmatched';
288
- switch (strategy) {
289
- case 'direct':
290
- case 'normalized':
291
- return 'clean';
292
- case 'stripped':
293
- case 'partial-start':
294
- case 'partial-start-stripped':
295
- case 'partial-window':
296
- case 'partial-window-stripped':
297
- case 'split-match':
298
- return 'drift';
299
- case 'context-both':
300
- case 'context-before':
301
- case 'context-after':
302
- return 'context-only';
303
- case 'empty-anchor':
304
- case 'failed':
305
- default:
306
- return 'unmatched';
307
- }
308
- }
1
+ /**
2
+ * Anchor matching primitives shared between sync (insertion) and
3
+ * verify-anchors (drift reporting). The functions are pure: given an
4
+ * anchor string and surrounding context, locate candidate positions in
5
+ * a target text using progressively looser strategies.
6
+ */
7
+
8
+ export type AnchorStrategy =
9
+ | 'direct'
10
+ | 'normalized'
11
+ | 'stripped'
12
+ | 'partial-start'
13
+ | 'partial-start-stripped'
14
+ | 'context-both'
15
+ | 'context-before'
16
+ | 'context-after'
17
+ | 'split-match'
18
+ | 'empty-anchor'
19
+ | 'failed';
20
+
21
+ export interface AnchorSearchResult {
22
+ occurrences: number[];
23
+ matchedAnchor: string | null;
24
+ strategy: AnchorStrategy;
25
+ stripped?: boolean;
26
+ }
27
+
28
+ /**
29
+ * Strip CriticMarkup so the matcher sees plain prose instead of
30
+ * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
+ * underneath previously imported track changes.
32
+ */
33
+ export function stripCriticMarkup(text: string): string {
34
+ return text
35
+ .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
+ .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
+ .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
+ .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
+ .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
+ }
41
+
42
+ /**
43
+ * Return every starting index where `needle` occurs in `haystack`.
44
+ * Empty needles return no occurrences (empty matches are not useful
45
+ * for anchor placement).
46
+ */
47
+ /**
48
+ * Score how well the docx-side `before` / `after` context matches the
49
+ * surroundings of a candidate position in the target text. Used by
50
+ * `verify-anchors` to tell apart "multiple hits but context picks one
51
+ * cleanly" (sync will place it correctly) from "multiple hits, context
52
+ * doesn't help" (truly ambiguous, needs human placement).
53
+ *
54
+ * Returns 0 if no context was provided.
55
+ */
56
+ export function scoreContextAt(
57
+ pos: number,
58
+ text: string,
59
+ before: string,
60
+ after: string,
61
+ anchorLen: number,
62
+ ): number {
63
+ let score = 0;
64
+ if (before) {
65
+ const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
+ const beforeLower = before.toLowerCase();
67
+ const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
+ for (const word of beforeWords) {
69
+ if (contextBefore.includes(word)) score += 2;
70
+ }
71
+ if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
+ }
73
+ if (after) {
74
+ const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
+ const afterLower = after.toLowerCase();
76
+ const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
+ for (const word of afterWords) {
78
+ if (contextAfter.includes(word)) score += 2;
79
+ }
80
+ if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
+ }
82
+ return score;
83
+ }
84
+
85
+ export function findAllOccurrences(haystack: string, needle: string): number[] {
86
+ if (!needle || needle.length === 0) return [];
87
+ const occurrences: number[] = [];
88
+ let idx = 0;
89
+ while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
+ occurrences.push(idx);
91
+ idx += 1;
92
+ }
93
+ return occurrences;
94
+ }
95
+
96
+ /**
97
+ * Find candidate positions for `anchor` in `text`, falling back through
98
+ * progressively looser strategies (whitespace normalization, stripped
99
+ * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
+ *
101
+ * The returned `strategy` lets callers distinguish a clean direct hit
102
+ * from a fuzzy approximation — useful for drift reporting.
103
+ */
104
+ export function findAnchorInText(
105
+ anchor: string,
106
+ text: string,
107
+ before: string = '',
108
+ after: string = ''
109
+ ): AnchorSearchResult {
110
+ // Empty anchor: skip directly to context-based matching
111
+ if (!anchor || anchor.trim().length === 0) {
112
+ if (before || after) {
113
+ const beforeLower = (before || '').toLowerCase();
114
+ const afterLower = (after || '').toLowerCase();
115
+ const textLower = text.toLowerCase();
116
+
117
+ if (before && after) {
118
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
+ if (beforeIdx !== -1) {
120
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
+ }
125
+ }
126
+ }
127
+
128
+ if (before) {
129
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
+ if (beforeIdx !== -1) {
131
+ return {
132
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
+ matchedAnchor: null,
134
+ strategy: 'context-before',
135
+ };
136
+ }
137
+ }
138
+
139
+ if (after) {
140
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
+ if (afterIdx !== -1) {
142
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
+ }
144
+ }
145
+ }
146
+ return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
+ }
148
+
149
+ const anchorLower = anchor.toLowerCase();
150
+ const textLower = text.toLowerCase();
151
+
152
+ // Strategy 1: direct match
153
+ let occurrences = findAllOccurrences(textLower, anchorLower);
154
+ if (occurrences.length > 0) {
155
+ return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
+ }
157
+
158
+ // Strategy 2: normalized whitespace
159
+ const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
+ const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
+ const idx = normalizedText.indexOf(normalizedAnchor);
162
+ if (idx !== -1) {
163
+ return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
+ }
165
+
166
+ // Strategy 3: match in stripped CriticMarkup version
167
+ const strippedText = stripCriticMarkup(text);
168
+ const strippedLower = strippedText.toLowerCase();
169
+ occurrences = findAllOccurrences(strippedLower, anchorLower);
170
+ if (occurrences.length > 0) {
171
+ return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
+ }
173
+
174
+ // Strategy 4: first N words of anchor (long anchors)
175
+ const words = anchor.split(/\s+/);
176
+ if (words.length > 3) {
177
+ for (let n = Math.min(6, words.length); n >= 3; n--) {
178
+ const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
+ if (partialAnchor.length >= 15) {
180
+ occurrences = findAllOccurrences(textLower, partialAnchor);
181
+ if (occurrences.length > 0) {
182
+ return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
+ }
184
+ occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
+ if (occurrences.length > 0) {
186
+ return {
187
+ occurrences,
188
+ matchedAnchor: words.slice(0, n).join(' '),
189
+ strategy: 'partial-start-stripped',
190
+ stripped: true,
191
+ };
192
+ }
193
+ }
194
+ }
195
+ }
196
+
197
+ // Strategy 5: context (before/after) only
198
+ if (before || after) {
199
+ const beforeLower = before.toLowerCase();
200
+ const afterLower = after.toLowerCase();
201
+
202
+ if (before && after) {
203
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
+ if (beforeIdx !== -1) {
205
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
+ }
210
+ }
211
+ }
212
+
213
+ if (before) {
214
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
+ if (beforeIdx !== -1) {
216
+ return {
217
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
+ matchedAnchor: null,
219
+ strategy: 'context-before',
220
+ };
221
+ }
222
+ }
223
+
224
+ if (after) {
225
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
+ if (afterIdx !== -1) {
227
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
+ }
229
+ }
230
+ }
231
+
232
+ // Strategy 6: split anchor on transition characters
233
+ const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
+ for (const sep of splitPatterns) {
235
+ if (anchor.includes(sep)) {
236
+ const parts = anchor.split(sep).filter(p => p.length >= 4);
237
+ for (const part of parts) {
238
+ const partLower = part.toLowerCase();
239
+ occurrences = findAllOccurrences(textLower, partLower);
240
+ if (occurrences.length > 0 && occurrences.length < 5) {
241
+ return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
+ }
243
+ }
244
+ }
245
+ }
246
+
247
+ return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
+ }
249
+
250
+ /**
251
+ * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
+ * Used by `verify-anchors` to summarize per-comment match quality.
253
+ */
254
+ export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
+
256
+ export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
+ if (occurrences === 0) return 'unmatched';
258
+ switch (strategy) {
259
+ case 'direct':
260
+ case 'normalized':
261
+ return 'clean';
262
+ case 'stripped':
263
+ case 'partial-start':
264
+ case 'partial-start-stripped':
265
+ case 'split-match':
266
+ return 'drift';
267
+ case 'context-both':
268
+ case 'context-before':
269
+ case 'context-after':
270
+ return 'context-only';
271
+ case 'empty-anchor':
272
+ case 'failed':
273
+ default:
274
+ return 'unmatched';
275
+ }
276
+ }