docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
@@ -1,276 +1,276 @@
1
- /**
2
- * Anchor matching primitives shared between sync (insertion) and
3
- * verify-anchors (drift reporting). The functions are pure: given an
4
- * anchor string and surrounding context, locate candidate positions in
5
- * a target text using progressively looser strategies.
6
- */
7
-
8
- export type AnchorStrategy =
9
- | 'direct'
10
- | 'normalized'
11
- | 'stripped'
12
- | 'partial-start'
13
- | 'partial-start-stripped'
14
- | 'context-both'
15
- | 'context-before'
16
- | 'context-after'
17
- | 'split-match'
18
- | 'empty-anchor'
19
- | 'failed';
20
-
21
- export interface AnchorSearchResult {
22
- occurrences: number[];
23
- matchedAnchor: string | null;
24
- strategy: AnchorStrategy;
25
- stripped?: boolean;
26
- }
27
-
28
- /**
29
- * Strip CriticMarkup so the matcher sees plain prose instead of
30
- * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
- * underneath previously imported track changes.
32
- */
33
- export function stripCriticMarkup(text: string): string {
34
- return text
35
- .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
- .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
- .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
- .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
- .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
- }
41
-
42
- /**
43
- * Return every starting index where `needle` occurs in `haystack`.
44
- * Empty needles return no occurrences (empty matches are not useful
45
- * for anchor placement).
46
- */
47
- /**
48
- * Score how well the docx-side `before` / `after` context matches the
49
- * surroundings of a candidate position in the target text. Used by
50
- * `verify-anchors` to tell apart "multiple hits but context picks one
51
- * cleanly" (sync will place it correctly) from "multiple hits, context
52
- * doesn't help" (truly ambiguous, needs human placement).
53
- *
54
- * Returns 0 if no context was provided.
55
- */
56
- export function scoreContextAt(
57
- pos: number,
58
- text: string,
59
- before: string,
60
- after: string,
61
- anchorLen: number,
62
- ): number {
63
- let score = 0;
64
- if (before) {
65
- const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
- const beforeLower = before.toLowerCase();
67
- const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
- for (const word of beforeWords) {
69
- if (contextBefore.includes(word)) score += 2;
70
- }
71
- if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
- }
73
- if (after) {
74
- const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
- const afterLower = after.toLowerCase();
76
- const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
- for (const word of afterWords) {
78
- if (contextAfter.includes(word)) score += 2;
79
- }
80
- if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
- }
82
- return score;
83
- }
84
-
85
- export function findAllOccurrences(haystack: string, needle: string): number[] {
86
- if (!needle || needle.length === 0) return [];
87
- const occurrences: number[] = [];
88
- let idx = 0;
89
- while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
- occurrences.push(idx);
91
- idx += 1;
92
- }
93
- return occurrences;
94
- }
95
-
96
- /**
97
- * Find candidate positions for `anchor` in `text`, falling back through
98
- * progressively looser strategies (whitespace normalization, stripped
99
- * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
- *
101
- * The returned `strategy` lets callers distinguish a clean direct hit
102
- * from a fuzzy approximation — useful for drift reporting.
103
- */
104
- export function findAnchorInText(
105
- anchor: string,
106
- text: string,
107
- before: string = '',
108
- after: string = ''
109
- ): AnchorSearchResult {
110
- // Empty anchor: skip directly to context-based matching
111
- if (!anchor || anchor.trim().length === 0) {
112
- if (before || after) {
113
- const beforeLower = (before || '').toLowerCase();
114
- const afterLower = (after || '').toLowerCase();
115
- const textLower = text.toLowerCase();
116
-
117
- if (before && after) {
118
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
- if (beforeIdx !== -1) {
120
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
- if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
- }
125
- }
126
- }
127
-
128
- if (before) {
129
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
- if (beforeIdx !== -1) {
131
- return {
132
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
- matchedAnchor: null,
134
- strategy: 'context-before',
135
- };
136
- }
137
- }
138
-
139
- if (after) {
140
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
- if (afterIdx !== -1) {
142
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
- }
144
- }
145
- }
146
- return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
- }
148
-
149
- const anchorLower = anchor.toLowerCase();
150
- const textLower = text.toLowerCase();
151
-
152
- // Strategy 1: direct match
153
- let occurrences = findAllOccurrences(textLower, anchorLower);
154
- if (occurrences.length > 0) {
155
- return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
- }
157
-
158
- // Strategy 2: normalized whitespace
159
- const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
- const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
- const idx = normalizedText.indexOf(normalizedAnchor);
162
- if (idx !== -1) {
163
- return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
- }
165
-
166
- // Strategy 3: match in stripped CriticMarkup version
167
- const strippedText = stripCriticMarkup(text);
168
- const strippedLower = strippedText.toLowerCase();
169
- occurrences = findAllOccurrences(strippedLower, anchorLower);
170
- if (occurrences.length > 0) {
171
- return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
- }
173
-
174
- // Strategy 4: first N words of anchor (long anchors)
175
- const words = anchor.split(/\s+/);
176
- if (words.length > 3) {
177
- for (let n = Math.min(6, words.length); n >= 3; n--) {
178
- const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
- if (partialAnchor.length >= 15) {
180
- occurrences = findAllOccurrences(textLower, partialAnchor);
181
- if (occurrences.length > 0) {
182
- return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
- }
184
- occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
- if (occurrences.length > 0) {
186
- return {
187
- occurrences,
188
- matchedAnchor: words.slice(0, n).join(' '),
189
- strategy: 'partial-start-stripped',
190
- stripped: true,
191
- };
192
- }
193
- }
194
- }
195
- }
196
-
197
- // Strategy 5: context (before/after) only
198
- if (before || after) {
199
- const beforeLower = before.toLowerCase();
200
- const afterLower = after.toLowerCase();
201
-
202
- if (before && after) {
203
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
- if (beforeIdx !== -1) {
205
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
- if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
- }
210
- }
211
- }
212
-
213
- if (before) {
214
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
- if (beforeIdx !== -1) {
216
- return {
217
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
- matchedAnchor: null,
219
- strategy: 'context-before',
220
- };
221
- }
222
- }
223
-
224
- if (after) {
225
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
- if (afterIdx !== -1) {
227
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
- }
229
- }
230
- }
231
-
232
- // Strategy 6: split anchor on transition characters
233
- const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
- for (const sep of splitPatterns) {
235
- if (anchor.includes(sep)) {
236
- const parts = anchor.split(sep).filter(p => p.length >= 4);
237
- for (const part of parts) {
238
- const partLower = part.toLowerCase();
239
- occurrences = findAllOccurrences(textLower, partLower);
240
- if (occurrences.length > 0 && occurrences.length < 5) {
241
- return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
- }
243
- }
244
- }
245
- }
246
-
247
- return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
- }
249
-
250
- /**
251
- * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
- * Used by `verify-anchors` to summarize per-comment match quality.
253
- */
254
- export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
-
256
- export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
- if (occurrences === 0) return 'unmatched';
258
- switch (strategy) {
259
- case 'direct':
260
- case 'normalized':
261
- return 'clean';
262
- case 'stripped':
263
- case 'partial-start':
264
- case 'partial-start-stripped':
265
- case 'split-match':
266
- return 'drift';
267
- case 'context-both':
268
- case 'context-before':
269
- case 'context-after':
270
- return 'context-only';
271
- case 'empty-anchor':
272
- case 'failed':
273
- default:
274
- return 'unmatched';
275
- }
276
- }
1
+ /**
2
+ * Anchor matching primitives shared between sync (insertion) and
3
+ * verify-anchors (drift reporting). The functions are pure: given an
4
+ * anchor string and surrounding context, locate candidate positions in
5
+ * a target text using progressively looser strategies.
6
+ */
7
+
8
+ export type AnchorStrategy =
9
+ | 'direct'
10
+ | 'normalized'
11
+ | 'stripped'
12
+ | 'partial-start'
13
+ | 'partial-start-stripped'
14
+ | 'context-both'
15
+ | 'context-before'
16
+ | 'context-after'
17
+ | 'split-match'
18
+ | 'empty-anchor'
19
+ | 'failed';
20
+
21
+ export interface AnchorSearchResult {
22
+ occurrences: number[];
23
+ matchedAnchor: string | null;
24
+ strategy: AnchorStrategy;
25
+ stripped?: boolean;
26
+ }
27
+
28
+ /**
29
+ * Strip CriticMarkup so the matcher sees plain prose instead of
30
+ * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
+ * underneath previously imported track changes.
32
+ */
33
+ export function stripCriticMarkup(text: string): string {
34
+ return text
35
+ .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
+ .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
+ .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
+ .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
+ .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
+ }
41
+
42
+ /**
43
+ * Return every starting index where `needle` occurs in `haystack`.
44
+ * Empty needles return no occurrences (empty matches are not useful
45
+ * for anchor placement).
46
+ */
47
+ /**
48
+ * Score how well the docx-side `before` / `after` context matches the
49
+ * surroundings of a candidate position in the target text. Used by
50
+ * `verify-anchors` to tell apart "multiple hits but context picks one
51
+ * cleanly" (sync will place it correctly) from "multiple hits, context
52
+ * doesn't help" (truly ambiguous, needs human placement).
53
+ *
54
+ * Returns 0 if no context was provided.
55
+ */
56
+ export function scoreContextAt(
57
+ pos: number,
58
+ text: string,
59
+ before: string,
60
+ after: string,
61
+ anchorLen: number,
62
+ ): number {
63
+ let score = 0;
64
+ if (before) {
65
+ const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
+ const beforeLower = before.toLowerCase();
67
+ const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
+ for (const word of beforeWords) {
69
+ if (contextBefore.includes(word)) score += 2;
70
+ }
71
+ if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
+ }
73
+ if (after) {
74
+ const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
+ const afterLower = after.toLowerCase();
76
+ const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
+ for (const word of afterWords) {
78
+ if (contextAfter.includes(word)) score += 2;
79
+ }
80
+ if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
+ }
82
+ return score;
83
+ }
84
+
85
+ export function findAllOccurrences(haystack: string, needle: string): number[] {
86
+ if (!needle || needle.length === 0) return [];
87
+ const occurrences: number[] = [];
88
+ let idx = 0;
89
+ while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
+ occurrences.push(idx);
91
+ idx += 1;
92
+ }
93
+ return occurrences;
94
+ }
95
+
96
+ /**
97
+ * Find candidate positions for `anchor` in `text`, falling back through
98
+ * progressively looser strategies (whitespace normalization, stripped
99
+ * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
+ *
101
+ * The returned `strategy` lets callers distinguish a clean direct hit
102
+ * from a fuzzy approximation — useful for drift reporting.
103
+ */
104
+ export function findAnchorInText(
105
+ anchor: string,
106
+ text: string,
107
+ before: string = '',
108
+ after: string = ''
109
+ ): AnchorSearchResult {
110
+ // Empty anchor: skip directly to context-based matching
111
+ if (!anchor || anchor.trim().length === 0) {
112
+ if (before || after) {
113
+ const beforeLower = (before || '').toLowerCase();
114
+ const afterLower = (after || '').toLowerCase();
115
+ const textLower = text.toLowerCase();
116
+
117
+ if (before && after) {
118
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
+ if (beforeIdx !== -1) {
120
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
+ }
125
+ }
126
+ }
127
+
128
+ if (before) {
129
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
+ if (beforeIdx !== -1) {
131
+ return {
132
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
+ matchedAnchor: null,
134
+ strategy: 'context-before',
135
+ };
136
+ }
137
+ }
138
+
139
+ if (after) {
140
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
+ if (afterIdx !== -1) {
142
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
+ }
144
+ }
145
+ }
146
+ return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
+ }
148
+
149
+ const anchorLower = anchor.toLowerCase();
150
+ const textLower = text.toLowerCase();
151
+
152
+ // Strategy 1: direct match
153
+ let occurrences = findAllOccurrences(textLower, anchorLower);
154
+ if (occurrences.length > 0) {
155
+ return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
+ }
157
+
158
+ // Strategy 2: normalized whitespace
159
+ const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
+ const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
+ const idx = normalizedText.indexOf(normalizedAnchor);
162
+ if (idx !== -1) {
163
+ return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
+ }
165
+
166
+ // Strategy 3: match in stripped CriticMarkup version
167
+ const strippedText = stripCriticMarkup(text);
168
+ const strippedLower = strippedText.toLowerCase();
169
+ occurrences = findAllOccurrences(strippedLower, anchorLower);
170
+ if (occurrences.length > 0) {
171
+ return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
+ }
173
+
174
+ // Strategy 4: first N words of anchor (long anchors)
175
+ const words = anchor.split(/\s+/);
176
+ if (words.length > 3) {
177
+ for (let n = Math.min(6, words.length); n >= 3; n--) {
178
+ const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
+ if (partialAnchor.length >= 15) {
180
+ occurrences = findAllOccurrences(textLower, partialAnchor);
181
+ if (occurrences.length > 0) {
182
+ return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
+ }
184
+ occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
+ if (occurrences.length > 0) {
186
+ return {
187
+ occurrences,
188
+ matchedAnchor: words.slice(0, n).join(' '),
189
+ strategy: 'partial-start-stripped',
190
+ stripped: true,
191
+ };
192
+ }
193
+ }
194
+ }
195
+ }
196
+
197
+ // Strategy 5: context (before/after) only
198
+ if (before || after) {
199
+ const beforeLower = before.toLowerCase();
200
+ const afterLower = after.toLowerCase();
201
+
202
+ if (before && after) {
203
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
+ if (beforeIdx !== -1) {
205
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
+ }
210
+ }
211
+ }
212
+
213
+ if (before) {
214
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
+ if (beforeIdx !== -1) {
216
+ return {
217
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
+ matchedAnchor: null,
219
+ strategy: 'context-before',
220
+ };
221
+ }
222
+ }
223
+
224
+ if (after) {
225
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
+ if (afterIdx !== -1) {
227
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
+ }
229
+ }
230
+ }
231
+
232
+ // Strategy 6: split anchor on transition characters
233
+ const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
+ for (const sep of splitPatterns) {
235
+ if (anchor.includes(sep)) {
236
+ const parts = anchor.split(sep).filter(p => p.length >= 4);
237
+ for (const part of parts) {
238
+ const partLower = part.toLowerCase();
239
+ occurrences = findAllOccurrences(textLower, partLower);
240
+ if (occurrences.length > 0 && occurrences.length < 5) {
241
+ return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
+ }
243
+ }
244
+ }
245
+ }
246
+
247
+ return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
+ }
249
+
250
+ /**
251
+ * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
+ * Used by `verify-anchors` to summarize per-comment match quality.
253
+ */
254
+ export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
+
256
+ export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
+ if (occurrences === 0) return 'unmatched';
258
+ switch (strategy) {
259
+ case 'direct':
260
+ case 'normalized':
261
+ return 'clean';
262
+ case 'stripped':
263
+ case 'partial-start':
264
+ case 'partial-start-stripped':
265
+ case 'split-match':
266
+ return 'drift';
267
+ case 'context-both':
268
+ case 'context-before':
269
+ case 'context-after':
270
+ return 'context-only';
271
+ case 'empty-anchor':
272
+ case 'failed':
273
+ default:
274
+ return 'unmatched';
275
+ }
276
+ }