docrev 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +411 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +38 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +68 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/pdf-comments.js +44 -44
  43. package/dist/lib/plugins.js +57 -57
  44. package/dist/lib/pptx-themes.js +115 -115
  45. package/dist/lib/spelling.js +2 -2
  46. package/dist/lib/templates.js +387 -387
  47. package/dist/lib/themes.js +51 -51
  48. package/eslint.config.js +27 -27
  49. package/lib/anchor-match.ts +276 -276
  50. package/lib/annotations.ts +644 -644
  51. package/lib/build.ts +1300 -1251
  52. package/lib/citations.ts +160 -160
  53. package/lib/commands/build.ts +833 -801
  54. package/lib/commands/citations.ts +515 -515
  55. package/lib/commands/comments.ts +1050 -1050
  56. package/lib/commands/context.ts +174 -174
  57. package/lib/commands/core.ts +309 -309
  58. package/lib/commands/doi.ts +435 -435
  59. package/lib/commands/file-ops.ts +372 -372
  60. package/lib/commands/history.ts +320 -320
  61. package/lib/commands/index.ts +87 -87
  62. package/lib/commands/init.ts +259 -259
  63. package/lib/commands/merge-resolve.ts +378 -378
  64. package/lib/commands/preview.ts +178 -178
  65. package/lib/commands/project-info.ts +244 -244
  66. package/lib/commands/quality.ts +517 -517
  67. package/lib/commands/response.ts +454 -454
  68. package/lib/commands/section-boundaries.ts +82 -82
  69. package/lib/commands/sections.ts +451 -451
  70. package/lib/commands/sync.ts +706 -706
  71. package/lib/commands/text-ops.ts +449 -449
  72. package/lib/commands/utilities.ts +448 -448
  73. package/lib/commands/verify-anchors.ts +272 -272
  74. package/lib/commands/word-tools.ts +340 -340
  75. package/lib/comment-realign.ts +517 -517
  76. package/lib/config.ts +84 -84
  77. package/lib/crossref.ts +781 -781
  78. package/lib/csl.ts +191 -191
  79. package/lib/dependencies.ts +98 -98
  80. package/lib/diff-engine.ts +465 -465
  81. package/lib/doi-cache.ts +115 -115
  82. package/lib/doi.ts +897 -897
  83. package/lib/equations.ts +506 -506
  84. package/lib/errors.ts +346 -346
  85. package/lib/format.ts +541 -541
  86. package/lib/git.ts +326 -326
  87. package/lib/grammar.ts +303 -303
  88. package/lib/image-registry.ts +180 -180
  89. package/lib/import.ts +911 -911
  90. package/lib/journals.ts +543 -543
  91. package/lib/merge.ts +633 -633
  92. package/lib/orcid.ts +144 -144
  93. package/lib/pdf-comments.ts +263 -263
  94. package/lib/pdf-import.ts +524 -524
  95. package/lib/plugins.ts +362 -362
  96. package/lib/postprocess.ts +188 -188
  97. package/lib/pptx-color-filter.lua +37 -37
  98. package/lib/pptx-template.ts +469 -469
  99. package/lib/pptx-themes.ts +483 -483
  100. package/lib/protect-restore.ts +520 -520
  101. package/lib/rate-limiter.ts +94 -94
  102. package/lib/response.ts +197 -197
  103. package/lib/restore-references.ts +240 -240
  104. package/lib/review.ts +327 -327
  105. package/lib/schema.ts +417 -417
  106. package/lib/scientific-words.ts +73 -73
  107. package/lib/sections.ts +335 -335
  108. package/lib/slides.ts +756 -756
  109. package/lib/spelling.ts +334 -334
  110. package/lib/templates.ts +526 -526
  111. package/lib/themes.ts +742 -742
  112. package/lib/trackchanges.ts +247 -247
  113. package/lib/tui.ts +450 -450
  114. package/lib/types.ts +550 -550
  115. package/lib/undo.ts +250 -250
  116. package/lib/utils.ts +69 -69
  117. package/lib/variables.ts +179 -179
  118. package/lib/word-extraction.ts +806 -806
  119. package/lib/word.ts +643 -643
  120. package/lib/wordcomments.ts +817 -817
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +28 -28
  123. package/skill/REFERENCE.md +473 -431
  124. package/skill/SKILL.md +274 -258
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
@@ -1,276 +1,276 @@
1
- /**
2
- * Anchor matching primitives shared between sync (insertion) and
3
- * verify-anchors (drift reporting). The functions are pure: given an
4
- * anchor string and surrounding context, locate candidate positions in
5
- * a target text using progressively looser strategies.
6
- */
7
-
8
- export type AnchorStrategy =
9
- | 'direct'
10
- | 'normalized'
11
- | 'stripped'
12
- | 'partial-start'
13
- | 'partial-start-stripped'
14
- | 'context-both'
15
- | 'context-before'
16
- | 'context-after'
17
- | 'split-match'
18
- | 'empty-anchor'
19
- | 'failed';
20
-
21
- export interface AnchorSearchResult {
22
- occurrences: number[];
23
- matchedAnchor: string | null;
24
- strategy: AnchorStrategy;
25
- stripped?: boolean;
26
- }
27
-
28
- /**
29
- * Strip CriticMarkup so the matcher sees plain prose instead of
30
- * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
- * underneath previously imported track changes.
32
- */
33
- export function stripCriticMarkup(text: string): string {
34
- return text
35
- .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
- .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
- .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
- .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
- .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
- }
41
-
42
- /**
43
- * Return every starting index where `needle` occurs in `haystack`.
44
- * Empty needles return no occurrences (empty matches are not useful
45
- * for anchor placement).
46
- */
47
- /**
48
- * Score how well the docx-side `before` / `after` context matches the
49
- * surroundings of a candidate position in the target text. Used by
50
- * `verify-anchors` to tell apart "multiple hits but context picks one
51
- * cleanly" (sync will place it correctly) from "multiple hits, context
52
- * doesn't help" (truly ambiguous, needs human placement).
53
- *
54
- * Returns 0 if no context was provided.
55
- */
56
- export function scoreContextAt(
57
- pos: number,
58
- text: string,
59
- before: string,
60
- after: string,
61
- anchorLen: number,
62
- ): number {
63
- let score = 0;
64
- if (before) {
65
- const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
- const beforeLower = before.toLowerCase();
67
- const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
- for (const word of beforeWords) {
69
- if (contextBefore.includes(word)) score += 2;
70
- }
71
- if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
- }
73
- if (after) {
74
- const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
- const afterLower = after.toLowerCase();
76
- const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
- for (const word of afterWords) {
78
- if (contextAfter.includes(word)) score += 2;
79
- }
80
- if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
- }
82
- return score;
83
- }
84
-
85
- export function findAllOccurrences(haystack: string, needle: string): number[] {
86
- if (!needle || needle.length === 0) return [];
87
- const occurrences: number[] = [];
88
- let idx = 0;
89
- while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
- occurrences.push(idx);
91
- idx += 1;
92
- }
93
- return occurrences;
94
- }
95
-
96
- /**
97
- * Find candidate positions for `anchor` in `text`, falling back through
98
- * progressively looser strategies (whitespace normalization, stripped
99
- * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
- *
101
- * The returned `strategy` lets callers distinguish a clean direct hit
102
- * from a fuzzy approximation — useful for drift reporting.
103
- */
104
- export function findAnchorInText(
105
- anchor: string,
106
- text: string,
107
- before: string = '',
108
- after: string = ''
109
- ): AnchorSearchResult {
110
- // Empty anchor: skip directly to context-based matching
111
- if (!anchor || anchor.trim().length === 0) {
112
- if (before || after) {
113
- const beforeLower = (before || '').toLowerCase();
114
- const afterLower = (after || '').toLowerCase();
115
- const textLower = text.toLowerCase();
116
-
117
- if (before && after) {
118
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
- if (beforeIdx !== -1) {
120
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
- if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
- }
125
- }
126
- }
127
-
128
- if (before) {
129
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
- if (beforeIdx !== -1) {
131
- return {
132
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
- matchedAnchor: null,
134
- strategy: 'context-before',
135
- };
136
- }
137
- }
138
-
139
- if (after) {
140
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
- if (afterIdx !== -1) {
142
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
- }
144
- }
145
- }
146
- return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
- }
148
-
149
- const anchorLower = anchor.toLowerCase();
150
- const textLower = text.toLowerCase();
151
-
152
- // Strategy 1: direct match
153
- let occurrences = findAllOccurrences(textLower, anchorLower);
154
- if (occurrences.length > 0) {
155
- return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
- }
157
-
158
- // Strategy 2: normalized whitespace
159
- const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
- const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
- const idx = normalizedText.indexOf(normalizedAnchor);
162
- if (idx !== -1) {
163
- return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
- }
165
-
166
- // Strategy 3: match in stripped CriticMarkup version
167
- const strippedText = stripCriticMarkup(text);
168
- const strippedLower = strippedText.toLowerCase();
169
- occurrences = findAllOccurrences(strippedLower, anchorLower);
170
- if (occurrences.length > 0) {
171
- return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
- }
173
-
174
- // Strategy 4: first N words of anchor (long anchors)
175
- const words = anchor.split(/\s+/);
176
- if (words.length > 3) {
177
- for (let n = Math.min(6, words.length); n >= 3; n--) {
178
- const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
- if (partialAnchor.length >= 15) {
180
- occurrences = findAllOccurrences(textLower, partialAnchor);
181
- if (occurrences.length > 0) {
182
- return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
- }
184
- occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
- if (occurrences.length > 0) {
186
- return {
187
- occurrences,
188
- matchedAnchor: words.slice(0, n).join(' '),
189
- strategy: 'partial-start-stripped',
190
- stripped: true,
191
- };
192
- }
193
- }
194
- }
195
- }
196
-
197
- // Strategy 5: context (before/after) only
198
- if (before || after) {
199
- const beforeLower = before.toLowerCase();
200
- const afterLower = after.toLowerCase();
201
-
202
- if (before && after) {
203
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
- if (beforeIdx !== -1) {
205
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
- if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
- }
210
- }
211
- }
212
-
213
- if (before) {
214
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
- if (beforeIdx !== -1) {
216
- return {
217
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
- matchedAnchor: null,
219
- strategy: 'context-before',
220
- };
221
- }
222
- }
223
-
224
- if (after) {
225
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
- if (afterIdx !== -1) {
227
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
- }
229
- }
230
- }
231
-
232
- // Strategy 6: split anchor on transition characters
233
- const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
- for (const sep of splitPatterns) {
235
- if (anchor.includes(sep)) {
236
- const parts = anchor.split(sep).filter(p => p.length >= 4);
237
- for (const part of parts) {
238
- const partLower = part.toLowerCase();
239
- occurrences = findAllOccurrences(textLower, partLower);
240
- if (occurrences.length > 0 && occurrences.length < 5) {
241
- return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
- }
243
- }
244
- }
245
- }
246
-
247
- return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
- }
249
-
250
- /**
251
- * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
- * Used by `verify-anchors` to summarize per-comment match quality.
253
- */
254
- export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
-
256
- export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
- if (occurrences === 0) return 'unmatched';
258
- switch (strategy) {
259
- case 'direct':
260
- case 'normalized':
261
- return 'clean';
262
- case 'stripped':
263
- case 'partial-start':
264
- case 'partial-start-stripped':
265
- case 'split-match':
266
- return 'drift';
267
- case 'context-both':
268
- case 'context-before':
269
- case 'context-after':
270
- return 'context-only';
271
- case 'empty-anchor':
272
- case 'failed':
273
- default:
274
- return 'unmatched';
275
- }
276
- }
1
+ /**
2
+ * Anchor matching primitives shared between sync (insertion) and
3
+ * verify-anchors (drift reporting). The functions are pure: given an
4
+ * anchor string and surrounding context, locate candidate positions in
5
+ * a target text using progressively looser strategies.
6
+ */
7
+
8
+ export type AnchorStrategy =
9
+ | 'direct'
10
+ | 'normalized'
11
+ | 'stripped'
12
+ | 'partial-start'
13
+ | 'partial-start-stripped'
14
+ | 'context-both'
15
+ | 'context-before'
16
+ | 'context-after'
17
+ | 'split-match'
18
+ | 'empty-anchor'
19
+ | 'failed';
20
+
21
+ export interface AnchorSearchResult {
22
+ occurrences: number[];
23
+ matchedAnchor: string | null;
24
+ strategy: AnchorStrategy;
25
+ stripped?: boolean;
26
+ }
27
+
28
+ /**
29
+ * Strip CriticMarkup so the matcher sees plain prose instead of
30
+ * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
+ * underneath previously imported track changes.
32
+ */
33
+ export function stripCriticMarkup(text: string): string {
34
+ return text
35
+ .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
+ .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
+ .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
+ .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
+ .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
+ }
41
+
42
+ /**
43
+ * Return every starting index where `needle` occurs in `haystack`.
44
+ * Empty needles return no occurrences (empty matches are not useful
45
+ * for anchor placement).
46
+ */
47
+ /**
48
+ * Score how well the docx-side `before` / `after` context matches the
49
+ * surroundings of a candidate position in the target text. Used by
50
+ * `verify-anchors` to tell apart "multiple hits but context picks one
51
+ * cleanly" (sync will place it correctly) from "multiple hits, context
52
+ * doesn't help" (truly ambiguous, needs human placement).
53
+ *
54
+ * Returns 0 if no context was provided.
55
+ */
56
+ export function scoreContextAt(
57
+ pos: number,
58
+ text: string,
59
+ before: string,
60
+ after: string,
61
+ anchorLen: number,
62
+ ): number {
63
+ let score = 0;
64
+ if (before) {
65
+ const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
+ const beforeLower = before.toLowerCase();
67
+ const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
+ for (const word of beforeWords) {
69
+ if (contextBefore.includes(word)) score += 2;
70
+ }
71
+ if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
+ }
73
+ if (after) {
74
+ const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
+ const afterLower = after.toLowerCase();
76
+ const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
+ for (const word of afterWords) {
78
+ if (contextAfter.includes(word)) score += 2;
79
+ }
80
+ if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
+ }
82
+ return score;
83
+ }
84
+
85
+ export function findAllOccurrences(haystack: string, needle: string): number[] {
86
+ if (!needle || needle.length === 0) return [];
87
+ const occurrences: number[] = [];
88
+ let idx = 0;
89
+ while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
+ occurrences.push(idx);
91
+ idx += 1;
92
+ }
93
+ return occurrences;
94
+ }
95
+
96
+ /**
97
+ * Find candidate positions for `anchor` in `text`, falling back through
98
+ * progressively looser strategies (whitespace normalization, stripped
99
+ * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
+ *
101
+ * The returned `strategy` lets callers distinguish a clean direct hit
102
+ * from a fuzzy approximation — useful for drift reporting.
103
+ */
104
+ export function findAnchorInText(
105
+ anchor: string,
106
+ text: string,
107
+ before: string = '',
108
+ after: string = ''
109
+ ): AnchorSearchResult {
110
+ // Empty anchor: skip directly to context-based matching
111
+ if (!anchor || anchor.trim().length === 0) {
112
+ if (before || after) {
113
+ const beforeLower = (before || '').toLowerCase();
114
+ const afterLower = (after || '').toLowerCase();
115
+ const textLower = text.toLowerCase();
116
+
117
+ if (before && after) {
118
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
+ if (beforeIdx !== -1) {
120
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
+ }
125
+ }
126
+ }
127
+
128
+ if (before) {
129
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
+ if (beforeIdx !== -1) {
131
+ return {
132
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
+ matchedAnchor: null,
134
+ strategy: 'context-before',
135
+ };
136
+ }
137
+ }
138
+
139
+ if (after) {
140
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
+ if (afterIdx !== -1) {
142
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
+ }
144
+ }
145
+ }
146
+ return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
+ }
148
+
149
+ const anchorLower = anchor.toLowerCase();
150
+ const textLower = text.toLowerCase();
151
+
152
+ // Strategy 1: direct match
153
+ let occurrences = findAllOccurrences(textLower, anchorLower);
154
+ if (occurrences.length > 0) {
155
+ return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
+ }
157
+
158
+ // Strategy 2: normalized whitespace
159
+ const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
+ const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
+ const idx = normalizedText.indexOf(normalizedAnchor);
162
+ if (idx !== -1) {
163
+ return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
+ }
165
+
166
+ // Strategy 3: match in stripped CriticMarkup version
167
+ const strippedText = stripCriticMarkup(text);
168
+ const strippedLower = strippedText.toLowerCase();
169
+ occurrences = findAllOccurrences(strippedLower, anchorLower);
170
+ if (occurrences.length > 0) {
171
+ return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
+ }
173
+
174
+ // Strategy 4: first N words of anchor (long anchors)
175
+ const words = anchor.split(/\s+/);
176
+ if (words.length > 3) {
177
+ for (let n = Math.min(6, words.length); n >= 3; n--) {
178
+ const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
+ if (partialAnchor.length >= 15) {
180
+ occurrences = findAllOccurrences(textLower, partialAnchor);
181
+ if (occurrences.length > 0) {
182
+ return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
+ }
184
+ occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
+ if (occurrences.length > 0) {
186
+ return {
187
+ occurrences,
188
+ matchedAnchor: words.slice(0, n).join(' '),
189
+ strategy: 'partial-start-stripped',
190
+ stripped: true,
191
+ };
192
+ }
193
+ }
194
+ }
195
+ }
196
+
197
+ // Strategy 5: context (before/after) only
198
+ if (before || after) {
199
+ const beforeLower = before.toLowerCase();
200
+ const afterLower = after.toLowerCase();
201
+
202
+ if (before && after) {
203
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
+ if (beforeIdx !== -1) {
205
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
+ }
210
+ }
211
+ }
212
+
213
+ if (before) {
214
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
+ if (beforeIdx !== -1) {
216
+ return {
217
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
+ matchedAnchor: null,
219
+ strategy: 'context-before',
220
+ };
221
+ }
222
+ }
223
+
224
+ if (after) {
225
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
+ if (afterIdx !== -1) {
227
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
+ }
229
+ }
230
+ }
231
+
232
+ // Strategy 6: split anchor on transition characters
233
+ const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
+ for (const sep of splitPatterns) {
235
+ if (anchor.includes(sep)) {
236
+ const parts = anchor.split(sep).filter(p => p.length >= 4);
237
+ for (const part of parts) {
238
+ const partLower = part.toLowerCase();
239
+ occurrences = findAllOccurrences(textLower, partLower);
240
+ if (occurrences.length > 0 && occurrences.length < 5) {
241
+ return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
+ }
243
+ }
244
+ }
245
+ }
246
+
247
+ return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
+ }
249
+
250
+ /**
251
+ * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
+ * Used by `verify-anchors` to summarize per-comment match quality.
253
+ */
254
+ export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
+
256
+ export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
+ if (occurrences === 0) return 'unmatched';
258
+ switch (strategy) {
259
+ case 'direct':
260
+ case 'normalized':
261
+ return 'clean';
262
+ case 'stripped':
263
+ case 'partial-start':
264
+ case 'partial-start-stripped':
265
+ case 'split-match':
266
+ return 'drift';
267
+ case 'context-both':
268
+ case 'context-before':
269
+ case 'context-after':
270
+ return 'context-only';
271
+ case 'empty-anchor':
272
+ case 'failed':
273
+ default:
274
+ return 'unmatched';
275
+ }
276
+ }