docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
package/eslint.config.js CHANGED
@@ -1,27 +1,27 @@
1
- export default [
2
- {
3
- languageOptions: {
4
- ecmaVersion: 2022,
5
- sourceType: 'module',
6
- globals: {
7
- console: 'readonly',
8
- process: 'readonly',
9
- Buffer: 'readonly',
10
- URL: 'readonly',
11
- setTimeout: 'readonly',
12
- clearTimeout: 'readonly',
13
- setInterval: 'readonly',
14
- clearInterval: 'readonly',
15
- },
16
- },
17
- rules: {
18
- 'no-unused-vars': ['warn', { argsIgnorePattern: '^_', varsIgnorePattern: '^_' }],
19
- 'no-constant-condition': 'warn',
20
- 'no-empty': ['warn', { allowEmptyCatch: true }],
21
- 'prefer-const': 'warn',
22
- 'no-var': 'error',
23
- 'eqeqeq': ['warn', 'smart'],
24
- },
25
- ignores: ['node_modules/**', 'test/**'],
26
- },
27
- ];
1
+ export default [
2
+ {
3
+ languageOptions: {
4
+ ecmaVersion: 2022,
5
+ sourceType: 'module',
6
+ globals: {
7
+ console: 'readonly',
8
+ process: 'readonly',
9
+ Buffer: 'readonly',
10
+ URL: 'readonly',
11
+ setTimeout: 'readonly',
12
+ clearTimeout: 'readonly',
13
+ setInterval: 'readonly',
14
+ clearInterval: 'readonly',
15
+ },
16
+ },
17
+ rules: {
18
+ 'no-unused-vars': ['warn', { argsIgnorePattern: '^_', varsIgnorePattern: '^_' }],
19
+ 'no-constant-condition': 'warn',
20
+ 'no-empty': ['warn', { allowEmptyCatch: true }],
21
+ 'prefer-const': 'warn',
22
+ 'no-var': 'error',
23
+ 'eqeqeq': ['warn', 'smart'],
24
+ },
25
+ ignores: ['node_modules/**', 'test/**'],
26
+ },
27
+ ];
@@ -1,276 +1,276 @@
1
- /**
2
- * Anchor matching primitives shared between sync (insertion) and
3
- * verify-anchors (drift reporting). The functions are pure: given an
4
- * anchor string and surrounding context, locate candidate positions in
5
- * a target text using progressively looser strategies.
6
- */
7
-
8
- export type AnchorStrategy =
9
- | 'direct'
10
- | 'normalized'
11
- | 'stripped'
12
- | 'partial-start'
13
- | 'partial-start-stripped'
14
- | 'context-both'
15
- | 'context-before'
16
- | 'context-after'
17
- | 'split-match'
18
- | 'empty-anchor'
19
- | 'failed';
20
-
21
- export interface AnchorSearchResult {
22
- occurrences: number[];
23
- matchedAnchor: string | null;
24
- strategy: AnchorStrategy;
25
- stripped?: boolean;
26
- }
27
-
28
- /**
29
- * Strip CriticMarkup so the matcher sees plain prose instead of
30
- * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
- * underneath previously imported track changes.
32
- */
33
- export function stripCriticMarkup(text: string): string {
34
- return text
35
- .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
- .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
- .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
- .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
- .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
- }
41
-
42
- /**
43
- * Return every starting index where `needle` occurs in `haystack`.
44
- * Empty needles return no occurrences (empty matches are not useful
45
- * for anchor placement).
46
- */
47
- /**
48
- * Score how well the docx-side `before` / `after` context matches the
49
- * surroundings of a candidate position in the target text. Used by
50
- * `verify-anchors` to tell apart "multiple hits but context picks one
51
- * cleanly" (sync will place it correctly) from "multiple hits, context
52
- * doesn't help" (truly ambiguous, needs human placement).
53
- *
54
- * Returns 0 if no context was provided.
55
- */
56
- export function scoreContextAt(
57
- pos: number,
58
- text: string,
59
- before: string,
60
- after: string,
61
- anchorLen: number,
62
- ): number {
63
- let score = 0;
64
- if (before) {
65
- const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
- const beforeLower = before.toLowerCase();
67
- const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
- for (const word of beforeWords) {
69
- if (contextBefore.includes(word)) score += 2;
70
- }
71
- if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
- }
73
- if (after) {
74
- const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
- const afterLower = after.toLowerCase();
76
- const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
- for (const word of afterWords) {
78
- if (contextAfter.includes(word)) score += 2;
79
- }
80
- if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
- }
82
- return score;
83
- }
84
-
85
- export function findAllOccurrences(haystack: string, needle: string): number[] {
86
- if (!needle || needle.length === 0) return [];
87
- const occurrences: number[] = [];
88
- let idx = 0;
89
- while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
- occurrences.push(idx);
91
- idx += 1;
92
- }
93
- return occurrences;
94
- }
95
-
96
- /**
97
- * Find candidate positions for `anchor` in `text`, falling back through
98
- * progressively looser strategies (whitespace normalization, stripped
99
- * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
- *
101
- * The returned `strategy` lets callers distinguish a clean direct hit
102
- * from a fuzzy approximation — useful for drift reporting.
103
- */
104
- export function findAnchorInText(
105
- anchor: string,
106
- text: string,
107
- before: string = '',
108
- after: string = ''
109
- ): AnchorSearchResult {
110
- // Empty anchor: skip directly to context-based matching
111
- if (!anchor || anchor.trim().length === 0) {
112
- if (before || after) {
113
- const beforeLower = (before || '').toLowerCase();
114
- const afterLower = (after || '').toLowerCase();
115
- const textLower = text.toLowerCase();
116
-
117
- if (before && after) {
118
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
- if (beforeIdx !== -1) {
120
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
- if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
- }
125
- }
126
- }
127
-
128
- if (before) {
129
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
- if (beforeIdx !== -1) {
131
- return {
132
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
- matchedAnchor: null,
134
- strategy: 'context-before',
135
- };
136
- }
137
- }
138
-
139
- if (after) {
140
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
- if (afterIdx !== -1) {
142
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
- }
144
- }
145
- }
146
- return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
- }
148
-
149
- const anchorLower = anchor.toLowerCase();
150
- const textLower = text.toLowerCase();
151
-
152
- // Strategy 1: direct match
153
- let occurrences = findAllOccurrences(textLower, anchorLower);
154
- if (occurrences.length > 0) {
155
- return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
- }
157
-
158
- // Strategy 2: normalized whitespace
159
- const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
- const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
- const idx = normalizedText.indexOf(normalizedAnchor);
162
- if (idx !== -1) {
163
- return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
- }
165
-
166
- // Strategy 3: match in stripped CriticMarkup version
167
- const strippedText = stripCriticMarkup(text);
168
- const strippedLower = strippedText.toLowerCase();
169
- occurrences = findAllOccurrences(strippedLower, anchorLower);
170
- if (occurrences.length > 0) {
171
- return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
- }
173
-
174
- // Strategy 4: first N words of anchor (long anchors)
175
- const words = anchor.split(/\s+/);
176
- if (words.length > 3) {
177
- for (let n = Math.min(6, words.length); n >= 3; n--) {
178
- const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
- if (partialAnchor.length >= 15) {
180
- occurrences = findAllOccurrences(textLower, partialAnchor);
181
- if (occurrences.length > 0) {
182
- return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
- }
184
- occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
- if (occurrences.length > 0) {
186
- return {
187
- occurrences,
188
- matchedAnchor: words.slice(0, n).join(' '),
189
- strategy: 'partial-start-stripped',
190
- stripped: true,
191
- };
192
- }
193
- }
194
- }
195
- }
196
-
197
- // Strategy 5: context (before/after) only
198
- if (before || after) {
199
- const beforeLower = before.toLowerCase();
200
- const afterLower = after.toLowerCase();
201
-
202
- if (before && after) {
203
- const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
- if (beforeIdx !== -1) {
205
- const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
- const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
- if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
- return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
- }
210
- }
211
- }
212
-
213
- if (before) {
214
- const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
- if (beforeIdx !== -1) {
216
- return {
217
- occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
- matchedAnchor: null,
219
- strategy: 'context-before',
220
- };
221
- }
222
- }
223
-
224
- if (after) {
225
- const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
- if (afterIdx !== -1) {
227
- return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
- }
229
- }
230
- }
231
-
232
- // Strategy 6: split anchor on transition characters
233
- const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
- for (const sep of splitPatterns) {
235
- if (anchor.includes(sep)) {
236
- const parts = anchor.split(sep).filter(p => p.length >= 4);
237
- for (const part of parts) {
238
- const partLower = part.toLowerCase();
239
- occurrences = findAllOccurrences(textLower, partLower);
240
- if (occurrences.length > 0 && occurrences.length < 5) {
241
- return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
- }
243
- }
244
- }
245
- }
246
-
247
- return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
- }
249
-
250
- /**
251
- * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
- * Used by `verify-anchors` to summarize per-comment match quality.
253
- */
254
- export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
-
256
- export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
- if (occurrences === 0) return 'unmatched';
258
- switch (strategy) {
259
- case 'direct':
260
- case 'normalized':
261
- return 'clean';
262
- case 'stripped':
263
- case 'partial-start':
264
- case 'partial-start-stripped':
265
- case 'split-match':
266
- return 'drift';
267
- case 'context-both':
268
- case 'context-before':
269
- case 'context-after':
270
- return 'context-only';
271
- case 'empty-anchor':
272
- case 'failed':
273
- default:
274
- return 'unmatched';
275
- }
276
- }
1
+ /**
2
+ * Anchor matching primitives shared between sync (insertion) and
3
+ * verify-anchors (drift reporting). The functions are pure: given an
4
+ * anchor string and surrounding context, locate candidate positions in
5
+ * a target text using progressively looser strategies.
6
+ */
7
+
8
+ export type AnchorStrategy =
9
+ | 'direct'
10
+ | 'normalized'
11
+ | 'stripped'
12
+ | 'partial-start'
13
+ | 'partial-start-stripped'
14
+ | 'context-both'
15
+ | 'context-before'
16
+ | 'context-after'
17
+ | 'split-match'
18
+ | 'empty-anchor'
19
+ | 'failed';
20
+
21
+ export interface AnchorSearchResult {
22
+ occurrences: number[];
23
+ matchedAnchor: string | null;
24
+ strategy: AnchorStrategy;
25
+ stripped?: boolean;
26
+ }
27
+
28
+ /**
29
+ * Strip CriticMarkup so the matcher sees plain prose instead of
30
+ * `{++inserted++}`/`{--deleted--}`/etc. Used when an anchor lives
31
+ * underneath previously imported track changes.
32
+ */
33
+ export function stripCriticMarkup(text: string): string {
34
+ return text
35
+ .replace(/\{\+\+([^+]*)\+\+\}/g, '$1') // insertions: keep new text
36
+ .replace(/\{--([^-]*)--\}/g, '') // deletions: remove old text
37
+ .replace(/\{~~([^~]*)~>([^~]*)~~\}/g, '$2') // substitutions: keep new text
38
+ .replace(/\{>>[\s\S]*?<<\}/g, '') // comments: remove (non-greedy; comment text may contain '<')
39
+ .replace(/\[([^\]]*)\]\{\.mark\}/g, '$1'); // marked text: keep text
40
+ }
41
+
42
+ /**
43
+ * Return every starting index where `needle` occurs in `haystack`.
44
+ * Empty needles return no occurrences (empty matches are not useful
45
+ * for anchor placement).
46
+ */
47
+ /**
48
+ * Score how well the docx-side `before` / `after` context matches the
49
+ * surroundings of a candidate position in the target text. Used by
50
+ * `verify-anchors` to tell apart "multiple hits but context picks one
51
+ * cleanly" (sync will place it correctly) from "multiple hits, context
52
+ * doesn't help" (truly ambiguous, needs human placement).
53
+ *
54
+ * Returns 0 if no context was provided.
55
+ */
56
+ export function scoreContextAt(
57
+ pos: number,
58
+ text: string,
59
+ before: string,
60
+ after: string,
61
+ anchorLen: number,
62
+ ): number {
63
+ let score = 0;
64
+ if (before) {
65
+ const contextBefore = text.slice(Math.max(0, pos - before.length - 20), pos).toLowerCase();
66
+ const beforeLower = before.toLowerCase();
67
+ const beforeWords = beforeLower.split(/\s+/).filter(w => w.length > 3);
68
+ for (const word of beforeWords) {
69
+ if (contextBefore.includes(word)) score += 2;
70
+ }
71
+ if (contextBefore.includes(beforeLower.slice(-30))) score += 5;
72
+ }
73
+ if (after) {
74
+ const contextAfter = text.slice(pos + anchorLen, pos + anchorLen + after.length + 20).toLowerCase();
75
+ const afterLower = after.toLowerCase();
76
+ const afterWords = afterLower.split(/\s+/).filter(w => w.length > 3);
77
+ for (const word of afterWords) {
78
+ if (contextAfter.includes(word)) score += 2;
79
+ }
80
+ if (contextAfter.includes(afterLower.slice(0, 30))) score += 5;
81
+ }
82
+ return score;
83
+ }
84
+
85
+ export function findAllOccurrences(haystack: string, needle: string): number[] {
86
+ if (!needle || needle.length === 0) return [];
87
+ const occurrences: number[] = [];
88
+ let idx = 0;
89
+ while ((idx = haystack.indexOf(needle, idx)) !== -1) {
90
+ occurrences.push(idx);
91
+ idx += 1;
92
+ }
93
+ return occurrences;
94
+ }
95
+
96
+ /**
97
+ * Find candidate positions for `anchor` in `text`, falling back through
98
+ * progressively looser strategies (whitespace normalization, stripped
99
+ * CriticMarkup, partial-prefix, surrounding context, word splitting).
100
+ *
101
+ * The returned `strategy` lets callers distinguish a clean direct hit
102
+ * from a fuzzy approximation — useful for drift reporting.
103
+ */
104
+ export function findAnchorInText(
105
+ anchor: string,
106
+ text: string,
107
+ before: string = '',
108
+ after: string = ''
109
+ ): AnchorSearchResult {
110
+ // Empty anchor: skip directly to context-based matching
111
+ if (!anchor || anchor.trim().length === 0) {
112
+ if (before || after) {
113
+ const beforeLower = (before || '').toLowerCase();
114
+ const afterLower = (after || '').toLowerCase();
115
+ const textLower = text.toLowerCase();
116
+
117
+ if (before && after) {
118
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
119
+ if (beforeIdx !== -1) {
120
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
121
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
122
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
123
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
124
+ }
125
+ }
126
+ }
127
+
128
+ if (before) {
129
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
130
+ if (beforeIdx !== -1) {
131
+ return {
132
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
133
+ matchedAnchor: null,
134
+ strategy: 'context-before',
135
+ };
136
+ }
137
+ }
138
+
139
+ if (after) {
140
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
141
+ if (afterIdx !== -1) {
142
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
143
+ }
144
+ }
145
+ }
146
+ return { occurrences: [], matchedAnchor: null, strategy: 'empty-anchor' };
147
+ }
148
+
149
+ const anchorLower = anchor.toLowerCase();
150
+ const textLower = text.toLowerCase();
151
+
152
+ // Strategy 1: direct match
153
+ let occurrences = findAllOccurrences(textLower, anchorLower);
154
+ if (occurrences.length > 0) {
155
+ return { occurrences, matchedAnchor: anchor, strategy: 'direct' };
156
+ }
157
+
158
+ // Strategy 2: normalized whitespace
159
+ const normalizedAnchor = anchor.replace(/\s+/g, ' ').toLowerCase();
160
+ const normalizedText = text.replace(/\s+/g, ' ').toLowerCase();
161
+ const idx = normalizedText.indexOf(normalizedAnchor);
162
+ if (idx !== -1) {
163
+ return { occurrences: [idx], matchedAnchor: anchor, strategy: 'normalized' };
164
+ }
165
+
166
+ // Strategy 3: match in stripped CriticMarkup version
167
+ const strippedText = stripCriticMarkup(text);
168
+ const strippedLower = strippedText.toLowerCase();
169
+ occurrences = findAllOccurrences(strippedLower, anchorLower);
170
+ if (occurrences.length > 0) {
171
+ return { occurrences, matchedAnchor: anchor, strategy: 'stripped', stripped: true };
172
+ }
173
+
174
+ // Strategy 4: first N words of anchor (long anchors)
175
+ const words = anchor.split(/\s+/);
176
+ if (words.length > 3) {
177
+ for (let n = Math.min(6, words.length); n >= 3; n--) {
178
+ const partialAnchor = words.slice(0, n).join(' ').toLowerCase();
179
+ if (partialAnchor.length >= 15) {
180
+ occurrences = findAllOccurrences(textLower, partialAnchor);
181
+ if (occurrences.length > 0) {
182
+ return { occurrences, matchedAnchor: words.slice(0, n).join(' '), strategy: 'partial-start' };
183
+ }
184
+ occurrences = findAllOccurrences(strippedLower, partialAnchor);
185
+ if (occurrences.length > 0) {
186
+ return {
187
+ occurrences,
188
+ matchedAnchor: words.slice(0, n).join(' '),
189
+ strategy: 'partial-start-stripped',
190
+ stripped: true,
191
+ };
192
+ }
193
+ }
194
+ }
195
+ }
196
+
197
+ // Strategy 5: context (before/after) only
198
+ if (before || after) {
199
+ const beforeLower = before.toLowerCase();
200
+ const afterLower = after.toLowerCase();
201
+
202
+ if (before && after) {
203
+ const beforeIdx = textLower.indexOf(beforeLower.slice(-50));
204
+ if (beforeIdx !== -1) {
205
+ const searchStart = beforeIdx + beforeLower.slice(-50).length;
206
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 50), searchStart);
207
+ if (afterIdx !== -1 && afterIdx - searchStart < 500) {
208
+ return { occurrences: [searchStart], matchedAnchor: null, strategy: 'context-both' };
209
+ }
210
+ }
211
+ }
212
+
213
+ if (before) {
214
+ const beforeIdx = textLower.lastIndexOf(beforeLower.slice(-30));
215
+ if (beforeIdx !== -1) {
216
+ return {
217
+ occurrences: [beforeIdx + beforeLower.slice(-30).length],
218
+ matchedAnchor: null,
219
+ strategy: 'context-before',
220
+ };
221
+ }
222
+ }
223
+
224
+ if (after) {
225
+ const afterIdx = textLower.indexOf(afterLower.slice(0, 30));
226
+ if (afterIdx !== -1) {
227
+ return { occurrences: [afterIdx], matchedAnchor: null, strategy: 'context-after' };
228
+ }
229
+ }
230
+ }
231
+
232
+ // Strategy 6: split anchor on transition characters
233
+ const splitPatterns = [' ', ', ', '. ', ' - ', ' – '];
234
+ for (const sep of splitPatterns) {
235
+ if (anchor.includes(sep)) {
236
+ const parts = anchor.split(sep).filter(p => p.length >= 4);
237
+ for (const part of parts) {
238
+ const partLower = part.toLowerCase();
239
+ occurrences = findAllOccurrences(textLower, partLower);
240
+ if (occurrences.length > 0 && occurrences.length < 5) {
241
+ return { occurrences, matchedAnchor: part, strategy: 'split-match' };
242
+ }
243
+ }
244
+ }
245
+ }
246
+
247
+ return { occurrences: [], matchedAnchor: null, strategy: 'failed' };
248
+ }
249
+
250
+ /**
251
+ * Classify a strategy as a clean hit, a fuzzy/drifted hit, or no hit.
252
+ * Used by `verify-anchors` to summarize per-comment match quality.
253
+ */
254
+ export type AnchorMatchQuality = 'clean' | 'drift' | 'context-only' | 'unmatched';
255
+
256
+ export function classifyStrategy(strategy: AnchorStrategy, occurrences: number): AnchorMatchQuality {
257
+ if (occurrences === 0) return 'unmatched';
258
+ switch (strategy) {
259
+ case 'direct':
260
+ case 'normalized':
261
+ return 'clean';
262
+ case 'stripped':
263
+ case 'partial-start':
264
+ case 'partial-start-stripped':
265
+ case 'split-match':
266
+ return 'drift';
267
+ case 'context-both':
268
+ case 'context-before':
269
+ case 'context-after':
270
+ return 'context-only';
271
+ case 'empty-anchor':
272
+ case 'failed':
273
+ default:
274
+ return 'unmatched';
275
+ }
276
+ }