docrev 0.9.13 → 0.9.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +411 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +38 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +68 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/pdf-comments.js +44 -44
  43. package/dist/lib/plugins.js +57 -57
  44. package/dist/lib/pptx-themes.js +115 -115
  45. package/dist/lib/spelling.js +2 -2
  46. package/dist/lib/templates.js +387 -387
  47. package/dist/lib/themes.js +51 -51
  48. package/eslint.config.js +27 -27
  49. package/lib/anchor-match.ts +276 -276
  50. package/lib/annotations.ts +644 -644
  51. package/lib/build.ts +1300 -1251
  52. package/lib/citations.ts +160 -160
  53. package/lib/commands/build.ts +833 -801
  54. package/lib/commands/citations.ts +515 -515
  55. package/lib/commands/comments.ts +1050 -1050
  56. package/lib/commands/context.ts +174 -174
  57. package/lib/commands/core.ts +309 -309
  58. package/lib/commands/doi.ts +435 -435
  59. package/lib/commands/file-ops.ts +372 -372
  60. package/lib/commands/history.ts +320 -320
  61. package/lib/commands/index.ts +87 -87
  62. package/lib/commands/init.ts +259 -259
  63. package/lib/commands/merge-resolve.ts +378 -378
  64. package/lib/commands/preview.ts +178 -178
  65. package/lib/commands/project-info.ts +244 -244
  66. package/lib/commands/quality.ts +517 -517
  67. package/lib/commands/response.ts +454 -454
  68. package/lib/commands/section-boundaries.ts +82 -82
  69. package/lib/commands/sections.ts +451 -451
  70. package/lib/commands/sync.ts +706 -706
  71. package/lib/commands/text-ops.ts +449 -449
  72. package/lib/commands/utilities.ts +448 -448
  73. package/lib/commands/verify-anchors.ts +272 -272
  74. package/lib/commands/word-tools.ts +340 -340
  75. package/lib/comment-realign.ts +517 -517
  76. package/lib/config.ts +84 -84
  77. package/lib/crossref.ts +781 -781
  78. package/lib/csl.ts +191 -191
  79. package/lib/dependencies.ts +98 -98
  80. package/lib/diff-engine.ts +465 -465
  81. package/lib/doi-cache.ts +115 -115
  82. package/lib/doi.ts +897 -897
  83. package/lib/equations.ts +506 -506
  84. package/lib/errors.ts +346 -346
  85. package/lib/format.ts +541 -541
  86. package/lib/git.ts +326 -326
  87. package/lib/grammar.ts +303 -303
  88. package/lib/image-registry.ts +180 -180
  89. package/lib/import.ts +911 -911
  90. package/lib/journals.ts +543 -543
  91. package/lib/merge.ts +633 -633
  92. package/lib/orcid.ts +144 -144
  93. package/lib/pdf-comments.ts +263 -263
  94. package/lib/pdf-import.ts +524 -524
  95. package/lib/plugins.ts +362 -362
  96. package/lib/postprocess.ts +188 -188
  97. package/lib/pptx-color-filter.lua +37 -37
  98. package/lib/pptx-template.ts +469 -469
  99. package/lib/pptx-themes.ts +483 -483
  100. package/lib/protect-restore.ts +520 -520
  101. package/lib/rate-limiter.ts +94 -94
  102. package/lib/response.ts +197 -197
  103. package/lib/restore-references.ts +240 -240
  104. package/lib/review.ts +327 -327
  105. package/lib/schema.ts +417 -417
  106. package/lib/scientific-words.ts +73 -73
  107. package/lib/sections.ts +335 -335
  108. package/lib/slides.ts +756 -756
  109. package/lib/spelling.ts +334 -334
  110. package/lib/templates.ts +526 -526
  111. package/lib/themes.ts +742 -742
  112. package/lib/trackchanges.ts +247 -247
  113. package/lib/tui.ts +450 -450
  114. package/lib/types.ts +550 -550
  115. package/lib/undo.ts +250 -250
  116. package/lib/utils.ts +69 -69
  117. package/lib/variables.ts +179 -179
  118. package/lib/word-extraction.ts +806 -806
  119. package/lib/word.ts +643 -643
  120. package/lib/wordcomments.ts +817 -817
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +28 -28
  123. package/skill/REFERENCE.md +473 -431
  124. package/skill/SKILL.md +274 -258
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
package/lib/spelling.ts CHANGED
@@ -1,334 +1,334 @@
1
- /**
2
- * Spelling checker module with global and project dictionaries
3
- *
4
- * Uses nspell (Hunspell-compatible) for English spellchecking.
5
- * Custom words stored in:
6
- * - ~/.rev-dictionary (global)
7
- * - .rev-dictionary (project-local)
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- // @ts-ignore - nspell has no types
13
- import nspell from 'nspell';
14
- // @ts-ignore - dictionary-en has no types
15
- import dictionaryEn from 'dictionary-en';
16
- // @ts-ignore - dictionary-en-gb has no types
17
- import dictionaryEnGb from 'dictionary-en-gb';
18
- import { scientificWords } from './scientific-words.js';
19
- import type { SpellingIssue, SpellingResult } from './types.js';
20
-
21
- const DICT_NAME = '.rev-dictionary';
22
-
23
- // Cache for the spellchecker instances (one per language)
24
- const spellcheckerCache: Record<string, any> = {
25
- en: null,
26
- 'en-gb': null,
27
- };
28
-
29
- interface WordLocation {
30
- word: string;
31
- line: number;
32
- column: number;
33
- }
34
-
35
- interface CheckSpellingOptions {
36
- projectDir?: string;
37
- lang?: 'en' | 'en-gb';
38
- }
39
-
40
- interface CheckFileOptions {
41
- projectDir?: string;
42
- lang?: 'en' | 'en-gb';
43
- }
44
-
45
- /**
46
- * Get the global dictionary path
47
- */
48
- export function getGlobalDictPath(): string {
49
- const home = process.env.HOME || process.env.USERPROFILE;
50
- return path.join(home!, DICT_NAME);
51
- }
52
-
53
- /**
54
- * Get the project dictionary path
55
- */
56
- export function getProjectDictPath(directory: string = '.'): string {
57
- return path.join(directory, DICT_NAME);
58
- }
59
-
60
- /**
61
- * Load custom words from a dictionary file
62
- */
63
- export function loadDictionaryFile(dictPath: string): Set<string> {
64
- const words = new Set<string>();
65
-
66
- if (fs.existsSync(dictPath)) {
67
- const content = fs.readFileSync(dictPath, 'utf-8');
68
- for (const line of content.split('\n')) {
69
- const word = line.trim();
70
- if (word && !word.startsWith('#')) {
71
- words.add(word.toLowerCase());
72
- }
73
- }
74
- }
75
-
76
- return words;
77
- }
78
-
79
- /**
80
- * Save words to a dictionary file
81
- */
82
- export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
- const header = `# Custom dictionary for docrev
84
- # One word per line, lines starting with # are comments
85
- `;
86
- const content = header + [...words].sort().join('\n') + '\n';
87
-
88
- // Ensure directory exists
89
- const dir = path.dirname(dictPath);
90
- if (!fs.existsSync(dir)) {
91
- fs.mkdirSync(dir, { recursive: true });
92
- }
93
-
94
- fs.writeFileSync(dictPath, content, 'utf-8');
95
- }
96
-
97
- /**
98
- * Load all custom words (global + project)
99
- */
100
- export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
- const globalWords = loadDictionaryFile(getGlobalDictPath());
102
- const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
-
104
- return new Set([...globalWords, ...projectWords]);
105
- }
106
-
107
- /**
108
- * Add word to dictionary
109
- */
110
- export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
- const words = loadDictionaryFile(dictPath);
113
- const normalizedWord = word.trim().toLowerCase();
114
-
115
- if (words.has(normalizedWord)) {
116
- return false;
117
- }
118
-
119
- words.add(normalizedWord);
120
- saveDictionaryFile(words, dictPath);
121
-
122
- // Clear cache so new word is picked up
123
- clearCache();
124
-
125
- return true;
126
- }
127
-
128
- /**
129
- * Remove word from dictionary
130
- */
131
- export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
- const words = loadDictionaryFile(dictPath);
134
- const normalizedWord = word.trim().toLowerCase();
135
-
136
- if (!words.has(normalizedWord)) {
137
- return false;
138
- }
139
-
140
- words.delete(normalizedWord);
141
- saveDictionaryFile(words, dictPath);
142
-
143
- // Clear cache
144
- clearCache();
145
-
146
- return true;
147
- }
148
-
149
- /**
150
- * List words in dictionary
151
- */
152
- export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
- const words = loadDictionaryFile(dictPath);
155
- return [...words].sort();
156
- }
157
-
158
- /**
159
- * Initialize the spellchecker with custom words
160
- */
161
- export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
- if (spellcheckerCache[lang]) {
163
- return spellcheckerCache[lang];
164
- }
165
-
166
- // Select dictionary based on language
167
- const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
- const spell = nspell(dictionary);
169
-
170
- // Add scientific/academic words
171
- for (const word of scientificWords) {
172
- spell.add(word);
173
- }
174
-
175
- // Add custom words
176
- const customWords = loadAllCustomWords(projectDir);
177
- for (const word of customWords) {
178
- spell.add(word);
179
- }
180
-
181
- spellcheckerCache[lang] = spell;
182
- return spell;
183
- }
184
-
185
- /**
186
- * Clear spellchecker cache (call after modifying dictionaries)
187
- */
188
- export function clearCache(): void {
189
- spellcheckerCache.en = null;
190
- spellcheckerCache['en-gb'] = null;
191
- }
192
-
193
- /**
194
- * Extract words from text, filtering out non-words
195
- */
196
- export function extractWords(text: string): WordLocation[] {
197
- const words: WordLocation[] = [];
198
- const lines = text.split('\n');
199
- let inCodeBlock = false;
200
- let inFrontmatter = false;
201
-
202
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
- const line = lines[lineNum];
204
- if (!line) continue;
205
- const trimmed = line.trim();
206
-
207
- // Track YAML frontmatter (only at start of file)
208
- if (lineNum === 0 && trimmed === '---') {
209
- inFrontmatter = true;
210
- continue;
211
- }
212
- if (inFrontmatter) {
213
- if (trimmed === '---') {
214
- inFrontmatter = false;
215
- }
216
- continue;
217
- }
218
-
219
- // Track code blocks
220
- if (trimmed.startsWith('```')) {
221
- inCodeBlock = !inCodeBlock;
222
- continue;
223
- }
224
- if (inCodeBlock) {
225
- continue;
226
- }
227
-
228
- // Skip URLs and paths
229
- if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
- continue;
231
- }
232
-
233
- // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
- let cleanLine = line
235
- .replace(/`[^`]+`/g, '') // inline code
236
- .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
- .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
- .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
- .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
- .replace(/https?:\/\/\S+/g, '') // URLs
241
- .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
- .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
- .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
-
245
- // Extract words (letters and apostrophes only)
246
- const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
- let match;
248
-
249
- while ((match = wordPattern.exec(cleanLine)) !== null) {
250
- const word = match[0];
251
-
252
- // Skip:
253
- // - Very short words (1-2 chars)
254
- // - All caps (acronyms like NASA)
255
- // - File extensions (.md, .tex, .png)
256
- // - CamelCase (likely code or citations like vanKleunen)
257
- // - Words starting with capital in middle of sentence (proper nouns/names)
258
- if (word.length < 3 ||
259
- /^[A-Z]+$/.test(word) ||
260
- /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
- /[a-z][A-Z]/.test(word)) {
262
- continue;
263
- }
264
-
265
- words.push({
266
- word,
267
- line: lineNum + 1,
268
- column: match.index + 1,
269
- });
270
- }
271
- }
272
-
273
- return words;
274
- }
275
-
276
- /**
277
- * Check if a word looks like a proper noun (name)
278
- */
279
- function looksLikeName(word: string): boolean {
280
- // Capitalized, not all caps, reasonable length for a name
281
- return /^[A-Z][a-z]{2,}$/.test(word);
282
- }
283
-
284
- /**
285
- * Check spelling in text
286
- */
287
- export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
- const { projectDir = '.', lang = 'en' } = options;
289
- const spell = await getSpellchecker(projectDir, lang);
290
- const words = extractWords(text);
291
- const misspelled: SpellingIssue[] = [];
292
- const possibleNames: SpellingIssue[] = [];
293
- const seen = new Set<string>();
294
- const seenNames = new Set<string>();
295
-
296
- for (const { word, line, column } of words) {
297
- // Skip if already reported this word
298
- const key = word.toLowerCase();
299
- if (seen.has(key) || seenNames.has(key)) {
300
- continue;
301
- }
302
-
303
- if (!spell.correct(word)) {
304
- // Check if it looks like a proper noun/name
305
- if (looksLikeName(word)) {
306
- seenNames.add(key);
307
- possibleNames.push({ word, line, column });
308
- } else {
309
- seen.add(key);
310
- misspelled.push({
311
- word,
312
- line,
313
- column,
314
- suggestions: spell.suggest(word).slice(0, 5),
315
- });
316
- }
317
- }
318
- }
319
-
320
- return { misspelled, possibleNames };
321
- }
322
-
323
- /**
324
- * Check spelling in a file
325
- */
326
- export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
- const text = fs.readFileSync(filePath, 'utf-8');
328
- const result = await checkSpelling(text, options);
329
-
330
- return {
331
- misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
- possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
- };
334
- }
1
+ /**
2
+ * Spelling checker module with global and project dictionaries
3
+ *
4
+ * Uses nspell (Hunspell-compatible) for English spellchecking.
5
+ * Custom words stored in:
6
+ * - ~/.rev-dictionary (global)
7
+ * - .rev-dictionary (project-local)
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ // @ts-ignore - nspell has no types
13
+ import nspell from 'nspell';
14
+ // @ts-ignore - dictionary-en has no types
15
+ import dictionaryEn from 'dictionary-en';
16
+ // @ts-ignore - dictionary-en-gb has no types
17
+ import dictionaryEnGb from 'dictionary-en-gb';
18
+ import { scientificWords } from './scientific-words.js';
19
+ import type { SpellingIssue, SpellingResult } from './types.js';
20
+
21
+ const DICT_NAME = '.rev-dictionary';
22
+
23
+ // Cache for the spellchecker instances (one per language)
24
+ const spellcheckerCache: Record<string, any> = {
25
+ en: null,
26
+ 'en-gb': null,
27
+ };
28
+
29
+ interface WordLocation {
30
+ word: string;
31
+ line: number;
32
+ column: number;
33
+ }
34
+
35
+ interface CheckSpellingOptions {
36
+ projectDir?: string;
37
+ lang?: 'en' | 'en-gb';
38
+ }
39
+
40
+ interface CheckFileOptions {
41
+ projectDir?: string;
42
+ lang?: 'en' | 'en-gb';
43
+ }
44
+
45
+ /**
46
+ * Get the global dictionary path
47
+ */
48
+ export function getGlobalDictPath(): string {
49
+ const home = process.env.HOME || process.env.USERPROFILE;
50
+ return path.join(home!, DICT_NAME);
51
+ }
52
+
53
+ /**
54
+ * Get the project dictionary path
55
+ */
56
+ export function getProjectDictPath(directory: string = '.'): string {
57
+ return path.join(directory, DICT_NAME);
58
+ }
59
+
60
+ /**
61
+ * Load custom words from a dictionary file
62
+ */
63
+ export function loadDictionaryFile(dictPath: string): Set<string> {
64
+ const words = new Set<string>();
65
+
66
+ if (fs.existsSync(dictPath)) {
67
+ const content = fs.readFileSync(dictPath, 'utf-8');
68
+ for (const line of content.split('\n')) {
69
+ const word = line.trim();
70
+ if (word && !word.startsWith('#')) {
71
+ words.add(word.toLowerCase());
72
+ }
73
+ }
74
+ }
75
+
76
+ return words;
77
+ }
78
+
79
+ /**
80
+ * Save words to a dictionary file
81
+ */
82
+ export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
+ const header = `# Custom dictionary for docrev
84
+ # One word per line, lines starting with # are comments
85
+ `;
86
+ const content = header + [...words].sort().join('\n') + '\n';
87
+
88
+ // Ensure directory exists
89
+ const dir = path.dirname(dictPath);
90
+ if (!fs.existsSync(dir)) {
91
+ fs.mkdirSync(dir, { recursive: true });
92
+ }
93
+
94
+ fs.writeFileSync(dictPath, content, 'utf-8');
95
+ }
96
+
97
+ /**
98
+ * Load all custom words (global + project)
99
+ */
100
+ export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
+ const globalWords = loadDictionaryFile(getGlobalDictPath());
102
+ const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
+
104
+ return new Set([...globalWords, ...projectWords]);
105
+ }
106
+
107
+ /**
108
+ * Add word to dictionary
109
+ */
110
+ export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
+ const words = loadDictionaryFile(dictPath);
113
+ const normalizedWord = word.trim().toLowerCase();
114
+
115
+ if (words.has(normalizedWord)) {
116
+ return false;
117
+ }
118
+
119
+ words.add(normalizedWord);
120
+ saveDictionaryFile(words, dictPath);
121
+
122
+ // Clear cache so new word is picked up
123
+ clearCache();
124
+
125
+ return true;
126
+ }
127
+
128
+ /**
129
+ * Remove word from dictionary
130
+ */
131
+ export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
+ const words = loadDictionaryFile(dictPath);
134
+ const normalizedWord = word.trim().toLowerCase();
135
+
136
+ if (!words.has(normalizedWord)) {
137
+ return false;
138
+ }
139
+
140
+ words.delete(normalizedWord);
141
+ saveDictionaryFile(words, dictPath);
142
+
143
+ // Clear cache
144
+ clearCache();
145
+
146
+ return true;
147
+ }
148
+
149
+ /**
150
+ * List words in dictionary
151
+ */
152
+ export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
+ const words = loadDictionaryFile(dictPath);
155
+ return [...words].sort();
156
+ }
157
+
158
+ /**
159
+ * Initialize the spellchecker with custom words
160
+ */
161
+ export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
+ if (spellcheckerCache[lang]) {
163
+ return spellcheckerCache[lang];
164
+ }
165
+
166
+ // Select dictionary based on language
167
+ const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
+ const spell = nspell(dictionary);
169
+
170
+ // Add scientific/academic words
171
+ for (const word of scientificWords) {
172
+ spell.add(word);
173
+ }
174
+
175
+ // Add custom words
176
+ const customWords = loadAllCustomWords(projectDir);
177
+ for (const word of customWords) {
178
+ spell.add(word);
179
+ }
180
+
181
+ spellcheckerCache[lang] = spell;
182
+ return spell;
183
+ }
184
+
185
+ /**
186
+ * Clear spellchecker cache (call after modifying dictionaries)
187
+ */
188
+ export function clearCache(): void {
189
+ spellcheckerCache.en = null;
190
+ spellcheckerCache['en-gb'] = null;
191
+ }
192
+
193
+ /**
194
+ * Extract words from text, filtering out non-words
195
+ */
196
+ export function extractWords(text: string): WordLocation[] {
197
+ const words: WordLocation[] = [];
198
+ const lines = text.split('\n');
199
+ let inCodeBlock = false;
200
+ let inFrontmatter = false;
201
+
202
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
+ const line = lines[lineNum];
204
+ if (!line) continue;
205
+ const trimmed = line.trim();
206
+
207
+ // Track YAML frontmatter (only at start of file)
208
+ if (lineNum === 0 && trimmed === '---') {
209
+ inFrontmatter = true;
210
+ continue;
211
+ }
212
+ if (inFrontmatter) {
213
+ if (trimmed === '---') {
214
+ inFrontmatter = false;
215
+ }
216
+ continue;
217
+ }
218
+
219
+ // Track code blocks
220
+ if (trimmed.startsWith('```')) {
221
+ inCodeBlock = !inCodeBlock;
222
+ continue;
223
+ }
224
+ if (inCodeBlock) {
225
+ continue;
226
+ }
227
+
228
+ // Skip URLs and paths
229
+ if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
+ continue;
231
+ }
232
+
233
+ // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
+ let cleanLine = line
235
+ .replace(/`[^`]+`/g, '') // inline code
236
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
+ .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
+ .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
+ .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
+ .replace(/https?:\/\/\S+/g, '') // URLs
241
+ .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
+ .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
+ .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
+
245
+ // Extract words (letters and apostrophes only)
246
+ const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
+ let match;
248
+
249
+ while ((match = wordPattern.exec(cleanLine)) !== null) {
250
+ const word = match[0];
251
+
252
+ // Skip:
253
+ // - Very short words (1-2 chars)
254
+ // - All caps (acronyms like NASA)
255
+ // - File extensions (.md, .tex, .png)
256
+ // - CamelCase (likely code or citations like vanKleunen)
257
+ // - Words starting with capital in middle of sentence (proper nouns/names)
258
+ if (word.length < 3 ||
259
+ /^[A-Z]+$/.test(word) ||
260
+ /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
+ /[a-z][A-Z]/.test(word)) {
262
+ continue;
263
+ }
264
+
265
+ words.push({
266
+ word,
267
+ line: lineNum + 1,
268
+ column: match.index + 1,
269
+ });
270
+ }
271
+ }
272
+
273
+ return words;
274
+ }
275
+
276
+ /**
277
+ * Check if a word looks like a proper noun (name)
278
+ */
279
+ function looksLikeName(word: string): boolean {
280
+ // Capitalized, not all caps, reasonable length for a name
281
+ return /^[A-Z][a-z]{2,}$/.test(word);
282
+ }
283
+
284
+ /**
285
+ * Check spelling in text
286
+ */
287
+ export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
+ const { projectDir = '.', lang = 'en' } = options;
289
+ const spell = await getSpellchecker(projectDir, lang);
290
+ const words = extractWords(text);
291
+ const misspelled: SpellingIssue[] = [];
292
+ const possibleNames: SpellingIssue[] = [];
293
+ const seen = new Set<string>();
294
+ const seenNames = new Set<string>();
295
+
296
+ for (const { word, line, column } of words) {
297
+ // Skip if already reported this word
298
+ const key = word.toLowerCase();
299
+ if (seen.has(key) || seenNames.has(key)) {
300
+ continue;
301
+ }
302
+
303
+ if (!spell.correct(word)) {
304
+ // Check if it looks like a proper noun/name
305
+ if (looksLikeName(word)) {
306
+ seenNames.add(key);
307
+ possibleNames.push({ word, line, column });
308
+ } else {
309
+ seen.add(key);
310
+ misspelled.push({
311
+ word,
312
+ line,
313
+ column,
314
+ suggestions: spell.suggest(word).slice(0, 5),
315
+ });
316
+ }
317
+ }
318
+ }
319
+
320
+ return { misspelled, possibleNames };
321
+ }
322
+
323
+ /**
324
+ * Check spelling in a file
325
+ */
326
+ export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
+ const text = fs.readFileSync(filePath, 'utf-8');
328
+ const result = await checkSpelling(text, options);
329
+
330
+ return {
331
+ misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
+ possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
+ };
334
+ }