docrev 0.9.11 → 0.9.14

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (138) hide show
  1. package/.claude/settings.local.json +9 -9
  2. package/.gitattributes +1 -1
  3. package/CHANGELOG.md +149 -149
  4. package/PLAN-tables-and-postprocess.md +850 -850
  5. package/README.md +391 -391
  6. package/bin/rev.js +11 -11
  7. package/bin/rev.ts +145 -145
  8. package/completions/rev.bash +127 -127
  9. package/completions/rev.ps1 +210 -210
  10. package/completions/rev.zsh +207 -207
  11. package/dev_notes/stress2/build_adversarial.ts +186 -186
  12. package/dev_notes/stress2/drift_matcher.ts +62 -62
  13. package/dev_notes/stress2/probe_anchors.ts +35 -35
  14. package/dev_notes/stress2/project/discussion.before.md +3 -3
  15. package/dev_notes/stress2/project/discussion.md +3 -3
  16. package/dev_notes/stress2/project/methods.before.md +20 -20
  17. package/dev_notes/stress2/project/methods.md +20 -20
  18. package/dev_notes/stress2/project/rev.yaml +5 -5
  19. package/dev_notes/stress2/project/sections.yaml +4 -4
  20. package/dev_notes/stress2/sections.yaml +5 -5
  21. package/dev_notes/stress2/trace_placement.ts +50 -50
  22. package/dev_notes/stresstest_boundaries.ts +27 -27
  23. package/dev_notes/stresstest_drift_apply.ts +43 -43
  24. package/dev_notes/stresstest_drift_compare.ts +43 -43
  25. package/dev_notes/stresstest_drift_v2.ts +54 -54
  26. package/dev_notes/stresstest_inspect.ts +54 -54
  27. package/dev_notes/stresstest_pstyle.ts +55 -55
  28. package/dev_notes/stresstest_section_debug.ts +23 -23
  29. package/dev_notes/stresstest_split.ts +70 -70
  30. package/dev_notes/stresstest_trace.ts +19 -19
  31. package/dev_notes/stresstest_verify_no_overwrite.ts +40 -40
  32. package/dist/lib/build.d.ts +50 -1
  33. package/dist/lib/build.d.ts.map +1 -1
  34. package/dist/lib/build.js +80 -30
  35. package/dist/lib/build.js.map +1 -1
  36. package/dist/lib/commands/build.d.ts.map +1 -1
  37. package/dist/lib/commands/build.js +38 -5
  38. package/dist/lib/commands/build.js.map +1 -1
  39. package/dist/lib/commands/utilities.js +164 -164
  40. package/dist/lib/commands/word-tools.js +8 -8
  41. package/dist/lib/grammar.js +3 -3
  42. package/dist/lib/import.d.ts.map +1 -1
  43. package/dist/lib/import.js +146 -24
  44. package/dist/lib/import.js.map +1 -1
  45. package/dist/lib/pdf-comments.js +44 -44
  46. package/dist/lib/plugins.js +57 -57
  47. package/dist/lib/pptx-themes.js +115 -115
  48. package/dist/lib/spelling.js +2 -2
  49. package/dist/lib/templates.js +387 -387
  50. package/dist/lib/themes.js +51 -51
  51. package/dist/lib/types.d.ts +20 -0
  52. package/dist/lib/types.d.ts.map +1 -1
  53. package/dist/lib/word-extraction.d.ts +6 -0
  54. package/dist/lib/word-extraction.d.ts.map +1 -1
  55. package/dist/lib/word-extraction.js +46 -3
  56. package/dist/lib/word-extraction.js.map +1 -1
  57. package/dist/lib/wordcomments.d.ts.map +1 -1
  58. package/dist/lib/wordcomments.js +23 -5
  59. package/dist/lib/wordcomments.js.map +1 -1
  60. package/eslint.config.js +27 -27
  61. package/lib/anchor-match.ts +276 -276
  62. package/lib/annotations.ts +644 -644
  63. package/lib/build.ts +1300 -1227
  64. package/lib/citations.ts +160 -160
  65. package/lib/commands/build.ts +833 -801
  66. package/lib/commands/citations.ts +515 -515
  67. package/lib/commands/comments.ts +1050 -1050
  68. package/lib/commands/context.ts +174 -174
  69. package/lib/commands/core.ts +309 -309
  70. package/lib/commands/doi.ts +435 -435
  71. package/lib/commands/file-ops.ts +372 -372
  72. package/lib/commands/history.ts +320 -320
  73. package/lib/commands/index.ts +87 -87
  74. package/lib/commands/init.ts +259 -259
  75. package/lib/commands/merge-resolve.ts +378 -378
  76. package/lib/commands/preview.ts +178 -178
  77. package/lib/commands/project-info.ts +244 -244
  78. package/lib/commands/quality.ts +517 -517
  79. package/lib/commands/response.ts +454 -454
  80. package/lib/commands/section-boundaries.ts +82 -82
  81. package/lib/commands/sections.ts +451 -451
  82. package/lib/commands/sync.ts +706 -706
  83. package/lib/commands/text-ops.ts +449 -449
  84. package/lib/commands/utilities.ts +448 -448
  85. package/lib/commands/verify-anchors.ts +272 -272
  86. package/lib/commands/word-tools.ts +340 -340
  87. package/lib/comment-realign.ts +517 -517
  88. package/lib/config.ts +84 -84
  89. package/lib/crossref.ts +781 -781
  90. package/lib/csl.ts +191 -191
  91. package/lib/dependencies.ts +98 -98
  92. package/lib/diff-engine.ts +465 -465
  93. package/lib/doi-cache.ts +115 -115
  94. package/lib/doi.ts +897 -897
  95. package/lib/equations.ts +506 -506
  96. package/lib/errors.ts +346 -346
  97. package/lib/format.ts +541 -541
  98. package/lib/git.ts +326 -326
  99. package/lib/grammar.ts +303 -303
  100. package/lib/image-registry.ts +180 -180
  101. package/lib/import.ts +911 -792
  102. package/lib/journals.ts +543 -543
  103. package/lib/merge.ts +633 -633
  104. package/lib/orcid.ts +144 -144
  105. package/lib/pdf-comments.ts +263 -263
  106. package/lib/pdf-import.ts +524 -524
  107. package/lib/plugins.ts +362 -362
  108. package/lib/postprocess.ts +188 -188
  109. package/lib/pptx-color-filter.lua +37 -37
  110. package/lib/pptx-template.ts +469 -469
  111. package/lib/pptx-themes.ts +483 -483
  112. package/lib/protect-restore.ts +520 -520
  113. package/lib/rate-limiter.ts +94 -94
  114. package/lib/response.ts +197 -197
  115. package/lib/restore-references.ts +240 -240
  116. package/lib/review.ts +327 -327
  117. package/lib/schema.ts +417 -417
  118. package/lib/scientific-words.ts +73 -73
  119. package/lib/sections.ts +335 -335
  120. package/lib/slides.ts +756 -756
  121. package/lib/spelling.ts +334 -334
  122. package/lib/templates.ts +526 -526
  123. package/lib/themes.ts +742 -742
  124. package/lib/trackchanges.ts +247 -247
  125. package/lib/tui.ts +450 -450
  126. package/lib/types.ts +550 -530
  127. package/lib/undo.ts +250 -250
  128. package/lib/utils.ts +69 -69
  129. package/lib/variables.ts +179 -179
  130. package/lib/word-extraction.ts +806 -759
  131. package/lib/word.ts +643 -643
  132. package/lib/wordcomments.ts +817 -798
  133. package/package.json +137 -137
  134. package/scripts/postbuild.js +28 -28
  135. package/skill/REFERENCE.md +431 -431
  136. package/skill/SKILL.md +258 -258
  137. package/tsconfig.json +26 -26
  138. package/types/index.d.ts +525 -525
package/lib/spelling.ts CHANGED
@@ -1,334 +1,334 @@
1
- /**
2
- * Spelling checker module with global and project dictionaries
3
- *
4
- * Uses nspell (Hunspell-compatible) for English spellchecking.
5
- * Custom words stored in:
6
- * - ~/.rev-dictionary (global)
7
- * - .rev-dictionary (project-local)
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- // @ts-ignore - nspell has no types
13
- import nspell from 'nspell';
14
- // @ts-ignore - dictionary-en has no types
15
- import dictionaryEn from 'dictionary-en';
16
- // @ts-ignore - dictionary-en-gb has no types
17
- import dictionaryEnGb from 'dictionary-en-gb';
18
- import { scientificWords } from './scientific-words.js';
19
- import type { SpellingIssue, SpellingResult } from './types.js';
20
-
21
- const DICT_NAME = '.rev-dictionary';
22
-
23
- // Cache for the spellchecker instances (one per language)
24
- const spellcheckerCache: Record<string, any> = {
25
- en: null,
26
- 'en-gb': null,
27
- };
28
-
29
- interface WordLocation {
30
- word: string;
31
- line: number;
32
- column: number;
33
- }
34
-
35
- interface CheckSpellingOptions {
36
- projectDir?: string;
37
- lang?: 'en' | 'en-gb';
38
- }
39
-
40
- interface CheckFileOptions {
41
- projectDir?: string;
42
- lang?: 'en' | 'en-gb';
43
- }
44
-
45
- /**
46
- * Get the global dictionary path
47
- */
48
- export function getGlobalDictPath(): string {
49
- const home = process.env.HOME || process.env.USERPROFILE;
50
- return path.join(home!, DICT_NAME);
51
- }
52
-
53
- /**
54
- * Get the project dictionary path
55
- */
56
- export function getProjectDictPath(directory: string = '.'): string {
57
- return path.join(directory, DICT_NAME);
58
- }
59
-
60
- /**
61
- * Load custom words from a dictionary file
62
- */
63
- export function loadDictionaryFile(dictPath: string): Set<string> {
64
- const words = new Set<string>();
65
-
66
- if (fs.existsSync(dictPath)) {
67
- const content = fs.readFileSync(dictPath, 'utf-8');
68
- for (const line of content.split('\n')) {
69
- const word = line.trim();
70
- if (word && !word.startsWith('#')) {
71
- words.add(word.toLowerCase());
72
- }
73
- }
74
- }
75
-
76
- return words;
77
- }
78
-
79
- /**
80
- * Save words to a dictionary file
81
- */
82
- export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
- const header = `# Custom dictionary for docrev
84
- # One word per line, lines starting with # are comments
85
- `;
86
- const content = header + [...words].sort().join('\n') + '\n';
87
-
88
- // Ensure directory exists
89
- const dir = path.dirname(dictPath);
90
- if (!fs.existsSync(dir)) {
91
- fs.mkdirSync(dir, { recursive: true });
92
- }
93
-
94
- fs.writeFileSync(dictPath, content, 'utf-8');
95
- }
96
-
97
- /**
98
- * Load all custom words (global + project)
99
- */
100
- export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
- const globalWords = loadDictionaryFile(getGlobalDictPath());
102
- const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
-
104
- return new Set([...globalWords, ...projectWords]);
105
- }
106
-
107
- /**
108
- * Add word to dictionary
109
- */
110
- export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
- const words = loadDictionaryFile(dictPath);
113
- const normalizedWord = word.trim().toLowerCase();
114
-
115
- if (words.has(normalizedWord)) {
116
- return false;
117
- }
118
-
119
- words.add(normalizedWord);
120
- saveDictionaryFile(words, dictPath);
121
-
122
- // Clear cache so new word is picked up
123
- clearCache();
124
-
125
- return true;
126
- }
127
-
128
- /**
129
- * Remove word from dictionary
130
- */
131
- export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
- const words = loadDictionaryFile(dictPath);
134
- const normalizedWord = word.trim().toLowerCase();
135
-
136
- if (!words.has(normalizedWord)) {
137
- return false;
138
- }
139
-
140
- words.delete(normalizedWord);
141
- saveDictionaryFile(words, dictPath);
142
-
143
- // Clear cache
144
- clearCache();
145
-
146
- return true;
147
- }
148
-
149
- /**
150
- * List words in dictionary
151
- */
152
- export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
- const words = loadDictionaryFile(dictPath);
155
- return [...words].sort();
156
- }
157
-
158
- /**
159
- * Initialize the spellchecker with custom words
160
- */
161
- export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
- if (spellcheckerCache[lang]) {
163
- return spellcheckerCache[lang];
164
- }
165
-
166
- // Select dictionary based on language
167
- const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
- const spell = nspell(dictionary);
169
-
170
- // Add scientific/academic words
171
- for (const word of scientificWords) {
172
- spell.add(word);
173
- }
174
-
175
- // Add custom words
176
- const customWords = loadAllCustomWords(projectDir);
177
- for (const word of customWords) {
178
- spell.add(word);
179
- }
180
-
181
- spellcheckerCache[lang] = spell;
182
- return spell;
183
- }
184
-
185
- /**
186
- * Clear spellchecker cache (call after modifying dictionaries)
187
- */
188
- export function clearCache(): void {
189
- spellcheckerCache.en = null;
190
- spellcheckerCache['en-gb'] = null;
191
- }
192
-
193
- /**
194
- * Extract words from text, filtering out non-words
195
- */
196
- export function extractWords(text: string): WordLocation[] {
197
- const words: WordLocation[] = [];
198
- const lines = text.split('\n');
199
- let inCodeBlock = false;
200
- let inFrontmatter = false;
201
-
202
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
- const line = lines[lineNum];
204
- if (!line) continue;
205
- const trimmed = line.trim();
206
-
207
- // Track YAML frontmatter (only at start of file)
208
- if (lineNum === 0 && trimmed === '---') {
209
- inFrontmatter = true;
210
- continue;
211
- }
212
- if (inFrontmatter) {
213
- if (trimmed === '---') {
214
- inFrontmatter = false;
215
- }
216
- continue;
217
- }
218
-
219
- // Track code blocks
220
- if (trimmed.startsWith('```')) {
221
- inCodeBlock = !inCodeBlock;
222
- continue;
223
- }
224
- if (inCodeBlock) {
225
- continue;
226
- }
227
-
228
- // Skip URLs and paths
229
- if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
- continue;
231
- }
232
-
233
- // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
- let cleanLine = line
235
- .replace(/`[^`]+`/g, '') // inline code
236
- .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
- .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
- .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
- .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
- .replace(/https?:\/\/\S+/g, '') // URLs
241
- .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
- .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
- .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
-
245
- // Extract words (letters and apostrophes only)
246
- const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
- let match;
248
-
249
- while ((match = wordPattern.exec(cleanLine)) !== null) {
250
- const word = match[0];
251
-
252
- // Skip:
253
- // - Very short words (1-2 chars)
254
- // - All caps (acronyms like NASA)
255
- // - File extensions (.md, .tex, .png)
256
- // - CamelCase (likely code or citations like vanKleunen)
257
- // - Words starting with capital in middle of sentence (proper nouns/names)
258
- if (word.length < 3 ||
259
- /^[A-Z]+$/.test(word) ||
260
- /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
- /[a-z][A-Z]/.test(word)) {
262
- continue;
263
- }
264
-
265
- words.push({
266
- word,
267
- line: lineNum + 1,
268
- column: match.index + 1,
269
- });
270
- }
271
- }
272
-
273
- return words;
274
- }
275
-
276
- /**
277
- * Check if a word looks like a proper noun (name)
278
- */
279
- function looksLikeName(word: string): boolean {
280
- // Capitalized, not all caps, reasonable length for a name
281
- return /^[A-Z][a-z]{2,}$/.test(word);
282
- }
283
-
284
- /**
285
- * Check spelling in text
286
- */
287
- export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
- const { projectDir = '.', lang = 'en' } = options;
289
- const spell = await getSpellchecker(projectDir, lang);
290
- const words = extractWords(text);
291
- const misspelled: SpellingIssue[] = [];
292
- const possibleNames: SpellingIssue[] = [];
293
- const seen = new Set<string>();
294
- const seenNames = new Set<string>();
295
-
296
- for (const { word, line, column } of words) {
297
- // Skip if already reported this word
298
- const key = word.toLowerCase();
299
- if (seen.has(key) || seenNames.has(key)) {
300
- continue;
301
- }
302
-
303
- if (!spell.correct(word)) {
304
- // Check if it looks like a proper noun/name
305
- if (looksLikeName(word)) {
306
- seenNames.add(key);
307
- possibleNames.push({ word, line, column });
308
- } else {
309
- seen.add(key);
310
- misspelled.push({
311
- word,
312
- line,
313
- column,
314
- suggestions: spell.suggest(word).slice(0, 5),
315
- });
316
- }
317
- }
318
- }
319
-
320
- return { misspelled, possibleNames };
321
- }
322
-
323
- /**
324
- * Check spelling in a file
325
- */
326
- export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
- const text = fs.readFileSync(filePath, 'utf-8');
328
- const result = await checkSpelling(text, options);
329
-
330
- return {
331
- misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
- possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
- };
334
- }
1
+ /**
2
+ * Spelling checker module with global and project dictionaries
3
+ *
4
+ * Uses nspell (Hunspell-compatible) for English spellchecking.
5
+ * Custom words stored in:
6
+ * - ~/.rev-dictionary (global)
7
+ * - .rev-dictionary (project-local)
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ // @ts-ignore - nspell has no types
13
+ import nspell from 'nspell';
14
+ // @ts-ignore - dictionary-en has no types
15
+ import dictionaryEn from 'dictionary-en';
16
+ // @ts-ignore - dictionary-en-gb has no types
17
+ import dictionaryEnGb from 'dictionary-en-gb';
18
+ import { scientificWords } from './scientific-words.js';
19
+ import type { SpellingIssue, SpellingResult } from './types.js';
20
+
21
+ const DICT_NAME = '.rev-dictionary';
22
+
23
+ // Cache for the spellchecker instances (one per language)
24
+ const spellcheckerCache: Record<string, any> = {
25
+ en: null,
26
+ 'en-gb': null,
27
+ };
28
+
29
+ interface WordLocation {
30
+ word: string;
31
+ line: number;
32
+ column: number;
33
+ }
34
+
35
+ interface CheckSpellingOptions {
36
+ projectDir?: string;
37
+ lang?: 'en' | 'en-gb';
38
+ }
39
+
40
+ interface CheckFileOptions {
41
+ projectDir?: string;
42
+ lang?: 'en' | 'en-gb';
43
+ }
44
+
45
+ /**
46
+ * Get the global dictionary path
47
+ */
48
+ export function getGlobalDictPath(): string {
49
+ const home = process.env.HOME || process.env.USERPROFILE;
50
+ return path.join(home!, DICT_NAME);
51
+ }
52
+
53
+ /**
54
+ * Get the project dictionary path
55
+ */
56
+ export function getProjectDictPath(directory: string = '.'): string {
57
+ return path.join(directory, DICT_NAME);
58
+ }
59
+
60
+ /**
61
+ * Load custom words from a dictionary file
62
+ */
63
+ export function loadDictionaryFile(dictPath: string): Set<string> {
64
+ const words = new Set<string>();
65
+
66
+ if (fs.existsSync(dictPath)) {
67
+ const content = fs.readFileSync(dictPath, 'utf-8');
68
+ for (const line of content.split('\n')) {
69
+ const word = line.trim();
70
+ if (word && !word.startsWith('#')) {
71
+ words.add(word.toLowerCase());
72
+ }
73
+ }
74
+ }
75
+
76
+ return words;
77
+ }
78
+
79
+ /**
80
+ * Save words to a dictionary file
81
+ */
82
+ export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
+ const header = `# Custom dictionary for docrev
84
+ # One word per line, lines starting with # are comments
85
+ `;
86
+ const content = header + [...words].sort().join('\n') + '\n';
87
+
88
+ // Ensure directory exists
89
+ const dir = path.dirname(dictPath);
90
+ if (!fs.existsSync(dir)) {
91
+ fs.mkdirSync(dir, { recursive: true });
92
+ }
93
+
94
+ fs.writeFileSync(dictPath, content, 'utf-8');
95
+ }
96
+
97
+ /**
98
+ * Load all custom words (global + project)
99
+ */
100
+ export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
+ const globalWords = loadDictionaryFile(getGlobalDictPath());
102
+ const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
+
104
+ return new Set([...globalWords, ...projectWords]);
105
+ }
106
+
107
+ /**
108
+ * Add word to dictionary
109
+ */
110
+ export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
+ const words = loadDictionaryFile(dictPath);
113
+ const normalizedWord = word.trim().toLowerCase();
114
+
115
+ if (words.has(normalizedWord)) {
116
+ return false;
117
+ }
118
+
119
+ words.add(normalizedWord);
120
+ saveDictionaryFile(words, dictPath);
121
+
122
+ // Clear cache so new word is picked up
123
+ clearCache();
124
+
125
+ return true;
126
+ }
127
+
128
+ /**
129
+ * Remove word from dictionary
130
+ */
131
+ export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
+ const words = loadDictionaryFile(dictPath);
134
+ const normalizedWord = word.trim().toLowerCase();
135
+
136
+ if (!words.has(normalizedWord)) {
137
+ return false;
138
+ }
139
+
140
+ words.delete(normalizedWord);
141
+ saveDictionaryFile(words, dictPath);
142
+
143
+ // Clear cache
144
+ clearCache();
145
+
146
+ return true;
147
+ }
148
+
149
+ /**
150
+ * List words in dictionary
151
+ */
152
+ export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
+ const words = loadDictionaryFile(dictPath);
155
+ return [...words].sort();
156
+ }
157
+
158
+ /**
159
+ * Initialize the spellchecker with custom words
160
+ */
161
+ export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
+ if (spellcheckerCache[lang]) {
163
+ return spellcheckerCache[lang];
164
+ }
165
+
166
+ // Select dictionary based on language
167
+ const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
+ const spell = nspell(dictionary);
169
+
170
+ // Add scientific/academic words
171
+ for (const word of scientificWords) {
172
+ spell.add(word);
173
+ }
174
+
175
+ // Add custom words
176
+ const customWords = loadAllCustomWords(projectDir);
177
+ for (const word of customWords) {
178
+ spell.add(word);
179
+ }
180
+
181
+ spellcheckerCache[lang] = spell;
182
+ return spell;
183
+ }
184
+
185
+ /**
186
+ * Clear spellchecker cache (call after modifying dictionaries)
187
+ */
188
+ export function clearCache(): void {
189
+ spellcheckerCache.en = null;
190
+ spellcheckerCache['en-gb'] = null;
191
+ }
192
+
193
+ /**
194
+ * Extract words from text, filtering out non-words
195
+ */
196
+ export function extractWords(text: string): WordLocation[] {
197
+ const words: WordLocation[] = [];
198
+ const lines = text.split('\n');
199
+ let inCodeBlock = false;
200
+ let inFrontmatter = false;
201
+
202
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
+ const line = lines[lineNum];
204
+ if (!line) continue;
205
+ const trimmed = line.trim();
206
+
207
+ // Track YAML frontmatter (only at start of file)
208
+ if (lineNum === 0 && trimmed === '---') {
209
+ inFrontmatter = true;
210
+ continue;
211
+ }
212
+ if (inFrontmatter) {
213
+ if (trimmed === '---') {
214
+ inFrontmatter = false;
215
+ }
216
+ continue;
217
+ }
218
+
219
+ // Track code blocks
220
+ if (trimmed.startsWith('```')) {
221
+ inCodeBlock = !inCodeBlock;
222
+ continue;
223
+ }
224
+ if (inCodeBlock) {
225
+ continue;
226
+ }
227
+
228
+ // Skip URLs and paths
229
+ if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
+ continue;
231
+ }
232
+
233
+ // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
+ let cleanLine = line
235
+ .replace(/`[^`]+`/g, '') // inline code
236
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
+ .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
+ .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
+ .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
+ .replace(/https?:\/\/\S+/g, '') // URLs
241
+ .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
+ .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
+ .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
+
245
+ // Extract words (letters and apostrophes only)
246
+ const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
+ let match;
248
+
249
+ while ((match = wordPattern.exec(cleanLine)) !== null) {
250
+ const word = match[0];
251
+
252
+ // Skip:
253
+ // - Very short words (1-2 chars)
254
+ // - All caps (acronyms like NASA)
255
+ // - File extensions (.md, .tex, .png)
256
+ // - CamelCase (likely code or citations like vanKleunen)
257
+ // - Words starting with capital in middle of sentence (proper nouns/names)
258
+ if (word.length < 3 ||
259
+ /^[A-Z]+$/.test(word) ||
260
+ /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
+ /[a-z][A-Z]/.test(word)) {
262
+ continue;
263
+ }
264
+
265
+ words.push({
266
+ word,
267
+ line: lineNum + 1,
268
+ column: match.index + 1,
269
+ });
270
+ }
271
+ }
272
+
273
+ return words;
274
+ }
275
+
276
+ /**
277
+ * Check if a word looks like a proper noun (name)
278
+ */
279
+ function looksLikeName(word: string): boolean {
280
+ // Capitalized, not all caps, reasonable length for a name
281
+ return /^[A-Z][a-z]{2,}$/.test(word);
282
+ }
283
+
284
+ /**
285
+ * Check spelling in text
286
+ */
287
+ export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
+ const { projectDir = '.', lang = 'en' } = options;
289
+ const spell = await getSpellchecker(projectDir, lang);
290
+ const words = extractWords(text);
291
+ const misspelled: SpellingIssue[] = [];
292
+ const possibleNames: SpellingIssue[] = [];
293
+ const seen = new Set<string>();
294
+ const seenNames = new Set<string>();
295
+
296
+ for (const { word, line, column } of words) {
297
+ // Skip if already reported this word
298
+ const key = word.toLowerCase();
299
+ if (seen.has(key) || seenNames.has(key)) {
300
+ continue;
301
+ }
302
+
303
+ if (!spell.correct(word)) {
304
+ // Check if it looks like a proper noun/name
305
+ if (looksLikeName(word)) {
306
+ seenNames.add(key);
307
+ possibleNames.push({ word, line, column });
308
+ } else {
309
+ seen.add(key);
310
+ misspelled.push({
311
+ word,
312
+ line,
313
+ column,
314
+ suggestions: spell.suggest(word).slice(0, 5),
315
+ });
316
+ }
317
+ }
318
+ }
319
+
320
+ return { misspelled, possibleNames };
321
+ }
322
+
323
+ /**
324
+ * Check spelling in a file
325
+ */
326
+ export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
+ const text = fs.readFileSync(filePath, 'utf-8');
328
+ const result = await checkSpelling(text, options);
329
+
330
+ return {
331
+ misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
+ possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
+ };
334
+ }