docrev 0.9.18 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (134) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -149
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -406
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/build.d.ts +8 -0
  11. package/dist/lib/build.d.ts.map +1 -1
  12. package/dist/lib/build.js +62 -6
  13. package/dist/lib/build.js.map +1 -1
  14. package/dist/lib/commands/context.d.ts +1 -1
  15. package/dist/lib/commands/context.d.ts.map +1 -1
  16. package/dist/lib/commands/context.js +1 -1
  17. package/dist/lib/commands/context.js.map +1 -1
  18. package/dist/lib/commands/sections.js +7 -7
  19. package/dist/lib/commands/sections.js.map +1 -1
  20. package/dist/lib/commands/sync.d.ts.map +1 -1
  21. package/dist/lib/commands/sync.js +15 -14
  22. package/dist/lib/commands/sync.js.map +1 -1
  23. package/dist/lib/commands/utilities.js +164 -164
  24. package/dist/lib/commands/verify-anchors.js +6 -6
  25. package/dist/lib/commands/verify-anchors.js.map +1 -1
  26. package/dist/lib/commands/word-tools.js +8 -8
  27. package/dist/lib/grammar.js +3 -3
  28. package/dist/lib/macro-filter.lua +201 -0
  29. package/dist/lib/macros.d.ts +102 -0
  30. package/dist/lib/macros.d.ts.map +1 -0
  31. package/dist/lib/macros.js +218 -0
  32. package/dist/lib/macros.js.map +1 -0
  33. package/dist/lib/pdf-comments.js +44 -44
  34. package/dist/lib/plugins.js +57 -57
  35. package/dist/lib/pptx-color-filter.lua +37 -0
  36. package/dist/lib/pptx-themes.js +115 -115
  37. package/dist/lib/schema.d.ts.map +1 -1
  38. package/dist/lib/schema.js +34 -0
  39. package/dist/lib/schema.js.map +1 -1
  40. package/dist/lib/sections.d.ts +35 -0
  41. package/dist/lib/sections.d.ts.map +1 -1
  42. package/dist/lib/sections.js +81 -0
  43. package/dist/lib/sections.js.map +1 -1
  44. package/dist/lib/spelling.js +2 -2
  45. package/dist/lib/templates.js +387 -387
  46. package/dist/lib/themes.js +51 -51
  47. package/eslint.config.js +27 -27
  48. package/lib/anchor-match.ts +276 -276
  49. package/lib/annotations.ts +644 -644
  50. package/lib/build.ts +1766 -1694
  51. package/lib/citations.ts +160 -160
  52. package/lib/commands/build.ts +855 -855
  53. package/lib/commands/citations.ts +515 -515
  54. package/lib/commands/comments.ts +1050 -1050
  55. package/lib/commands/context.ts +176 -174
  56. package/lib/commands/core.ts +309 -309
  57. package/lib/commands/doi.ts +435 -435
  58. package/lib/commands/file-ops.ts +372 -372
  59. package/lib/commands/history.ts +320 -320
  60. package/lib/commands/index.ts +87 -87
  61. package/lib/commands/init.ts +259 -259
  62. package/lib/commands/merge-resolve.ts +378 -378
  63. package/lib/commands/preview.ts +178 -178
  64. package/lib/commands/project-info.ts +244 -244
  65. package/lib/commands/quality.ts +517 -517
  66. package/lib/commands/response.ts +454 -454
  67. package/lib/commands/section-boundaries.ts +82 -82
  68. package/lib/commands/sections.ts +451 -451
  69. package/lib/commands/sync.ts +709 -706
  70. package/lib/commands/text-ops.ts +449 -449
  71. package/lib/commands/utilities.ts +448 -448
  72. package/lib/commands/verify-anchors.ts +272 -272
  73. package/lib/commands/word-tools.ts +340 -340
  74. package/lib/comment-realign.ts +517 -517
  75. package/lib/config.ts +84 -84
  76. package/lib/crossref.ts +781 -781
  77. package/lib/csl.ts +191 -191
  78. package/lib/dependencies.ts +98 -98
  79. package/lib/diff-engine.ts +465 -465
  80. package/lib/doi-cache.ts +115 -115
  81. package/lib/doi.ts +897 -897
  82. package/lib/equations.ts +506 -506
  83. package/lib/errors.ts +346 -346
  84. package/lib/format.ts +541 -541
  85. package/lib/git.ts +326 -326
  86. package/lib/grammar.ts +303 -303
  87. package/lib/image-registry.ts +180 -180
  88. package/lib/import.ts +911 -911
  89. package/lib/journals.ts +543 -543
  90. package/lib/macro-filter.lua +201 -0
  91. package/lib/macros.ts +273 -0
  92. package/lib/merge.ts +633 -633
  93. package/lib/orcid.ts +144 -144
  94. package/lib/pdf-comments.ts +263 -263
  95. package/lib/pdf-import.ts +524 -524
  96. package/lib/plugins.ts +362 -362
  97. package/lib/postprocess.ts +188 -188
  98. package/lib/pptx-color-filter.lua +37 -37
  99. package/lib/pptx-template.ts +469 -469
  100. package/lib/pptx-themes.ts +483 -483
  101. package/lib/protect-restore.ts +520 -520
  102. package/lib/rate-limiter.ts +94 -94
  103. package/lib/response.ts +197 -197
  104. package/lib/restore-references.ts +240 -240
  105. package/lib/review.ts +327 -327
  106. package/lib/schema.ts +488 -454
  107. package/lib/scientific-words.ts +73 -73
  108. package/lib/sections.ts +425 -335
  109. package/lib/slides.ts +756 -756
  110. package/lib/spelling.ts +334 -334
  111. package/lib/templates.ts +526 -526
  112. package/lib/themes.ts +742 -742
  113. package/lib/trackchanges.ts +247 -247
  114. package/lib/tui.ts +450 -450
  115. package/lib/types.ts +550 -550
  116. package/lib/undo.ts +250 -250
  117. package/lib/utils.ts +69 -69
  118. package/lib/variables.ts +179 -179
  119. package/lib/word-extraction.ts +806 -806
  120. package/lib/word.ts +643 -643
  121. package/lib/wordcomments.ts +840 -840
  122. package/package.json +137 -137
  123. package/scripts/postbuild.js +47 -28
  124. package/skill/REFERENCE.md +539 -539
  125. package/skill/SKILL.md +295 -295
  126. package/tsconfig.json +26 -26
  127. package/types/index.d.ts +525 -525
  128. package/issues.md +0 -180
  129. package/site/assets/extra.css +0 -208
  130. package/site/commands.html +0 -926
  131. package/site/configuration.html +0 -469
  132. package/site/index.html +0 -288
  133. package/site/troubleshooting.html +0 -461
  134. package/site/workflow.html +0 -518
package/lib/spelling.ts CHANGED
@@ -1,334 +1,334 @@
1
- /**
2
- * Spelling checker module with global and project dictionaries
3
- *
4
- * Uses nspell (Hunspell-compatible) for English spellchecking.
5
- * Custom words stored in:
6
- * - ~/.rev-dictionary (global)
7
- * - .rev-dictionary (project-local)
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- // @ts-ignore - nspell has no types
13
- import nspell from 'nspell';
14
- // @ts-ignore - dictionary-en has no types
15
- import dictionaryEn from 'dictionary-en';
16
- // @ts-ignore - dictionary-en-gb has no types
17
- import dictionaryEnGb from 'dictionary-en-gb';
18
- import { scientificWords } from './scientific-words.js';
19
- import type { SpellingIssue, SpellingResult } from './types.js';
20
-
21
- const DICT_NAME = '.rev-dictionary';
22
-
23
- // Cache for the spellchecker instances (one per language)
24
- const spellcheckerCache: Record<string, any> = {
25
- en: null,
26
- 'en-gb': null,
27
- };
28
-
29
- interface WordLocation {
30
- word: string;
31
- line: number;
32
- column: number;
33
- }
34
-
35
- interface CheckSpellingOptions {
36
- projectDir?: string;
37
- lang?: 'en' | 'en-gb';
38
- }
39
-
40
- interface CheckFileOptions {
41
- projectDir?: string;
42
- lang?: 'en' | 'en-gb';
43
- }
44
-
45
- /**
46
- * Get the global dictionary path
47
- */
48
- export function getGlobalDictPath(): string {
49
- const home = process.env.HOME || process.env.USERPROFILE;
50
- return path.join(home!, DICT_NAME);
51
- }
52
-
53
- /**
54
- * Get the project dictionary path
55
- */
56
- export function getProjectDictPath(directory: string = '.'): string {
57
- return path.join(directory, DICT_NAME);
58
- }
59
-
60
- /**
61
- * Load custom words from a dictionary file
62
- */
63
- export function loadDictionaryFile(dictPath: string): Set<string> {
64
- const words = new Set<string>();
65
-
66
- if (fs.existsSync(dictPath)) {
67
- const content = fs.readFileSync(dictPath, 'utf-8');
68
- for (const line of content.split('\n')) {
69
- const word = line.trim();
70
- if (word && !word.startsWith('#')) {
71
- words.add(word.toLowerCase());
72
- }
73
- }
74
- }
75
-
76
- return words;
77
- }
78
-
79
- /**
80
- * Save words to a dictionary file
81
- */
82
- export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
- const header = `# Custom dictionary for docrev
84
- # One word per line, lines starting with # are comments
85
- `;
86
- const content = header + [...words].sort().join('\n') + '\n';
87
-
88
- // Ensure directory exists
89
- const dir = path.dirname(dictPath);
90
- if (!fs.existsSync(dir)) {
91
- fs.mkdirSync(dir, { recursive: true });
92
- }
93
-
94
- fs.writeFileSync(dictPath, content, 'utf-8');
95
- }
96
-
97
- /**
98
- * Load all custom words (global + project)
99
- */
100
- export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
- const globalWords = loadDictionaryFile(getGlobalDictPath());
102
- const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
-
104
- return new Set([...globalWords, ...projectWords]);
105
- }
106
-
107
- /**
108
- * Add word to dictionary
109
- */
110
- export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
- const words = loadDictionaryFile(dictPath);
113
- const normalizedWord = word.trim().toLowerCase();
114
-
115
- if (words.has(normalizedWord)) {
116
- return false;
117
- }
118
-
119
- words.add(normalizedWord);
120
- saveDictionaryFile(words, dictPath);
121
-
122
- // Clear cache so new word is picked up
123
- clearCache();
124
-
125
- return true;
126
- }
127
-
128
- /**
129
- * Remove word from dictionary
130
- */
131
- export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
- const words = loadDictionaryFile(dictPath);
134
- const normalizedWord = word.trim().toLowerCase();
135
-
136
- if (!words.has(normalizedWord)) {
137
- return false;
138
- }
139
-
140
- words.delete(normalizedWord);
141
- saveDictionaryFile(words, dictPath);
142
-
143
- // Clear cache
144
- clearCache();
145
-
146
- return true;
147
- }
148
-
149
- /**
150
- * List words in dictionary
151
- */
152
- export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
- const words = loadDictionaryFile(dictPath);
155
- return [...words].sort();
156
- }
157
-
158
- /**
159
- * Initialize the spellchecker with custom words
160
- */
161
- export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
- if (spellcheckerCache[lang]) {
163
- return spellcheckerCache[lang];
164
- }
165
-
166
- // Select dictionary based on language
167
- const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
- const spell = nspell(dictionary);
169
-
170
- // Add scientific/academic words
171
- for (const word of scientificWords) {
172
- spell.add(word);
173
- }
174
-
175
- // Add custom words
176
- const customWords = loadAllCustomWords(projectDir);
177
- for (const word of customWords) {
178
- spell.add(word);
179
- }
180
-
181
- spellcheckerCache[lang] = spell;
182
- return spell;
183
- }
184
-
185
- /**
186
- * Clear spellchecker cache (call after modifying dictionaries)
187
- */
188
- export function clearCache(): void {
189
- spellcheckerCache.en = null;
190
- spellcheckerCache['en-gb'] = null;
191
- }
192
-
193
- /**
194
- * Extract words from text, filtering out non-words
195
- */
196
- export function extractWords(text: string): WordLocation[] {
197
- const words: WordLocation[] = [];
198
- const lines = text.split('\n');
199
- let inCodeBlock = false;
200
- let inFrontmatter = false;
201
-
202
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
- const line = lines[lineNum];
204
- if (!line) continue;
205
- const trimmed = line.trim();
206
-
207
- // Track YAML frontmatter (only at start of file)
208
- if (lineNum === 0 && trimmed === '---') {
209
- inFrontmatter = true;
210
- continue;
211
- }
212
- if (inFrontmatter) {
213
- if (trimmed === '---') {
214
- inFrontmatter = false;
215
- }
216
- continue;
217
- }
218
-
219
- // Track code blocks
220
- if (trimmed.startsWith('```')) {
221
- inCodeBlock = !inCodeBlock;
222
- continue;
223
- }
224
- if (inCodeBlock) {
225
- continue;
226
- }
227
-
228
- // Skip URLs and paths
229
- if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
- continue;
231
- }
232
-
233
- // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
- let cleanLine = line
235
- .replace(/`[^`]+`/g, '') // inline code
236
- .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
- .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
- .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
- .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
- .replace(/https?:\/\/\S+/g, '') // URLs
241
- .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
- .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
- .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
-
245
- // Extract words (letters and apostrophes only)
246
- const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
- let match;
248
-
249
- while ((match = wordPattern.exec(cleanLine)) !== null) {
250
- const word = match[0];
251
-
252
- // Skip:
253
- // - Very short words (1-2 chars)
254
- // - All caps (acronyms like NASA)
255
- // - File extensions (.md, .tex, .png)
256
- // - CamelCase (likely code or citations like vanKleunen)
257
- // - Words starting with capital in middle of sentence (proper nouns/names)
258
- if (word.length < 3 ||
259
- /^[A-Z]+$/.test(word) ||
260
- /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
- /[a-z][A-Z]/.test(word)) {
262
- continue;
263
- }
264
-
265
- words.push({
266
- word,
267
- line: lineNum + 1,
268
- column: match.index + 1,
269
- });
270
- }
271
- }
272
-
273
- return words;
274
- }
275
-
276
- /**
277
- * Check if a word looks like a proper noun (name)
278
- */
279
- function looksLikeName(word: string): boolean {
280
- // Capitalized, not all caps, reasonable length for a name
281
- return /^[A-Z][a-z]{2,}$/.test(word);
282
- }
283
-
284
- /**
285
- * Check spelling in text
286
- */
287
- export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
- const { projectDir = '.', lang = 'en' } = options;
289
- const spell = await getSpellchecker(projectDir, lang);
290
- const words = extractWords(text);
291
- const misspelled: SpellingIssue[] = [];
292
- const possibleNames: SpellingIssue[] = [];
293
- const seen = new Set<string>();
294
- const seenNames = new Set<string>();
295
-
296
- for (const { word, line, column } of words) {
297
- // Skip if already reported this word
298
- const key = word.toLowerCase();
299
- if (seen.has(key) || seenNames.has(key)) {
300
- continue;
301
- }
302
-
303
- if (!spell.correct(word)) {
304
- // Check if it looks like a proper noun/name
305
- if (looksLikeName(word)) {
306
- seenNames.add(key);
307
- possibleNames.push({ word, line, column });
308
- } else {
309
- seen.add(key);
310
- misspelled.push({
311
- word,
312
- line,
313
- column,
314
- suggestions: spell.suggest(word).slice(0, 5),
315
- });
316
- }
317
- }
318
- }
319
-
320
- return { misspelled, possibleNames };
321
- }
322
-
323
- /**
324
- * Check spelling in a file
325
- */
326
- export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
- const text = fs.readFileSync(filePath, 'utf-8');
328
- const result = await checkSpelling(text, options);
329
-
330
- return {
331
- misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
- possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
- };
334
- }
1
+ /**
2
+ * Spelling checker module with global and project dictionaries
3
+ *
4
+ * Uses nspell (Hunspell-compatible) for English spellchecking.
5
+ * Custom words stored in:
6
+ * - ~/.rev-dictionary (global)
7
+ * - .rev-dictionary (project-local)
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ // @ts-ignore - nspell has no types
13
+ import nspell from 'nspell';
14
+ // @ts-ignore - dictionary-en has no types
15
+ import dictionaryEn from 'dictionary-en';
16
+ // @ts-ignore - dictionary-en-gb has no types
17
+ import dictionaryEnGb from 'dictionary-en-gb';
18
+ import { scientificWords } from './scientific-words.js';
19
+ import type { SpellingIssue, SpellingResult } from './types.js';
20
+
21
+ const DICT_NAME = '.rev-dictionary';
22
+
23
+ // Cache for the spellchecker instances (one per language)
24
+ const spellcheckerCache: Record<string, any> = {
25
+ en: null,
26
+ 'en-gb': null,
27
+ };
28
+
29
+ interface WordLocation {
30
+ word: string;
31
+ line: number;
32
+ column: number;
33
+ }
34
+
35
+ interface CheckSpellingOptions {
36
+ projectDir?: string;
37
+ lang?: 'en' | 'en-gb';
38
+ }
39
+
40
+ interface CheckFileOptions {
41
+ projectDir?: string;
42
+ lang?: 'en' | 'en-gb';
43
+ }
44
+
45
+ /**
46
+ * Get the global dictionary path
47
+ */
48
+ export function getGlobalDictPath(): string {
49
+ const home = process.env.HOME || process.env.USERPROFILE;
50
+ return path.join(home!, DICT_NAME);
51
+ }
52
+
53
+ /**
54
+ * Get the project dictionary path
55
+ */
56
+ export function getProjectDictPath(directory: string = '.'): string {
57
+ return path.join(directory, DICT_NAME);
58
+ }
59
+
60
+ /**
61
+ * Load custom words from a dictionary file
62
+ */
63
+ export function loadDictionaryFile(dictPath: string): Set<string> {
64
+ const words = new Set<string>();
65
+
66
+ if (fs.existsSync(dictPath)) {
67
+ const content = fs.readFileSync(dictPath, 'utf-8');
68
+ for (const line of content.split('\n')) {
69
+ const word = line.trim();
70
+ if (word && !word.startsWith('#')) {
71
+ words.add(word.toLowerCase());
72
+ }
73
+ }
74
+ }
75
+
76
+ return words;
77
+ }
78
+
79
+ /**
80
+ * Save words to a dictionary file
81
+ */
82
+ export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
+ const header = `# Custom dictionary for docrev
84
+ # One word per line, lines starting with # are comments
85
+ `;
86
+ const content = header + [...words].sort().join('\n') + '\n';
87
+
88
+ // Ensure directory exists
89
+ const dir = path.dirname(dictPath);
90
+ if (!fs.existsSync(dir)) {
91
+ fs.mkdirSync(dir, { recursive: true });
92
+ }
93
+
94
+ fs.writeFileSync(dictPath, content, 'utf-8');
95
+ }
96
+
97
+ /**
98
+ * Load all custom words (global + project)
99
+ */
100
+ export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
+ const globalWords = loadDictionaryFile(getGlobalDictPath());
102
+ const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
+
104
+ return new Set([...globalWords, ...projectWords]);
105
+ }
106
+
107
+ /**
108
+ * Add word to dictionary
109
+ */
110
+ export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
+ const words = loadDictionaryFile(dictPath);
113
+ const normalizedWord = word.trim().toLowerCase();
114
+
115
+ if (words.has(normalizedWord)) {
116
+ return false;
117
+ }
118
+
119
+ words.add(normalizedWord);
120
+ saveDictionaryFile(words, dictPath);
121
+
122
+ // Clear cache so new word is picked up
123
+ clearCache();
124
+
125
+ return true;
126
+ }
127
+
128
+ /**
129
+ * Remove word from dictionary
130
+ */
131
+ export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
+ const words = loadDictionaryFile(dictPath);
134
+ const normalizedWord = word.trim().toLowerCase();
135
+
136
+ if (!words.has(normalizedWord)) {
137
+ return false;
138
+ }
139
+
140
+ words.delete(normalizedWord);
141
+ saveDictionaryFile(words, dictPath);
142
+
143
+ // Clear cache
144
+ clearCache();
145
+
146
+ return true;
147
+ }
148
+
149
+ /**
150
+ * List words in dictionary
151
+ */
152
+ export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
+ const words = loadDictionaryFile(dictPath);
155
+ return [...words].sort();
156
+ }
157
+
158
+ /**
159
+ * Initialize the spellchecker with custom words
160
+ */
161
+ export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
+ if (spellcheckerCache[lang]) {
163
+ return spellcheckerCache[lang];
164
+ }
165
+
166
+ // Select dictionary based on language
167
+ const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
+ const spell = nspell(dictionary);
169
+
170
+ // Add scientific/academic words
171
+ for (const word of scientificWords) {
172
+ spell.add(word);
173
+ }
174
+
175
+ // Add custom words
176
+ const customWords = loadAllCustomWords(projectDir);
177
+ for (const word of customWords) {
178
+ spell.add(word);
179
+ }
180
+
181
+ spellcheckerCache[lang] = spell;
182
+ return spell;
183
+ }
184
+
185
+ /**
186
+ * Clear spellchecker cache (call after modifying dictionaries)
187
+ */
188
+ export function clearCache(): void {
189
+ spellcheckerCache.en = null;
190
+ spellcheckerCache['en-gb'] = null;
191
+ }
192
+
193
+ /**
194
+ * Extract words from text, filtering out non-words
195
+ */
196
+ export function extractWords(text: string): WordLocation[] {
197
+ const words: WordLocation[] = [];
198
+ const lines = text.split('\n');
199
+ let inCodeBlock = false;
200
+ let inFrontmatter = false;
201
+
202
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
+ const line = lines[lineNum];
204
+ if (!line) continue;
205
+ const trimmed = line.trim();
206
+
207
+ // Track YAML frontmatter (only at start of file)
208
+ if (lineNum === 0 && trimmed === '---') {
209
+ inFrontmatter = true;
210
+ continue;
211
+ }
212
+ if (inFrontmatter) {
213
+ if (trimmed === '---') {
214
+ inFrontmatter = false;
215
+ }
216
+ continue;
217
+ }
218
+
219
+ // Track code blocks
220
+ if (trimmed.startsWith('```')) {
221
+ inCodeBlock = !inCodeBlock;
222
+ continue;
223
+ }
224
+ if (inCodeBlock) {
225
+ continue;
226
+ }
227
+
228
+ // Skip URLs and paths
229
+ if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
+ continue;
231
+ }
232
+
233
+ // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
+ let cleanLine = line
235
+ .replace(/`[^`]+`/g, '') // inline code
236
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
+ .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
+ .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
+ .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
+ .replace(/https?:\/\/\S+/g, '') // URLs
241
+ .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
+ .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
+ .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
+
245
+ // Extract words (letters and apostrophes only)
246
+ const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
+ let match;
248
+
249
+ while ((match = wordPattern.exec(cleanLine)) !== null) {
250
+ const word = match[0];
251
+
252
+ // Skip:
253
+ // - Very short words (1-2 chars)
254
+ // - All caps (acronyms like NASA)
255
+ // - File extensions (.md, .tex, .png)
256
+ // - CamelCase (likely code or citations like vanKleunen)
257
+ // - Words starting with capital in middle of sentence (proper nouns/names)
258
+ if (word.length < 3 ||
259
+ /^[A-Z]+$/.test(word) ||
260
+ /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
+ /[a-z][A-Z]/.test(word)) {
262
+ continue;
263
+ }
264
+
265
+ words.push({
266
+ word,
267
+ line: lineNum + 1,
268
+ column: match.index + 1,
269
+ });
270
+ }
271
+ }
272
+
273
+ return words;
274
+ }
275
+
276
+ /**
277
+ * Check if a word looks like a proper noun (name)
278
+ */
279
+ function looksLikeName(word: string): boolean {
280
+ // Capitalized, not all caps, reasonable length for a name
281
+ return /^[A-Z][a-z]{2,}$/.test(word);
282
+ }
283
+
284
+ /**
285
+ * Check spelling in text
286
+ */
287
+ export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
+ const { projectDir = '.', lang = 'en' } = options;
289
+ const spell = await getSpellchecker(projectDir, lang);
290
+ const words = extractWords(text);
291
+ const misspelled: SpellingIssue[] = [];
292
+ const possibleNames: SpellingIssue[] = [];
293
+ const seen = new Set<string>();
294
+ const seenNames = new Set<string>();
295
+
296
+ for (const { word, line, column } of words) {
297
+ // Skip if already reported this word
298
+ const key = word.toLowerCase();
299
+ if (seen.has(key) || seenNames.has(key)) {
300
+ continue;
301
+ }
302
+
303
+ if (!spell.correct(word)) {
304
+ // Check if it looks like a proper noun/name
305
+ if (looksLikeName(word)) {
306
+ seenNames.add(key);
307
+ possibleNames.push({ word, line, column });
308
+ } else {
309
+ seen.add(key);
310
+ misspelled.push({
311
+ word,
312
+ line,
313
+ column,
314
+ suggestions: spell.suggest(word).slice(0, 5),
315
+ });
316
+ }
317
+ }
318
+ }
319
+
320
+ return { misspelled, possibleNames };
321
+ }
322
+
323
+ /**
324
+ * Check spelling in a file
325
+ */
326
+ export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
+ const text = fs.readFileSync(filePath, 'utf-8');
328
+ const result = await checkSpelling(text, options);
329
+
330
+ return {
331
+ misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
+ possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
+ };
334
+ }