docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (126) hide show
  1. package/.gitattributes +1 -1
  2. package/CHANGELOG.md +173 -164
  3. package/PLAN-tables-and-postprocess.md +850 -850
  4. package/README.md +431 -431
  5. package/bin/rev.js +11 -11
  6. package/bin/rev.ts +145 -145
  7. package/completions/rev.bash +127 -127
  8. package/completions/rev.ps1 +210 -210
  9. package/completions/rev.zsh +207 -207
  10. package/dist/lib/anchor-match.d.ts +1 -1
  11. package/dist/lib/anchor-match.d.ts.map +1 -1
  12. package/dist/lib/anchor-match.js +17 -47
  13. package/dist/lib/anchor-match.js.map +1 -1
  14. package/dist/lib/build.js +4 -4
  15. package/dist/lib/commands/context.d.ts +1 -1
  16. package/dist/lib/commands/context.d.ts.map +1 -1
  17. package/dist/lib/commands/context.js +1 -1
  18. package/dist/lib/commands/context.js.map +1 -1
  19. package/dist/lib/commands/sections.js +7 -7
  20. package/dist/lib/commands/sections.js.map +1 -1
  21. package/dist/lib/commands/sync.d.ts.map +1 -1
  22. package/dist/lib/commands/sync.js +15 -14
  23. package/dist/lib/commands/sync.js.map +1 -1
  24. package/dist/lib/commands/utilities.js +164 -164
  25. package/dist/lib/commands/verify-anchors.js +6 -6
  26. package/dist/lib/commands/verify-anchors.js.map +1 -1
  27. package/dist/lib/commands/word-tools.js +8 -8
  28. package/dist/lib/grammar.js +3 -3
  29. package/dist/lib/macro-filter.lua +201 -201
  30. package/dist/lib/pdf-comments.js +44 -44
  31. package/dist/lib/plugins.js +57 -57
  32. package/dist/lib/pptx-color-filter.lua +37 -37
  33. package/dist/lib/pptx-themes.js +115 -115
  34. package/dist/lib/sections.d.ts +35 -0
  35. package/dist/lib/sections.d.ts.map +1 -1
  36. package/dist/lib/sections.js +81 -0
  37. package/dist/lib/sections.js.map +1 -1
  38. package/dist/lib/spelling.js +2 -2
  39. package/dist/lib/templates.js +387 -387
  40. package/dist/lib/themes.js +51 -51
  41. package/docs-src/build.py +113 -113
  42. package/docs-src/extra.css +208 -208
  43. package/docs-src/md-to-html.lua +6 -6
  44. package/docs-src/template.html +116 -116
  45. package/eslint.config.js +27 -27
  46. package/lib/anchor-match.ts +276 -308
  47. package/lib/annotations.ts +644 -644
  48. package/lib/build.ts +1766 -1766
  49. package/lib/citations.ts +160 -160
  50. package/lib/commands/build.ts +855 -855
  51. package/lib/commands/citations.ts +515 -515
  52. package/lib/commands/comments.ts +1050 -1050
  53. package/lib/commands/context.ts +176 -174
  54. package/lib/commands/core.ts +309 -309
  55. package/lib/commands/doi.ts +435 -435
  56. package/lib/commands/file-ops.ts +372 -372
  57. package/lib/commands/history.ts +320 -320
  58. package/lib/commands/index.ts +87 -87
  59. package/lib/commands/init.ts +259 -259
  60. package/lib/commands/merge-resolve.ts +378 -378
  61. package/lib/commands/preview.ts +178 -178
  62. package/lib/commands/project-info.ts +244 -244
  63. package/lib/commands/quality.ts +517 -517
  64. package/lib/commands/response.ts +454 -454
  65. package/lib/commands/section-boundaries.ts +82 -82
  66. package/lib/commands/sections.ts +451 -451
  67. package/lib/commands/sync.ts +709 -706
  68. package/lib/commands/text-ops.ts +449 -449
  69. package/lib/commands/utilities.ts +448 -448
  70. package/lib/commands/verify-anchors.ts +272 -272
  71. package/lib/commands/word-tools.ts +340 -340
  72. package/lib/comment-realign.ts +517 -517
  73. package/lib/config.ts +84 -84
  74. package/lib/crossref.ts +781 -781
  75. package/lib/csl.ts +191 -191
  76. package/lib/dependencies.ts +98 -98
  77. package/lib/diff-engine.ts +465 -465
  78. package/lib/doi-cache.ts +115 -115
  79. package/lib/doi.ts +897 -897
  80. package/lib/equations.ts +506 -506
  81. package/lib/errors.ts +346 -346
  82. package/lib/format.ts +541 -541
  83. package/lib/git.ts +326 -326
  84. package/lib/grammar.ts +303 -303
  85. package/lib/image-registry.ts +180 -180
  86. package/lib/import.ts +911 -911
  87. package/lib/journals.ts +543 -543
  88. package/lib/macro-filter.lua +201 -201
  89. package/lib/macros.ts +273 -273
  90. package/lib/merge.ts +633 -633
  91. package/lib/orcid.ts +144 -144
  92. package/lib/pdf-comments.ts +263 -263
  93. package/lib/pdf-import.ts +524 -524
  94. package/lib/plugins.ts +362 -362
  95. package/lib/postprocess.ts +188 -188
  96. package/lib/pptx-color-filter.lua +37 -37
  97. package/lib/pptx-template.ts +469 -469
  98. package/lib/pptx-themes.ts +483 -483
  99. package/lib/protect-restore.ts +520 -520
  100. package/lib/rate-limiter.ts +94 -94
  101. package/lib/response.ts +197 -197
  102. package/lib/restore-references.ts +240 -240
  103. package/lib/review.ts +327 -327
  104. package/lib/schema.ts +488 -488
  105. package/lib/scientific-words.ts +73 -73
  106. package/lib/sections.ts +425 -335
  107. package/lib/slides.ts +756 -756
  108. package/lib/spelling.ts +334 -334
  109. package/lib/templates.ts +526 -526
  110. package/lib/themes.ts +742 -742
  111. package/lib/trackchanges.ts +247 -247
  112. package/lib/tui.ts +450 -450
  113. package/lib/types.ts +550 -550
  114. package/lib/undo.ts +250 -250
  115. package/lib/utils.ts +69 -69
  116. package/lib/variables.ts +179 -179
  117. package/lib/word-extraction.ts +806 -806
  118. package/lib/word.ts +643 -643
  119. package/lib/wordcomments.ts +840 -840
  120. package/mkdocs.yml +64 -64
  121. package/package.json +137 -137
  122. package/scripts/postbuild.js +47 -47
  123. package/skill/REFERENCE.md +539 -539
  124. package/skill/SKILL.md +295 -295
  125. package/tsconfig.json +26 -26
  126. package/types/index.d.ts +525 -525
package/lib/spelling.ts CHANGED
@@ -1,334 +1,334 @@
1
- /**
2
- * Spelling checker module with global and project dictionaries
3
- *
4
- * Uses nspell (Hunspell-compatible) for English spellchecking.
5
- * Custom words stored in:
6
- * - ~/.rev-dictionary (global)
7
- * - .rev-dictionary (project-local)
8
- */
9
-
10
- import * as fs from 'fs';
11
- import * as path from 'path';
12
- // @ts-ignore - nspell has no types
13
- import nspell from 'nspell';
14
- // @ts-ignore - dictionary-en has no types
15
- import dictionaryEn from 'dictionary-en';
16
- // @ts-ignore - dictionary-en-gb has no types
17
- import dictionaryEnGb from 'dictionary-en-gb';
18
- import { scientificWords } from './scientific-words.js';
19
- import type { SpellingIssue, SpellingResult } from './types.js';
20
-
21
- const DICT_NAME = '.rev-dictionary';
22
-
23
- // Cache for the spellchecker instances (one per language)
24
- const spellcheckerCache: Record<string, any> = {
25
- en: null,
26
- 'en-gb': null,
27
- };
28
-
29
- interface WordLocation {
30
- word: string;
31
- line: number;
32
- column: number;
33
- }
34
-
35
- interface CheckSpellingOptions {
36
- projectDir?: string;
37
- lang?: 'en' | 'en-gb';
38
- }
39
-
40
- interface CheckFileOptions {
41
- projectDir?: string;
42
- lang?: 'en' | 'en-gb';
43
- }
44
-
45
- /**
46
- * Get the global dictionary path
47
- */
48
- export function getGlobalDictPath(): string {
49
- const home = process.env.HOME || process.env.USERPROFILE;
50
- return path.join(home!, DICT_NAME);
51
- }
52
-
53
- /**
54
- * Get the project dictionary path
55
- */
56
- export function getProjectDictPath(directory: string = '.'): string {
57
- return path.join(directory, DICT_NAME);
58
- }
59
-
60
- /**
61
- * Load custom words from a dictionary file
62
- */
63
- export function loadDictionaryFile(dictPath: string): Set<string> {
64
- const words = new Set<string>();
65
-
66
- if (fs.existsSync(dictPath)) {
67
- const content = fs.readFileSync(dictPath, 'utf-8');
68
- for (const line of content.split('\n')) {
69
- const word = line.trim();
70
- if (word && !word.startsWith('#')) {
71
- words.add(word.toLowerCase());
72
- }
73
- }
74
- }
75
-
76
- return words;
77
- }
78
-
79
- /**
80
- * Save words to a dictionary file
81
- */
82
- export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
- const header = `# Custom dictionary for docrev
84
- # One word per line, lines starting with # are comments
85
- `;
86
- const content = header + [...words].sort().join('\n') + '\n';
87
-
88
- // Ensure directory exists
89
- const dir = path.dirname(dictPath);
90
- if (!fs.existsSync(dir)) {
91
- fs.mkdirSync(dir, { recursive: true });
92
- }
93
-
94
- fs.writeFileSync(dictPath, content, 'utf-8');
95
- }
96
-
97
- /**
98
- * Load all custom words (global + project)
99
- */
100
- export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
- const globalWords = loadDictionaryFile(getGlobalDictPath());
102
- const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
-
104
- return new Set([...globalWords, ...projectWords]);
105
- }
106
-
107
- /**
108
- * Add word to dictionary
109
- */
110
- export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
- const words = loadDictionaryFile(dictPath);
113
- const normalizedWord = word.trim().toLowerCase();
114
-
115
- if (words.has(normalizedWord)) {
116
- return false;
117
- }
118
-
119
- words.add(normalizedWord);
120
- saveDictionaryFile(words, dictPath);
121
-
122
- // Clear cache so new word is picked up
123
- clearCache();
124
-
125
- return true;
126
- }
127
-
128
- /**
129
- * Remove word from dictionary
130
- */
131
- export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
- const words = loadDictionaryFile(dictPath);
134
- const normalizedWord = word.trim().toLowerCase();
135
-
136
- if (!words.has(normalizedWord)) {
137
- return false;
138
- }
139
-
140
- words.delete(normalizedWord);
141
- saveDictionaryFile(words, dictPath);
142
-
143
- // Clear cache
144
- clearCache();
145
-
146
- return true;
147
- }
148
-
149
- /**
150
- * List words in dictionary
151
- */
152
- export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
- const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
- const words = loadDictionaryFile(dictPath);
155
- return [...words].sort();
156
- }
157
-
158
- /**
159
- * Initialize the spellchecker with custom words
160
- */
161
- export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
- if (spellcheckerCache[lang]) {
163
- return spellcheckerCache[lang];
164
- }
165
-
166
- // Select dictionary based on language
167
- const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
- const spell = nspell(dictionary);
169
-
170
- // Add scientific/academic words
171
- for (const word of scientificWords) {
172
- spell.add(word);
173
- }
174
-
175
- // Add custom words
176
- const customWords = loadAllCustomWords(projectDir);
177
- for (const word of customWords) {
178
- spell.add(word);
179
- }
180
-
181
- spellcheckerCache[lang] = spell;
182
- return spell;
183
- }
184
-
185
- /**
186
- * Clear spellchecker cache (call after modifying dictionaries)
187
- */
188
- export function clearCache(): void {
189
- spellcheckerCache.en = null;
190
- spellcheckerCache['en-gb'] = null;
191
- }
192
-
193
- /**
194
- * Extract words from text, filtering out non-words
195
- */
196
- export function extractWords(text: string): WordLocation[] {
197
- const words: WordLocation[] = [];
198
- const lines = text.split('\n');
199
- let inCodeBlock = false;
200
- let inFrontmatter = false;
201
-
202
- for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
- const line = lines[lineNum];
204
- if (!line) continue;
205
- const trimmed = line.trim();
206
-
207
- // Track YAML frontmatter (only at start of file)
208
- if (lineNum === 0 && trimmed === '---') {
209
- inFrontmatter = true;
210
- continue;
211
- }
212
- if (inFrontmatter) {
213
- if (trimmed === '---') {
214
- inFrontmatter = false;
215
- }
216
- continue;
217
- }
218
-
219
- // Track code blocks
220
- if (trimmed.startsWith('```')) {
221
- inCodeBlock = !inCodeBlock;
222
- continue;
223
- }
224
- if (inCodeBlock) {
225
- continue;
226
- }
227
-
228
- // Skip URLs and paths
229
- if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
- continue;
231
- }
232
-
233
- // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
- let cleanLine = line
235
- .replace(/`[^`]+`/g, '') // inline code
236
- .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
- .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
- .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
- .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
- .replace(/https?:\/\/\S+/g, '') // URLs
241
- .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
- .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
- .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
-
245
- // Extract words (letters and apostrophes only)
246
- const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
- let match;
248
-
249
- while ((match = wordPattern.exec(cleanLine)) !== null) {
250
- const word = match[0];
251
-
252
- // Skip:
253
- // - Very short words (1-2 chars)
254
- // - All caps (acronyms like NASA)
255
- // - File extensions (.md, .tex, .png)
256
- // - CamelCase (likely code or citations like vanKleunen)
257
- // - Words starting with capital in middle of sentence (proper nouns/names)
258
- if (word.length < 3 ||
259
- /^[A-Z]+$/.test(word) ||
260
- /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
- /[a-z][A-Z]/.test(word)) {
262
- continue;
263
- }
264
-
265
- words.push({
266
- word,
267
- line: lineNum + 1,
268
- column: match.index + 1,
269
- });
270
- }
271
- }
272
-
273
- return words;
274
- }
275
-
276
- /**
277
- * Check if a word looks like a proper noun (name)
278
- */
279
- function looksLikeName(word: string): boolean {
280
- // Capitalized, not all caps, reasonable length for a name
281
- return /^[A-Z][a-z]{2,}$/.test(word);
282
- }
283
-
284
- /**
285
- * Check spelling in text
286
- */
287
- export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
- const { projectDir = '.', lang = 'en' } = options;
289
- const spell = await getSpellchecker(projectDir, lang);
290
- const words = extractWords(text);
291
- const misspelled: SpellingIssue[] = [];
292
- const possibleNames: SpellingIssue[] = [];
293
- const seen = new Set<string>();
294
- const seenNames = new Set<string>();
295
-
296
- for (const { word, line, column } of words) {
297
- // Skip if already reported this word
298
- const key = word.toLowerCase();
299
- if (seen.has(key) || seenNames.has(key)) {
300
- continue;
301
- }
302
-
303
- if (!spell.correct(word)) {
304
- // Check if it looks like a proper noun/name
305
- if (looksLikeName(word)) {
306
- seenNames.add(key);
307
- possibleNames.push({ word, line, column });
308
- } else {
309
- seen.add(key);
310
- misspelled.push({
311
- word,
312
- line,
313
- column,
314
- suggestions: spell.suggest(word).slice(0, 5),
315
- });
316
- }
317
- }
318
- }
319
-
320
- return { misspelled, possibleNames };
321
- }
322
-
323
- /**
324
- * Check spelling in a file
325
- */
326
- export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
- const text = fs.readFileSync(filePath, 'utf-8');
328
- const result = await checkSpelling(text, options);
329
-
330
- return {
331
- misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
- possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
- };
334
- }
1
+ /**
2
+ * Spelling checker module with global and project dictionaries
3
+ *
4
+ * Uses nspell (Hunspell-compatible) for English spellchecking.
5
+ * Custom words stored in:
6
+ * - ~/.rev-dictionary (global)
7
+ * - .rev-dictionary (project-local)
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ // @ts-ignore - nspell has no types
13
+ import nspell from 'nspell';
14
+ // @ts-ignore - dictionary-en has no types
15
+ import dictionaryEn from 'dictionary-en';
16
+ // @ts-ignore - dictionary-en-gb has no types
17
+ import dictionaryEnGb from 'dictionary-en-gb';
18
+ import { scientificWords } from './scientific-words.js';
19
+ import type { SpellingIssue, SpellingResult } from './types.js';
20
+
21
+ const DICT_NAME = '.rev-dictionary';
22
+
23
+ // Cache for the spellchecker instances (one per language)
24
+ const spellcheckerCache: Record<string, any> = {
25
+ en: null,
26
+ 'en-gb': null,
27
+ };
28
+
29
+ interface WordLocation {
30
+ word: string;
31
+ line: number;
32
+ column: number;
33
+ }
34
+
35
+ interface CheckSpellingOptions {
36
+ projectDir?: string;
37
+ lang?: 'en' | 'en-gb';
38
+ }
39
+
40
+ interface CheckFileOptions {
41
+ projectDir?: string;
42
+ lang?: 'en' | 'en-gb';
43
+ }
44
+
45
+ /**
46
+ * Get the global dictionary path
47
+ */
48
+ export function getGlobalDictPath(): string {
49
+ const home = process.env.HOME || process.env.USERPROFILE;
50
+ return path.join(home!, DICT_NAME);
51
+ }
52
+
53
+ /**
54
+ * Get the project dictionary path
55
+ */
56
+ export function getProjectDictPath(directory: string = '.'): string {
57
+ return path.join(directory, DICT_NAME);
58
+ }
59
+
60
+ /**
61
+ * Load custom words from a dictionary file
62
+ */
63
+ export function loadDictionaryFile(dictPath: string): Set<string> {
64
+ const words = new Set<string>();
65
+
66
+ if (fs.existsSync(dictPath)) {
67
+ const content = fs.readFileSync(dictPath, 'utf-8');
68
+ for (const line of content.split('\n')) {
69
+ const word = line.trim();
70
+ if (word && !word.startsWith('#')) {
71
+ words.add(word.toLowerCase());
72
+ }
73
+ }
74
+ }
75
+
76
+ return words;
77
+ }
78
+
79
+ /**
80
+ * Save words to a dictionary file
81
+ */
82
+ export function saveDictionaryFile(words: Set<string>, dictPath: string): void {
83
+ const header = `# Custom dictionary for docrev
84
+ # One word per line, lines starting with # are comments
85
+ `;
86
+ const content = header + [...words].sort().join('\n') + '\n';
87
+
88
+ // Ensure directory exists
89
+ const dir = path.dirname(dictPath);
90
+ if (!fs.existsSync(dir)) {
91
+ fs.mkdirSync(dir, { recursive: true });
92
+ }
93
+
94
+ fs.writeFileSync(dictPath, content, 'utf-8');
95
+ }
96
+
97
+ /**
98
+ * Load all custom words (global + project)
99
+ */
100
+ export function loadAllCustomWords(projectDir: string = '.'): Set<string> {
101
+ const globalWords = loadDictionaryFile(getGlobalDictPath());
102
+ const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
103
+
104
+ return new Set([...globalWords, ...projectWords]);
105
+ }
106
+
107
+ /**
108
+ * Add word to dictionary
109
+ */
110
+ export function addWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
111
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
112
+ const words = loadDictionaryFile(dictPath);
113
+ const normalizedWord = word.trim().toLowerCase();
114
+
115
+ if (words.has(normalizedWord)) {
116
+ return false;
117
+ }
118
+
119
+ words.add(normalizedWord);
120
+ saveDictionaryFile(words, dictPath);
121
+
122
+ // Clear cache so new word is picked up
123
+ clearCache();
124
+
125
+ return true;
126
+ }
127
+
128
+ /**
129
+ * Remove word from dictionary
130
+ */
131
+ export function removeWord(word: string, global: boolean = true, projectDir: string = '.'): boolean {
132
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
133
+ const words = loadDictionaryFile(dictPath);
134
+ const normalizedWord = word.trim().toLowerCase();
135
+
136
+ if (!words.has(normalizedWord)) {
137
+ return false;
138
+ }
139
+
140
+ words.delete(normalizedWord);
141
+ saveDictionaryFile(words, dictPath);
142
+
143
+ // Clear cache
144
+ clearCache();
145
+
146
+ return true;
147
+ }
148
+
149
+ /**
150
+ * List words in dictionary
151
+ */
152
+ export function listWords(global: boolean = true, projectDir: string = '.'): string[] {
153
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
+ const words = loadDictionaryFile(dictPath);
155
+ return [...words].sort();
156
+ }
157
+
158
+ /**
159
+ * Initialize the spellchecker with custom words
160
+ */
161
+ export async function getSpellchecker(projectDir: string = '.', lang: 'en' | 'en-gb' = 'en'): Promise<any> {
162
+ if (spellcheckerCache[lang]) {
163
+ return spellcheckerCache[lang];
164
+ }
165
+
166
+ // Select dictionary based on language
167
+ const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
168
+ const spell = nspell(dictionary);
169
+
170
+ // Add scientific/academic words
171
+ for (const word of scientificWords) {
172
+ spell.add(word);
173
+ }
174
+
175
+ // Add custom words
176
+ const customWords = loadAllCustomWords(projectDir);
177
+ for (const word of customWords) {
178
+ spell.add(word);
179
+ }
180
+
181
+ spellcheckerCache[lang] = spell;
182
+ return spell;
183
+ }
184
+
185
+ /**
186
+ * Clear spellchecker cache (call after modifying dictionaries)
187
+ */
188
+ export function clearCache(): void {
189
+ spellcheckerCache.en = null;
190
+ spellcheckerCache['en-gb'] = null;
191
+ }
192
+
193
+ /**
194
+ * Extract words from text, filtering out non-words
195
+ */
196
+ export function extractWords(text: string): WordLocation[] {
197
+ const words: WordLocation[] = [];
198
+ const lines = text.split('\n');
199
+ let inCodeBlock = false;
200
+ let inFrontmatter = false;
201
+
202
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
203
+ const line = lines[lineNum];
204
+ if (!line) continue;
205
+ const trimmed = line.trim();
206
+
207
+ // Track YAML frontmatter (only at start of file)
208
+ if (lineNum === 0 && trimmed === '---') {
209
+ inFrontmatter = true;
210
+ continue;
211
+ }
212
+ if (inFrontmatter) {
213
+ if (trimmed === '---') {
214
+ inFrontmatter = false;
215
+ }
216
+ continue;
217
+ }
218
+
219
+ // Track code blocks
220
+ if (trimmed.startsWith('```')) {
221
+ inCodeBlock = !inCodeBlock;
222
+ continue;
223
+ }
224
+ if (inCodeBlock) {
225
+ continue;
226
+ }
227
+
228
+ // Skip URLs and paths
229
+ if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
230
+ continue;
231
+ }
232
+
233
+ // Remove markdown syntax, URLs, code spans, LaTeX, etc.
234
+ let cleanLine = line
235
+ .replace(/`[^`]+`/g, '') // inline code
236
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
237
+ .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
238
+ .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
239
+ .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
240
+ .replace(/https?:\/\/\S+/g, '') // URLs
241
+ .replace(/\$[^$]+\$/g, '') // inline LaTeX math
242
+ .replace(/\\\w+/g, '') // LaTeX commands like \frac
243
+ .replace(/[#*_~`>|]/g, ' '); // markdown chars
244
+
245
+ // Extract words (letters and apostrophes only)
246
+ const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
247
+ let match;
248
+
249
+ while ((match = wordPattern.exec(cleanLine)) !== null) {
250
+ const word = match[0];
251
+
252
+ // Skip:
253
+ // - Very short words (1-2 chars)
254
+ // - All caps (acronyms like NASA)
255
+ // - File extensions (.md, .tex, .png)
256
+ // - CamelCase (likely code or citations like vanKleunen)
257
+ // - Words starting with capital in middle of sentence (proper nouns/names)
258
+ if (word.length < 3 ||
259
+ /^[A-Z]+$/.test(word) ||
260
+ /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
261
+ /[a-z][A-Z]/.test(word)) {
262
+ continue;
263
+ }
264
+
265
+ words.push({
266
+ word,
267
+ line: lineNum + 1,
268
+ column: match.index + 1,
269
+ });
270
+ }
271
+ }
272
+
273
+ return words;
274
+ }
275
+
276
+ /**
277
+ * Check if a word looks like a proper noun (name)
278
+ */
279
+ function looksLikeName(word: string): boolean {
280
+ // Capitalized, not all caps, reasonable length for a name
281
+ return /^[A-Z][a-z]{2,}$/.test(word);
282
+ }
283
+
284
+ /**
285
+ * Check spelling in text
286
+ */
287
+ export async function checkSpelling(text: string, options: CheckSpellingOptions = {}): Promise<SpellingResult> {
288
+ const { projectDir = '.', lang = 'en' } = options;
289
+ const spell = await getSpellchecker(projectDir, lang);
290
+ const words = extractWords(text);
291
+ const misspelled: SpellingIssue[] = [];
292
+ const possibleNames: SpellingIssue[] = [];
293
+ const seen = new Set<string>();
294
+ const seenNames = new Set<string>();
295
+
296
+ for (const { word, line, column } of words) {
297
+ // Skip if already reported this word
298
+ const key = word.toLowerCase();
299
+ if (seen.has(key) || seenNames.has(key)) {
300
+ continue;
301
+ }
302
+
303
+ if (!spell.correct(word)) {
304
+ // Check if it looks like a proper noun/name
305
+ if (looksLikeName(word)) {
306
+ seenNames.add(key);
307
+ possibleNames.push({ word, line, column });
308
+ } else {
309
+ seen.add(key);
310
+ misspelled.push({
311
+ word,
312
+ line,
313
+ column,
314
+ suggestions: spell.suggest(word).slice(0, 5),
315
+ });
316
+ }
317
+ }
318
+ }
319
+
320
+ return { misspelled, possibleNames };
321
+ }
322
+
323
+ /**
324
+ * Check spelling in a file
325
+ */
326
+ export async function checkFile(filePath: string, options: CheckFileOptions = {}): Promise<SpellingResult> {
327
+ const text = fs.readFileSync(filePath, 'utf-8');
328
+ const result = await checkSpelling(text, options);
329
+
330
+ return {
331
+ misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
332
+ possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
333
+ };
334
+ }