docrev 0.3.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,350 @@
1
+ /**
2
+ * Spelling checker module with global and project dictionaries
3
+ *
4
+ * Uses nspell (Hunspell-compatible) for English spellchecking.
5
+ * Custom words stored in:
6
+ * - ~/.rev-dictionary (global)
7
+ * - .rev-dictionary (project-local)
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import nspell from 'nspell';
13
+ import dictionaryEn from 'dictionary-en';
14
+ import dictionaryEnGb from 'dictionary-en-gb';
15
+ import { scientificWords } from './scientific-words.js';
16
+
17
+ const DICT_NAME = '.rev-dictionary';
18
+
19
+ // Cache for the spellchecker instances (one per language)
20
+ const spellcheckerCache = {
21
+ en: null,
22
+ 'en-gb': null,
23
+ };
24
+
25
+ /**
26
+ * Get the global dictionary path
27
+ * @returns {string}
28
+ */
29
+ export function getGlobalDictPath() {
30
+ const home = process.env.HOME || process.env.USERPROFILE;
31
+ return path.join(home, DICT_NAME);
32
+ }
33
+
34
+ /**
35
+ * Get the project dictionary path
36
+ * @param {string} directory
37
+ * @returns {string}
38
+ */
39
+ export function getProjectDictPath(directory = '.') {
40
+ return path.join(directory, DICT_NAME);
41
+ }
42
+
43
+ /**
44
+ * Load custom words from a dictionary file
45
+ * @param {string} dictPath
46
+ * @returns {Set<string>}
47
+ */
48
+ export function loadDictionaryFile(dictPath) {
49
+ const words = new Set();
50
+
51
+ if (fs.existsSync(dictPath)) {
52
+ const content = fs.readFileSync(dictPath, 'utf-8');
53
+ for (const line of content.split('\n')) {
54
+ const word = line.trim();
55
+ if (word && !word.startsWith('#')) {
56
+ words.add(word.toLowerCase());
57
+ }
58
+ }
59
+ }
60
+
61
+ return words;
62
+ }
63
+
64
+ /**
65
+ * Save words to a dictionary file
66
+ * @param {Set<string>} words
67
+ * @param {string} dictPath
68
+ */
69
+ export function saveDictionaryFile(words, dictPath) {
70
+ const header = `# Custom dictionary for docrev
71
+ # One word per line, lines starting with # are comments
72
+ `;
73
+ const content = header + [...words].sort().join('\n') + '\n';
74
+
75
+ // Ensure directory exists
76
+ const dir = path.dirname(dictPath);
77
+ if (!fs.existsSync(dir)) {
78
+ fs.mkdirSync(dir, { recursive: true });
79
+ }
80
+
81
+ fs.writeFileSync(dictPath, content, 'utf-8');
82
+ }
83
+
84
+ /**
85
+ * Load all custom words (global + project)
86
+ * @param {string} projectDir
87
+ * @returns {Set<string>}
88
+ */
89
+ export function loadAllCustomWords(projectDir = '.') {
90
+ const globalWords = loadDictionaryFile(getGlobalDictPath());
91
+ const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
92
+
93
+ return new Set([...globalWords, ...projectWords]);
94
+ }
95
+
96
+ /**
97
+ * Add word to dictionary
98
+ * @param {string} word
99
+ * @param {boolean} global - Add to global dictionary
100
+ * @param {string} projectDir
101
+ * @returns {boolean} True if word was added
102
+ */
103
+ export function addWord(word, global = true, projectDir = '.') {
104
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
105
+ const words = loadDictionaryFile(dictPath);
106
+ const normalizedWord = word.trim().toLowerCase();
107
+
108
+ if (words.has(normalizedWord)) {
109
+ return false;
110
+ }
111
+
112
+ words.add(normalizedWord);
113
+ saveDictionaryFile(words, dictPath);
114
+
115
+ // Clear cache so new word is picked up
116
+ clearCache();
117
+
118
+ return true;
119
+ }
120
+
121
+ /**
122
+ * Remove word from dictionary
123
+ * @param {string} word
124
+ * @param {boolean} global
125
+ * @param {string} projectDir
126
+ * @returns {boolean} True if word was removed
127
+ */
128
+ export function removeWord(word, global = true, projectDir = '.') {
129
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
130
+ const words = loadDictionaryFile(dictPath);
131
+ const normalizedWord = word.trim().toLowerCase();
132
+
133
+ if (!words.has(normalizedWord)) {
134
+ return false;
135
+ }
136
+
137
+ words.delete(normalizedWord);
138
+ saveDictionaryFile(words, dictPath);
139
+
140
+ // Clear cache
141
+ clearCache();
142
+
143
+ return true;
144
+ }
145
+
146
+ /**
147
+ * List words in dictionary
148
+ * @param {boolean} global
149
+ * @param {string} projectDir
150
+ * @returns {string[]}
151
+ */
152
+ export function listWords(global = true, projectDir = '.') {
153
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
+ const words = loadDictionaryFile(dictPath);
155
+ return [...words].sort();
156
+ }
157
+
158
+ /**
159
+ * Initialize the spellchecker with custom words
160
+ * @param {string} projectDir
161
+ * @param {string} lang - Language: 'en' (US) or 'en-gb' (British)
162
+ * @returns {Promise<object>}
163
+ */
164
+ export async function getSpellchecker(projectDir = '.', lang = 'en') {
165
+ if (spellcheckerCache[lang]) {
166
+ return spellcheckerCache[lang];
167
+ }
168
+
169
+ // Select dictionary based on language
170
+ const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
171
+ const spell = nspell(dictionary);
172
+
173
+ // Add scientific/academic words
174
+ for (const word of scientificWords) {
175
+ spell.add(word);
176
+ }
177
+
178
+ // Add custom words
179
+ const customWords = loadAllCustomWords(projectDir);
180
+ for (const word of customWords) {
181
+ spell.add(word);
182
+ }
183
+
184
+ spellcheckerCache[lang] = spell;
185
+ return spell;
186
+ }
187
+
188
+ /**
189
+ * Clear spellchecker cache (call after modifying dictionaries)
190
+ */
191
+ export function clearCache() {
192
+ spellcheckerCache.en = null;
193
+ spellcheckerCache['en-gb'] = null;
194
+ }
195
+
196
+ /**
197
+ * Extract words from text, filtering out non-words
198
+ * @param {string} text
199
+ * @returns {Array<{word: string, line: number, column: number}>}
200
+ */
201
+ export function extractWords(text) {
202
+ const words = [];
203
+ const lines = text.split('\n');
204
+ let inCodeBlock = false;
205
+ let inFrontmatter = false;
206
+
207
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
208
+ const line = lines[lineNum];
209
+ const trimmed = line.trim();
210
+
211
+ // Track YAML frontmatter (only at start of file)
212
+ if (lineNum === 0 && trimmed === '---') {
213
+ inFrontmatter = true;
214
+ continue;
215
+ }
216
+ if (inFrontmatter) {
217
+ if (trimmed === '---') {
218
+ inFrontmatter = false;
219
+ }
220
+ continue;
221
+ }
222
+
223
+ // Track code blocks
224
+ if (trimmed.startsWith('```')) {
225
+ inCodeBlock = !inCodeBlock;
226
+ continue;
227
+ }
228
+ if (inCodeBlock) {
229
+ continue;
230
+ }
231
+
232
+ // Skip URLs and paths
233
+ if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
234
+ continue;
235
+ }
236
+
237
+ // Remove markdown syntax, URLs, code spans, LaTeX, etc.
238
+ let cleanLine = line
239
+ .replace(/`[^`]+`/g, '') // inline code
240
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
241
+ .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
242
+ .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
243
+ .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
244
+ .replace(/https?:\/\/\S+/g, '') // URLs
245
+ .replace(/\$[^$]+\$/g, '') // inline LaTeX math
246
+ .replace(/\\\w+/g, '') // LaTeX commands like \frac
247
+ .replace(/[#*_~`>|]/g, ' '); // markdown chars
248
+
249
+ // Extract words (letters and apostrophes only)
250
+ const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
251
+ let match;
252
+
253
+ while ((match = wordPattern.exec(cleanLine)) !== null) {
254
+ const word = match[0];
255
+
256
+ // Skip:
257
+ // - Very short words (1-2 chars)
258
+ // - All caps (acronyms like NASA)
259
+ // - File extensions (.md, .tex, .png)
260
+ // - CamelCase (likely code or citations like vanKleunen)
261
+ // - Words starting with capital in middle of sentence (proper nouns/names)
262
+ if (word.length < 3 ||
263
+ /^[A-Z]+$/.test(word) ||
264
+ /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
265
+ /[a-z][A-Z]/.test(word)) {
266
+ continue;
267
+ }
268
+
269
+ words.push({
270
+ word,
271
+ line: lineNum + 1,
272
+ column: match.index + 1,
273
+ });
274
+ }
275
+ }
276
+
277
+ return words;
278
+ }
279
+
280
+ /**
281
+ * Check if a word looks like a proper noun (name)
282
+ * @param {string} word
283
+ * @returns {boolean}
284
+ */
285
+ function looksLikeName(word) {
286
+ // Capitalized, not all caps, reasonable length for a name
287
+ return /^[A-Z][a-z]{2,}$/.test(word);
288
+ }
289
+
290
+ /**
291
+ * Check spelling in text
292
+ * @param {string} text
293
+ * @param {object} options
294
+ * @param {string} options.projectDir
295
+ * @param {string} options.lang - 'en' (US) or 'en-gb' (British)
296
+ * @returns {Promise<{misspelled: Array, possibleNames: Array}>}
297
+ */
298
+ export async function checkSpelling(text, options = {}) {
299
+ const { projectDir = '.', lang = 'en' } = options;
300
+ const spell = await getSpellchecker(projectDir, lang);
301
+ const words = extractWords(text);
302
+ const misspelled = [];
303
+ const possibleNames = [];
304
+ const seen = new Set();
305
+ const seenNames = new Set();
306
+
307
+ for (const { word, line, column } of words) {
308
+ // Skip if already reported this word
309
+ const key = word.toLowerCase();
310
+ if (seen.has(key) || seenNames.has(key)) {
311
+ continue;
312
+ }
313
+
314
+ if (!spell.correct(word)) {
315
+ // Check if it looks like a proper noun/name
316
+ if (looksLikeName(word)) {
317
+ seenNames.add(key);
318
+ possibleNames.push({ word, line, column });
319
+ } else {
320
+ seen.add(key);
321
+ misspelled.push({
322
+ word,
323
+ line,
324
+ column,
325
+ suggestions: spell.suggest(word).slice(0, 5),
326
+ });
327
+ }
328
+ }
329
+ }
330
+
331
+ return { misspelled, possibleNames };
332
+ }
333
+
334
+ /**
335
+ * Check spelling in a file
336
+ * @param {string} filePath
337
+ * @param {object} options
338
+ * @param {string} options.projectDir
339
+ * @param {string} options.lang
340
+ * @returns {Promise<{misspelled: Array, possibleNames: Array}>}
341
+ */
342
+ export async function checkFile(filePath, options = {}) {
343
+ const text = fs.readFileSync(filePath, 'utf-8');
344
+ const result = await checkSpelling(text, options);
345
+
346
+ return {
347
+ misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
348
+ possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
349
+ };
350
+ }