docrev 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,350 @@
1
+ /**
2
+ * Spelling checker module with global and project dictionaries
3
+ *
4
+ * Uses nspell (Hunspell-compatible) for English spellchecking.
5
+ * Custom words stored in:
6
+ * - ~/.rev-dictionary (global)
7
+ * - .rev-dictionary (project-local)
8
+ */
9
+
10
+ import * as fs from 'fs';
11
+ import * as path from 'path';
12
+ import nspell from 'nspell';
13
+ import dictionaryEn from 'dictionary-en';
14
+ import dictionaryEnGb from 'dictionary-en-gb';
15
+ import { scientificWords } from './scientific-words.js';
16
+
17
+ const DICT_NAME = '.rev-dictionary';
18
+
19
+ // Cache for the spellchecker instances (one per language)
20
+ const spellcheckerCache = {
21
+ en: null,
22
+ 'en-gb': null,
23
+ };
24
+
25
+ /**
26
+ * Get the global dictionary path
27
+ * @returns {string}
28
+ */
29
+ export function getGlobalDictPath() {
30
+ const home = process.env.HOME || process.env.USERPROFILE;
31
+ return path.join(home, DICT_NAME);
32
+ }
33
+
34
+ /**
35
+ * Get the project dictionary path
36
+ * @param {string} directory
37
+ * @returns {string}
38
+ */
39
+ export function getProjectDictPath(directory = '.') {
40
+ return path.join(directory, DICT_NAME);
41
+ }
42
+
43
+ /**
44
+ * Load custom words from a dictionary file
45
+ * @param {string} dictPath
46
+ * @returns {Set<string>}
47
+ */
48
+ export function loadDictionaryFile(dictPath) {
49
+ const words = new Set();
50
+
51
+ if (fs.existsSync(dictPath)) {
52
+ const content = fs.readFileSync(dictPath, 'utf-8');
53
+ for (const line of content.split('\n')) {
54
+ const word = line.trim();
55
+ if (word && !word.startsWith('#')) {
56
+ words.add(word.toLowerCase());
57
+ }
58
+ }
59
+ }
60
+
61
+ return words;
62
+ }
63
+
64
+ /**
65
+ * Save words to a dictionary file
66
+ * @param {Set<string>} words
67
+ * @param {string} dictPath
68
+ */
69
+ export function saveDictionaryFile(words, dictPath) {
70
+ const header = `# Custom dictionary for docrev
71
+ # One word per line, lines starting with # are comments
72
+ `;
73
+ const content = header + [...words].sort().join('\n') + '\n';
74
+
75
+ // Ensure directory exists
76
+ const dir = path.dirname(dictPath);
77
+ if (!fs.existsSync(dir)) {
78
+ fs.mkdirSync(dir, { recursive: true });
79
+ }
80
+
81
+ fs.writeFileSync(dictPath, content, 'utf-8');
82
+ }
83
+
84
+ /**
85
+ * Load all custom words (global + project)
86
+ * @param {string} projectDir
87
+ * @returns {Set<string>}
88
+ */
89
+ export function loadAllCustomWords(projectDir = '.') {
90
+ const globalWords = loadDictionaryFile(getGlobalDictPath());
91
+ const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
92
+
93
+ return new Set([...globalWords, ...projectWords]);
94
+ }
95
+
96
+ /**
97
+ * Add word to dictionary
98
+ * @param {string} word
99
+ * @param {boolean} global - Add to global dictionary
100
+ * @param {string} projectDir
101
+ * @returns {boolean} True if word was added
102
+ */
103
+ export function addWord(word, global = true, projectDir = '.') {
104
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
105
+ const words = loadDictionaryFile(dictPath);
106
+ const normalizedWord = word.trim().toLowerCase();
107
+
108
+ if (words.has(normalizedWord)) {
109
+ return false;
110
+ }
111
+
112
+ words.add(normalizedWord);
113
+ saveDictionaryFile(words, dictPath);
114
+
115
+ // Clear cache so new word is picked up
116
+ clearCache();
117
+
118
+ return true;
119
+ }
120
+
121
+ /**
122
+ * Remove word from dictionary
123
+ * @param {string} word
124
+ * @param {boolean} global
125
+ * @param {string} projectDir
126
+ * @returns {boolean} True if word was removed
127
+ */
128
+ export function removeWord(word, global = true, projectDir = '.') {
129
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
130
+ const words = loadDictionaryFile(dictPath);
131
+ const normalizedWord = word.trim().toLowerCase();
132
+
133
+ if (!words.has(normalizedWord)) {
134
+ return false;
135
+ }
136
+
137
+ words.delete(normalizedWord);
138
+ saveDictionaryFile(words, dictPath);
139
+
140
+ // Clear cache
141
+ clearCache();
142
+
143
+ return true;
144
+ }
145
+
146
+ /**
147
+ * List words in dictionary
148
+ * @param {boolean} global
149
+ * @param {string} projectDir
150
+ * @returns {string[]}
151
+ */
152
+ export function listWords(global = true, projectDir = '.') {
153
+ const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
154
+ const words = loadDictionaryFile(dictPath);
155
+ return [...words].sort();
156
+ }
157
+
158
+ /**
159
+ * Initialize the spellchecker with custom words
160
+ * @param {string} projectDir
161
+ * @param {string} lang - Language: 'en' (US) or 'en-gb' (British)
162
+ * @returns {Promise<object>}
163
+ */
164
+ export async function getSpellchecker(projectDir = '.', lang = 'en') {
165
+ if (spellcheckerCache[lang]) {
166
+ return spellcheckerCache[lang];
167
+ }
168
+
169
+ // Select dictionary based on language
170
+ const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
171
+ const spell = nspell(dictionary);
172
+
173
+ // Add scientific/academic words
174
+ for (const word of scientificWords) {
175
+ spell.add(word);
176
+ }
177
+
178
+ // Add custom words
179
+ const customWords = loadAllCustomWords(projectDir);
180
+ for (const word of customWords) {
181
+ spell.add(word);
182
+ }
183
+
184
+ spellcheckerCache[lang] = spell;
185
+ return spell;
186
+ }
187
+
188
+ /**
189
+ * Clear spellchecker cache (call after modifying dictionaries)
190
+ */
191
+ export function clearCache() {
192
+ spellcheckerCache.en = null;
193
+ spellcheckerCache['en-gb'] = null;
194
+ }
195
+
196
+ /**
197
+ * Extract words from text, filtering out non-words
198
+ * @param {string} text
199
+ * @returns {Array<{word: string, line: number, column: number}>}
200
+ */
201
+ export function extractWords(text) {
202
+ const words = [];
203
+ const lines = text.split('\n');
204
+ let inCodeBlock = false;
205
+ let inFrontmatter = false;
206
+
207
+ for (let lineNum = 0; lineNum < lines.length; lineNum++) {
208
+ const line = lines[lineNum];
209
+ const trimmed = line.trim();
210
+
211
+ // Track YAML frontmatter (only at start of file)
212
+ if (lineNum === 0 && trimmed === '---') {
213
+ inFrontmatter = true;
214
+ continue;
215
+ }
216
+ if (inFrontmatter) {
217
+ if (trimmed === '---') {
218
+ inFrontmatter = false;
219
+ }
220
+ continue;
221
+ }
222
+
223
+ // Track code blocks
224
+ if (trimmed.startsWith('```')) {
225
+ inCodeBlock = !inCodeBlock;
226
+ continue;
227
+ }
228
+ if (inCodeBlock) {
229
+ continue;
230
+ }
231
+
232
+ // Skip URLs and paths
233
+ if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
234
+ continue;
235
+ }
236
+
237
+ // Remove markdown syntax, URLs, code spans, LaTeX, etc.
238
+ let cleanLine = line
239
+ .replace(/`[^`]+`/g, '') // inline code
240
+ .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // links (keep text)
241
+ .replace(/!\[[^\]]*\]\([^)]+\)/g, '') // images
242
+ .replace(/@(fig|tbl|eq):\w+/g, '') // cross-refs
243
+ .replace(/\{[^}]+\}/g, '') // CriticMarkup/templates
244
+ .replace(/https?:\/\/\S+/g, '') // URLs
245
+ .replace(/\$[^$]+\$/g, '') // inline LaTeX math
246
+ .replace(/\\\w+/g, '') // LaTeX commands like \frac
247
+ .replace(/[#*_~`>|]/g, ' '); // markdown chars
248
+
249
+ // Extract words (letters and apostrophes only)
250
+ const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
251
+ let match;
252
+
253
+ while ((match = wordPattern.exec(cleanLine)) !== null) {
254
+ const word = match[0];
255
+
256
+ // Skip:
257
+ // - Very short words (1-2 chars)
258
+ // - All caps (acronyms like NASA)
259
+ // - File extensions (.md, .tex, .png)
260
+ // - CamelCase (likely code or citations like vanKleunen)
261
+ // - Words starting with capital in middle of sentence (proper nouns/names)
262
+ if (word.length < 3 ||
263
+ /^[A-Z]+$/.test(word) ||
264
+ /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
265
+ /[a-z][A-Z]/.test(word)) {
266
+ continue;
267
+ }
268
+
269
+ words.push({
270
+ word,
271
+ line: lineNum + 1,
272
+ column: match.index + 1,
273
+ });
274
+ }
275
+ }
276
+
277
+ return words;
278
+ }
279
+
280
+ /**
281
+ * Check if a word looks like a proper noun (name)
282
+ * @param {string} word
283
+ * @returns {boolean}
284
+ */
285
+ function looksLikeName(word) {
286
+ // Capitalized, not all caps, reasonable length for a name
287
+ return /^[A-Z][a-z]{2,}$/.test(word);
288
+ }
289
+
290
+ /**
291
+ * Check spelling in text
292
+ * @param {string} text
293
+ * @param {object} options
294
+ * @param {string} options.projectDir
295
+ * @param {string} options.lang - 'en' (US) or 'en-gb' (British)
296
+ * @returns {Promise<{misspelled: Array, possibleNames: Array}>}
297
+ */
298
+ export async function checkSpelling(text, options = {}) {
299
+ const { projectDir = '.', lang = 'en' } = options;
300
+ const spell = await getSpellchecker(projectDir, lang);
301
+ const words = extractWords(text);
302
+ const misspelled = [];
303
+ const possibleNames = [];
304
+ const seen = new Set();
305
+ const seenNames = new Set();
306
+
307
+ for (const { word, line, column } of words) {
308
+ // Skip if already reported this word
309
+ const key = word.toLowerCase();
310
+ if (seen.has(key) || seenNames.has(key)) {
311
+ continue;
312
+ }
313
+
314
+ if (!spell.correct(word)) {
315
+ // Check if it looks like a proper noun/name
316
+ if (looksLikeName(word)) {
317
+ seenNames.add(key);
318
+ possibleNames.push({ word, line, column });
319
+ } else {
320
+ seen.add(key);
321
+ misspelled.push({
322
+ word,
323
+ line,
324
+ column,
325
+ suggestions: spell.suggest(word).slice(0, 5),
326
+ });
327
+ }
328
+ }
329
+ }
330
+
331
+ return { misspelled, possibleNames };
332
+ }
333
+
334
+ /**
335
+ * Check spelling in a file
336
+ * @param {string} filePath
337
+ * @param {object} options
338
+ * @param {string} options.projectDir
339
+ * @param {string} options.lang
340
+ * @returns {Promise<{misspelled: Array, possibleNames: Array}>}
341
+ */
342
+ export async function checkFile(filePath, options = {}) {
343
+ const text = fs.readFileSync(filePath, 'utf-8');
344
+ const result = await checkSpelling(text, options);
345
+
346
+ return {
347
+ misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
348
+ possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
349
+ };
350
+ }
@@ -0,0 +1,229 @@
1
+ /**
2
+ * Track changes module - Apply markdown annotations as Word track changes
3
+ *
4
+ * Converts CriticMarkup annotations to Word OOXML track changes format.
5
+ */
6
+
7
+ import * as fs from 'fs';
8
+ import * as path from 'path';
9
+ import { execSync } from 'child_process';
10
+ import AdmZip from 'adm-zip';
11
+
12
+ /**
13
+ * Escape XML special characters
14
+ */
15
+ function escapeXml(str) {
16
+ return str
17
+ .replace(/&/g, '&amp;')
18
+ .replace(/</g, '&lt;')
19
+ .replace(/>/g, '&gt;')
20
+ .replace(/"/g, '&quot;')
21
+ .replace(/'/g, '&apos;');
22
+ }
23
+
24
+ /**
25
+ * Prepare text with CriticMarkup annotations for track changes
26
+ * Replaces annotations with markers that can be processed in DOCX
27
+ *
28
+ * @param {string} text - Text with CriticMarkup annotations
29
+ * @param {object} options - Options
30
+ * @param {string} options.author - Default author for track changes
31
+ * @returns {{text: string, markers: Array}} Processed text and marker info
32
+ */
33
+ export function prepareForTrackChanges(text, options = {}) {
34
+ const { author = 'Reviewer' } = options;
35
+ const markers = [];
36
+ let markerId = 0;
37
+
38
+ let result = text;
39
+
40
+ // Process insertions: {++text++}
41
+ result = result.replace(/\{\+\+(.+?)\+\+\}/gs, (match, content) => {
42
+ const id = markerId++;
43
+ markers.push({
44
+ id,
45
+ type: 'insert',
46
+ content,
47
+ author,
48
+ });
49
+ return `{{TC_${id}}}`;
50
+ });
51
+
52
+ // Process deletions: {--text--}
53
+ result = result.replace(/\{--(.+?)--\}/gs, (match, content) => {
54
+ const id = markerId++;
55
+ markers.push({
56
+ id,
57
+ type: 'delete',
58
+ content,
59
+ author,
60
+ });
61
+ return `{{TC_${id}}}`;
62
+ });
63
+
64
+ // Process substitutions: {~~old~>new~~}
65
+ result = result.replace(/\{~~(.+?)~>(.+?)~~\}/gs, (match, old, replacement) => {
66
+ const id = markerId++;
67
+ markers.push({
68
+ id,
69
+ type: 'substitute',
70
+ content: old,
71
+ replacement,
72
+ author,
73
+ });
74
+ return `{{TC_${id}}}`;
75
+ });
76
+
77
+ // Process comments: {>>Author: comment<<}
78
+ result = result.replace(/\{>>(.+?)<<\}/gs, (match, content) => {
79
+ const id = markerId++;
80
+ // Extract author if present (format: "Author: comment")
81
+ const colonIdx = content.indexOf(':');
82
+ let commentAuthor = author;
83
+ let commentText = content;
84
+ if (colonIdx > 0 && colonIdx < 30) {
85
+ commentAuthor = content.slice(0, colonIdx).trim();
86
+ commentText = content.slice(colonIdx + 1).trim();
87
+ }
88
+ markers.push({
89
+ id,
90
+ type: 'comment',
91
+ content: commentText,
92
+ author: commentAuthor,
93
+ });
94
+ return `{{TC_${id}}}`;
95
+ });
96
+
97
+ return { text: result, markers };
98
+ }
99
+
100
+ /**
101
+ * Apply track changes markers to a Word document
102
+ *
103
+ * @param {string} docxPath - Path to input DOCX file
104
+ * @param {Array} markers - Markers from prepareForTrackChanges
105
+ * @param {string} outputPath - Path for output DOCX file
106
+ * @returns {Promise<{success: boolean, message: string}>}
107
+ */
108
+ export async function applyTrackChangesToDocx(docxPath, markers, outputPath) {
109
+ if (!fs.existsSync(docxPath)) {
110
+ return { success: false, message: `File not found: ${docxPath}` };
111
+ }
112
+
113
+ let zip;
114
+ try {
115
+ zip = new AdmZip(docxPath);
116
+ } catch (err) {
117
+ return { success: false, message: `Invalid DOCX file: ${err.message}` };
118
+ }
119
+
120
+ // Read document.xml
121
+ const docEntry = zip.getEntry('word/document.xml');
122
+ if (!docEntry) {
123
+ return { success: false, message: 'Invalid DOCX: no document.xml' };
124
+ }
125
+
126
+ let documentXml = zip.readAsText(docEntry);
127
+
128
+ // Generate ISO date for track changes
129
+ const now = new Date().toISOString();
130
+
131
+ // Replace markers with track change XML
132
+ for (const marker of markers) {
133
+ const placeholder = `{{TC_${marker.id}}}`;
134
+ let replacement = '';
135
+
136
+ const escapedContent = escapeXml(marker.content);
137
+ const escapedAuthor = escapeXml(marker.author);
138
+
139
+ if (marker.type === 'insert') {
140
+ replacement = `<w:ins w:id="${marker.id}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:t>${escapedContent}</w:t></w:r></w:ins>`;
141
+ } else if (marker.type === 'delete') {
142
+ replacement = `<w:del w:id="${marker.id}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:delText>${escapedContent}</w:delText></w:r></w:del>`;
143
+ } else if (marker.type === 'substitute') {
144
+ const escapedReplacement = escapeXml(marker.replacement);
145
+ replacement = `<w:del w:id="${marker.id}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:delText>${escapedContent}</w:delText></w:r></w:del><w:ins w:id="${marker.id + 1000}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:t>${escapedReplacement}</w:t></w:r></w:ins>`;
146
+ }
147
+
148
+ documentXml = documentXml.replace(placeholder, replacement);
149
+ }
150
+
151
+ // Update document.xml
152
+ zip.updateFile('word/document.xml', Buffer.from(documentXml));
153
+
154
+ // Enable track revisions in settings.xml
155
+ const settingsEntry = zip.getEntry('word/settings.xml');
156
+ if (settingsEntry) {
157
+ let settingsXml = zip.readAsText(settingsEntry);
158
+ if (!settingsXml.includes('w:trackRevisions')) {
159
+ settingsXml = settingsXml.replace(
160
+ '</w:settings>',
161
+ '<w:trackRevisions/></w:settings>'
162
+ );
163
+ zip.updateFile('word/settings.xml', Buffer.from(settingsXml));
164
+ }
165
+ }
166
+
167
+ // Write output
168
+ zip.writeZip(outputPath);
169
+
170
+ return { success: true, message: `Created ${outputPath} with track changes` };
171
+ }
172
+
173
+ /**
174
+ * Build a Word document with track changes from annotated markdown
175
+ *
176
+ * @param {string} mdPath - Path to markdown file with CriticMarkup
177
+ * @param {string} docxPath - Output path for Word document
178
+ * @param {object} options - Options
179
+ * @param {string} options.author - Author name for track changes
180
+ * @returns {Promise<{success: boolean, message: string}>}
181
+ */
182
+ export async function buildWithTrackChanges(mdPath, docxPath, options = {}) {
183
+ const { author = 'Author' } = options;
184
+
185
+ if (!fs.existsSync(mdPath)) {
186
+ return { success: false, message: `File not found: ${mdPath}` };
187
+ }
188
+
189
+ const content = fs.readFileSync(mdPath, 'utf-8');
190
+
191
+ // Prepare for track changes
192
+ const { text: prepared, markers } = prepareForTrackChanges(content, { author });
193
+
194
+ // If no annotations, just build normally
195
+ if (markers.length === 0) {
196
+ try {
197
+ execSync(`pandoc "${mdPath}" -o "${docxPath}"`, { encoding: 'utf-8' });
198
+ return { success: true, message: `Created ${docxPath}` };
199
+ } catch (err) {
200
+ return { success: false, message: err.message };
201
+ }
202
+ }
203
+
204
+ // Write prepared content to temp file
205
+ const tempDir = path.dirname(mdPath);
206
+ const tempMd = path.join(tempDir, `.temp-${Date.now()}.md`);
207
+ const tempDocx = path.join(tempDir, `.temp-${Date.now()}.docx`);
208
+
209
+ try {
210
+ fs.writeFileSync(tempMd, prepared, 'utf-8');
211
+
212
+ // Build with pandoc
213
+ execSync(`pandoc "${tempMd}" -o "${tempDocx}"`, { encoding: 'utf-8' });
214
+
215
+ // Apply track changes
216
+ const result = await applyTrackChangesToDocx(tempDocx, markers, docxPath);
217
+
218
+ // Clean up temp files
219
+ fs.unlinkSync(tempMd);
220
+ fs.unlinkSync(tempDocx);
221
+
222
+ return result;
223
+ } catch (err) {
224
+ // Clean up on error
225
+ if (fs.existsSync(tempMd)) fs.unlinkSync(tempMd);
226
+ if (fs.existsSync(tempDocx)) fs.unlinkSync(tempDocx);
227
+ return { success: false, message: err.message };
228
+ }
229
+ }