npm - docrev - Versions diffs - 0.2.1 → 0.5.0 - Mend

docrev 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/lib/spelling.js ADDED Viewed

@@ -0,0 +1,350 @@
+/**
+ * Spelling checker module with global and project dictionaries
+ *
+ * Uses nspell (Hunspell-compatible) for English spellchecking.
+ * Custom words stored in:
+ * - ~/.rev-dictionary (global)
+ * - .rev-dictionary (project-local)
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import nspell from 'nspell';
+import dictionaryEn from 'dictionary-en';
+import dictionaryEnGb from 'dictionary-en-gb';
+import { scientificWords } from './scientific-words.js';
+const DICT_NAME = '.rev-dictionary';
+// Cache for the spellchecker instances (one per language)
+const spellcheckerCache = {
+  en: null,
+  'en-gb': null,
+};
+/**
+ * Get the global dictionary path
+ * @returns {string}
+ */
+export function getGlobalDictPath() {
+  const home = process.env.HOME || process.env.USERPROFILE;
+  return path.join(home, DICT_NAME);
+}
+/**
+ * Get the project dictionary path
+ * @param {string} directory
+ * @returns {string}
+ */
+export function getProjectDictPath(directory = '.') {
+  return path.join(directory, DICT_NAME);
+}
+/**
+ * Load custom words from a dictionary file
+ * @param {string} dictPath
+ * @returns {Set<string>}
+ */
+export function loadDictionaryFile(dictPath) {
+  const words = new Set();
+  if (fs.existsSync(dictPath)) {
+    const content = fs.readFileSync(dictPath, 'utf-8');
+    for (const line of content.split('\n')) {
+      const word = line.trim();
+      if (word && !word.startsWith('#')) {
+        words.add(word.toLowerCase());
+      }
+    }
+  }
+  return words;
+}
+/**
+ * Save words to a dictionary file
+ * @param {Set<string>} words
+ * @param {string} dictPath
+ */
+export function saveDictionaryFile(words, dictPath) {
+  const header = `# Custom dictionary for docrev
+# One word per line, lines starting with # are comments
+`;
+  const content = header + [...words].sort().join('\n') + '\n';
+  // Ensure directory exists
+  const dir = path.dirname(dictPath);
+  if (!fs.existsSync(dir)) {
+    fs.mkdirSync(dir, { recursive: true });
+  }
+  fs.writeFileSync(dictPath, content, 'utf-8');
+}
+/**
+ * Load all custom words (global + project)
+ * @param {string} projectDir
+ * @returns {Set<string>}
+ */
+export function loadAllCustomWords(projectDir = '.') {
+  const globalWords = loadDictionaryFile(getGlobalDictPath());
+  const projectWords = loadDictionaryFile(getProjectDictPath(projectDir));
+  return new Set([...globalWords, ...projectWords]);
+}
+/**
+ * Add word to dictionary
+ * @param {string} word
+ * @param {boolean} global - Add to global dictionary
+ * @param {string} projectDir
+ * @returns {boolean} True if word was added
+ */
+export function addWord(word, global = true, projectDir = '.') {
+  const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
+  const words = loadDictionaryFile(dictPath);
+  const normalizedWord = word.trim().toLowerCase();
+  if (words.has(normalizedWord)) {
+    return false;
+  }
+  words.add(normalizedWord);
+  saveDictionaryFile(words, dictPath);
+  // Clear cache so new word is picked up
+  clearCache();
+  return true;
+}
+/**
+ * Remove word from dictionary
+ * @param {string} word
+ * @param {boolean} global
+ * @param {string} projectDir
+ * @returns {boolean} True if word was removed
+ */
+export function removeWord(word, global = true, projectDir = '.') {
+  const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
+  const words = loadDictionaryFile(dictPath);
+  const normalizedWord = word.trim().toLowerCase();
+  if (!words.has(normalizedWord)) {
+    return false;
+  }
+  words.delete(normalizedWord);
+  saveDictionaryFile(words, dictPath);
+  // Clear cache
+  clearCache();
+  return true;
+}
+/**
+ * List words in dictionary
+ * @param {boolean} global
+ * @param {string} projectDir
+ * @returns {string[]}
+ */
+export function listWords(global = true, projectDir = '.') {
+  const dictPath = global ? getGlobalDictPath() : getProjectDictPath(projectDir);
+  const words = loadDictionaryFile(dictPath);
+  return [...words].sort();
+}
+/**
+ * Initialize the spellchecker with custom words
+ * @param {string} projectDir
+ * @param {string} lang - Language: 'en' (US) or 'en-gb' (British)
+ * @returns {Promise<object>}
+ */
+export async function getSpellchecker(projectDir = '.', lang = 'en') {
+  if (spellcheckerCache[lang]) {
+    return spellcheckerCache[lang];
+  }
+  // Select dictionary based on language
+  const dictionary = lang === 'en-gb' ? dictionaryEnGb : dictionaryEn;
+  const spell = nspell(dictionary);
+  // Add scientific/academic words
+  for (const word of scientificWords) {
+    spell.add(word);
+  }
+  // Add custom words
+  const customWords = loadAllCustomWords(projectDir);
+  for (const word of customWords) {
+    spell.add(word);
+  }
+  spellcheckerCache[lang] = spell;
+  return spell;
+}
+/**
+ * Clear spellchecker cache (call after modifying dictionaries)
+ */
+export function clearCache() {
+  spellcheckerCache.en = null;
+  spellcheckerCache['en-gb'] = null;
+}
+/**
+ * Extract words from text, filtering out non-words
+ * @param {string} text
+ * @returns {Array<{word: string, line: number, column: number}>}
+ */
+export function extractWords(text) {
+  const words = [];
+  const lines = text.split('\n');
+  let inCodeBlock = false;
+  let inFrontmatter = false;
+  for (let lineNum = 0; lineNum < lines.length; lineNum++) {
+    const line = lines[lineNum];
+    const trimmed = line.trim();
+    // Track YAML frontmatter (only at start of file)
+    if (lineNum === 0 && trimmed === '---') {
+      inFrontmatter = true;
+      continue;
+    }
+    if (inFrontmatter) {
+      if (trimmed === '---') {
+        inFrontmatter = false;
+      }
+      continue;
+    }
+    // Track code blocks
+    if (trimmed.startsWith('```')) {
+      inCodeBlock = !inCodeBlock;
+      continue;
+    }
+    if (inCodeBlock) {
+      continue;
+    }
+    // Skip URLs and paths
+    if (trimmed.startsWith('http') || trimmed.startsWith('/')) {
+      continue;
+    }
+    // Remove markdown syntax, URLs, code spans, LaTeX, etc.
+    let cleanLine = line
+      .replace(/`[^`]+`/g, '')           // inline code
+      .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')  // links (keep text)
+      .replace(/!\[[^\]]*\]\([^)]+\)/g, '')     // images
+      .replace(/@(fig|tbl|eq):\w+/g, '')        // cross-refs
+      .replace(/\{[^}]+\}/g, '')                // CriticMarkup/templates
+      .replace(/https?:\/\/\S+/g, '')           // URLs
+      .replace(/\$[^$]+\$/g, '')                // inline LaTeX math
+      .replace(/\\\w+/g, '')                    // LaTeX commands like \frac
+      .replace(/[#*_~`>|]/g, ' ');              // markdown chars
+    // Extract words (letters and apostrophes only)
+    const wordPattern = /[a-zA-Z][a-zA-Z']*[a-zA-Z]|[a-zA-Z]/g;
+    let match;
+    while ((match = wordPattern.exec(cleanLine)) !== null) {
+      const word = match[0];
+      // Skip:
+      // - Very short words (1-2 chars)
+      // - All caps (acronyms like NASA)
+      // - File extensions (.md, .tex, .png)
+      // - CamelCase (likely code or citations like vanKleunen)
+      // - Words starting with capital in middle of sentence (proper nouns/names)
+      if (word.length < 3 ||
+          /^[A-Z]+$/.test(word) ||
+          /^\w{2,4}$/.test(word) && /^(md|tex|png|jpg|pdf|csv|js|py|html|css|yaml|json|docx|bib)$/i.test(word) ||
+          /[a-z][A-Z]/.test(word)) {
+        continue;
+      }
+      words.push({
+        word,
+        line: lineNum + 1,
+        column: match.index + 1,
+      });
+    }
+  }
+  return words;
+}
+/**
+ * Check if a word looks like a proper noun (name)
+ * @param {string} word
+ * @returns {boolean}
+ */
+function looksLikeName(word) {
+  // Capitalized, not all caps, reasonable length for a name
+  return /^[A-Z][a-z]{2,}$/.test(word);
+}
+/**
+ * Check spelling in text
+ * @param {string} text
+ * @param {object} options
+ * @param {string} options.projectDir
+ * @param {string} options.lang - 'en' (US) or 'en-gb' (British)
+ * @returns {Promise<{misspelled: Array, possibleNames: Array}>}
+ */
+export async function checkSpelling(text, options = {}) {
+  const { projectDir = '.', lang = 'en' } = options;
+  const spell = await getSpellchecker(projectDir, lang);
+  const words = extractWords(text);
+  const misspelled = [];
+  const possibleNames = [];
+  const seen = new Set();
+  const seenNames = new Set();
+  for (const { word, line, column } of words) {
+    // Skip if already reported this word
+    const key = word.toLowerCase();
+    if (seen.has(key) || seenNames.has(key)) {
+      continue;
+    }
+    if (!spell.correct(word)) {
+      // Check if it looks like a proper noun/name
+      if (looksLikeName(word)) {
+        seenNames.add(key);
+        possibleNames.push({ word, line, column });
+      } else {
+        seen.add(key);
+        misspelled.push({
+          word,
+          line,
+          column,
+          suggestions: spell.suggest(word).slice(0, 5),
+        });
+      }
+    }
+  }
+  return { misspelled, possibleNames };
+}
+/**
+ * Check spelling in a file
+ * @param {string} filePath
+ * @param {object} options
+ * @param {string} options.projectDir
+ * @param {string} options.lang
+ * @returns {Promise<{misspelled: Array, possibleNames: Array}>}
+ */
+export async function checkFile(filePath, options = {}) {
+  const text = fs.readFileSync(filePath, 'utf-8');
+  const result = await checkSpelling(text, options);
+  return {
+    misspelled: result.misspelled.map(issue => ({ ...issue, file: filePath })),
+    possibleNames: result.possibleNames.map(issue => ({ ...issue, file: filePath })),
+  };
+}

package/lib/trackchanges.js ADDED Viewed

@@ -0,0 +1,229 @@
+/**
+ * Track changes module - Apply markdown annotations as Word track changes
+ *
+ * Converts CriticMarkup annotations to Word OOXML track changes format.
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import { execSync } from 'child_process';
+import AdmZip from 'adm-zip';
+/**
+ * Escape XML special characters
+ */
+function escapeXml(str) {
+  return str
+    .replace(/&/g, '&amp;')
+    .replace(/</g, '&lt;')
+    .replace(/>/g, '&gt;')
+    .replace(/"/g, '&quot;')
+    .replace(/'/g, '&apos;');
+}
+/**
+ * Prepare text with CriticMarkup annotations for track changes
+ * Replaces annotations with markers that can be processed in DOCX
+ *
+ * @param {string} text - Text with CriticMarkup annotations
+ * @param {object} options - Options
+ * @param {string} options.author - Default author for track changes
+ * @returns {{text: string, markers: Array}} Processed text and marker info
+ */
+export function prepareForTrackChanges(text, options = {}) {
+  const { author = 'Reviewer' } = options;
+  const markers = [];
+  let markerId = 0;
+  let result = text;
+  // Process insertions: {++text++}
+  result = result.replace(/\{\+\+(.+?)\+\+\}/gs, (match, content) => {
+    const id = markerId++;
+    markers.push({
+      id,
+      type: 'insert',
+      content,
+      author,
+    });
+    return `{{TC_${id}}}`;
+  });
+  // Process deletions: {--text--}
+  result = result.replace(/\{--(.+?)--\}/gs, (match, content) => {
+    const id = markerId++;
+    markers.push({
+      id,
+      type: 'delete',
+      content,
+      author,
+    });
+    return `{{TC_${id}}}`;
+  });
+  // Process substitutions: {~~old~>new~~}
+  result = result.replace(/\{~~(.+?)~>(.+?)~~\}/gs, (match, old, replacement) => {
+    const id = markerId++;
+    markers.push({
+      id,
+      type: 'substitute',
+      content: old,
+      replacement,
+      author,
+    });
+    return `{{TC_${id}}}`;
+  });
+  // Process comments: {>>Author: comment<<}
+  result = result.replace(/\{>>(.+?)<<\}/gs, (match, content) => {
+    const id = markerId++;
+    // Extract author if present (format: "Author: comment")
+    const colonIdx = content.indexOf(':');
+    let commentAuthor = author;
+    let commentText = content;
+    if (colonIdx > 0 && colonIdx < 30) {
+      commentAuthor = content.slice(0, colonIdx).trim();
+      commentText = content.slice(colonIdx + 1).trim();
+    }
+    markers.push({
+      id,
+      type: 'comment',
+      content: commentText,
+      author: commentAuthor,
+    });
+    return `{{TC_${id}}}`;
+  });
+  return { text: result, markers };
+}
+/**
+ * Apply track changes markers to a Word document
+ *
+ * @param {string} docxPath - Path to input DOCX file
+ * @param {Array} markers - Markers from prepareForTrackChanges
+ * @param {string} outputPath - Path for output DOCX file
+ * @returns {Promise<{success: boolean, message: string}>}
+ */
+export async function applyTrackChangesToDocx(docxPath, markers, outputPath) {
+  if (!fs.existsSync(docxPath)) {
+    return { success: false, message: `File not found: ${docxPath}` };
+  }
+  let zip;
+  try {
+    zip = new AdmZip(docxPath);
+  } catch (err) {
+    return { success: false, message: `Invalid DOCX file: ${err.message}` };
+  }
+  // Read document.xml
+  const docEntry = zip.getEntry('word/document.xml');
+  if (!docEntry) {
+    return { success: false, message: 'Invalid DOCX: no document.xml' };
+  }
+  let documentXml = zip.readAsText(docEntry);
+  // Generate ISO date for track changes
+  const now = new Date().toISOString();
+  // Replace markers with track change XML
+  for (const marker of markers) {
+    const placeholder = `{{TC_${marker.id}}}`;
+    let replacement = '';
+    const escapedContent = escapeXml(marker.content);
+    const escapedAuthor = escapeXml(marker.author);
+    if (marker.type === 'insert') {
+      replacement = `<w:ins w:id="${marker.id}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:t>${escapedContent}</w:t></w:r></w:ins>`;
+    } else if (marker.type === 'delete') {
+      replacement = `<w:del w:id="${marker.id}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:delText>${escapedContent}</w:delText></w:r></w:del>`;
+    } else if (marker.type === 'substitute') {
+      const escapedReplacement = escapeXml(marker.replacement);
+      replacement = `<w:del w:id="${marker.id}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:delText>${escapedContent}</w:delText></w:r></w:del><w:ins w:id="${marker.id + 1000}" w:author="${escapedAuthor}" w:date="${now}"><w:r><w:t>${escapedReplacement}</w:t></w:r></w:ins>`;
+    }
+    documentXml = documentXml.replace(placeholder, replacement);
+  }
+  // Update document.xml
+  zip.updateFile('word/document.xml', Buffer.from(documentXml));
+  // Enable track revisions in settings.xml
+  const settingsEntry = zip.getEntry('word/settings.xml');
+  if (settingsEntry) {
+    let settingsXml = zip.readAsText(settingsEntry);
+    if (!settingsXml.includes('w:trackRevisions')) {
+      settingsXml = settingsXml.replace(
+        '</w:settings>',
+        '<w:trackRevisions/></w:settings>'
+      );
+      zip.updateFile('word/settings.xml', Buffer.from(settingsXml));
+    }
+  }
+  // Write output
+  zip.writeZip(outputPath);
+  return { success: true, message: `Created ${outputPath} with track changes` };
+}
+/**
+ * Build a Word document with track changes from annotated markdown
+ *
+ * @param {string} mdPath - Path to markdown file with CriticMarkup
+ * @param {string} docxPath - Output path for Word document
+ * @param {object} options - Options
+ * @param {string} options.author - Author name for track changes
+ * @returns {Promise<{success: boolean, message: string}>}
+ */
+export async function buildWithTrackChanges(mdPath, docxPath, options = {}) {
+  const { author = 'Author' } = options;
+  if (!fs.existsSync(mdPath)) {
+    return { success: false, message: `File not found: ${mdPath}` };
+  }
+  const content = fs.readFileSync(mdPath, 'utf-8');
+  // Prepare for track changes
+  const { text: prepared, markers } = prepareForTrackChanges(content, { author });
+  // If no annotations, just build normally
+  if (markers.length === 0) {
+    try {
+      execSync(`pandoc "${mdPath}" -o "${docxPath}"`, { encoding: 'utf-8' });
+      return { success: true, message: `Created ${docxPath}` };
+    } catch (err) {
+      return { success: false, message: err.message };
+    }
+  }
+  // Write prepared content to temp file
+  const tempDir = path.dirname(mdPath);
+  const tempMd = path.join(tempDir, `.temp-${Date.now()}.md`);
+  const tempDocx = path.join(tempDir, `.temp-${Date.now()}.docx`);
+  try {
+    fs.writeFileSync(tempMd, prepared, 'utf-8');
+    // Build with pandoc
+    execSync(`pandoc "${tempMd}" -o "${tempDocx}"`, { encoding: 'utf-8' });
+    // Apply track changes
+    const result = await applyTrackChangesToDocx(tempDocx, markers, docxPath);
+    // Clean up temp files
+    fs.unlinkSync(tempMd);
+    fs.unlinkSync(tempDocx);
+    return result;
+  } catch (err) {
+    // Clean up on error
+    if (fs.existsSync(tempMd)) fs.unlinkSync(tempMd);
+    if (fs.existsSync(tempDocx)) fs.unlinkSync(tempDocx);
+    return { success: false, message: err.message };
+  }
+}