npm - docrev - Versions diffs - 0.2.1 → 0.5.0 - Mend

docrev 0.2.1 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (23) hide show

package/lib/variables.js ADDED Viewed

@@ -0,0 +1,173 @@
+/**
+ * Template variable substitution for rev
+ *
+ * Supported variables:
+ *   {{date}}       - Current date (YYYY-MM-DD)
+ *   {{date:format}} - Custom date format (e.g., {{date:MMMM D, YYYY}})
+ *   {{version}}    - Version from rev.yaml
+ *   {{word_count}} - Total word count
+ *   {{author}}     - First author name
+ *   {{authors}}    - All authors (comma-separated)
+ *   {{title}}      - Document title
+ *   {{year}}       - Current year
+ */
+import * as fs from 'fs';
+/**
+ * Format date with simple pattern
+ * @param {Date} date
+ * @param {string} format - Pattern (YYYY, MM, DD, MMMM, MMM, D)
+ * @returns {string}
+ */
+function formatDate(date, format = 'YYYY-MM-DD') {
+  const months = [
+    'January', 'February', 'March', 'April', 'May', 'June',
+    'July', 'August', 'September', 'October', 'November', 'December'
+  ];
+  const monthsShort = [
+    'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
+    'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'
+  ];
+  const year = date.getFullYear();
+  const month = date.getMonth();
+  const day = date.getDate();
+  // Use placeholders to avoid replacement conflicts (e.g., D in December)
+  return format
+    .replace('YYYY', '\x00YEAR\x00')
+    .replace('MMMM', '\x00MONTHFULL\x00')
+    .replace('MMM', '\x00MONTHSHORT\x00')
+    .replace('MM', '\x00MONTHNUM\x00')
+    .replace('DD', '\x00DAYPAD\x00')
+    .replace(/\bD\b/, '\x00DAY\x00')
+    .replace('\x00YEAR\x00', year.toString())
+    .replace('\x00MONTHFULL\x00', months[month])
+    .replace('\x00MONTHSHORT\x00', monthsShort[month])
+    .replace('\x00MONTHNUM\x00', (month + 1).toString().padStart(2, '0'))
+    .replace('\x00DAYPAD\x00', day.toString().padStart(2, '0'))
+    .replace('\x00DAY\x00', day.toString());
+}
+/**
+ * Count words in text (excluding markdown syntax)
+ * @param {string} text
+ * @returns {number}
+ */
+function countWords(text) {
+  return text
+    .replace(/^---[\s\S]*?---/m, '') // Remove frontmatter
+    .replace(/!\[.*?\]\(.*?\)/g, '') // Remove images
+    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1') // Keep link text
+    .replace(/#+\s*/g, '') // Remove headers
+    .replace(/\*\*|__|[*_`]/g, '') // Remove formatting
+    .replace(/```[\s\S]*?```/g, '') // Remove code blocks
+    .replace(/\{[^}]+\}/g, '') // Remove annotations
+    .replace(/@\w+:\w+/g, '') // Remove refs
+    .replace(/@\w+/g, '') // Remove citations
+    .replace(/\|[^|]+\|/g, ' ') // Remove tables
+    .replace(/\n+/g, ' ')
+    .trim()
+    .split(/\s+/)
+    .filter(w => w.length > 0).length;
+}
+/**
+ * Get first author name from authors array
+ * @param {Array|string} authors
+ * @returns {string}
+ */
+function getFirstAuthor(authors) {
+  if (!authors || authors.length === 0) return '';
+  const first = Array.isArray(authors) ? authors[0] : authors;
+  if (typeof first === 'string') return first;
+  if (first.name) return first.name;
+  return '';
+}
+/**
+ * Get all author names
+ * @param {Array|string} authors
+ * @returns {string}
+ */
+function getAllAuthors(authors) {
+  if (!authors) return '';
+  if (typeof authors === 'string') return authors;
+  return authors
+    .map(a => typeof a === 'string' ? a : a.name)
+    .filter(Boolean)
+    .join(', ');
+}
+/**
+ * Process template variables in text
+ * @param {string} text - Text with {{variable}} placeholders
+ * @param {object} config - rev.yaml config
+ * @param {object} options - Additional options
+ * @param {string[]} options.sections - Section file contents for word count
+ * @returns {string} Text with variables replaced
+ */
+export function processVariables(text, config = {}, options = {}) {
+  const now = new Date();
+  let result = text;
+  // Calculate word count from sections if provided
+  let wordCount = 0;
+  if (options.sectionContents) {
+    for (const content of options.sectionContents) {
+      wordCount += countWords(content);
+    }
+  }
+  // {{date}} - Current date
+  result = result.replace(/\{\{date\}\}/g, formatDate(now));
+  // {{date:format}} - Custom date format
+  result = result.replace(/\{\{date:([^}]+)\}\}/g, (match, format) => {
+    return formatDate(now, format);
+  });
+  // {{year}} - Current year
+  result = result.replace(/\{\{year\}\}/g, now.getFullYear().toString());
+  // {{version}} - From config
+  result = result.replace(/\{\{version\}\}/g, config.version || '');
+  // {{title}} - Document title
+  result = result.replace(/\{\{title\}\}/g, config.title || '');
+  // {{author}} - First author
+  result = result.replace(/\{\{author\}\}/g, getFirstAuthor(config.authors));
+  // {{authors}} - All authors
+  result = result.replace(/\{\{authors\}\}/g, getAllAuthors(config.authors));
+  // {{word_count}} - Total word count
+  result = result.replace(/\{\{word_count\}\}/g, wordCount.toLocaleString());
+  return result;
+}
+/**
+ * Check if text contains any template variables
+ * @param {string} text
+ * @returns {boolean}
+ */
+export function hasVariables(text) {
+  return /\{\{[^}]+\}\}/.test(text);
+}
+/**
+ * List all variables found in text
+ * @param {string} text
+ * @returns {string[]}
+ */
+export function findVariables(text) {
+  const matches = text.match(/\{\{([^}]+)\}\}/g) || [];
+  return [...new Set(matches.map(m => m.slice(2, -2)))];
+}

package/lib/word.js ADDED Viewed

@@ -0,0 +1,225 @@
+/**
+ * Word document extraction utilities
+ * Handle reading text, comments, and anchors from .docx files
+ */
+import * as fs from 'fs';
+import * as path from 'path';
+import AdmZip from 'adm-zip';
+import { parseString } from 'xml2js';
+import { promisify } from 'util';
+const parseXml = promisify(parseString);
+/**
+ * Extract comments from Word document's comments.xml
+ * @param {string} docxPath
+ * @returns {Promise<Array<{id: string, author: string, date: string, text: string}>>}
+ */
+export async function extractWordComments(docxPath) {
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const zip = new AdmZip(docxPath);
+  const commentsEntry = zip.getEntry('word/comments.xml');
+  if (!commentsEntry) {
+    return []; // No comments in document
+  }
+  const commentsXml = zip.readAsText(commentsEntry);
+  const parsed = await parseXml(commentsXml);
+  if (!parsed['w:comments'] || !parsed['w:comments']['w:comment']) {
+    return [];
+  }
+  const comments = [];
+  const rawComments = parsed['w:comments']['w:comment'];
+  for (const comment of rawComments) {
+    const id = comment.$?.['w:id'];
+    const author = comment.$?.['w:author'] || 'Unknown';
+    const date = comment.$?.['w:date'];
+    // Extract text from all paragraphs in comment
+    let text = '';
+    const paragraphs = comment['w:p'] || [];
+    for (const para of paragraphs) {
+      const runs = para['w:r'] || [];
+      for (const run of runs) {
+        const texts = run['w:t'] || [];
+        for (const t of texts) {
+          text += typeof t === 'string' ? t : (t._ || '');
+        }
+      }
+    }
+    if (id && text.trim()) {
+      comments.push({
+        id,
+        author,
+        date,
+        text: text.trim(),
+      });
+    }
+  }
+  return comments;
+}
+/**
+ * Extract comment anchors (where comments are attached) from document.xml
+ * Returns mapping of comment ID to the text they're anchored to
+ * @param {string} docxPath
+ * @returns {Promise<Map<string, {text: string, context: string}>>}
+ */
+export async function extractCommentAnchors(docxPath) {
+  const zip = new AdmZip(docxPath);
+  const documentEntry = zip.getEntry('word/document.xml');
+  if (!documentEntry) {
+    throw new Error('Invalid docx: no document.xml');
+  }
+  const documentXml = zip.readAsText(documentEntry);
+  const anchors = new Map();
+  // Find commentRangeStart and commentRangeEnd pairs
+  // The text between them is what the comment is anchored to
+  const startPattern = /<w:commentRangeStart w:id="(\d+)"\/>/g;
+  const endPattern = /<w:commentRangeEnd w:id="(\d+)"\/>/g;
+  let match;
+  const starts = new Map();
+  const ends = new Map();
+  while ((match = startPattern.exec(documentXml)) !== null) {
+    starts.set(match[1], match.index);
+  }
+  while ((match = endPattern.exec(documentXml)) !== null) {
+    ends.set(match[1], match.index);
+  }
+  // For each comment, extract the text between start and end
+  for (const [id, startPos] of starts) {
+    const endPos = ends.get(id);
+    if (!endPos) continue;
+    const segment = documentXml.slice(startPos, endPos);
+    // Extract all text content from the segment
+    const textPattern = /<w:t[^>]*>([^<]*)<\/w:t>/g;
+    let text = '';
+    let textMatch;
+    while ((textMatch = textPattern.exec(segment)) !== null) {
+      text += textMatch[1];
+    }
+    // Get surrounding context (text before the anchor)
+    const contextStart = Math.max(0, startPos - 500);
+    const contextSegment = documentXml.slice(contextStart, startPos);
+    let context = '';
+    while ((textMatch = textPattern.exec(contextSegment)) !== null) {
+      context += textMatch[1];
+    }
+    anchors.set(id, {
+      text: text.trim(),
+      context: context.slice(-100), // Last 100 chars of context
+    });
+  }
+  return anchors;
+}
+/**
+ * Extract plain text from Word document using mammoth
+ * @param {string} docxPath
+ * @returns {Promise<string>}
+ */
+export async function extractTextFromWord(docxPath) {
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const mammoth = await import('mammoth');
+  const result = await mammoth.extractRawText({ path: docxPath });
+  return result.value;
+}
+/**
+ * Extract rich content from Word with basic formatting
+ * @param {string} docxPath
+ * @returns {Promise<{text: string, html: string}>}
+ */
+export async function extractFromWord(docxPath) {
+  if (!fs.existsSync(docxPath)) {
+    throw new Error(`File not found: ${docxPath}`);
+  }
+  const mammoth = await import('mammoth');
+  const [textResult, htmlResult] = await Promise.all([
+    mammoth.extractRawText({ path: docxPath }),
+    mammoth.convertToHtml({ path: docxPath }),
+  ]);
+  return {
+    text: textResult.value,
+    html: htmlResult.value,
+  };
+}
+/**
+ * Get document metadata from Word file
+ * @param {string} docxPath
+ * @returns {Promise<{title?: string, author?: string, created?: string, modified?: string}>}
+ */
+export async function getWordMetadata(docxPath) {
+  const zip = new AdmZip(docxPath);
+  const coreEntry = zip.getEntry('docProps/core.xml');
+  if (!coreEntry) {
+    return {};
+  }
+  const coreXml = zip.readAsText(coreEntry);
+  const metadata = {};
+  // Extract common metadata fields
+  const patterns = {
+    title: /<dc:title>([^<]*)<\/dc:title>/,
+    author: /<dc:creator>([^<]*)<\/dc:creator>/,
+    created: /<dcterms:created[^>]*>([^<]*)<\/dcterms:created>/,
+    modified: /<dcterms:modified[^>]*>([^<]*)<\/dcterms:modified>/,
+  };
+  for (const [key, pattern] of Object.entries(patterns)) {
+    const match = coreXml.match(pattern);
+    if (match) {
+      metadata[key] = match[1];
+    }
+  }
+  return metadata;
+}
+/**
+ * Check if file is a valid Word document
+ * @param {string} filePath
+ * @returns {boolean}
+ */
+export function isWordDocument(filePath) {
+  if (!fs.existsSync(filePath)) return false;
+  if (!filePath.toLowerCase().endsWith('.docx')) return false;
+  try {
+    const zip = new AdmZip(filePath);
+    return zip.getEntry('word/document.xml') !== null;
+  } catch {
+    return false;
+  }
+}

package/package.json CHANGED Viewed

@@ -1,14 +1,86 @@
 {
   "name": "docrev",
-  "version": "0.2.1",
+  "version": "0.5.0",
   "description": "Academic paper revision workflow: Word ↔ Markdown round-trips, DOI validation, reviewer comments",
   "type": "module",
+  "types": "types/index.d.ts",
+  "exports": {
+    ".": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/annotations.js"
+    },
+    "./annotations": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/annotations.js"
+    },
+    "./build": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/build.js"
+    },
+    "./citations": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/citations.js"
+    },
+    "./crossref": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/crossref.js"
+    },
+    "./doi": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/doi.js"
+    },
+    "./equations": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/equations.js"
+    },
+    "./git": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/git.js"
+    },
+    "./journals": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/journals.js"
+    },
+    "./merge": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/merge.js"
+    },
+    "./sections": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/sections.js"
+    },
+    "./word": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/word.js"
+    },
+    "./variables": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/variables.js"
+    },
+    "./grammar": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/grammar.js"
+    },
+    "./trackchanges": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/trackchanges.js"
+    },
+    "./spelling": {
+      "types": "./types/index.d.ts",
+      "import": "./lib/spelling.js"
+    }
+  },
+  "engines": {
+    "node": ">=18.0.0"
+  },
   "bin": {
     "rev": "bin/rev.js"
   },
   "scripts": {
     "build": "echo 'No build needed'",
-    "test": "node bin/rev.js --help"
+    "test": "node --test test/*.test.js",
+    "test:watch": "node --test --watch test/*.test.js",
+    "test:coverage": "c8 --reporter=text --reporter=lcov node --test test/*.test.js"
   },
   "repository": {
     "type": "git",
@@ -35,10 +107,16 @@
     "adm-zip": "^0.5.16",
     "chalk": "^5.3.0",
     "commander": "^12.0.0",
+    "dictionary-en": "^4.0.0",
+    "dictionary-en-gb": "^3.0.0",
     "diff": "^8.0.2",
     "js-yaml": "^4.1.1",
     "mammoth": "^1.6.0",
     "mathml-to-latex": "^1.5.0",
+    "nspell": "^2.1.5",
     "xml2js": "^0.6.2"
+  },
+  "devDependencies": {
+    "c8": "^10.1.2"
   }
 }