npm - docrev - Versions diffs - 0.9.5 → 0.9.7 - Mend

docrev 0.9.5 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (99) hide show

package/CHANGELOG.md +20 -0
package/dev_notes/bug_repro_comment_parser.md +71 -0
package/dist/lib/anchor-match.d.ts +41 -0
package/dist/lib/anchor-match.d.ts.map +1 -0
package/dist/lib/anchor-match.js +192 -0
package/dist/lib/anchor-match.js.map +1 -0
package/dist/lib/annotations.d.ts.map +1 -1
package/dist/lib/annotations.js +8 -5
package/dist/lib/annotations.js.map +1 -1
package/dist/lib/commands/file-ops.d.ts +11 -0
package/dist/lib/commands/file-ops.d.ts.map +1 -0
package/dist/lib/commands/file-ops.js +301 -0
package/dist/lib/commands/file-ops.js.map +1 -0
package/dist/lib/commands/index.d.ts +10 -1
package/dist/lib/commands/index.d.ts.map +1 -1
package/dist/lib/commands/index.js +19 -1
package/dist/lib/commands/index.js.map +1 -1
package/dist/lib/commands/merge-resolve.d.ts +12 -0
package/dist/lib/commands/merge-resolve.d.ts.map +1 -0
package/dist/lib/commands/merge-resolve.js +318 -0
package/dist/lib/commands/merge-resolve.js.map +1 -0
package/dist/lib/commands/preview.d.ts +11 -0
package/dist/lib/commands/preview.d.ts.map +1 -0
package/dist/lib/commands/preview.js +138 -0
package/dist/lib/commands/preview.js.map +1 -0
package/dist/lib/commands/project-info.d.ts +11 -0
package/dist/lib/commands/project-info.d.ts.map +1 -0
package/dist/lib/commands/project-info.js +187 -0
package/dist/lib/commands/project-info.js.map +1 -0
package/dist/lib/commands/quality.d.ts +11 -0
package/dist/lib/commands/quality.d.ts.map +1 -0
package/dist/lib/commands/quality.js +384 -0
package/dist/lib/commands/quality.js.map +1 -0
package/dist/lib/commands/section-boundaries.d.ts +22 -0
package/dist/lib/commands/section-boundaries.d.ts.map +1 -0
package/dist/lib/commands/section-boundaries.js +53 -0
package/dist/lib/commands/section-boundaries.js.map +1 -0
package/dist/lib/commands/sections.d.ts +3 -2
package/dist/lib/commands/sections.d.ts.map +1 -1
package/dist/lib/commands/sections.js +4 -736
package/dist/lib/commands/sections.js.map +1 -1
package/dist/lib/commands/sync.d.ts +11 -0
package/dist/lib/commands/sync.d.ts.map +1 -0
package/dist/lib/commands/sync.js +576 -0
package/dist/lib/commands/sync.js.map +1 -0
package/dist/lib/commands/text-ops.d.ts +11 -0
package/dist/lib/commands/text-ops.d.ts.map +1 -0
package/dist/lib/commands/text-ops.js +357 -0
package/dist/lib/commands/text-ops.js.map +1 -0
package/dist/lib/commands/utilities.d.ts +2 -4
package/dist/lib/commands/utilities.d.ts.map +1 -1
package/dist/lib/commands/utilities.js +3 -1572
package/dist/lib/commands/utilities.js.map +1 -1
package/dist/lib/commands/verify-anchors.d.ts +17 -0
package/dist/lib/commands/verify-anchors.d.ts.map +1 -0
package/dist/lib/commands/verify-anchors.js +215 -0
package/dist/lib/commands/verify-anchors.js.map +1 -0
package/dist/lib/commands/word-tools.d.ts +11 -0
package/dist/lib/commands/word-tools.d.ts.map +1 -0
package/dist/lib/commands/word-tools.js +272 -0
package/dist/lib/commands/word-tools.js.map +1 -0
package/dist/lib/diff-engine.d.ts +25 -0
package/dist/lib/diff-engine.d.ts.map +1 -0
package/dist/lib/diff-engine.js +354 -0
package/dist/lib/diff-engine.js.map +1 -0
package/dist/lib/import.d.ts +44 -118
package/dist/lib/import.d.ts.map +1 -1
package/dist/lib/import.js +25 -1173
package/dist/lib/import.js.map +1 -1
package/dist/lib/restore-references.d.ts +35 -0
package/dist/lib/restore-references.d.ts.map +1 -0
package/dist/lib/restore-references.js +188 -0
package/dist/lib/restore-references.js.map +1 -0
package/dist/lib/word-extraction.d.ts +100 -0
package/dist/lib/word-extraction.d.ts.map +1 -0
package/dist/lib/word-extraction.js +594 -0
package/dist/lib/word-extraction.js.map +1 -0
package/lib/anchor-match.ts +238 -0
package/lib/annotations.ts +9 -5
package/lib/commands/file-ops.ts +372 -0
package/lib/commands/index.ts +27 -0
package/lib/commands/merge-resolve.ts +378 -0
package/lib/commands/preview.ts +178 -0
package/lib/commands/project-info.ts +244 -0
package/lib/commands/quality.ts +517 -0
package/lib/commands/section-boundaries.ts +72 -0
package/lib/commands/sections.ts +3 -870
package/lib/commands/sync.ts +701 -0
package/lib/commands/text-ops.ts +449 -0
package/lib/commands/utilities.ts +62 -2043
package/lib/commands/verify-anchors.ts +261 -0
package/lib/commands/word-tools.ts +340 -0
package/lib/diff-engine.ts +465 -0
package/lib/import.ts +108 -1504
package/lib/restore-references.ts +240 -0
package/lib/word-extraction.ts +759 -0
package/package.json +1 -1
package/skill/REFERENCE.md +29 -2
package/skill/SKILL.md +12 -2

package/lib/diff-engine.ts ADDED Viewed

@@ -0,0 +1,465 @@
+/**
+ * Diff engine - diffing and annotation processing for Word→Markdown import
+ */
+import { diffWords, Change } from 'diff';
+import {
+  extractMarkdownPrefix,
+  protectAnchors,
+  restoreAnchors,
+  protectCrossrefs,
+  restoreCrossrefs,
+  protectMath,
+  restoreMath,
+  replaceRenderedMath,
+  protectCitations,
+  restoreCitations,
+  replaceRenderedCitations,
+  protectImages,
+  restoreImages,
+  matchWordImagesToOriginal,
+  protectTables,
+  restoreTables,
+} from './protect-restore.js';
+import { normalizeWhitespace } from './utils.js';
+import type { WordTable } from './word-extraction.js';
+// ============================================
+// Type Definitions
+// ============================================
+export interface GenerateSmartDiffOptions {
+  wordTables?: WordTable[];
+  imageRegistry?: any;
+}
+// ============================================
+// Functions
+// ============================================
+/**
+ * Fix citation and math annotations by preserving original markdown syntax
+ */
+export function fixCitationAnnotations(text: string, originalMd: string): string {
+  // Fix math annotations - preserve inline and display math
+  text = text.replace(/\{--(\$[^$]+\$)--\}/g, '$1');
+  text = text.replace(/\{--(\$\$[^$]+\$\$)--\}/g, '$1');
+  text = text.replace(/\{~~(\$[^$]+\$)~>[^~]+~~\}/g, '$1');
+  text = text.replace(/\{~~(\$\$[^$]+\$\$)~>[^~]+~~\}/g, '$1');
+  // Extract all citations from original markdown
+  const citationPattern = /\[@[^\]]+\]/g;
+  const originalCitations = [...originalMd.matchAll(citationPattern)].map(m => m[0]);
+  // Fix substitutions where left side has markdown citation
+  text = text.replace(/\{~~(\[@[^\]]+\])~>[^~]+~~\}/g, '$1');
+  // Fix substitutions where left side STARTS with markdown citation
+  text = text.replace(/\{~~(\[@[^\]]+\])\s*([^~]*)~>([^~]*)~~\}/g, (match, cite, oldText, newText) => {
+    if (oldText.trim() === '' && newText.trim() === '') {
+      return cite;
+    }
+    if (oldText.trim() || newText.trim()) {
+      return cite + (oldText.trim() !== newText.trim() ? ` {~~${oldText.trim()}~>${newText.trim()}~~}` : ` ${newText}`);
+    }
+    return cite;
+  });
+  // Fix deletions of markdown citations
+  text = text.replace(/\{--(\[@[^\]]+\])--\}/g, '$1');
+  // Fix insertions of rendered citations
+  text = text.replace(/\{\+\+\([A-Z][^)]*\d{4}[^)]*\)\+\+\}/g, '');
+  // Clean up broken multi-part substitutions
+  text = text.replace(/\{~~(@[A-Za-z]+\d{4})~>[^~]+~~\}/g, '[$1]');
+  // Fix citations split across substitution boundaries
+  text = text.replace(/\{~~\[@~>[^~]*~~\}([A-Za-z]+\d{4})\]/g, '[@$1]');
+  // Clean up any remaining partial citations
+  text = text.replace(/\{~~;\s*@([A-Za-z]+\d{4})\]~>[^~]*~~\}/g, '; [@$1]');
+  // Remove rendered citation insertions (with Unicode support)
+  text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\+\+\}/gu, '');
+  text = text.replace(/\{\+\+\(\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
+  // Trailing citation fragments
+  text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
+  text = text.replace(/\{\+\+\d{4}[a-z]?(?:[;,]\s*(?:\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+)?\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
+  // Just year with closing paren
+  text = text.replace(/\{\+\+\d{4}[a-z]?\)\.\s*\+\+\}/g, '');
+  text = text.replace(/\{\+\+\d{4}[a-z]?\)\s*\+\+\}/g, '');
+  // Leading citation fragments
+  text = text.replace(/\{\+\+\(?\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s*\+\+\}/gu, '');
+  // Semicolon-separated fragments
+  text = text.replace(/\{\+\+[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?\+\+\}/gu, '');
+  // Year ranges with authors
+  text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\s*\+\+\}/gu, '');
+  text = text.replace(/\{\+\+\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?(?:[;,]\s*\p{Lu}\p{L}*(?:\s+et\s+al\.?)?\s+\d{4}[a-z]?)*\)\.\s*\+\+\}/gu, '');
+  // Clean up double spaces and orphaned punctuation
+  text = text.replace(/  +/g, ' ');
+  text = text.replace(/\s+\./g, '.');
+  text = text.replace(/\s+,/g, ',');
+  // Final cleanup - remove empty annotations
+  text = text.replace(/\{~~\s*~>\s*~~\}/g, '');
+  text = text.replace(/\{\+\+\s*\+\+\}/g, '');
+  text = text.replace(/\{--\s*--\}/g, '');
+  return text;
+}
+/**
+ * Strip markdown syntax to get plain text
+ */
+function stripMarkdownSyntax(md: string): string {
+  return md
+    .replace(/^---[\s\S]*?---\n*/m, '')
+    .replace(/^#{1,6}\s+/gm, '')
+    .replace(/(\*\*|__)(.*?)\1/g, '$2')
+    .replace(/(\*|_)(.*?)\1/g, '$2')
+    .replace(/\[([^\]]+)\]\([^)]+\)/g, '$1')
+    .replace(/!\[([^\]]*)\]\([^)]+\)/g, '')
+    .replace(/`([^`]+)`/g, '$1')
+    .replace(/```[\s\S]*?```/g, '')
+    .replace(/^>\s*/gm, '')
+    .replace(/^[-*_]{3,}\s*$/gm, '')
+    .replace(/^[\s]*[-*+]\s+/gm, '')
+    .replace(/^[\s]*\d+\.\s+/gm, '')
+    .replace(/\|/g, ' ')
+    .replace(/^[-:]+$/gm, '')
+    .replace(/\n{3,}/g, '\n\n')
+    .trim();
+}
+/**
+ * Inject Word tables (extracted from XML) into pandoc text output
+ */
+function injectWordTables(pandocText: string, wordTables: WordTable[]): string {
+  if (!wordTables || wordTables.length === 0) {
+    return pandocText;
+  }
+  let result = pandocText;
+  for (const table of wordTables) {
+    const firstLine = table.markdown.split('\n')[0];
+    const headerCells = firstLine
+      .split('|')
+      .map((c) => c.trim())
+      .filter((c) => c.length > 0);
+    if (headerCells.length === 0) continue;
+    const firstCell = headerCells[0];
+    const startIdx = result.indexOf(firstCell);
+    if (startIdx === -1) continue;
+    const lastLine = table.markdown.split('\n').pop();
+    const lastCells = lastLine!
+      .split('|')
+      .map((c) => c.trim())
+      .filter((c) => c.length > 0);
+    const lastCell = lastCells[lastCells.length - 1] || lastCells[0];
+    const endIdx = result.indexOf(lastCell, startIdx);
+    if (endIdx === -1) continue;
+    let regionStart = result.lastIndexOf('\n\n', startIdx);
+    if (regionStart === -1) regionStart = 0;
+    else regionStart += 2;
+    let regionEnd = result.indexOf('\n\n', endIdx + lastCell.length);
+    if (regionEnd === -1) regionEnd = result.length;
+    result = result.slice(0, regionStart) + table.markdown + '\n\n' + result.slice(regionEnd);
+  }
+  return result;
+}
+/**
+ * Generate annotated markdown by diffing original MD against Word text
+ */
+export function generateAnnotatedDiff(originalMd: string, wordText: string, author: string = 'Reviewer'): string {
+  const normalizedOriginal = normalizeWhitespace(originalMd);
+  const normalizedWord = normalizeWhitespace(wordText);
+  const changes = diffWords(normalizedOriginal, normalizedWord);
+  let result = '';
+  for (const part of changes) {
+    if (part.added) {
+      result += `{++${part.value}++}`;
+    } else if (part.removed) {
+      result += `{--${part.value}--}`;
+    } else {
+      result += part.value;
+    }
+  }
+  return result;
+}
+/**
+ * Smart paragraph-level diff that preserves markdown structure
+ */
+export function generateSmartDiff(
+  originalMd: string,
+  wordText: string,
+  author: string = 'Reviewer',
+  options: GenerateSmartDiffOptions = {}
+): string {
+  const { wordTables = [], imageRegistry = null } = options;
+  // Inject Word tables into pandoc output
+  let wordTextWithTables = injectWordTables(wordText, wordTables);
+  // Protect markdown tables
+  const { text: mdWithTablesProtected, tables } = protectTables(originalMd);
+  // Also protect tables in Word text
+  const { text: wordWithTablesProtected, tables: wordTableBlocks } = protectTables(wordTextWithTables);
+  // Protect images
+  const { text: mdWithImagesProtected, images: origImages } = protectImages(mdWithTablesProtected, imageRegistry);
+  const { text: wordWithImagesProtected, images: wordImages } = protectImages(wordWithTablesProtected, imageRegistry);
+  // Match Word images to original images
+  const imageMapping = matchWordImagesToOriginal(origImages, wordImages, imageRegistry);
+  // Replace Word image placeholders with matching original placeholders
+  let wordWithMappedImages = wordWithImagesProtected;
+  for (const [wordPlaceholder, origPlaceholder] of imageMapping) {
+    wordWithMappedImages = wordWithMappedImages.split(wordPlaceholder).join(origPlaceholder);
+  }
+  // Protect figure/table anchors
+  const { text: mdWithAnchorsProtected, anchors: figAnchors } = protectAnchors(mdWithImagesProtected);
+  // Protect cross-references
+  const { text: mdWithXrefsProtected, crossrefs } = protectCrossrefs(mdWithAnchorsProtected);
+  // Protect math
+  const { text: mdWithMathProtected, mathBlocks } = protectMath(mdWithXrefsProtected);
+  // Protect citations
+  const { text: mdProtected, citations } = protectCitations(mdWithMathProtected);
+  // Replace rendered elements in Word text
+  let wordProtected = wordWithMappedImages;
+  wordProtected = replaceRenderedMath(wordProtected, mathBlocks);
+  wordProtected = replaceRenderedCitations(wordProtected, citations.length);
+  // Split into paragraphs
+  const originalParas = mdProtected.split(/\n\n+/);
+  const wordParas = wordProtected.split(/\n\n+/);
+  const result: string[] = [];
+  // Try to match paragraphs intelligently
+  let wordIdx = 0;
+  for (let i = 0; i < originalParas.length; i++) {
+    const orig = originalParas[i] || '';
+    const { prefix: mdPrefix, content: origContent } = extractMarkdownPrefix(orig.split('\n')[0]);
+    // Find best matching word paragraph
+    let bestMatch = -1;
+    let bestScore = 0;
+    for (let j = wordIdx; j < Math.min(wordIdx + 3, wordParas.length); j++) {
+      const wordPara = wordParas[j] || '';
+      const origWords = new Set(origContent.toLowerCase().split(/\s+/));
+      const wordWords = wordPara.toLowerCase().split(/\s+/);
+      const common = wordWords.filter((w) => origWords.has(w)).length;
+      const score = common / Math.max(origWords.size, wordWords.length);
+      if (score > bestScore && score > 0.3) {
+        bestScore = score;
+        bestMatch = j;
+      }
+    }
+    if (bestMatch === -1) {
+      if (mdPrefix && wordIdx < wordParas.length) {
+        const wordPara = wordParas[wordIdx];
+        if (wordPara.toLowerCase().includes(origContent.toLowerCase().slice(0, 20))) {
+          bestMatch = wordIdx;
+        }
+      }
+    }
+    if (bestMatch >= 0) {
+      const word = wordParas[bestMatch];
+      const origStripped = stripMarkdownSyntax(orig);
+      const wordNormalized = normalizeWhitespace(word);
+      if (origStripped === wordNormalized) {
+        result.push(orig);
+      } else {
+        const changes = diffWords(origStripped, wordNormalized);
+        let annotated = mdPrefix;
+        for (const part of changes) {
+          if (part.added) {
+            annotated += `{++${part.value}++}`;
+          } else if (part.removed) {
+            annotated += `{--${part.value}--}`;
+          } else {
+            annotated += part.value;
+          }
+        }
+        result.push(annotated);
+      }
+      wordIdx = bestMatch + 1;
+    } else {
+      // Paragraph deleted entirely
+      if (mdPrefix && mdPrefix.match(/^#{1,6}\s+/)) {
+        result.push(orig);
+      } else {
+        result.push(`{--${orig}--}`);
+      }
+    }
+  }
+  // Any remaining word paragraphs are additions
+  for (let j = wordIdx; j < wordParas.length; j++) {
+    const word = wordParas[j];
+    if (word.trim()) {
+      result.push(`{++${word}++}`);
+    }
+  }
+  // Restore protected content
+  let finalResult = result.join('\n\n');
+  finalResult = restoreCitations(finalResult, citations);
+  finalResult = restoreMath(finalResult, mathBlocks);
+  finalResult = restoreCrossrefs(finalResult, crossrefs);
+  finalResult = restoreAnchors(finalResult, figAnchors);
+  finalResult = restoreImages(finalResult, origImages);
+  finalResult = restoreImages(finalResult, wordImages);
+  finalResult = restoreTables(finalResult, tables);
+  finalResult = restoreTables(finalResult, wordTableBlocks);
+  return finalResult;
+}
+/**
+ * Clean up redundant adjacent annotations
+ */
+export function cleanupAnnotations(text: string): string {
+  // Convert adjacent delete+insert to substitution
+  text = text.replace(/\{--(.+?)--\}\s*\{\+\+(.+?)\+\+\}/g, '{~~$1~>$2~~}');
+  // Also handle insert+delete
+  text = text.replace(/\{\+\+(.+?)\+\+\}\s*\{--(.+?)--\}/g, '{~~$2~>$1~~}');
+  // Fix malformed patterns
+  text = text.replace(/\{--([^}]+?)~>([^}]+?)~~\}/g, '{~~$1~>$2~~}');
+  // Fix malformed substitutions that got split
+  text = text.replace(/\{~~([^~]+)\s*--\}/g, '{--$1--}');
+  text = text.replace(/\{\+\+([^+]+)~~\}/g, '{++$1++}');
+  // Clean up empty annotations
+  text = text.replace(/\{--\s*--\}/g, '');
+  text = text.replace(/\{\+\+\s*\+\+\}/g, '');
+  // Clean up double spaces in prose, but preserve table formatting
+  const lines = text.split('\n');
+  let inTable = false;
+  const processedLines = lines.map((line, idx) => {
+    const isSeparator = /^[-]+(\s+[-]+)+\s*$/.test(line.trim());
+    const looksLikeTableRow = /\S+\s{2,}\S+/.test(line);
+    if (isSeparator) {
+      if (!inTable) {
+        inTable = true;
+      }
+      return line;
+    }
+    if (inTable) {
+      if (line.trim() === '') {
+        let lookAhead = idx + 1;
+        let foundTableContent = false;
+        let foundEndSeparator = false;
+        while (lookAhead < lines.length && lookAhead < idx + 20) {
+          const nextLine = lines[lookAhead].trim();
+          if (nextLine === '') {
+            lookAhead++;
+            continue;
+          }
+          if (/^[-]+(\s+[-]+)+\s*$/.test(nextLine)) {
+            foundEndSeparator = true;
+            break;
+          }
+          if (/\S+\s{2,}\S+/.test(nextLine)) {
+            foundTableContent = true;
+            break;
+          }
+          if (/^\*[^*]+\*\s*$/.test(nextLine)) {
+            foundTableContent = true;
+            break;
+          }
+          if (lines[lookAhead].startsWith('  ')) {
+            lookAhead++;
+            continue;
+          }
+          break;
+        }
+        if (foundTableContent || foundEndSeparator) {
+          return line;
+        }
+        inTable = false;
+        return line;
+      }
+      return line;
+    }
+    if (looksLikeTableRow) {
+      let nextIdx = idx + 1;
+      while (nextIdx < lines.length && lines[nextIdx].trim() === '') {
+        nextIdx++;
+      }
+      if (nextIdx < lines.length && /^[-]+(\s+[-]+)+\s*$/.test(lines[nextIdx].trim())) {
+        return line;
+      }
+    }
+    if (line.trim().startsWith('|')) {
+      return line;
+    }
+    return line.replace(/  +/g, ' ');
+  });
+  text = processedLines.join('\n');
+  return text;
+}