npm - docrev - Versions diffs - 0.10.0 → 0.10.1 - Mend

docrev 0.10.0 → 0.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (126) hide show

package/.gitattributes +1 -1
package/CHANGELOG.md +173 -164
package/PLAN-tables-and-postprocess.md +850 -850
package/README.md +431 -431
package/bin/rev.js +11 -11
package/bin/rev.ts +145 -145
package/completions/rev.bash +127 -127
package/completions/rev.ps1 +210 -210
package/completions/rev.zsh +207 -207
package/dist/lib/anchor-match.d.ts +1 -1
package/dist/lib/anchor-match.d.ts.map +1 -1
package/dist/lib/anchor-match.js +17 -47
package/dist/lib/anchor-match.js.map +1 -1
package/dist/lib/build.js +4 -4
package/dist/lib/commands/context.d.ts +1 -1
package/dist/lib/commands/context.d.ts.map +1 -1
package/dist/lib/commands/context.js +1 -1
package/dist/lib/commands/context.js.map +1 -1
package/dist/lib/commands/sections.js +7 -7
package/dist/lib/commands/sections.js.map +1 -1
package/dist/lib/commands/sync.d.ts.map +1 -1
package/dist/lib/commands/sync.js +15 -14
package/dist/lib/commands/sync.js.map +1 -1
package/dist/lib/commands/utilities.js +164 -164
package/dist/lib/commands/verify-anchors.js +6 -6
package/dist/lib/commands/verify-anchors.js.map +1 -1
package/dist/lib/commands/word-tools.js +8 -8
package/dist/lib/grammar.js +3 -3
package/dist/lib/macro-filter.lua +201 -201
package/dist/lib/pdf-comments.js +44 -44
package/dist/lib/plugins.js +57 -57
package/dist/lib/pptx-color-filter.lua +37 -37
package/dist/lib/pptx-themes.js +115 -115
package/dist/lib/sections.d.ts +35 -0
package/dist/lib/sections.d.ts.map +1 -1
package/dist/lib/sections.js +81 -0
package/dist/lib/sections.js.map +1 -1
package/dist/lib/spelling.js +2 -2
package/dist/lib/templates.js +387 -387
package/dist/lib/themes.js +51 -51
package/docs-src/build.py +113 -113
package/docs-src/extra.css +208 -208
package/docs-src/md-to-html.lua +6 -6
package/docs-src/template.html +116 -116
package/eslint.config.js +27 -27
package/lib/anchor-match.ts +276 -308
package/lib/annotations.ts +644 -644
package/lib/build.ts +1766 -1766
package/lib/citations.ts +160 -160
package/lib/commands/build.ts +855 -855
package/lib/commands/citations.ts +515 -515
package/lib/commands/comments.ts +1050 -1050
package/lib/commands/context.ts +176 -174
package/lib/commands/core.ts +309 -309
package/lib/commands/doi.ts +435 -435
package/lib/commands/file-ops.ts +372 -372
package/lib/commands/history.ts +320 -320
package/lib/commands/index.ts +87 -87
package/lib/commands/init.ts +259 -259
package/lib/commands/merge-resolve.ts +378 -378
package/lib/commands/preview.ts +178 -178
package/lib/commands/project-info.ts +244 -244
package/lib/commands/quality.ts +517 -517
package/lib/commands/response.ts +454 -454
package/lib/commands/section-boundaries.ts +82 -82
package/lib/commands/sections.ts +451 -451
package/lib/commands/sync.ts +709 -706
package/lib/commands/text-ops.ts +449 -449
package/lib/commands/utilities.ts +448 -448
package/lib/commands/verify-anchors.ts +272 -272
package/lib/commands/word-tools.ts +340 -340
package/lib/comment-realign.ts +517 -517
package/lib/config.ts +84 -84
package/lib/crossref.ts +781 -781
package/lib/csl.ts +191 -191
package/lib/dependencies.ts +98 -98
package/lib/diff-engine.ts +465 -465
package/lib/doi-cache.ts +115 -115
package/lib/doi.ts +897 -897
package/lib/equations.ts +506 -506
package/lib/errors.ts +346 -346
package/lib/format.ts +541 -541
package/lib/git.ts +326 -326
package/lib/grammar.ts +303 -303
package/lib/image-registry.ts +180 -180
package/lib/import.ts +911 -911
package/lib/journals.ts +543 -543
package/lib/macro-filter.lua +201 -201
package/lib/macros.ts +273 -273
package/lib/merge.ts +633 -633
package/lib/orcid.ts +144 -144
package/lib/pdf-comments.ts +263 -263
package/lib/pdf-import.ts +524 -524
package/lib/plugins.ts +362 -362
package/lib/postprocess.ts +188 -188
package/lib/pptx-color-filter.lua +37 -37
package/lib/pptx-template.ts +469 -469
package/lib/pptx-themes.ts +483 -483
package/lib/protect-restore.ts +520 -520
package/lib/rate-limiter.ts +94 -94
package/lib/response.ts +197 -197
package/lib/restore-references.ts +240 -240
package/lib/review.ts +327 -327
package/lib/schema.ts +488 -488
package/lib/scientific-words.ts +73 -73
package/lib/sections.ts +425 -335
package/lib/slides.ts +756 -756
package/lib/spelling.ts +334 -334
package/lib/templates.ts +526 -526
package/lib/themes.ts +742 -742
package/lib/trackchanges.ts +247 -247
package/lib/tui.ts +450 -450
package/lib/types.ts +550 -550
package/lib/undo.ts +250 -250
package/lib/utils.ts +69 -69
package/lib/variables.ts +179 -179
package/lib/word-extraction.ts +806 -806
package/lib/word.ts +643 -643
package/lib/wordcomments.ts +840 -840
package/mkdocs.yml +64 -64
package/package.json +137 -137
package/scripts/postbuild.js +47 -47
package/skill/REFERENCE.md +539 -539
package/skill/SKILL.md +295 -295
package/tsconfig.json +26 -26
package/types/index.d.ts +525 -525

package/lib/pdf-import.ts CHANGED Viewed

@@ -1,524 +1,524 @@
-/**
- * PDF comment extraction for docrev
- *
- * Extracts annotations (comments, highlights, sticky notes) from PDF files
- * and converts them to CriticMarkup format for insertion into markdown.
- * Also extracts the actual text content under highlights using pdfjs-dist.
- */
-import * as fs from 'fs';
-import { PDFDocument } from 'pdf-lib';
-/**
- * Annotation types we care about
- */
-const COMMENT_TYPES = [
-  'Text',      // Sticky notes
-  'FreeText',  // Text boxes
-  'Highlight', // Highlighted text with comment
-  'Underline', // Underlined text with comment
-  'StrikeOut', // Strikethrough (deletion suggestion)
-  'Squiggly',  // Squiggly underline
-  'Popup',     // Popup comments (attached to other annotations)
-];
-/**
- * Raw PDF annotation extracted from pdf-lib
- */
-export interface PdfAnnotation {
-  type: string;
-  page: number;
-  contents: string;
-  author: string;
-  date: string;
-  rect: number[];
-  quadPoints: number[];
-}
-/**
- * PDF comment converted to CriticMarkup format
- */
-export interface PdfComment {
-  author: string;
-  text: string;
-  page: number;
-  type: string;
-  date?: string;
-}
-/**
- * PDF annotation with extracted highlighted text
- */
-export interface PdfAnnotationWithText extends PdfAnnotation {
-  highlightedText: string;
-}
-/**
- * Options for PDF extraction
- */
-export interface ExtractOptions {
-  timeout?: number;
-}
-/**
- * Options for markdown insertion
- */
-export interface InsertOptions {
-  sectionPerPage?: boolean;
-}
-/**
- * Statistics about PDF comments
- */
-export interface PdfCommentStats {
-  total: number;
-  byType: Record<string, number>;
-  byAuthor: Record<string, number>;
-  byPage: Record<number, number>;
-}
-/**
- * Text item from pdfjs-dist
- */
-interface PdfTextItem {
-  str: string;
-  x: number;
-  y: number;
-  width: number;
-  height: number;
-}
-/**
- * Extract raw annotations from a PDF file
- * @param pdfPath - Path to PDF file
- * @param options - { timeout: number (ms) }
- * @returns Array of PDF annotations
- */
-export async function extractPdfAnnotations(
-  pdfPath: string,
-  options: ExtractOptions = {}
-): Promise<PdfAnnotation[]> {
-  const { timeout = 30000 } = options;
-  // Validate file exists
-  if (!fs.existsSync(pdfPath)) {
-    throw new Error(`File not found: ${pdfPath}`);
-  }
-  let pdfBytes: Buffer;
-  try {
-    pdfBytes = fs.readFileSync(pdfPath);
-  } catch (err) {
-    const error = err as Error;
-    throw new Error(`Cannot read PDF file: ${error.message}`);
-  }
-  // Create a promise that rejects after timeout
-  const timeoutPromise = new Promise<never>((_, reject) => {
-    setTimeout(() => reject(new Error(`PDF extraction timed out after ${timeout / 1000}s`)), timeout);
-  });
-  let pdfDoc: PDFDocument;
-  try {
-    pdfDoc = await Promise.race([
-      PDFDocument.load(pdfBytes, { ignoreEncryption: true }),
-      timeoutPromise,
-    ]);
-  } catch (err) {
-    const error = err as Error;
-    if (error.message.includes('timed out')) {
-      throw error;
-    }
-    throw new Error(`Invalid or corrupted PDF file: ${error.message}`);
-  }
-  const annotations: PdfAnnotation[] = [];
-  const pages = pdfDoc.getPages();
-  for (let pageNum = 0; pageNum < pages.length; pageNum++) {
-    const page = pages[pageNum];
-    const annots = page.node.Annots();
-    if (!annots) continue;
-    const annotRefs = annots.asArray();
-    for (const annotRef of annotRefs) {
-      try {
-        const annot = (annotRef as any).dict || annotRef;
-        if (!annot) continue;
-        // Get annotation type
-        const subtypeName = annot.get(pdfDoc.context.obj('Subtype'));
-        const subtype = subtypeName?.toString?.()?.replace('/', '') || '';
-        if (!COMMENT_TYPES.includes(subtype)) continue;
-        // Extract contents (the comment text)
-        const contentsObj = annot.get(pdfDoc.context.obj('Contents'));
-        const contents = contentsObj?.toString?.() || contentsObj?.decodeText?.() || '';
-        // Extract author (T field in PDF spec)
-        const authorObj = annot.get(pdfDoc.context.obj('T'));
-        const author = authorObj?.toString?.() || authorObj?.decodeText?.() || 'Unknown';
-        // Extract modification date
-        const dateObj = annot.get(pdfDoc.context.obj('M'));
-        const dateStr = dateObj?.toString?.() || '';
-        const date = parsePdfDate(dateStr);
-        // Extract rectangle (position on page)
-        const rectObj = annot.get(pdfDoc.context.obj('Rect'));
-        const rect = rectObj?.asArray?.()?.map((n: any) => n?.asNumber?.() || 0) || [0, 0, 0, 0];
-        // Extract QuadPoints for highlights (the actual text bounds)
-        const quadObj = annot.get(pdfDoc.context.obj('QuadPoints'));
-        const quadPoints = quadObj?.asArray?.()?.map((n: any) => n?.asNumber?.() || 0) || [];
-        // Skip empty annotations
-        if (!contents.trim() && subtype !== 'StrikeOut') continue;
-        annotations.push({
-          type: subtype,
-          page: pageNum + 1,
-          contents: cleanPdfString(contents),
-          author: cleanPdfString(author),
-          date,
-          rect,
-          quadPoints,
-        });
-      } catch (err) {
-        // Skip malformed annotations
-        continue;
-      }
-    }
-  }
-  // Sort by page, then by vertical position (top to bottom)
-  annotations.sort((a, b) => {
-    if (a.page !== b.page) return a.page - b.page;
-    // Higher Y = higher on page in PDF coords
-    return (b.rect[1] || 0) - (a.rect[1] || 0);
-  });
-  return annotations;
-}
-/**
- * Parse PDF date string (D:YYYYMMDDHHmmSS format)
- * @param dateStr - PDF date string
- * @returns ISO date string
- */
-function parsePdfDate(dateStr: string): string {
-  if (!dateStr) return '';
-  // Remove D: prefix and timezone info
-  const clean = dateStr.replace(/^D:/, '').replace(/[Z+-].*$/, '');
-  if (clean.length >= 8) {
-    const year = clean.slice(0, 4);
-    const month = clean.slice(4, 6);
-    const day = clean.slice(6, 8);
-    return `${year}-${month}-${day}`;
-  }
-  return '';
-}
-/**
- * Clean PDF string (remove parentheses, decode escape sequences)
- * @param str - Raw PDF string
- * @returns Cleaned string
- */
-function cleanPdfString(str: string): string {
-  if (!str) return '';
-  return str
-    .replace(/^\(/, '')  // Remove leading paren
-    .replace(/\)$/, '')  // Remove trailing paren
-    .replace(/\\n/g, '\n')  // Newlines
-    .replace(/\\r/g, '')    // Carriage returns
-    .replace(/\\t/g, ' ')   // Tabs
-    .replace(/\\\(/g, '(')  // Escaped parens
-    .replace(/\\\)/g, ')')
-    .replace(/\\\\/g, '\\') // Escaped backslash
-    .trim();
-}
-/**
- * Convert PDF annotations to CriticMarkup comments
- * @param annotations - From extractPdfAnnotations
- * @returns Array of PDF comments
- */
-export function annotationsToComments(annotations: PdfAnnotation[]): PdfComment[] {
-  return annotations
-    .filter(a => a.contents.trim())
-    .map(a => ({
-      author: a.author || 'Reviewer',
-      text: a.contents,
-      page: a.page,
-      type: a.type,
-      date: a.date,
-    }));
-}
-/**
- * Extract comments from PDF and format for display
- * @param pdfPath - Path to PDF file
- * @returns Array of PDF comments
- */
-export async function extractPdfComments(pdfPath: string): Promise<PdfComment[]> {
-  const annotations = await extractPdfAnnotations(pdfPath);
-  return annotationsToComments(annotations);
-}
-/**
- * Insert PDF comments into markdown based on page/position heuristics
- * Since PDFs don't have direct text anchors like Word, we use page numbers
- * and append comments to the end of corresponding sections
- *
- * @param markdown - The markdown content
- * @param comments - Comments from extractPdfComments
- * @param options - { sectionPerPage: boolean }
- * @returns Markdown with comments inserted
- */
-export function insertPdfCommentsIntoMarkdown(
-  markdown: string,
-  comments: PdfComment[],
-  options: InsertOptions = {}
-): string {
-  if (comments.length === 0) return markdown;
-  // Group comments by page
-  const commentsByPage = new Map<number, PdfComment[]>();
-  for (const c of comments) {
-    if (!commentsByPage.has(c.page)) {
-      commentsByPage.set(c.page, []);
-    }
-    commentsByPage.get(c.page)!.push(c);
-  }
-  // Strategy: Append all comments at the end with page references
-  // This is the safest approach since we can't reliably map PDF positions to markdown
-  const lines = markdown.split('\n');
-  const commentBlock: string[] = [];
-  commentBlock.push('');
-  commentBlock.push('<!-- PDF Comments -->');
-  for (const [page, pageComments] of Array.from(commentsByPage.entries())) {
-    for (const c of pageComments) {
-      const authorPrefix = c.author ? `${c.author}: ` : '';
-      const pageRef = `[p.${page}]`;
-      commentBlock.push(`{>>${authorPrefix}${pageRef} ${c.text}<<}`);
-    }
-  }
-  return lines.join('\n') + commentBlock.join('\n');
-}
-/**
- * Format PDF comments for CLI display
- * @param comments - Array of PDF comments
- * @returns Formatted string
- */
-export function formatPdfComments(comments: PdfComment[]): string {
-  if (comments.length === 0) {
-    return 'No comments found in PDF.';
-  }
-  const lines: string[] = [];
-  let currentPage = 0;
-  for (const c of comments) {
-    if (c.page !== currentPage) {
-      if (currentPage > 0) lines.push('');
-      lines.push(`Page ${c.page}:`);
-      currentPage = c.page;
-    }
-    const typeIcon = getTypeIcon(c.type);
-    const author = c.author || 'Unknown';
-    lines.push(`  ${typeIcon} [${author}] ${c.text}`);
-  }
-  return lines.join('\n');
-}
-/**
- * Get icon for annotation type
- * @param type - Annotation type
- * @returns Icon string
- */
-function getTypeIcon(type: string): string {
-  switch (type) {
-    case 'Text': return '📝';      // Sticky note
-    case 'FreeText': return '💬';  // Text box
-    case 'Highlight': return '🖍️'; // Highlight
-    case 'Underline': return '📍'; // Underline
-    case 'StrikeOut': return '❌'; // Strikethrough
-    case 'Squiggly': return '〰️';  // Squiggly
-    default: return '💬';
-  }
-}
-/**
- * Get statistics about PDF comments
- * @param comments - Array of PDF comments
- * @returns Statistics object
- */
-export function getPdfCommentStats(comments: PdfComment[]): PdfCommentStats {
-  const stats: PdfCommentStats = {
-    total: comments.length,
-    byType: {},
-    byAuthor: {},
-    byPage: {},
-  };
-  for (const c of comments) {
-    stats.byType[c.type] = (stats.byType[c.type] || 0) + 1;
-    stats.byAuthor[c.author] = (stats.byAuthor[c.author] || 0) + 1;
-    stats.byPage[c.page] = (stats.byPage[c.page] || 0) + 1;
-  }
-  return stats;
-}
-/**
- * Extract text content from a PDF page
- * @param page - pdfjs page object
- * @returns Array of text items with positions
- */
-async function getPageTextItems(page: any): Promise<PdfTextItem[]> {
-  const textContent = await page.getTextContent();
-  return textContent.items.map((item: any) => ({
-    str: item.str,
-    x: item.transform[4],
-    y: item.transform[5],
-    width: item.width,
-    height: item.height,
-  }));
-}
-/**
- * Check if a point is inside a quadrilateral defined by QuadPoints
- * QuadPoints format: [x1,y1, x2,y2, x3,y3, x4,y4] for each quad
- * @param x - X coordinate
- * @param y - Y coordinate
- * @param quad - 8 numbers defining corners
- * @returns True if point is inside quad
- */
-function isPointInQuad(x: number, y: number, quad: number[]): boolean {
-  if (quad.length < 8) return false;
-  // Get bounding box from quad points
-  const xs = [quad[0], quad[2], quad[4], quad[6]];
-  const ys = [quad[1], quad[3], quad[5], quad[7]];
-  const minX = Math.min(...xs);
-  const maxX = Math.max(...xs);
-  const minY = Math.min(...ys);
-  const maxY = Math.max(...ys);
-  return x >= minX && x <= maxX && y >= minY && y <= maxY;
-}
-/**
- * Extract highlighted text from a PDF using QuadPoints
- * @param pdfPath - Path to PDF file
- * @param annotations - Annotations with quadPoints from extractPdfAnnotations
- * @returns Annotations with highlighted text extracted
- */
-export async function extractHighlightedText(
-  pdfPath: string,
-  annotations: PdfAnnotation[]
-): Promise<PdfAnnotationWithText[]> {
-  const pdfBytes = fs.readFileSync(pdfPath);
-  const data = new Uint8Array(pdfBytes);
-  // Load pdfjs-dist dynamically (requires DOMMatrix, not available in Node 18)
-  const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs');
-  const loadingTask = getDocument({ data, useSystemFonts: true });
-  const pdfDoc = await loadingTask.promise;
-  const results: PdfAnnotationWithText[] = [];
-  for (const annot of annotations) {
-    // Only process text markup annotations (Highlight, Underline, StrikeOut, Squiggly)
-    if (!['Highlight', 'Underline', 'StrikeOut', 'Squiggly'].includes(annot.type)) {
-      results.push({ ...annot, highlightedText: '' });
-      continue;
-    }
-    if (!annot.quadPoints || annot.quadPoints.length < 8) {
-      results.push({ ...annot, highlightedText: '' });
-      continue;
-    }
-    try {
-      const page = await pdfDoc.getPage(annot.page);
-      const textItems = await getPageTextItems(page);
-      // Split quadPoints into individual quads (8 numbers each)
-      const quads: number[][] = [];
-      for (let i = 0; i < annot.quadPoints.length; i += 8) {
-        quads.push(annot.quadPoints.slice(i, i + 8));
-      }
-      // Find text items that fall within any of the quads
-      const matchedText: string[] = [];
-      for (const item of textItems) {
-        // Check if text item center is in any quad
-        const centerX = item.x + (item.width || 0) / 2;
-        const centerY = item.y + (item.height || 0) / 2;
-        for (const quad of quads) {
-          if (isPointInQuad(centerX, centerY, quad) || isPointInQuad(item.x, item.y, quad)) {
-            matchedText.push(item.str);
-            break;
-          }
-        }
-      }
-      results.push({
-        ...annot,
-        highlightedText: matchedText.join(' ').trim(),
-      });
-    } catch (err) {
-      // If text extraction fails, just return empty
-      results.push({ ...annot, highlightedText: '' });
-    }
-  }
-  return results;
-}
-/**
- * Extract annotations with highlighted text in one call
- * @param pdfPath - Path to PDF file
- * @returns Annotations with highlighted text
- */
-export async function extractPdfAnnotationsWithText(pdfPath: string): Promise<PdfAnnotationWithText[]> {
-  const annotations = await extractPdfAnnotations(pdfPath);
-  return extractHighlightedText(pdfPath, annotations);
-}
-/**
- * Format annotation with highlighted text for display
- * @param annot - Annotation with highlightedText
- * @returns Formatted string
- */
-export function formatAnnotationWithText(annot: PdfAnnotationWithText): string {
-  const typeIcon = getTypeIcon(annot.type);
-  const author = annot.author || 'Unknown';
-  const parts: string[] = [`${typeIcon} [${author}]`];
-  if (annot.highlightedText) {
-    parts.push(`"${annot.highlightedText}"`);
-  }
-  if (annot.contents) {
-    parts.push(`→ ${annot.contents}`);
-  }
-  return parts.join(' ');
-}
+/**
+ * PDF comment extraction for docrev
+ *
+ * Extracts annotations (comments, highlights, sticky notes) from PDF files
+ * and converts them to CriticMarkup format for insertion into markdown.
+ * Also extracts the actual text content under highlights using pdfjs-dist.
+ */
+import * as fs from 'fs';
+import { PDFDocument } from 'pdf-lib';
+/**
+ * Annotation types we care about
+ */
+const COMMENT_TYPES = [
+  'Text',      // Sticky notes
+  'FreeText',  // Text boxes
+  'Highlight', // Highlighted text with comment
+  'Underline', // Underlined text with comment
+  'StrikeOut', // Strikethrough (deletion suggestion)
+  'Squiggly',  // Squiggly underline
+  'Popup',     // Popup comments (attached to other annotations)
+];
+/**
+ * Raw PDF annotation extracted from pdf-lib
+ */
+export interface PdfAnnotation {
+  type: string;
+  page: number;
+  contents: string;
+  author: string;
+  date: string;
+  rect: number[];
+  quadPoints: number[];
+}
+/**
+ * PDF comment converted to CriticMarkup format
+ */
+export interface PdfComment {
+  author: string;
+  text: string;
+  page: number;
+  type: string;
+  date?: string;
+}
+/**
+ * PDF annotation with extracted highlighted text
+ */
+export interface PdfAnnotationWithText extends PdfAnnotation {
+  highlightedText: string;
+}
+/**
+ * Options for PDF extraction
+ */
+export interface ExtractOptions {
+  timeout?: number;
+}
+/**
+ * Options for markdown insertion
+ */
+export interface InsertOptions {
+  sectionPerPage?: boolean;
+}
+/**
+ * Statistics about PDF comments
+ */
+export interface PdfCommentStats {
+  total: number;
+  byType: Record<string, number>;
+  byAuthor: Record<string, number>;
+  byPage: Record<number, number>;
+}
+/**
+ * Text item from pdfjs-dist
+ */
+interface PdfTextItem {
+  str: string;
+  x: number;
+  y: number;
+  width: number;
+  height: number;
+}
+/**
+ * Extract raw annotations from a PDF file
+ * @param pdfPath - Path to PDF file
+ * @param options - { timeout: number (ms) }
+ * @returns Array of PDF annotations
+ */
+export async function extractPdfAnnotations(
+  pdfPath: string,
+  options: ExtractOptions = {}
+): Promise<PdfAnnotation[]> {
+  const { timeout = 30000 } = options;
+  // Validate file exists
+  if (!fs.existsSync(pdfPath)) {
+    throw new Error(`File not found: ${pdfPath}`);
+  }
+  let pdfBytes: Buffer;
+  try {
+    pdfBytes = fs.readFileSync(pdfPath);
+  } catch (err) {
+    const error = err as Error;
+    throw new Error(`Cannot read PDF file: ${error.message}`);
+  }
+  // Create a promise that rejects after timeout
+  const timeoutPromise = new Promise<never>((_, reject) => {
+    setTimeout(() => reject(new Error(`PDF extraction timed out after ${timeout / 1000}s`)), timeout);
+  });
+  let pdfDoc: PDFDocument;
+  try {
+    pdfDoc = await Promise.race([
+      PDFDocument.load(pdfBytes, { ignoreEncryption: true }),
+      timeoutPromise,
+    ]);
+  } catch (err) {
+    const error = err as Error;
+    if (error.message.includes('timed out')) {
+      throw error;
+    }
+    throw new Error(`Invalid or corrupted PDF file: ${error.message}`);
+  }
+  const annotations: PdfAnnotation[] = [];
+  const pages = pdfDoc.getPages();
+  for (let pageNum = 0; pageNum < pages.length; pageNum++) {
+    const page = pages[pageNum];
+    const annots = page.node.Annots();
+    if (!annots) continue;
+    const annotRefs = annots.asArray();
+    for (const annotRef of annotRefs) {
+      try {
+        const annot = (annotRef as any).dict || annotRef;
+        if (!annot) continue;
+        // Get annotation type
+        const subtypeName = annot.get(pdfDoc.context.obj('Subtype'));
+        const subtype = subtypeName?.toString?.()?.replace('/', '') || '';
+        if (!COMMENT_TYPES.includes(subtype)) continue;
+        // Extract contents (the comment text)
+        const contentsObj = annot.get(pdfDoc.context.obj('Contents'));
+        const contents = contentsObj?.toString?.() || contentsObj?.decodeText?.() || '';
+        // Extract author (T field in PDF spec)
+        const authorObj = annot.get(pdfDoc.context.obj('T'));
+        const author = authorObj?.toString?.() || authorObj?.decodeText?.() || 'Unknown';
+        // Extract modification date
+        const dateObj = annot.get(pdfDoc.context.obj('M'));
+        const dateStr = dateObj?.toString?.() || '';
+        const date = parsePdfDate(dateStr);
+        // Extract rectangle (position on page)
+        const rectObj = annot.get(pdfDoc.context.obj('Rect'));
+        const rect = rectObj?.asArray?.()?.map((n: any) => n?.asNumber?.() || 0) || [0, 0, 0, 0];
+        // Extract QuadPoints for highlights (the actual text bounds)
+        const quadObj = annot.get(pdfDoc.context.obj('QuadPoints'));
+        const quadPoints = quadObj?.asArray?.()?.map((n: any) => n?.asNumber?.() || 0) || [];
+        // Skip empty annotations
+        if (!contents.trim() && subtype !== 'StrikeOut') continue;
+        annotations.push({
+          type: subtype,
+          page: pageNum + 1,
+          contents: cleanPdfString(contents),
+          author: cleanPdfString(author),
+          date,
+          rect,
+          quadPoints,
+        });
+      } catch (err) {
+        // Skip malformed annotations
+        continue;
+      }
+    }
+  }
+  // Sort by page, then by vertical position (top to bottom)
+  annotations.sort((a, b) => {
+    if (a.page !== b.page) return a.page - b.page;
+    // Higher Y = higher on page in PDF coords
+    return (b.rect[1] || 0) - (a.rect[1] || 0);
+  });
+  return annotations;
+}
+/**
+ * Parse PDF date string (D:YYYYMMDDHHmmSS format)
+ * @param dateStr - PDF date string
+ * @returns ISO date string
+ */
+function parsePdfDate(dateStr: string): string {
+  if (!dateStr) return '';
+  // Remove D: prefix and timezone info
+  const clean = dateStr.replace(/^D:/, '').replace(/[Z+-].*$/, '');
+  if (clean.length >= 8) {
+    const year = clean.slice(0, 4);
+    const month = clean.slice(4, 6);
+    const day = clean.slice(6, 8);
+    return `${year}-${month}-${day}`;
+  }
+  return '';
+}
+/**
+ * Clean PDF string (remove parentheses, decode escape sequences)
+ * @param str - Raw PDF string
+ * @returns Cleaned string
+ */
+function cleanPdfString(str: string): string {
+  if (!str) return '';
+  return str
+    .replace(/^\(/, '')  // Remove leading paren
+    .replace(/\)$/, '')  // Remove trailing paren
+    .replace(/\\n/g, '\n')  // Newlines
+    .replace(/\\r/g, '')    // Carriage returns
+    .replace(/\\t/g, ' ')   // Tabs
+    .replace(/\\\(/g, '(')  // Escaped parens
+    .replace(/\\\)/g, ')')
+    .replace(/\\\\/g, '\\') // Escaped backslash
+    .trim();
+}
+/**
+ * Convert PDF annotations to CriticMarkup comments
+ * @param annotations - From extractPdfAnnotations
+ * @returns Array of PDF comments
+ */
+export function annotationsToComments(annotations: PdfAnnotation[]): PdfComment[] {
+  return annotations
+    .filter(a => a.contents.trim())
+    .map(a => ({
+      author: a.author || 'Reviewer',
+      text: a.contents,
+      page: a.page,
+      type: a.type,
+      date: a.date,
+    }));
+}
+/**
+ * Extract comments from PDF and format for display
+ * @param pdfPath - Path to PDF file
+ * @returns Array of PDF comments
+ */
+export async function extractPdfComments(pdfPath: string): Promise<PdfComment[]> {
+  const annotations = await extractPdfAnnotations(pdfPath);
+  return annotationsToComments(annotations);
+}
+/**
+ * Insert PDF comments into markdown based on page/position heuristics
+ * Since PDFs don't have direct text anchors like Word, we use page numbers
+ * and append comments to the end of corresponding sections
+ *
+ * @param markdown - The markdown content
+ * @param comments - Comments from extractPdfComments
+ * @param options - { sectionPerPage: boolean }
+ * @returns Markdown with comments inserted
+ */
+export function insertPdfCommentsIntoMarkdown(
+  markdown: string,
+  comments: PdfComment[],
+  options: InsertOptions = {}
+): string {
+  if (comments.length === 0) return markdown;
+  // Group comments by page
+  const commentsByPage = new Map<number, PdfComment[]>();
+  for (const c of comments) {
+    if (!commentsByPage.has(c.page)) {
+      commentsByPage.set(c.page, []);
+    }
+    commentsByPage.get(c.page)!.push(c);
+  }
+  // Strategy: Append all comments at the end with page references
+  // This is the safest approach since we can't reliably map PDF positions to markdown
+  const lines = markdown.split('\n');
+  const commentBlock: string[] = [];
+  commentBlock.push('');
+  commentBlock.push('<!-- PDF Comments -->');
+  for (const [page, pageComments] of Array.from(commentsByPage.entries())) {
+    for (const c of pageComments) {
+      const authorPrefix = c.author ? `${c.author}: ` : '';
+      const pageRef = `[p.${page}]`;
+      commentBlock.push(`{>>${authorPrefix}${pageRef} ${c.text}<<}`);
+    }
+  }
+  return lines.join('\n') + commentBlock.join('\n');
+}
+/**
+ * Format PDF comments for CLI display
+ * @param comments - Array of PDF comments
+ * @returns Formatted string
+ */
+export function formatPdfComments(comments: PdfComment[]): string {
+  if (comments.length === 0) {
+    return 'No comments found in PDF.';
+  }
+  const lines: string[] = [];
+  let currentPage = 0;
+  for (const c of comments) {
+    if (c.page !== currentPage) {
+      if (currentPage > 0) lines.push('');
+      lines.push(`Page ${c.page}:`);
+      currentPage = c.page;
+    }
+    const typeIcon = getTypeIcon(c.type);
+    const author = c.author || 'Unknown';
+    lines.push(`  ${typeIcon} [${author}] ${c.text}`);
+  }
+  return lines.join('\n');
+}
+/**
+ * Get icon for annotation type
+ * @param type - Annotation type
+ * @returns Icon string
+ */
+function getTypeIcon(type: string): string {
+  switch (type) {
+    case 'Text': return '📝';      // Sticky note
+    case 'FreeText': return '💬';  // Text box
+    case 'Highlight': return '🖍️'; // Highlight
+    case 'Underline': return '📍'; // Underline
+    case 'StrikeOut': return '❌'; // Strikethrough
+    case 'Squiggly': return '〰️';  // Squiggly
+    default: return '💬';
+  }
+}
+/**
+ * Get statistics about PDF comments
+ * @param comments - Array of PDF comments
+ * @returns Statistics object
+ */
+export function getPdfCommentStats(comments: PdfComment[]): PdfCommentStats {
+  const stats: PdfCommentStats = {
+    total: comments.length,
+    byType: {},
+    byAuthor: {},
+    byPage: {},
+  };
+  for (const c of comments) {
+    stats.byType[c.type] = (stats.byType[c.type] || 0) + 1;
+    stats.byAuthor[c.author] = (stats.byAuthor[c.author] || 0) + 1;
+    stats.byPage[c.page] = (stats.byPage[c.page] || 0) + 1;
+  }
+  return stats;
+}
+/**
+ * Extract text content from a PDF page
+ * @param page - pdfjs page object
+ * @returns Array of text items with positions
+ */
+async function getPageTextItems(page: any): Promise<PdfTextItem[]> {
+  const textContent = await page.getTextContent();
+  return textContent.items.map((item: any) => ({
+    str: item.str,
+    x: item.transform[4],
+    y: item.transform[5],
+    width: item.width,
+    height: item.height,
+  }));
+}
+/**
+ * Check if a point is inside a quadrilateral defined by QuadPoints
+ * QuadPoints format: [x1,y1, x2,y2, x3,y3, x4,y4] for each quad
+ * @param x - X coordinate
+ * @param y - Y coordinate
+ * @param quad - 8 numbers defining corners
+ * @returns True if point is inside quad
+ */
+function isPointInQuad(x: number, y: number, quad: number[]): boolean {
+  if (quad.length < 8) return false;
+  // Get bounding box from quad points
+  const xs = [quad[0], quad[2], quad[4], quad[6]];
+  const ys = [quad[1], quad[3], quad[5], quad[7]];
+  const minX = Math.min(...xs);
+  const maxX = Math.max(...xs);
+  const minY = Math.min(...ys);
+  const maxY = Math.max(...ys);
+  return x >= minX && x <= maxX && y >= minY && y <= maxY;
+}
+/**
+ * Extract highlighted text from a PDF using QuadPoints
+ * @param pdfPath - Path to PDF file
+ * @param annotations - Annotations with quadPoints from extractPdfAnnotations
+ * @returns Annotations with highlighted text extracted
+ */
+export async function extractHighlightedText(
+  pdfPath: string,
+  annotations: PdfAnnotation[]
+): Promise<PdfAnnotationWithText[]> {
+  const pdfBytes = fs.readFileSync(pdfPath);
+  const data = new Uint8Array(pdfBytes);
+  // Load pdfjs-dist dynamically (requires DOMMatrix, not available in Node 18)
+  const { getDocument } = await import('pdfjs-dist/legacy/build/pdf.mjs');
+  const loadingTask = getDocument({ data, useSystemFonts: true });
+  const pdfDoc = await loadingTask.promise;
+  const results: PdfAnnotationWithText[] = [];
+  for (const annot of annotations) {
+    // Only process text markup annotations (Highlight, Underline, StrikeOut, Squiggly)
+    if (!['Highlight', 'Underline', 'StrikeOut', 'Squiggly'].includes(annot.type)) {
+      results.push({ ...annot, highlightedText: '' });
+      continue;
+    }
+    if (!annot.quadPoints || annot.quadPoints.length < 8) {
+      results.push({ ...annot, highlightedText: '' });
+      continue;
+    }
+    try {
+      const page = await pdfDoc.getPage(annot.page);
+      const textItems = await getPageTextItems(page);
+      // Split quadPoints into individual quads (8 numbers each)
+      const quads: number[][] = [];
+      for (let i = 0; i < annot.quadPoints.length; i += 8) {
+        quads.push(annot.quadPoints.slice(i, i + 8));
+      }
+      // Find text items that fall within any of the quads
+      const matchedText: string[] = [];
+      for (const item of textItems) {
+        // Check if text item center is in any quad
+        const centerX = item.x + (item.width || 0) / 2;
+        const centerY = item.y + (item.height || 0) / 2;
+        for (const quad of quads) {
+          if (isPointInQuad(centerX, centerY, quad) || isPointInQuad(item.x, item.y, quad)) {
+            matchedText.push(item.str);
+            break;
+          }
+        }
+      }
+      results.push({
+        ...annot,
+        highlightedText: matchedText.join(' ').trim(),
+      });
+    } catch (err) {
+      // If text extraction fails, just return empty
+      results.push({ ...annot, highlightedText: '' });
+    }
+  }
+  return results;
+}
+/**
+ * Extract annotations with highlighted text in one call
+ * @param pdfPath - Path to PDF file
+ * @returns Annotations with highlighted text
+ */
+export async function extractPdfAnnotationsWithText(pdfPath: string): Promise<PdfAnnotationWithText[]> {
+  const annotations = await extractPdfAnnotations(pdfPath);
+  return extractHighlightedText(pdfPath, annotations);
+}
+/**
+ * Format annotation with highlighted text for display
+ * @param annot - Annotation with highlightedText
+ * @returns Formatted string
+ */
+export function formatAnnotationWithText(annot: PdfAnnotationWithText): string {
+  const typeIcon = getTypeIcon(annot.type);
+  const author = annot.author || 'Unknown';
+  const parts: string[] = [`${typeIcon} [${author}]`];
+  if (annot.highlightedText) {
+    parts.push(`"${annot.highlightedText}"`);
+  }
+  if (annot.contents) {
+    parts.push(`→ ${annot.contents}`);
+  }
+  return parts.join(' ');
+}