npm - @okrapdf/cli - Versions diffs - 0.3.7 → 0.4.1 - Mend

@okrapdf/cli 0.3.7 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

package/dist/cli.d.ts.map +1 -1
package/dist/cli.js +32 -52
package/dist/cli.js.map +1 -1
package/dist/cli.test.js +12 -6
package/dist/cli.test.js.map +1 -1
package/dist/commands/browse.d.ts +10 -0
package/dist/commands/browse.d.ts.map +1 -0
package/dist/commands/browse.js +86 -0
package/dist/commands/browse.js.map +1 -0
package/dist/commands/chat.d.ts.map +1 -1
package/dist/commands/chat.js +26 -4
package/dist/commands/chat.js.map +1 -1
package/dist/commands/elements.d.ts +9 -0
package/dist/commands/elements.d.ts.map +1 -0
package/dist/commands/elements.js +857 -0
package/dist/commands/elements.js.map +1 -0
package/dist/commands/read.d.ts +13 -0
package/dist/commands/read.d.ts.map +1 -0
package/dist/commands/read.js +143 -0
package/dist/commands/read.js.map +1 -0
package/dist/lib/cache.d.ts +10 -0
package/dist/lib/cache.d.ts.map +1 -1
package/dist/lib/cache.js +41 -0
package/dist/lib/cache.js.map +1 -1
package/dist/lib/resolver.d.ts +28 -0
package/dist/lib/resolver.d.ts.map +1 -0
package/dist/lib/resolver.js +117 -0
package/dist/lib/resolver.js.map +1 -0
package/package.json +13 -14

package/dist/commands/elements.js ADDED Viewed

@@ -0,0 +1,857 @@
+/**
+ * Consolidated element management commands
+ *
+ * Combines entities, tables, toc, and review into a single top-level command.
+ * All extracted content from PDFs (tables, figures, footnotes, etc.) lives here.
+ */
+import { Command } from 'commander';
+import { writeFileSync, mkdirSync, existsSync, readFileSync } from 'fs';
+import { join, resolve } from 'path';
+import chalk from 'chalk';
+import { get, post, patch, OkraApiError, EXIT_CODES } from '../lib/client.js';
+import { formatOutput, formatDate, formatStatus as formatOutputStatus, success, error, info, warn, } from '../lib/output.js';
+import { withSpinner } from '../lib/progress.js';
+import { getDefaultFormat, shouldUseJsonOutput } from '../lib/config.js';
+import { openInBrowser, getJobWebUrl } from '../lib/browser.js';
+import { CacheManager } from '../lib/cache.js';
+import { validateJobReady, throwValidationError } from '../lib/validator.js';
+// ─── Table columns ───────────────────────────────────────────────────────────
+const ENTITY_COLUMNS = [
+    { key: 'id', header: 'Entity ID', width: 25 },
+    { key: 'type', header: 'Type', width: 12 },
+    { key: 'page', header: 'Page', width: 6 },
+    { key: 'title_fmt', header: 'Title', width: 40 },
+    { key: 'has_bbox', header: 'BBox', width: 6 },
+];
+const REVIEW_PAGE_COLUMNS = [
+    { key: 'page', header: 'Page', width: 6 },
+    { key: 'status', header: 'Status', width: 12 },
+    { key: 'resolution', header: 'Resolution', width: 12 },
+    { key: 'ocrLineCount', header: 'OCR Lines', width: 10 },
+    { key: 'hasOcr', header: 'Has OCR', width: 8 },
+];
+// ─── Helpers ─────────────────────────────────────────────────────────────────
+function truncate(str, maxLength) {
+    if (str.length <= maxLength)
+        return str;
+    return str.slice(0, maxLength - 3) + '...';
+}
+function formatReviewStatus(status) {
+    switch (status) {
+        case 'complete': return chalk.green(status);
+        case 'partial': return chalk.yellow(status);
+        case 'flagged': return chalk.red(status);
+        case 'pending': return chalk.yellow(status);
+        case 'gap': return chalk.magenta(status);
+        case 'empty': return chalk.dim(status);
+        default: return status;
+    }
+}
+function markdownTableToCsv(markdown) {
+    const lines = markdown.trim().split('\n');
+    const csvLines = [];
+    for (const line of lines) {
+        if (line.match(/^\|[\s-:|]+\|$/))
+            continue;
+        if (line.startsWith('|') && line.endsWith('|')) {
+            const cells = line
+                .slice(1, -1)
+                .split('|')
+                .map(cell => cell.trim());
+            const escapedCells = cells.map(cell => {
+                if (cell.includes(',') || cell.includes('"') || cell.includes('\n')) {
+                    return `"${cell.replace(/"/g, '""')}"`;
+                }
+                return cell;
+            });
+            csvLines.push(escapedCells.join(','));
+        }
+    }
+    return csvLines.join('\n');
+}
+function formatTocAsMarkdown(toc) {
+    const lines = [];
+    lines.push(chalk.bold('Table of Contents'));
+    lines.push('');
+    for (const entry of toc.toc) {
+        const indent = '  '.repeat(entry.level - 1);
+        const pageNum = chalk.dim(`(p. ${entry.page})`);
+        lines.push(`${indent}- ${entry.title} ${pageNum}`);
+    }
+    lines.push('');
+    lines.push(chalk.dim(`Total entries: ${toc.total_entries}`));
+    return lines.join('\n');
+}
+// ─── Main command ────────────────────────────────────────────────────────────
+export function createElementsCommand() {
+    const elements = new Command('elements')
+        .description('Manage extracted elements (tables, figures, footnotes, TOC, review)');
+    // ── elements list ──────────────────────────────────────────────────────────
+    elements
+        .command('list <jobId>')
+        .alias('ls')
+        .description('List elements from a job')
+        .option('-o, --output <format>', 'Output format (table, json, csv)', getDefaultFormat())
+        .option('-t, --type <type>', 'Filter by type (tables, figures, footnotes, summaries, signatures, all)', 'all')
+        .option('-p, --page <n>', 'Filter by page number')
+        .option('--with-bbox', 'Only show elements with bounding boxes')
+        .action(async (jobId, options) => {
+        const params = {
+            type: options.type,
+        };
+        const response = await withSpinner('Fetching elements', () => get(`api/ocr/jobs/${jobId}/entities`, params));
+        let entities = response.entities;
+        if (options.page) {
+            const pageNum = parseInt(options.page, 10);
+            entities = entities.filter(e => e.page === pageNum);
+        }
+        if (options.withBbox) {
+            entities = entities.filter(e => e.bbox != null);
+        }
+        if (entities.length === 0) {
+            console.log(chalk.dim('No elements found'));
+            return;
+        }
+        const formatted = entities.map(entity => ({
+            ...entity,
+            title_fmt: truncate(entity.title || '-', 40),
+            has_bbox: entity.bbox ? chalk.green('Yes') : chalk.dim('No'),
+        }));
+        console.log(formatOutput(formatted, options.output, ENTITY_COLUMNS));
+        if (!shouldUseJsonOutput(options.output)) {
+            console.log(chalk.dim(`\nTotal: ${entities.length} elements`));
+            console.log(chalk.dim(`Status: ${formatOutputStatus(response.extractionStatus)}`));
+        }
+    });
+    // ── elements get ───────────────────────────────────────────────────────────
+    elements
+        .command('get <elementId>')
+        .description('Get a single table/element by ID')
+        .option('-o, --output <format>', 'Output format (markdown, json)', 'markdown')
+        .action(async (elementId, options) => {
+        try {
+            const table = await withSpinner('Fetching element', () => get(`api/extractions/tables/${elementId}`));
+            if (options.output === 'json') {
+                console.log(formatOutput(table, 'json'));
+            }
+            else {
+                console.log(chalk.bold(`Table (Page ${table.page_number})`));
+                console.log(chalk.dim('-'.repeat(50)));
+                console.log(table.content_markdown);
+                console.log();
+                console.log(chalk.dim(`ID: ${table.id}`));
+                console.log(chalk.dim(`Source: ${table.processor_type}`));
+                if (table.confidence !== null) {
+                    console.log(chalk.dim(`Confidence: ${(table.confidence * 100).toFixed(1)}%`));
+                }
+            }
+        }
+        catch (err) {
+            if (err instanceof OkraApiError && err.statusCode === 404) {
+                error(`Element not found: ${elementId}`);
+                process.exit(EXIT_CODES.NOT_FOUND);
+            }
+            throw err;
+        }
+    });
+    // ── elements export ────────────────────────────────────────────────────────
+    elements
+        .command('export <jobId>')
+        .description('Export elements to disk (tables as CSV/JSON/MD, images as PNG/JPG)')
+        .option('-t, --type <type>', 'Element type to export (tables, figures, all)', 'all')
+        .option('-f, --format <fmt>', 'Export format (csv, json, md, png, jpg)', 'csv')
+        .option('-d, --output-dir <dir>', 'Output directory', './element-export')
+        .option('-q, --quality <n>', 'Image quality for JPEG (1-100)', '90')
+        .option('-s, --scale <n>', 'Image scale factor (1-4)', '2')
+        .option('--padding <n>', 'Padding around crop region in pixels', '10')
+        .option('-o, --output <format>', 'Result output format (table, json)', 'table')
+        .action(async (jobId, options) => {
+        const useJson = shouldUseJsonOutput(options.output);
+        const format = options.format.toLowerCase();
+        const isImageFormat = format === 'png' || format === 'jpg';
+        if (isImageFormat) {
+            // Image export path (from entities images)
+            await exportImages(jobId, { ...options, format }, useJson);
+        }
+        else {
+            // Table data export path (from tables export)
+            await exportTableData(jobId, { ...options, format }, useJson);
+        }
+    });
+    // ── elements toc ───────────────────────────────────────────────────────────
+    elements
+        .command('toc <jobId>')
+        .description('Extract table of contents from PDF')
+        .option('-f, --format <format>', 'Output format (markdown, json)', 'markdown')
+        .option('--max-depth <n>', 'Maximum TOC depth level', parseInt)
+        .option('--refresh', 'Force refresh from API (ignore cache)')
+        .action(async (jobId, options) => {
+        try {
+            const cache = new CacheManager();
+            const validation = await validateJobReady(jobId, cache);
+            if (!validation.valid) {
+                throwValidationError(validation.error);
+            }
+            if (!options.refresh && cache.getToc(jobId)) {
+                const cachedToc = cache.getToc(jobId);
+                if (cachedToc) {
+                    info('Using cached TOC data');
+                    if (options.format === 'json') {
+                        console.log(JSON.stringify(cachedToc, null, 2));
+                    }
+                    else {
+                        console.log(formatTocAsMarkdown(cachedToc));
+                    }
+                    return;
+                }
+            }
+            const params = {};
+            if (options.maxDepth) {
+                params.max_depth = options.maxDepth.toString();
+            }
+            info('Extracting TOC from PDF...');
+            const apiResponse = await withSpinner('Running TOC extraction (this may take ~30s)', () => get(`api/steps/table-of-content/${jobId}`, params));
+            if (!apiResponse.success) {
+                error('TOC extraction failed');
+                process.exit(EXIT_CODES.GENERAL_ERROR);
+            }
+            const tocResult = {
+                toc: apiResponse.toc.map(entry => ({
+                    level: entry.level,
+                    title: entry.title,
+                    page: entry.page,
+                })),
+                total_entries: apiResponse.total_entries,
+            };
+            cache.setToc(jobId, tocResult);
+            success('TOC cached locally');
+            cache.logCommand('toc', jobId, {
+                format: options.format,
+                maxDepth: options.maxDepth,
+                refresh: options.refresh,
+            });
+            cache.logResult(true, {
+                total_entries: tocResult.total_entries,
+                strategy: apiResponse.strategy,
+            });
+            if (options.format === 'json') {
+                console.log(JSON.stringify(tocResult, null, 2));
+            }
+            else {
+                console.log(formatTocAsMarkdown(tocResult));
+            }
+            console.log(chalk.dim(`\nExtraction: ${apiResponse.strategy} strategy, ${apiResponse.total_elapsed_ms}ms`));
+        }
+        catch (err) {
+            const cache = new CacheManager();
+            const errorMessage = err instanceof Error ? err.message : String(err);
+            cache.logResult(false, {}, errorMessage);
+            if (err instanceof OkraApiError) {
+                error(err.message);
+                process.exit(err.exitCode);
+            }
+            error(`TOC extraction failed: ${errorMessage}`);
+            process.exit(EXIT_CODES.GENERAL_ERROR);
+        }
+    });
+    // ── elements count ─────────────────────────────────────────────────────────
+    elements
+        .command('count <jobId>')
+        .description('Get element counts by type')
+        .option('-o, --output <format>', 'Output format (table, json)', getDefaultFormat())
+        .action(async (jobId, options) => {
+        const response = await withSpinner('Fetching element counts', () => get(`api/ocr/jobs/${jobId}/entities`));
+        if (options.output === 'json') {
+            console.log(formatOutput({
+                job_id: jobId,
+                counts: response.counts,
+                extraction_status: response.extractionStatus,
+                total_pages: response.totalPages,
+            }, 'json'));
+        }
+        else {
+            console.log(chalk.bold('\nElement Counts'));
+            console.log(chalk.dim('-'.repeat(30)));
+            console.log(chalk.bold('Tables:'), response.counts.tables);
+            console.log(chalk.bold('Figures:'), response.counts.figures);
+            console.log(chalk.bold('Footnotes:'), response.counts.footnotes);
+            console.log(chalk.bold('Summaries:'), response.counts.summaries);
+            console.log(chalk.bold('Signatures:'), response.counts.signatures);
+            console.log();
+            console.log(chalk.dim(`Status: ${response.extractionStatus}`));
+            if (response.totalPages) {
+                console.log(chalk.dim(`Total pages: ${response.totalPages}`));
+            }
+        }
+    });
+    // ── elements review ────────────────────────────────────────────────────────
+    const review = elements
+        .command('review')
+        .description('Review job verification status and page content');
+    // review status
+    review
+        .command('status <jobId>')
+        .description('Get verification status summary for a job')
+        .option('-o, --output <format>', 'Output format (table, json)', getDefaultFormat())
+        .option('-w, --web', 'Open job review page in browser')
+        .action(async (jobId, options) => {
+        if (options.web) {
+            const url = `${getJobWebUrl(jobId)}/review`;
+            console.error(`Opening ${url} in your browser.`);
+            await openInBrowser(url);
+            return;
+        }
+        const tree = await withSpinner('Fetching verification status', () => get(`api/ocr/jobs/${jobId}/verification-tree`));
+        if (options.output === 'json') {
+            console.log(formatOutput(tree, 'json'));
+            return;
+        }
+        console.log(chalk.bold('Verification Status'));
+        console.log(chalk.dim('-'.repeat(50)));
+        console.log(chalk.bold('Job:'), jobId);
+        console.log(chalk.bold('Total Pages:'), tree.totalPages);
+        console.log();
+        console.log(chalk.bold('Summary:'));
+        console.log(`  ${chalk.green('Complete:')} ${tree.summary.complete}`);
+        console.log(`  ${chalk.yellow('Pending:')} ${tree.summary.pending}`);
+        console.log(`  ${chalk.red('Flagged:')} ${tree.summary.flagged}`);
+        console.log(`  ${chalk.magenta('Gap:')} ${tree.summary.gap}`);
+        console.log(`  ${chalk.blue('Resolved:')} ${tree.summary.resolved}`);
+        if (tree.summary.stale > 0) {
+            console.log(`  ${chalk.dim('Stale:')} ${tree.summary.stale}`);
+        }
+    });
+    // review pages
+    review
+        .command('pages <jobId>')
+        .description('List pages with verification status')
+        .option('-o, --output <format>', 'Output format (table, json)', getDefaultFormat())
+        .option('-s, --status <status>', 'Filter by status (complete, pending, flagged, gap)')
+        .action(async (jobId, options) => {
+        const tree = await withSpinner('Fetching pages', () => get(`api/ocr/jobs/${jobId}/verification-tree`));
+        let pages = tree.pages;
+        if (options.status) {
+            pages = pages.filter(p => p.status === options.status);
+        }
+        if (pages.length === 0) {
+            console.log(chalk.dim('No pages found'));
+            return;
+        }
+        if (options.output === 'json') {
+            console.log(formatOutput(pages, 'json'));
+            return;
+        }
+        const formatted = pages.map(p => ({
+            ...p,
+            status: formatReviewStatus(p.status),
+            resolution: p.resolution || chalk.dim('-'),
+            hasOcr: p.hasOcr ? chalk.green('\u2713') : chalk.dim('\u2717'),
+        }));
+        console.log(formatOutput(formatted, 'table', REVIEW_PAGE_COLUMNS));
+        console.log(chalk.dim(`\n${pages.length} pages`));
+    });
+    // review page
+    review
+        .command('page <jobId> <pageNum>')
+        .description('Get page content (markdown and OCR blocks)')
+        .option('-o, --output <format>', 'Output format (markdown, json)', 'markdown')
+        .option('--ocr', 'Show OCR blocks instead of markdown')
+        .option('--raw', 'Output raw content without formatting')
+        .action(async (jobId, pageNum, options) => {
+        const page = await withSpinner('Fetching page content', () => get(`api/ocr/jobs/${jobId}/pages/${pageNum}`));
+        if (options.output === 'json') {
+            console.log(formatOutput(page, 'json'));
+            return;
+        }
+        if (options.ocr) {
+            if (!page.blocks || page.blocks.length === 0) {
+                console.log(chalk.dim('No OCR blocks available'));
+                return;
+            }
+            if (options.raw) {
+                for (const block of page.blocks) {
+                    console.log(block.text);
+                }
+                return;
+            }
+            console.log(chalk.bold(`OCR Blocks - Page ${pageNum}`));
+            console.log(chalk.dim('-'.repeat(50)));
+            for (let i = 0; i < page.blocks.length; i++) {
+                const block = page.blocks[i];
+                const conf = block.confidence !== undefined ? ` (${(block.confidence * 100).toFixed(0)}%)` : '';
+                console.log(chalk.cyan(`[${i + 1}]${conf}`), block.text);
+            }
+            console.log(chalk.dim(`\n${page.blocks.length} blocks`));
+            return;
+        }
+        if (options.raw) {
+            console.log(page.content);
+            return;
+        }
+        console.log(chalk.bold(`Page ${pageNum} Content`));
+        console.log(chalk.dim('-'.repeat(50)));
+        console.log(page.content);
+        console.log();
+        console.log(chalk.dim(`Version: ${page.version}`));
+        if (page.dimension) {
+            console.log(chalk.dim(`Dimension: ${page.dimension.width}x${page.dimension.height}`));
+        }
+        if (page.blocks) {
+            console.log(chalk.dim(`OCR Blocks: ${page.blocks.length}`));
+        }
+    });
+    // review resolve
+    review
+        .command('resolve <jobId> <pageNum>')
+        .description('Mark a page as reviewed')
+        .option('-r, --resolution <type>', 'Resolution type (reviewed, skipped, flagged)', 'reviewed')
+        .option('-n, --note <text>', 'Add a note to the resolution')
+        .option('-o, --output <format>', 'Output format (table, json)', 'table')
+        .action(async (jobId, pageNum, options) => {
+        const useJson = shouldUseJsonOutput(options.output);
+        try {
+            await withSpinner(`Resolving page ${pageNum}`, () => post(`api/ocr/jobs/${jobId}/pages/${pageNum}/resolve`, {
+                resolution: options.resolution,
+                note: options.note,
+            }));
+            if (useJson) {
+                console.log(formatOutput({
+                    success: true,
+                    job_id: jobId,
+                    page: parseInt(pageNum),
+                    resolution: options.resolution,
+                }, 'json'));
+            }
+            else {
+                success(`Page ${pageNum} marked as ${options.resolution}`);
+            }
+        }
+        catch (err) {
+            if (err instanceof OkraApiError) {
+                if (useJson) {
+                    console.log(formatOutput({ success: false, error: err.message }, 'json'));
+                }
+                else {
+                    error(err.message);
+                }
+                process.exit(err.exitCode);
+            }
+            throw err;
+        }
+    });
+    // review history
+    review
+        .command('history <jobId>')
+        .description('Get verification audit trail')
+        .option('-o, --output <format>', 'Output format (table, json)', getDefaultFormat())
+        .option('-l, --limit <n>', 'Limit results', '20')
+        .option('-p, --page <n>', 'Filter by page number')
+        .action(async (jobId, options) => {
+        const params = { limit: options.limit };
+        if (options.page)
+            params.page = options.page;
+        const response = await withSpinner('Fetching history', () => get(`api/ocr/jobs/${jobId}/history`, params));
+        if (response.history.length === 0) {
+            console.log(chalk.dim('No history found'));
+            return;
+        }
+        if (options.output === 'json') {
+            console.log(formatOutput(response.history, 'json'));
+            return;
+        }
+        console.log(chalk.bold('Verification History'));
+        console.log(chalk.dim('-'.repeat(60)));
+        for (const entry of response.history) {
+            const page = entry.pageNumber ? `Page ${entry.pageNumber}` : '';
+            const entity = entry.entityType ? `${entry.entityType}` : '';
+            const target = [page, entity].filter(Boolean).join(' - ') || 'Job';
+            console.log(chalk.dim(formatDate(entry.createdAt)), chalk.cyan(entry.action), chalk.white(target), chalk.dim(`by ${entry.triggeredBy}`));
+        }
+    });
+    // review save
+    review
+        .command('save <jobId> <pageNum>')
+        .description('Save/update page markdown content')
+        .option('-f, --file <path>', 'Read content from file')
+        .option('-c, --content <text>', 'Content to save (use - for stdin)')
+        .option('-o, --output <format>', 'Output format (table, json)', 'table')
+        .action(async (jobId, pageNum, options) => {
+        const useJson = shouldUseJsonOutput(options.output);
+        let content;
+        if (options.file) {
+            if (!existsSync(options.file)) {
+                if (useJson) {
+                    console.log(formatOutput({ success: false, error: `File not found: ${options.file}` }, 'json'));
+                }
+                else {
+                    error(`File not found: ${options.file}`);
+                }
+                process.exit(EXIT_CODES.INVALID_ARGS);
+            }
+            content = readFileSync(options.file, 'utf-8');
+        }
+        else if (options.content) {
+            if (options.content === '-') {
+                const chunks = [];
+                for await (const chunk of process.stdin) {
+                    chunks.push(chunk);
+                }
+                content = Buffer.concat(chunks).toString('utf-8');
+            }
+            else {
+                content = options.content;
+            }
+        }
+        else {
+            if (useJson) {
+                console.log(formatOutput({ success: false, error: 'Either --file or --content is required' }, 'json'));
+            }
+            else {
+                error('Either --file or --content is required');
+            }
+            process.exit(EXIT_CODES.INVALID_ARGS);
+        }
+        try {
+            const result = await withSpinner(`Saving page ${pageNum}`, () => patch(`api/ocr/jobs/${jobId}/pages/${pageNum}`, { content }));
+            if (useJson) {
+                console.log(formatOutput({
+                    success: true,
+                    job_id: jobId,
+                    page: parseInt(pageNum),
+                    version: result.version,
+                }, 'json'));
+            }
+            else {
+                success(`Page ${pageNum} saved (version ${result.version})`);
+            }
+        }
+        catch (err) {
+            if (err instanceof OkraApiError) {
+                if (useJson) {
+                    console.log(formatOutput({ success: false, error: err.message }, 'json'));
+                }
+                else {
+                    error(err.message);
+                }
+                process.exit(err.exitCode);
+            }
+            throw err;
+        }
+    });
+    // review versions
+    review
+        .command('versions <jobId> <pageNum>')
+        .description('List page content versions')
+        .option('-o, --output <format>', 'Output format (table, json)', getDefaultFormat())
+        .action(async (jobId, pageNum, options) => {
+        const response = await withSpinner('Fetching versions', () => get(`api/ocr/jobs/${jobId}/pages/${pageNum}/versions`));
+        if (response.versions.length === 0) {
+            console.log(chalk.dim('No versions found'));
+            return;
+        }
+        if (options.output === 'json') {
+            console.log(formatOutput(response.versions, 'json'));
+            return;
+        }
+        console.log(chalk.bold(`Page ${pageNum} Versions`));
+        console.log(chalk.dim('-'.repeat(50)));
+        for (const v of response.versions) {
+            console.log(`  v${v.version} - ${formatDate(v.createdAt)} by ${v.createdBy}`);
+        }
+    });
+    // review version
+    review
+        .command('version <jobId> <pageNum> <version>')
+        .description('Get specific version of page content')
+        .option('-o, --output <format>', 'Output format (markdown, json)', 'markdown')
+        .option('--raw', 'Output raw content without formatting')
+        .action(async (jobId, pageNum, version, options) => {
+        const page = await withSpinner(`Fetching version ${version}`, () => get(`api/ocr/jobs/${jobId}/pages/${pageNum}/versions/${version}`));
+        if (options.output === 'json') {
+            console.log(formatOutput(page, 'json'));
+            return;
+        }
+        if (options.raw) {
+            console.log(page.content);
+            return;
+        }
+        console.log(chalk.bold(`Page ${pageNum} - Version ${version}`));
+        console.log(chalk.dim('-'.repeat(50)));
+        console.log(page.content);
+    });
+    // review diff
+    review
+        .command('diff <jobId> <pageNum>')
+        .description('Show diff between current and previous version')
+        .option('--from <v>', 'Compare from version')
+        .option('--to <v>', 'Compare to version')
+        .option('-o, --output <format>', 'Output format (table, json)', 'table')
+        .action(async (jobId, pageNum, options) => {
+        const useJson = shouldUseJsonOutput(options.output);
+        const current = await get(`api/ocr/jobs/${jobId}/pages/${pageNum}`);
+        let previousVersion = (current.version || 1) - 1;
+        if (options.from)
+            previousVersion = parseInt(options.from);
+        if (previousVersion < 1) {
+            if (useJson) {
+                console.log(formatOutput({
+                    job_id: jobId,
+                    page: parseInt(pageNum),
+                    current_version: current.version,
+                    previous_version: null,
+                    message: 'No previous version to compare',
+                    changes: [],
+                }, 'json'));
+            }
+            else {
+                console.log(chalk.dim('No previous version to compare'));
+            }
+            return;
+        }
+        const previous = await get(`api/ocr/jobs/${jobId}/pages/${pageNum}/versions/${previousVersion}`);
+        const currentLines = current.content.split('\n');
+        const previousLines = previous.content.split('\n');
+        const changes = [];
+        for (let i = 0; i < Math.max(currentLines.length, previousLines.length); i++) {
+            const curr = currentLines[i] ?? '';
+            const prev = previousLines[i] ?? '';
+            if (curr !== prev) {
+                if (prev)
+                    changes.push({ line: i + 1, type: 'removed', content: prev });
+                if (curr)
+                    changes.push({ line: i + 1, type: 'added', content: curr });
+            }
+        }
+        if (useJson) {
+            console.log(formatOutput({
+                job_id: jobId,
+                page: parseInt(pageNum),
+                from_version: previousVersion,
+                to_version: current.version,
+                changes,
+            }, 'json'));
+            return;
+        }
+        console.log(chalk.bold(`Diff: v${previousVersion} \u2192 v${current.version}`));
+        console.log(chalk.dim('-'.repeat(50)));
+        for (const change of changes) {
+            if (change.type === 'removed') {
+                console.log(chalk.red(`- ${change.content}`));
+            }
+            else {
+                console.log(chalk.green(`+ ${change.content}`));
+            }
+        }
+    });
+    return elements;
+}
+// ─── Export helpers ──────────────────────────────────────────────────────────
+async function exportImages(jobId, options, useJson) {
+    const format = options.format;
+    if (format !== 'png' && format !== 'jpg') {
+        if (useJson) {
+            console.log(formatOutput({ success: false, error: 'Invalid image format. Use: png or jpg' }, 'json'));
+        }
+        else {
+            error('Invalid image format. Use: png or jpg');
+        }
+        process.exit(EXIT_CODES.INVALID_ARGS);
+    }
+    const entitiesResponse = await withSpinner('Fetching elements', () => get(`api/ocr/jobs/${jobId}/entities`, { type: options.type }));
+    let entities = entitiesResponse.entities.filter(e => e.bbox != null);
+    if (options.page) {
+        const pageNum = parseInt(options.page, 10);
+        entities = entities.filter(e => e.page === pageNum);
+    }
+    if (entities.length === 0) {
+        if (useJson) {
+            console.log(formatOutput({
+                success: true,
+                job_id: jobId,
+                message: 'No elements with bounding boxes found',
+                exported: 0,
+            }, 'json'));
+        }
+        else {
+            warn('No elements with bounding boxes found');
+        }
+        return;
+    }
+    info(`Found ${entities.length} elements with bounding boxes`);
+    const job = await withSpinner('Fetching job info', () => get(`api/v1/jobs/${jobId}`));
+    const jobData = job;
+    if (!jobData.document_uuid) {
+        if (useJson) {
+            console.log(formatOutput({ success: false, error: 'Job has no associated document' }, 'json'));
+        }
+        else {
+            error('Job has no associated document');
+        }
+        process.exit(EXIT_CODES.GENERAL_ERROR);
+    }
+    const docInfo = await withSpinner('Getting document URL', () => get(`api/documents/${jobData.document_uuid}/download`));
+    const got = (await import('got')).default;
+    info('Downloading PDF...');
+    const pdfResponse = await got(docInfo.signed_url, { responseType: 'buffer' });
+    const pdfBuffer = pdfResponse.body;
+    const { PdfImageRenderer } = await import('../lib/pdf-image.js');
+    const outDir = resolve(options.outputDir);
+    if (!existsSync(outDir)) {
+        mkdirSync(outDir, { recursive: true });
+    }
+    const renderer = await PdfImageRenderer.fromBuffer(pdfBuffer);
+    const exported = [];
+    const errors = [];
+    const renderOptions = {
+        format,
+        quality: parseInt(options.quality, 10),
+        scale: parseFloat(options.scale),
+        padding: parseInt(options.padding, 10),
+    };
+    console.log(chalk.dim(`Exporting ${entities.length} elements to ${outDir}...`));
+    for (const entity of entities) {
+        try {
+            const result = await renderer.renderRegion(entity.page, entity.bbox, renderOptions);
+            const safeTitle = (entity.title || 'untitled')
+                .toLowerCase()
+                .replace(/[^a-z0-9]+/g, '-')
+                .slice(0, 30);
+            const filename = `${entity.type}-p${entity.page}-${safeTitle}.${format}`;
+            const filepath = join(outDir, filename);
+            writeFileSync(filepath, result.buffer);
+            exported.push({
+                entity_id: entity.id,
+                file: filepath,
+                page: entity.page,
+                type: entity.type,
+            });
+            if (!useJson) {
+                console.log(chalk.green('  \u2713'), chalk.dim(`${entity.type} p${entity.page}:`), filename);
+            }
+        }
+        catch (err) {
+            const errorMsg = err instanceof Error ? err.message : 'Unknown error';
+            errors.push({ entity_id: entity.id, error: errorMsg });
+            if (!useJson) {
+                console.log(chalk.red('  \u2717'), chalk.dim(`${entity.type} p${entity.page}:`), errorMsg);
+            }
+        }
+    }
+    renderer.close();
+    if (useJson) {
+        console.log(formatOutput({
+            success: errors.length === 0,
+            job_id: jobId,
+            output_dir: outDir,
+            format,
+            exported: exported.length,
+            errors: errors.length,
+            files: exported,
+            error_details: errors.length > 0 ? errors : undefined,
+        }, 'json'));
+    }
+    else {
+        console.log();
+        if (exported.length > 0) {
+            success(`Exported ${exported.length} images to: ${outDir}`);
+        }
+        if (errors.length > 0) {
+            warn(`${errors.length} elements failed to export`);
+        }
+    }
+}
+async function exportTableData(jobId, options, useJson) {
+    const format = options.format;
+    if (!['csv', 'json', 'md', 'markdown'].includes(format)) {
+        if (useJson) {
+            console.log(formatOutput({ success: false, error: `Invalid format: ${format}. Use: csv, json, md, png, jpg` }, 'json'));
+        }
+        else {
+            error(`Invalid format: ${format}. Use: csv, json, md, png, jpg`);
+        }
+        process.exit(EXIT_CODES.INVALID_ARGS);
+    }
+    // Fetch entities to get table IDs
+    const entitiesResponse = await withSpinner('Fetching elements', () => get(`api/ocr/jobs/${jobId}/entities`, { type: 'tables' }));
+    const tableEntities = entitiesResponse.entities.filter(e => e.type === 'table');
+    if (tableEntities.length === 0) {
+        if (useJson) {
+            console.log(formatOutput({ success: true, job_id: jobId, message: 'No tables found', exported: 0 }, 'json'));
+        }
+        else {
+            warn('No tables found to export');
+        }
+        return;
+    }
+    const outDir = resolve(options.outputDir);
+    if (!existsSync(outDir)) {
+        mkdirSync(outDir, { recursive: true });
+    }
+    info(`Exporting ${tableEntities.length} tables to ${outDir}...`);
+    const exported = [];
+    const errors = [];
+    for (const entity of tableEntities) {
+        try {
+            const table = await get(`api/extractions/tables/${entity.id}`);
+            let content;
+            let ext;
+            switch (format) {
+                case 'json':
+                    content = JSON.stringify(table, null, 2);
+                    ext = 'json';
+                    break;
+                case 'csv':
+                    content = markdownTableToCsv(table.content_markdown);
+                    ext = 'csv';
+                    break;
+                case 'markdown':
+                case 'md':
+                    content = table.content_markdown;
+                    ext = 'md';
+                    break;
+                default:
+                    continue;
+            }
+            const safeTitle = (entity.title || 'untitled')
+                .toLowerCase()
+                .replace(/[^a-z0-9]+/g, '-')
+                .slice(0, 30);
+            const filename = `table-p${entity.page}-${safeTitle}.${ext}`;
+            const filepath = join(outDir, filename);
+            writeFileSync(filepath, content, 'utf-8');
+            exported.push({ table_id: entity.id, file: filepath, page: entity.page });
+            if (!useJson) {
+                console.log(chalk.green('  \u2713'), chalk.dim(`p${entity.page}:`), filename);
+            }
+        }
+        catch (err) {
+            const errorMsg = err instanceof Error ? err.message : 'Unknown error';
+            errors.push({ table_id: entity.id, error: errorMsg });
+            if (!useJson) {
+                console.log(chalk.red('  \u2717'), chalk.dim(`p${entity.page}:`), errorMsg);
+            }
+        }
+    }
+    if (useJson) {
+        console.log(formatOutput({
+            success: errors.length === 0,
+            job_id: jobId,
+            output_dir: outDir,
+            format,
+            exported: exported.length,
+            errors: errors.length,
+            files: exported,
+            error_details: errors.length > 0 ? errors : undefined,
+        }, 'json'));
+    }
+    else {
+        console.log();
+        if (exported.length > 0) {
+            success(`Exported ${exported.length} tables to: ${outDir}`);
+        }
+        if (errors.length > 0) {
+            warn(`${errors.length} tables failed to export`);
+        }
+    }
+}
+//# sourceMappingURL=elements.js.map