@heripo/pdf-parser 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +201 -0
- package/NOTICE +37 -0
- package/README.ko.md +426 -0
- package/README.md +426 -0
- package/dist/chunk-JVYF2SQS.js +495 -0
- package/dist/chunk-JVYF2SQS.js.map +1 -0
- package/dist/chunk-VUNV25KB.js +16 -0
- package/dist/chunk-VUNV25KB.js.map +1 -0
- package/dist/index.cjs +1323 -0
- package/dist/index.cjs.map +1 -0
- package/dist/index.d.cts +129 -0
- package/dist/index.d.ts +129 -0
- package/dist/index.js +1295 -0
- package/dist/index.js.map +1 -0
- package/dist/token-HEEJ7XHP.js +63 -0
- package/dist/token-HEEJ7XHP.js.map +1 -0
- package/dist/token-util-MKHXD2JQ.js +6 -0
- package/dist/token-util-MKHXD2JQ.js.map +1 -0
- package/package.json +96 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../src/index.ts","../src/core/pdf-parser.ts","../src/config/constants.ts","../../shared/src/utils/batch-processor.ts","../../shared/src/utils/spawn-utils.ts","../../shared/src/utils/llm-caller.ts","../../shared/src/utils/llm-token-usage-aggregator.ts","../src/environment/docling-environment.ts","../src/utils/python-version.ts","../src/core/pdf-converter.ts","../src/errors/image-pdf-fallback-error.ts","../src/processors/image-extractor.ts","../src/utils/jq.ts","../src/utils/local-file-server.ts","../src/core/image-pdf-converter.ts"],"sourcesContent":["export { PDFParser } from './core/pdf-parser';\nexport type { ConversionCompleteCallback } from './core/pdf-converter';\nexport { ImagePdfFallbackError } from './errors/image-pdf-fallback-error';\n","import type { LoggerMethods } from '@heripo/logger';\nimport type { ConversionOptions, DoclingAPIClient } from 'docling-sdk';\n\nimport { Docling } from 'docling-sdk';\nimport { execSync } from 'node:child_process';\nimport { platform } from 'node:os';\nimport { join } from 'node:path';\n\nimport { PDF_PARSER } from '../config/constants';\nimport { DoclingEnvironment } from '../environment/docling-environment';\nimport { type ConversionCompleteCallback, PDFConverter } from './pdf-converter';\n\ntype Options = {\n logger: LoggerMethods;\n timeout?: number;\n venvPath?: string;\n killExistingProcess?: boolean;\n /**\n * Enable fallback to image-based PDF when conversion fails.\n * Only works with local server mode (port option).\n * Requires ImageMagick and Ghostscript to be installed.\n */\n enableImagePdfFallback?: boolean;\n} & ({ port?: number } | { baseUrl: string });\n\n/**\n * PDFParser - A PDF document parser using docling-serve\n *\n * ## System Requirements\n * Before using PDFParser, ensure your system meets these requirements:\n *\n * ### Operating System\n * - macOS 10.15 (Catalina) or later\n *\n * ### Required Software\n * - `python3` (version 3.9 - 3.12)\n * - Python 3.13+ is NOT compatible with docling-serve\n * - Recommended: Python 3.11 or 3.12\n * - Install specific version: `pyenv install 3.12.0 && pyenv global 3.12.0`\n * - `jq` - JSON processor\n * - Install: `brew install jq`\n * - `lsof` - List open files (usually pre-installed on macOS)\n *\n * ## Initialization Process\n * When `init()` is called, the following setup occurs automatically:\n *\n * ### If using external server (baseUrl provided):\n * 1. Connects to the provided baseUrl\n * 2. Waits for server health check (up to 120 seconds)\n *\n * ### If using local server (default):\n * 1. **Python Environment Setup**\n * - Verifies Python version compatibility (3.9-3.12)\n * - Creates Python virtual environment at `venvPath` (default: `.venv`)\n * - Verifies virtual environment Python version\n *\n * 2. **Dependency Installation**\n * - Upgrades pip to latest version\n * - Installs setuptools and wheel\n * - Installs pyarrow (binary-only to avoid compilation)\n * - Installs docling-serve package\n *\n * 3. **Server Management**\n * - Checks if specified port is already in use\n * - If `killExistingProcess` is true, kills any process using the port\n * - If port is in use and `killExistingProcess` is false, reuses existing server\n * - Otherwise, starts new docling-serve instance on specified port\n * - Waits for server to become ready (health check, up to 120 seconds)\n *\n * ## Notes\n * - First initialization may take several minutes due to Python package downloads\n * - Subsequent initializations are faster if virtual environment already exists\n * - The virtual environment and packages are reused across sessions\n * - Server process runs in background and needs to be managed separately if needed\n */\nexport class PDFParser {\n private readonly logger: LoggerMethods;\n private readonly port?: number;\n private readonly baseUrl?: string;\n private readonly timeout: number;\n private readonly venvPath: string;\n private readonly killExistingProcess: boolean;\n private readonly enableImagePdfFallback: boolean;\n private client: DoclingAPIClient | null = null;\n\n constructor(options: Options) {\n const {\n logger,\n timeout = PDF_PARSER.DEFAULT_TIMEOUT_MS,\n venvPath,\n killExistingProcess = false,\n enableImagePdfFallback = false,\n } = options;\n\n this.logger = logger;\n\n if ('baseUrl' in options) {\n this.baseUrl = options.baseUrl;\n this.port = undefined;\n } else {\n this.port = options.port;\n this.baseUrl = undefined;\n }\n\n this.timeout = timeout;\n this.venvPath = venvPath || join(process.cwd(), '.venv');\n this.killExistingProcess = killExistingProcess;\n this.enableImagePdfFallback = enableImagePdfFallback;\n }\n\n async init(): Promise<void> {\n this.logger.info('[PDFParser] Initializing...');\n\n this.checkOperatingSystem();\n this.checkJqInstalled();\n this.checkMacOSVersion();\n\n // Check ImageMagick/Ghostscript only for local server mode with fallback enabled\n if (this.enableImagePdfFallback && !this.baseUrl) {\n this.checkImageMagickInstalled();\n this.checkGhostscriptInstalled();\n } else if (this.enableImagePdfFallback && this.baseUrl) {\n this.logger.warn(\n '[PDFParser] enableImagePdfFallback is ignored when using external server (baseUrl)',\n );\n }\n\n if (this.baseUrl) {\n this.logger.info('[PDFParser] Using external server:', this.baseUrl);\n this.client = new Docling({\n api: { baseUrl: this.baseUrl, timeout: this.timeout },\n });\n await this.waitForServerReady();\n return;\n }\n\n this.logger.info('[PDFParser] Setting up local server...');\n try {\n const environment = new DoclingEnvironment({\n logger: this.logger,\n venvPath: this.venvPath,\n port: this.port as number,\n killExistingProcess: this.killExistingProcess,\n });\n\n await environment.setup();\n\n const clientUrl = `http://localhost:${this.port}`;\n this.client = new Docling({\n api: {\n baseUrl: clientUrl,\n timeout: this.timeout,\n },\n });\n\n await this.waitForServerReady();\n this.logger.info('[PDFParser] Ready');\n } catch (error) {\n this.logger.error('[PDFParser] Initialization failed:', error);\n throw new Error(`Failed to initialize PDFParser: ${error}`);\n }\n }\n\n private checkOperatingSystem(): void {\n if (platform() !== 'darwin') {\n throw new Error(\n 'PDFParser is only supported on macOS. Current platform: ' + platform(),\n );\n }\n }\n\n private checkJqInstalled(): void {\n try {\n execSync('which jq', { stdio: 'ignore' });\n } catch {\n throw new Error(\n 'jq is not installed. Please install jq using: brew install jq',\n );\n }\n }\n\n private checkMacOSVersion(): void {\n try {\n const versionOutput = execSync('sw_vers -productVersion', {\n encoding: 'utf-8',\n }).trim();\n const versionMatch = versionOutput.match(/^(\\d+)\\.(\\d+)/);\n if (versionMatch) {\n const major = parseInt(versionMatch[1]);\n const minor = parseInt(versionMatch[2]);\n if (major < 10 || (major === 10 && minor < 15)) {\n throw new Error(\n `macOS 10.15 or later is required. Current version: ${versionOutput}`,\n );\n }\n }\n } catch (error) {\n if (error instanceof Error && error.message.includes('macOS 10.15')) {\n throw error;\n }\n throw new Error('Failed to check macOS version');\n }\n }\n\n private checkImageMagickInstalled(): void {\n try {\n execSync('which magick', { stdio: 'ignore' });\n } catch {\n throw new Error(\n 'ImageMagick is not installed but enableImagePdfFallback is enabled. ' +\n 'Please install ImageMagick using: brew install imagemagick',\n );\n }\n }\n\n private checkGhostscriptInstalled(): void {\n try {\n execSync('which gs', { stdio: 'ignore' });\n } catch {\n throw new Error(\n 'Ghostscript is not installed but enableImagePdfFallback is enabled. ' +\n 'Please install Ghostscript using: brew install ghostscript',\n );\n }\n }\n\n /**\n * Check if an error is a connection refused error (ECONNREFUSED).\n * This typically indicates the Docling server has crashed.\n */\n private isConnectionRefusedError(error: unknown): boolean {\n if (error instanceof Error) {\n const errorStr = JSON.stringify(error);\n return errorStr.includes('ECONNREFUSED');\n }\n return false;\n }\n\n /**\n * Restart the Docling server after it has crashed.\n * This kills any existing process on the port, starts a new server,\n * and waits for it to become ready.\n *\n * Note: This method is only called when canRecover is true,\n * which guarantees this.port is defined.\n */\n private async restartServer(): Promise<void> {\n this.logger.info('[PDFParser] Restarting server...');\n\n // Kill existing process on port\n // Note: this.port is guaranteed to be defined by the caller (canRecover check)\n await DoclingEnvironment.killProcessOnPort(this.logger, this.port!);\n\n // Start new server\n const environment = new DoclingEnvironment({\n logger: this.logger,\n venvPath: this.venvPath,\n port: this.port!,\n killExistingProcess: false, // Already killed above\n });\n\n await environment.startServer();\n\n // Recreate client\n this.client?.destroy();\n this.client = new Docling({\n api: {\n baseUrl: `http://localhost:${this.port}`,\n timeout: this.timeout,\n },\n });\n\n await this.waitForServerReady();\n this.logger.info('[PDFParser] Server restarted successfully');\n }\n\n private async waitForServerReady(): Promise<void> {\n const maxAttempts = PDF_PARSER.MAX_HEALTH_CHECK_ATTEMPTS;\n const checkInterval = PDF_PARSER.HEALTH_CHECK_INTERVAL_MS;\n const logInterval = PDF_PARSER.HEALTH_CHECK_LOG_INTERVAL_MS;\n let lastLogTime = 0;\n\n for (let attempt = 1; attempt <= maxAttempts; attempt++) {\n try {\n await this.client!.health();\n this.logger.info('[PDFParser] Server is ready');\n return;\n } catch {\n const now = Date.now();\n if (now - lastLogTime >= logInterval) {\n this.logger.info(\n '[PDFParser] Waiting for server... (attempt',\n attempt,\n '/',\n maxAttempts,\n ')',\n );\n lastLogTime = now;\n }\n\n if (attempt < maxAttempts) {\n await new Promise((resolve) => setTimeout(resolve, checkInterval));\n }\n }\n }\n\n throw new Error('Server failed to become ready after maximum attempts');\n }\n\n public async parse(\n url: string,\n reportId: string,\n onComplete: ConversionCompleteCallback,\n cleanupAfterCallback: boolean,\n options: Omit<\n ConversionOptions,\n | 'to_formats'\n | 'image_export_mode'\n | 'ocr_engine'\n | 'accelerator_options'\n | 'ocr_options'\n | 'generate_picture_images'\n | 'images_scale'\n | 'force_ocr'\n > & {\n num_threads?: number;\n },\n abortSignal?: AbortSignal,\n ) {\n if (!this.client) {\n throw new Error(\n 'PDFParser is not initialized. Call init() before using parse()',\n );\n }\n\n // Enable recovery only for local server mode\n const canRecover = !this.baseUrl && this.port !== undefined;\n const maxAttempts = PDF_PARSER.MAX_SERVER_RECOVERY_ATTEMPTS;\n let attempt = 0;\n\n while (attempt <= maxAttempts) {\n try {\n // Enable fallback only for local server mode\n const effectiveFallbackEnabled =\n this.enableImagePdfFallback && !this.baseUrl;\n const converter = new PDFConverter(\n this.logger,\n this.client,\n effectiveFallbackEnabled,\n );\n return await converter.convert(\n url,\n reportId,\n onComplete,\n cleanupAfterCallback,\n options,\n abortSignal,\n );\n } catch (error) {\n // If aborted, don't retry - re-throw immediately\n if (abortSignal?.aborted) {\n throw error;\n }\n\n // Attempt server recovery on ECONNREFUSED (server crashed)\n if (\n canRecover &&\n this.isConnectionRefusedError(error) &&\n attempt < maxAttempts\n ) {\n this.logger.warn(\n '[PDFParser] Connection refused, attempting server recovery...',\n );\n await this.restartServer();\n attempt++;\n continue;\n }\n throw error;\n }\n }\n }\n\n /**\n * Dispose the parser instance.\n * - Sets the internal client to null\n * - If a local docling server was started (no baseUrl), kills the process on the configured port\n */\n public async dispose(): Promise<void> {\n this.logger.info('[PDFParser] Disposing...');\n\n try {\n // Only manage local server lifecycle when we started it (i.e., no external baseUrl)\n if (!this.baseUrl && this.port) {\n await DoclingEnvironment.killProcessOnPort(this.logger, this.port);\n }\n } catch (error) {\n this.logger.error('[PDFParser] Error while disposing:', error);\n } finally {\n // Always clear the client reference\n this.client?.destroy();\n this.client = null;\n this.logger.info('[PDFParser] Disposed');\n }\n }\n}\n","/**\n * Configuration constants for PDFParser\n */\nexport const PDF_PARSER = {\n /**\n * Default timeout for API calls in milliseconds\n */\n DEFAULT_TIMEOUT_MS: 100000,\n\n /**\n * Maximum number of health check attempts before giving up\n */\n MAX_HEALTH_CHECK_ATTEMPTS: 60,\n\n /**\n * Interval between health check attempts in milliseconds\n */\n HEALTH_CHECK_INTERVAL_MS: 2000,\n\n /**\n * Interval between log messages during health check in milliseconds\n */\n HEALTH_CHECK_LOG_INTERVAL_MS: 5000,\n\n /**\n * Maximum retry attempts for server recovery on ECONNREFUSED\n */\n MAX_SERVER_RECOVERY_ATTEMPTS: 1,\n} as const;\n\n/**\n * Configuration constants for PDFConverter\n */\nexport const PDF_CONVERTER = {\n /**\n * Interval for progress polling in milliseconds\n */\n POLL_INTERVAL_MS: 1000,\n} as const;\n\n/**\n * Configuration constants for DoclingEnvironment\n */\nexport const DOCLING_ENVIRONMENT = {\n /**\n * Delay after starting docling-serve to allow startup\n */\n STARTUP_DELAY_MS: 2000,\n} as const;\n\n/**\n * Configuration constants for ImagePdfConverter\n */\nexport const IMAGE_PDF_CONVERTER = {\n /**\n * ImageMagick density option (DPI) for PDF to image conversion\n */\n DENSITY: 300,\n\n /**\n * ImageMagick quality option (1-100)\n */\n QUALITY: 100,\n} as const;\n","/**\n * BatchProcessor - Batch processing utility\n *\n * Provides functionality to split large arrays into batches for parallel processing.\n */\nexport class BatchProcessor {\n /**\n * Splits an array into batches of specified size.\n *\n * @param items - Array to split\n * @param batchSize - Size of each batch\n * @returns Array of batches\n *\n * @example\n * ```typescript\n * const items = [1, 2, 3, 4, 5];\n * const batches = BatchProcessor.createBatches(items, 2);\n * // [[1, 2], [3, 4], [5]]\n * ```\n */\n static createBatches<T>(items: T[], batchSize: number): T[][] {\n const batches: T[][] = [];\n for (let i = 0; i < items.length; i += batchSize) {\n batches.push(items.slice(i, i + batchSize));\n }\n return batches;\n }\n\n /**\n * Splits an array into batches and executes async function in parallel.\n *\n * @param items - Array to process\n * @param batchSize - Size of each batch\n * @param processFn - Async function to process each batch\n * @returns Flattened array of processed results\n *\n * @example\n * ```typescript\n * const texts = ['a', 'b', 'c', 'd', 'e'];\n * const results = await BatchProcessor.processBatch(\n * texts,\n * 2,\n * async (batch) => {\n * return batch.map(t => t.toUpperCase());\n * }\n * );\n * // ['A', 'B', 'C', 'D', 'E']\n * ```\n */\n static async processBatch<T, R>(\n items: T[],\n batchSize: number,\n processFn: (batch: T[]) => Promise<R[]>,\n ): Promise<R[]> {\n const batches = this.createBatches(items, batchSize);\n const results = await Promise.all(batches.map((batch) => processFn(batch)));\n return results.flat();\n }\n\n /**\n * Splits an array into batches and executes sync function in parallel.\n *\n * @param items - Array to process\n * @param batchSize - Size of each batch\n * @param processFn - Sync function to process each batch\n * @returns Flattened array of processed results\n *\n * @example\n * ```typescript\n * const numbers = [1, 2, 3, 4, 5];\n * const results = BatchProcessor.processBatchSync(\n * numbers,\n * 2,\n * (batch) => batch.map(n => n * 2)\n * );\n * // [2, 4, 6, 8, 10]\n * ```\n */\n static processBatchSync<T, R>(\n items: T[],\n batchSize: number,\n processFn: (batch: T[]) => R[],\n ): R[] {\n const batches = this.createBatches(items, batchSize);\n const results = batches.map((batch) => processFn(batch));\n return results.flat();\n }\n}\n","import type { SpawnOptions } from 'node:child_process';\n\nimport { spawn } from 'node:child_process';\n\n/**\n * Result of a spawn operation\n */\nexport interface SpawnResult {\n stdout: string;\n stderr: string;\n code: number;\n}\n\n/**\n * Extended spawn options with output capture control\n */\nexport interface SpawnAsyncOptions extends SpawnOptions {\n /**\n * Whether to capture stdout (default: true)\n */\n captureStdout?: boolean;\n\n /**\n * Whether to capture stderr (default: true)\n */\n captureStderr?: boolean;\n}\n\n/**\n * Execute a command asynchronously and return the result\n *\n * Eliminates the repetitive Promise wrapper pattern used throughout\n * DoclingEnvironment for spawn operations.\n *\n * @param command - The command to execute\n * @param args - Arguments to pass to the command\n * @param options - Spawn options with optional output capture control\n * @returns Promise resolving to stdout, stderr, and exit code\n *\n * @example\n * ```typescript\n * // Simple usage\n * const result = await spawnAsync('python3', ['--version']);\n * console.log(result.stdout); // \"Python 3.12.0\"\n *\n * // With options\n * const result = await spawnAsync('pip', ['install', 'package'], {\n * cwd: '/path/to/venv',\n * captureStderr: true,\n * });\n * ```\n */\nexport function spawnAsync(\n command: string,\n args: string[],\n options: SpawnAsyncOptions = {},\n): Promise<SpawnResult> {\n const {\n captureStdout = true,\n captureStderr = true,\n ...spawnOptions\n } = options;\n\n return new Promise((resolve, reject) => {\n const proc = spawn(command, args, spawnOptions);\n\n let stdout = '';\n let stderr = '';\n\n if (captureStdout && proc.stdout) {\n proc.stdout.on('data', (data) => {\n stdout += data.toString();\n });\n }\n\n if (captureStderr && proc.stderr) {\n proc.stderr.on('data', (data) => {\n stderr += data.toString();\n });\n }\n\n proc.on('close', (code) => {\n resolve({ stdout, stderr, code: code ?? 0 });\n });\n\n proc.on('error', reject);\n });\n}\n","import type { z } from 'zod';\n\nimport { type LanguageModel, Output, generateText } from 'ai';\n\n/**\n * Configuration for LLM API call with retry and fallback support\n */\nexport interface LLMCallConfig<TSchema extends z.ZodType> {\n /**\n * Zod schema for response validation\n */\n schema: TSchema;\n\n /**\n * System prompt for LLM\n */\n systemPrompt: string;\n\n /**\n * User prompt for LLM\n */\n userPrompt: string;\n\n /**\n * Primary model for the call (required)\n */\n primaryModel: LanguageModel;\n\n /**\n * Fallback model for retry after primary model exhausts maxRetries (optional)\n */\n fallbackModel?: LanguageModel;\n\n /**\n * Maximum retry count per model (default: 3)\n */\n maxRetries: number;\n\n /**\n * Temperature for generation (optional, 0-1)\n */\n temperature?: number;\n\n /**\n * Abort signal for cancellation support\n */\n abortSignal?: AbortSignal;\n\n /**\n * Component name for tracking (e.g., 'TocExtractor', 'PageRangeParser')\n */\n component: string;\n\n /**\n * Phase name for tracking (e.g., 'extraction', 'validation', 'sampling')\n */\n phase: string;\n}\n\n/**\n * Configuration for LLM vision call with message format\n */\nexport interface LLMVisionCallConfig<TSchema extends z.ZodType> {\n /**\n * Zod schema for response validation\n */\n schema: TSchema;\n\n /**\n * Messages array for vision LLM (instead of systemPrompt/userPrompt)\n */\n messages: Array<{ role: 'user' | 'assistant'; content: any[] | string }>;\n\n /**\n * Primary model for the call (required)\n */\n primaryModel: LanguageModel;\n\n /**\n * Fallback model for retry after primary model exhausts maxRetries (optional)\n */\n fallbackModel?: LanguageModel;\n\n /**\n * Maximum retry count per model (default: 3)\n */\n maxRetries: number;\n\n /**\n * Temperature for generation (optional, 0-1)\n */\n temperature?: number;\n\n /**\n * Abort signal for cancellation support\n */\n abortSignal?: AbortSignal;\n\n /**\n * Component name for tracking (e.g., 'TocExtractor', 'PageRangeParser')\n */\n component: string;\n\n /**\n * Phase name for tracking (e.g., 'extraction', 'validation', 'sampling')\n */\n phase: string;\n}\n\n/**\n * Token usage information with model tracking\n */\nexport interface ExtendedTokenUsage {\n component: string;\n phase: string;\n model: 'primary' | 'fallback';\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n}\n\n/**\n * Result of LLM call including usage information\n */\nexport interface LLMCallResult<T> {\n output: T;\n usage: ExtendedTokenUsage;\n usedFallback: boolean;\n}\n\n/**\n * Base execution configuration for LLM calls\n */\ninterface ExecutionConfig {\n primaryModel: LanguageModel;\n fallbackModel?: LanguageModel;\n abortSignal?: AbortSignal;\n component: string;\n phase: string;\n}\n\n/**\n * LLMCaller - Centralized LLM API caller with retry and fallback support\n *\n * Wraps AI SDK's generateText with enhanced retry strategy:\n * 1. Try primary model with maxRetries\n * 2. If all attempts fail and fallbackModel provided, try fallback with maxRetries\n * 3. Return usage data with model type indicator\n *\n * @example\n * ```typescript\n * const result = await LLMCaller.call({\n * schema: MyZodSchema,\n * systemPrompt: 'You are a helpful assistant',\n * userPrompt: 'Extract the TOC from this markdown',\n * primaryModel: openai('gpt-5'),\n * fallbackModel: anthropic('claude-opus-4-5'),\n * maxRetries: 3,\n * component: 'TocExtractor',\n * phase: 'extraction',\n * });\n *\n * console.log(result.output); // Parsed result\n * console.log(result.usage); // Token usage with model info\n * console.log(result.usedFallback); // Whether fallback was used\n * ```\n */\nexport class LLMCaller {\n /**\n * Extract model name from LanguageModel object\n *\n * Attempts to get model ID from various possible fields in the LanguageModel object.\n */\n private static extractModelName(model: LanguageModel): string {\n const modelObj = model as Record<string, unknown>;\n\n // Try common field names\n if (typeof modelObj.modelId === 'string') return modelObj.modelId;\n if (typeof modelObj.id === 'string') return modelObj.id;\n if (typeof modelObj.model === 'string') return modelObj.model;\n if (typeof modelObj.name === 'string') return modelObj.name;\n\n // Fallback: return object representation\n return String(model);\n }\n\n /**\n * Build usage information from response\n */\n private static buildUsage(\n config: ExecutionConfig,\n modelName: string,\n response: {\n usage?: {\n inputTokens?: number;\n outputTokens?: number;\n totalTokens?: number;\n };\n },\n usedFallback: boolean,\n ): ExtendedTokenUsage {\n return {\n component: config.component,\n phase: config.phase,\n model: usedFallback ? 'fallback' : 'primary',\n modelName,\n inputTokens: response.usage?.inputTokens ?? 0,\n outputTokens: response.usage?.outputTokens ?? 0,\n totalTokens: response.usage?.totalTokens ?? 0,\n };\n }\n\n /**\n * Execute LLM call with fallback support\n *\n * Common execution logic for both text and vision calls.\n */\n private static async executeWithFallback<TOutput>(\n config: ExecutionConfig,\n generateFn: (model: LanguageModel) => Promise<{\n output: TOutput;\n usage?: {\n inputTokens?: number;\n outputTokens?: number;\n totalTokens?: number;\n };\n }>,\n ): Promise<LLMCallResult<TOutput>> {\n const primaryModelName = this.extractModelName(config.primaryModel);\n\n // Attempt 1: Try primary model\n try {\n const response = await generateFn(config.primaryModel);\n\n return {\n output: response.output,\n usage: this.buildUsage(config, primaryModelName, response, false),\n usedFallback: false,\n };\n } catch (primaryError) {\n // If aborted, don't try fallback - re-throw immediately\n if (config.abortSignal?.aborted) {\n throw primaryError;\n }\n\n // If no fallback model, throw immediately\n if (!config.fallbackModel) {\n throw primaryError;\n }\n\n // Attempt 2: Try fallback model\n const fallbackModelName = this.extractModelName(config.fallbackModel);\n const response = await generateFn(config.fallbackModel);\n\n return {\n output: response.output,\n usage: this.buildUsage(config, fallbackModelName, response, true),\n usedFallback: true,\n };\n }\n }\n\n /**\n * Call LLM with retry and fallback support\n *\n * Retry Strategy:\n * 1. Try primary model up to maxRetries times\n * 2. If all fail and fallbackModel provided, try fallback up to maxRetries times\n * 3. Throw error if all attempts exhausted\n *\n * @template TOutput - Output type from schema validation\n * @param config - LLM call configuration\n * @returns Result with parsed object and usage information\n * @throws Error if all retry attempts fail\n */\n static async call<TOutput = unknown>(\n config: LLMCallConfig<z.ZodType<TOutput>>,\n ): Promise<LLMCallResult<TOutput>> {\n return this.executeWithFallback(config, (model) =>\n generateText({\n model,\n output: Output.object({\n schema: config.schema,\n }),\n system: config.systemPrompt,\n prompt: config.userPrompt,\n temperature: config.temperature,\n maxRetries: config.maxRetries,\n abortSignal: config.abortSignal,\n }),\n );\n }\n\n /**\n * Call LLM for vision tasks with message format support\n *\n * Same retry and fallback logic as call(), but using message format instead of system/user prompts.\n *\n * @template TOutput - Output type from schema validation\n * @param config - LLM vision call configuration\n * @returns Result with parsed object and usage information\n * @throws Error if all retry attempts fail\n */\n static async callVision<TOutput = unknown>(\n config: LLMVisionCallConfig<z.ZodType<TOutput>>,\n ): Promise<LLMCallResult<TOutput>> {\n return this.executeWithFallback(config, (model) =>\n generateText({\n model,\n output: Output.object({\n schema: config.schema,\n }),\n messages: config.messages,\n temperature: config.temperature,\n maxRetries: config.maxRetries,\n abortSignal: config.abortSignal,\n }),\n );\n }\n}\n","import type { LoggerMethods } from '@heripo/logger';\n\nimport type { ExtendedTokenUsage } from './llm-caller';\n\n/**\n * Token usage totals\n */\nexport interface TokenUsage {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n}\n\n/**\n * Format token usage as a human-readable string\n *\n * @param usage - Token usage object with input, output, and total counts\n * @returns Formatted string like \"1500 input, 300 output, 1800 total\"\n */\nfunction formatTokens(usage: TokenUsage): string {\n return `${usage.inputTokens} input, ${usage.outputTokens} output, ${usage.totalTokens} total`;\n}\n\n/**\n * Aggregated token usage for a specific component\n */\ninterface ComponentAggregate {\n component: string;\n phases: Record<\n string,\n {\n primary?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n fallback?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n }\n >;\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n}\n\n/**\n * LLMTokenUsageAggregator - Aggregates token usage across all LLM calls\n *\n * Unlike LLMTokenUsageTracker which logs immediately after each component,\n * this aggregator collects usage data from all components and logs a comprehensive\n * summary at the end of document processing.\n *\n * Tracks usage by:\n * - Component (TocExtractor, PageRangeParser, etc.)\n * - Phase (extraction, validation, sampling, etc.)\n * - Model (primary vs fallback)\n *\n * @example\n * ```typescript\n * const aggregator = new LLMTokenUsageAggregator();\n *\n * // Track usage from each LLM call\n * aggregator.track({\n * component: 'TocExtractor',\n * phase: 'extraction',\n * model: 'primary',\n * modelName: 'gpt-5',\n * inputTokens: 1500,\n * outputTokens: 300,\n * totalTokens: 1800,\n * });\n *\n * aggregator.track({\n * component: 'PageRangeParser',\n * phase: 'sampling',\n * model: 'fallback',\n * modelName: 'claude-opus-4-5',\n * inputTokens: 2000,\n * outputTokens: 100,\n * totalTokens: 2100,\n * });\n *\n * // Log comprehensive summary\n * aggregator.logSummary(logger);\n * // Outputs:\n * // [DocumentProcessor] Token usage summary:\n * // TocExtractor:\n * // - extraction (primary: gpt-5): 1500 input, 300 output, 1800 total\n * // TocExtractor total: 1500 input, 300 output, 1800 total\n * // PageRangeParser:\n * // - sampling (fallback: claude-opus-4-5): 2000 input, 100 output, 2100 total\n * // PageRangeParser total: 2000 input, 100 output, 2100 total\n * // Grand total: 3500 input, 400 output, 3900 total\n * ```\n */\nexport class LLMTokenUsageAggregator {\n private usage: Record<string, ComponentAggregate> = {};\n\n /**\n * Track token usage from an LLM call\n *\n * @param usage - Extended token usage with component/phase/model information\n */\n track(usage: ExtendedTokenUsage): void {\n // Initialize component if not seen before\n if (!this.usage[usage.component]) {\n this.usage[usage.component] = {\n component: usage.component,\n phases: {},\n total: {\n inputTokens: 0,\n outputTokens: 0,\n totalTokens: 0,\n },\n };\n }\n\n const component = this.usage[usage.component];\n\n // Initialize phase if not seen before\n if (!component.phases[usage.phase]) {\n component.phases[usage.phase] = {\n total: {\n inputTokens: 0,\n outputTokens: 0,\n totalTokens: 0,\n },\n };\n }\n\n const phase = component.phases[usage.phase];\n\n // Track by model type\n if (usage.model === 'primary') {\n if (!phase.primary) {\n phase.primary = {\n modelName: usage.modelName,\n inputTokens: 0,\n outputTokens: 0,\n totalTokens: 0,\n };\n }\n\n phase.primary.inputTokens += usage.inputTokens;\n phase.primary.outputTokens += usage.outputTokens;\n phase.primary.totalTokens += usage.totalTokens;\n } else if (usage.model === 'fallback') {\n if (!phase.fallback) {\n phase.fallback = {\n modelName: usage.modelName,\n inputTokens: 0,\n outputTokens: 0,\n totalTokens: 0,\n };\n }\n\n phase.fallback.inputTokens += usage.inputTokens;\n phase.fallback.outputTokens += usage.outputTokens;\n phase.fallback.totalTokens += usage.totalTokens;\n }\n\n // Update phase total\n phase.total.inputTokens += usage.inputTokens;\n phase.total.outputTokens += usage.outputTokens;\n phase.total.totalTokens += usage.totalTokens;\n\n // Update component total\n component.total.inputTokens += usage.inputTokens;\n component.total.outputTokens += usage.outputTokens;\n component.total.totalTokens += usage.totalTokens;\n }\n\n /**\n * Get aggregated usage grouped by component\n *\n * @returns Array of component aggregates with phase breakdown\n */\n getByComponent(): ComponentAggregate[] {\n return Object.values(this.usage);\n }\n\n /**\n * Get token usage report in structured JSON format\n *\n * Converts internal usage data to external TokenUsageReport format suitable\n * for serialization and reporting. The report includes component breakdown,\n * phase-level details, and both primary and fallback model usage.\n *\n * @returns Structured token usage report with components and total\n */\n getReport(): {\n components: Array<{\n component: string;\n phases: Array<{\n phase: string;\n primary?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n fallback?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n }>;\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n }>;\n total: TokenUsage;\n } {\n const components: Array<{\n component: string;\n phases: Array<{\n phase: string;\n primary?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n fallback?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n }>;\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n }> = [];\n\n for (const component of Object.values(this.usage)) {\n const phases: Array<{\n phase: string;\n primary?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n fallback?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n }> = [];\n\n for (const [phaseName, phaseData] of Object.entries(component.phases)) {\n const phaseReport: {\n phase: string;\n primary?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n fallback?: {\n modelName: string;\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n total: {\n inputTokens: number;\n outputTokens: number;\n totalTokens: number;\n };\n } = {\n phase: phaseName,\n total: {\n inputTokens: phaseData.total.inputTokens,\n outputTokens: phaseData.total.outputTokens,\n totalTokens: phaseData.total.totalTokens,\n },\n };\n\n if (phaseData.primary) {\n phaseReport.primary = {\n modelName: phaseData.primary.modelName,\n inputTokens: phaseData.primary.inputTokens,\n outputTokens: phaseData.primary.outputTokens,\n totalTokens: phaseData.primary.totalTokens,\n };\n }\n\n if (phaseData.fallback) {\n phaseReport.fallback = {\n modelName: phaseData.fallback.modelName,\n inputTokens: phaseData.fallback.inputTokens,\n outputTokens: phaseData.fallback.outputTokens,\n totalTokens: phaseData.fallback.totalTokens,\n };\n }\n\n phases.push(phaseReport);\n }\n\n components.push({\n component: component.component,\n phases,\n total: {\n inputTokens: component.total.inputTokens,\n outputTokens: component.total.outputTokens,\n totalTokens: component.total.totalTokens,\n },\n });\n }\n\n const totalUsage = this.getTotalUsage();\n\n return {\n components,\n total: {\n inputTokens: totalUsage.inputTokens,\n outputTokens: totalUsage.outputTokens,\n totalTokens: totalUsage.totalTokens,\n },\n };\n }\n\n /**\n * Get total usage across all components and phases\n *\n * @returns Aggregated token usage totals\n */\n getTotalUsage(): TokenUsage {\n let totalInput = 0;\n let totalOutput = 0;\n let totalTokens = 0;\n\n for (const component of Object.values(this.usage)) {\n totalInput += component.total.inputTokens;\n totalOutput += component.total.outputTokens;\n totalTokens += component.total.totalTokens;\n }\n\n return {\n inputTokens: totalInput,\n outputTokens: totalOutput,\n totalTokens: totalTokens,\n };\n }\n\n /**\n * Log comprehensive token usage summary\n *\n * Outputs usage grouped by component, with phase and model breakdown.\n * Shows primary and fallback token usage separately for each phase.\n * Call this once at the end of document processing.\n *\n * @param logger - Logger instance for output\n */\n logSummary(logger: LoggerMethods): void {\n const components = this.getByComponent();\n\n if (components.length === 0) {\n logger.info('[DocumentProcessor] No token usage to report');\n return;\n }\n\n logger.info('[DocumentProcessor] Token usage summary:');\n logger.info('');\n\n let grandInputTokens = 0;\n let grandOutputTokens = 0;\n let grandTotalTokens = 0;\n let grandPrimaryInputTokens = 0;\n let grandPrimaryOutputTokens = 0;\n let grandPrimaryTotalTokens = 0;\n let grandFallbackInputTokens = 0;\n let grandFallbackOutputTokens = 0;\n let grandFallbackTotalTokens = 0;\n\n for (const component of components) {\n logger.info(`${component.component}:`);\n\n for (const [phase, phaseData] of Object.entries(component.phases)) {\n logger.info(` - ${phase}:`);\n\n // Show primary model usage\n if (phaseData.primary) {\n logger.info(\n ` primary (${phaseData.primary.modelName}): ${formatTokens(phaseData.primary)}`,\n );\n grandPrimaryInputTokens += phaseData.primary.inputTokens;\n grandPrimaryOutputTokens += phaseData.primary.outputTokens;\n grandPrimaryTotalTokens += phaseData.primary.totalTokens;\n }\n\n // Show fallback model usage\n if (phaseData.fallback) {\n logger.info(\n ` fallback (${phaseData.fallback.modelName}): ${formatTokens(phaseData.fallback)}`,\n );\n grandFallbackInputTokens += phaseData.fallback.inputTokens;\n grandFallbackOutputTokens += phaseData.fallback.outputTokens;\n grandFallbackTotalTokens += phaseData.fallback.totalTokens;\n }\n\n // Show phase subtotal\n logger.info(` subtotal: ${formatTokens(phaseData.total)}`);\n }\n\n logger.info(\n ` ${component.component} total: ${formatTokens(component.total)}`,\n );\n logger.info('');\n\n grandInputTokens += component.total.inputTokens;\n grandOutputTokens += component.total.outputTokens;\n grandTotalTokens += component.total.totalTokens;\n }\n\n // Show grand total with primary/fallback breakdown\n logger.info('--- Summary ---');\n if (grandPrimaryTotalTokens > 0) {\n logger.info(\n `Primary total: ${formatTokens({\n inputTokens: grandPrimaryInputTokens,\n outputTokens: grandPrimaryOutputTokens,\n totalTokens: grandPrimaryTotalTokens,\n })}`,\n );\n }\n if (grandFallbackTotalTokens > 0) {\n logger.info(\n `Fallback total: ${formatTokens({\n inputTokens: grandFallbackInputTokens,\n outputTokens: grandFallbackOutputTokens,\n totalTokens: grandFallbackTotalTokens,\n })}`,\n );\n }\n logger.info(\n `Grand total: ${formatTokens({\n inputTokens: grandInputTokens,\n outputTokens: grandOutputTokens,\n totalTokens: grandTotalTokens,\n })}`,\n );\n }\n\n /**\n * Reset all tracked usage\n *\n * Call this at the start of a new document processing run.\n */\n reset(): void {\n this.usage = {};\n }\n}\n","import type { LoggerMethods } from '@heripo/logger';\n\nimport { spawnAsync } from '@heripo/shared';\nimport { spawn } from 'node:child_process';\nimport { join } from 'node:path';\n\nimport { DOCLING_ENVIRONMENT } from '../config/constants';\nimport {\n PythonVersionError,\n type PythonVersionInfo,\n parsePythonVersion,\n validatePythonVersion,\n} from '../utils/python-version';\n\nexport class DoclingEnvironment {\n private readonly logger: LoggerMethods;\n private readonly venvPath: string;\n private readonly port: number;\n private readonly killExistingProcess: boolean;\n\n constructor(options: {\n logger: LoggerMethods;\n venvPath: string;\n port: number;\n killExistingProcess: boolean;\n }) {\n this.logger = options.logger;\n this.venvPath = options.venvPath;\n this.port = options.port;\n this.killExistingProcess = options.killExistingProcess;\n }\n\n async setup(): Promise<void> {\n this.logger.info('[DoclingEnvironment] Setting up Python environment...');\n\n await this.checkPythonVersion();\n await this.setupPythonEnvironment();\n await this.upgradePip();\n await this.installSetuptools();\n await this.installPyArrow();\n await this.installDoclingServe();\n\n // Check if server is already running\n const portInUse = await this.isPortInUse(this.port);\n\n if (portInUse && !this.killExistingProcess) {\n this.logger.info(\n '[DoclingEnvironment] Reusing existing server on port',\n this.port,\n );\n } else {\n await this.startDoclingServe();\n }\n\n this.logger.info('[DoclingEnvironment] Setup completed');\n }\n\n private async checkPythonVersion(): Promise<PythonVersionInfo> {\n const result = await spawnAsync('python3', ['--version']);\n\n if (result.code !== 0) {\n throw new Error('Failed to check Python version');\n }\n\n const output = result.stdout + result.stderr;\n const version = parsePythonVersion(output);\n\n if (!version) {\n throw new Error('Could not parse Python version');\n }\n\n this.logger.info(\n '[DoclingEnvironment] Python version:',\n version.versionString,\n );\n\n try {\n validatePythonVersion(version, 'system');\n } catch (error) {\n if (error instanceof PythonVersionError && version.minor >= 13) {\n this.logger.error(\n '[DoclingEnvironment] Python 3.13+ is not compatible. Install 3.11 or 3.12 with: pyenv install 3.12.0 && pyenv global 3.12.0',\n );\n }\n throw error;\n }\n\n return version;\n }\n\n private async setupPythonEnvironment(): Promise<void> {\n const result = await spawnAsync('python3', ['-m', 'venv', this.venvPath]);\n\n if (result.code !== 0) {\n throw new Error('Failed to create Python virtual environment');\n }\n\n await this.verifyVenvPythonVersion();\n }\n\n private async verifyVenvPythonVersion(): Promise<void> {\n const pythonPath = join(this.venvPath, 'bin', 'python');\n const result = await spawnAsync(pythonPath, ['--version']);\n\n if (result.code !== 0) {\n throw new Error('Failed to verify venv Python version');\n }\n\n const output = result.stdout + result.stderr;\n const version = parsePythonVersion(output);\n\n if (!version) {\n throw new Error('Could not parse venv Python version');\n }\n\n validatePythonVersion(version, 'venv');\n }\n\n private async upgradePip(): Promise<void> {\n const pipPath = join(this.venvPath, 'bin', 'pip');\n const result = await spawnAsync(pipPath, ['install', '--upgrade', 'pip']);\n\n if (result.code !== 0) {\n this.logger.error(\n '[DoclingEnvironment] Failed to upgrade pip:',\n result.stderr,\n );\n throw new Error(`Failed to upgrade pip. Exit code: ${result.code}`);\n }\n }\n\n private async installSetuptools(): Promise<void> {\n const pipPath = join(this.venvPath, 'bin', 'pip');\n const result = await spawnAsync(pipPath, [\n 'install',\n '--upgrade',\n 'setuptools',\n 'wheel',\n ]);\n\n if (result.code !== 0) {\n this.logger.error(\n '[DoclingEnvironment] Failed to install setuptools:',\n result.stderr,\n );\n throw new Error(\n `Failed to install setuptools. Exit code: ${result.code}`,\n );\n }\n }\n\n private async installPyArrow(): Promise<void> {\n const pipPath = join(this.venvPath, 'bin', 'pip');\n const result = await spawnAsync(pipPath, [\n 'install',\n '--only-binary',\n ':all:',\n 'pyarrow',\n ]);\n\n if (result.code !== 0) {\n this.logger.error(\n '[DoclingEnvironment] Failed to install pyarrow:',\n result.stderr,\n );\n throw new Error(`Failed to install pyarrow. Exit code: ${result.code}`);\n }\n }\n\n private async installDoclingServe(): Promise<void> {\n const pipPath = join(this.venvPath, 'bin', 'pip');\n const result = await spawnAsync(pipPath, ['install', 'docling-serve']);\n\n if (result.code !== 0) {\n this.logger.error(\n '[DoclingEnvironment] Failed to install docling-serve:',\n result.stderr,\n );\n throw new Error(\n `Failed to install docling-serve. Exit code: ${result.code}`,\n );\n }\n }\n\n private async isPortInUse(port: number): Promise<boolean> {\n try {\n const result = await spawnAsync('lsof', ['-ti', `:${port}`]);\n return result.code === 0 && !!result.stdout.trim();\n } catch {\n return false;\n }\n }\n\n /**\n * Start the docling-serve server without running full setup.\n * Useful for restarting the server after it has crashed.\n */\n public async startServer(): Promise<void> {\n await this.startDoclingServe();\n }\n\n // Process-killing logic is provided as a static method to allow reuse without instantiation\n public static async killProcessOnPort(\n logger: LoggerMethods,\n port: number,\n ): Promise<void> {\n return new Promise((resolve) => {\n const lsof = spawn('lsof', ['-ti', `:${port}`]);\n const pids: string[] = [];\n\n lsof.stdout?.on('data', (data) => {\n const txt: string = data.toString();\n pids.push(\n ...txt\n .split(/\\s+/)\n .map((s) => s.trim())\n .filter(Boolean),\n );\n });\n\n lsof.on('close', () => {\n if (pids.length === 0) return resolve();\n\n let remaining = pids.length;\n const done = () => {\n if (--remaining <= 0) resolve();\n };\n\n logger.info(\n '[DoclingEnvironment] Killing process',\n pids.join(', '),\n 'on port',\n port,\n );\n for (const pid of pids) {\n const killProc = spawn('kill', ['-9', pid]);\n\n killProc.on('close', (killCode) => {\n if (killCode !== 0) {\n logger.info('[DoclingEnvironment] Failed to kill process', pid);\n }\n done();\n });\n killProc.on('error', (Error) => {\n logger.info('[DoclingEnvironment] Failed to kill process', Error);\n done();\n });\n }\n });\n\n lsof.on('error', () => resolve());\n });\n }\n\n private async startDoclingServe(): Promise<void> {\n return new Promise(async (resolve, reject) => {\n // Kill any existing process on the port if option is enabled\n if (this.killExistingProcess) {\n await DoclingEnvironment.killProcessOnPort(this.logger, this.port);\n }\n\n const venvPath = this.venvPath;\n const doclingServePath = join(venvPath, 'bin', 'docling-serve');\n const args = ['run', '--port', this.port.toString()];\n\n this.logger.info(\n '[DoclingEnvironment] Starting docling-serve on port',\n this.port,\n );\n const doclingProcess = spawn(doclingServePath, args, {\n detached: true, // Detached from parent process\n stdio: 'ignore', // Remove stdio pipes to prevent event loop from hanging\n });\n\n doclingProcess.unref(); // Parent doesn't wait for child process to exit\n\n doclingProcess.on('error', (error) => {\n this.logger.error('[DoclingEnvironment] docling-serve error:', error);\n reject(error);\n });\n\n // Give docling-serve time to start\n setTimeout(() => {\n resolve();\n }, DOCLING_ENVIRONMENT.STARTUP_DELAY_MS);\n });\n }\n}\n","/**\n * Python version information\n */\nexport interface PythonVersionInfo {\n major: number;\n minor: number;\n versionString: string;\n}\n\n/**\n * Regex pattern to extract Python version from command output\n */\nexport const PYTHON_VERSION_REGEX = /Python (\\d+)\\.(\\d+)/;\n\n/**\n * Minimum supported Python version\n */\nexport const MIN_PYTHON_VERSION = { major: 3, minor: 9 };\n\n/**\n * Maximum supported Python version (exclusive upper bound)\n * Python 3.13+ is NOT compatible with docling-serve\n */\nexport const MAX_PYTHON_MINOR = 12;\n\n/**\n * Error thrown when Python version is invalid\n */\nexport class PythonVersionError extends Error {\n constructor(message: string) {\n super(message);\n this.name = 'PythonVersionError';\n }\n}\n\n/**\n * Parse Python version from command output\n *\n * @param output - Output from `python3 --version` command\n * @returns Parsed version info or null if parsing fails\n *\n * @example\n * ```typescript\n * const version = parsePythonVersion('Python 3.12.0');\n * // { major: 3, minor: 12, versionString: '3.12' }\n * ```\n */\nexport function parsePythonVersion(output: string): PythonVersionInfo | null {\n const match = output.match(PYTHON_VERSION_REGEX);\n if (!match) return null;\n\n const major = parseInt(match[1]);\n const minor = parseInt(match[2]);\n\n return {\n major,\n minor,\n versionString: `${major}.${minor}`,\n };\n}\n\n/**\n * Validate that Python version is within supported range\n *\n * @param version - Parsed Python version info\n * @param context - Context for error messages ('system' or 'venv')\n * @throws PythonVersionError if version is outside supported range\n *\n * @example\n * ```typescript\n * const version = parsePythonVersion('Python 3.12.0');\n * validatePythonVersion(version, 'system'); // OK\n *\n * const tooNew = parsePythonVersion('Python 3.13.0');\n * validatePythonVersion(tooNew, 'venv'); // throws PythonVersionError\n * ```\n */\nexport function validatePythonVersion(\n version: PythonVersionInfo,\n context: 'system' | 'venv' = 'system',\n): void {\n const { major, minor } = version;\n const prefix = context === 'venv' ? 'Venv Python' : 'Python';\n\n // Check if too new (3.13+)\n if (major === 3 && minor >= 13) {\n throw new PythonVersionError(\n `${prefix} ${major}.${minor} is too new. docling-serve requires Python 3.11 or 3.12.`,\n );\n }\n\n // Check if too old (< 3.9)\n if (major !== 3 || minor < MIN_PYTHON_VERSION.minor) {\n throw new PythonVersionError('Python 3.9 or higher is required');\n }\n}\n","import type { LoggerMethods } from '@heripo/logger';\nimport type {\n AsyncConversionTask,\n ConversionOptions,\n DoclingAPIClient,\n} from 'docling-sdk';\n\nimport { omit } from 'es-toolkit';\nimport { createWriteStream, existsSync, rmSync } from 'node:fs';\nimport { join } from 'node:path';\nimport { pipeline } from 'node:stream/promises';\n\nimport { PDF_CONVERTER } from '../config/constants';\nimport { ImagePdfFallbackError } from '../errors/image-pdf-fallback-error';\nimport { ImageExtractor } from '../processors/image-extractor';\nimport { LocalFileServer } from '../utils/local-file-server';\nimport { ImagePdfConverter } from './image-pdf-converter';\n\n/**\n * Callback function invoked after PDF conversion completes\n * @param outputPath Absolute path to the output directory containing result files\n */\nexport type ConversionCompleteCallback = (\n outputPath: string,\n) => Promise<void> | void;\n\nexport class PDFConverter {\n constructor(\n private readonly logger: LoggerMethods,\n private readonly client: DoclingAPIClient,\n private readonly enableImagePdfFallback: boolean = false,\n ) {}\n\n async convert(\n url: string,\n reportId: string,\n onComplete: ConversionCompleteCallback,\n cleanupAfterCallback: boolean,\n options: Omit<\n ConversionOptions,\n | 'to_formats'\n | 'image_export_mode'\n | 'ocr_engine'\n | 'accelerator_options'\n | 'ocr_options'\n | 'generate_picture_images'\n | 'images_scale'\n | 'force_ocr'\n > & {\n num_threads?: number;\n },\n abortSignal?: AbortSignal,\n ) {\n this.logger.info('[PDFConverter] Converting:', url);\n\n let originalError: Error | null = null;\n\n try {\n await this.performConversion(\n url,\n reportId,\n onComplete,\n cleanupAfterCallback,\n options,\n abortSignal,\n );\n return;\n } catch (error) {\n // If aborted, don't try fallback - re-throw immediately\n if (abortSignal?.aborted) {\n throw error;\n }\n\n originalError = error as Error;\n this.logger.error('[PDFConverter] Conversion failed:', error);\n\n if (!this.enableImagePdfFallback) {\n throw error;\n }\n }\n\n // Fallback: Convert to image PDF and retry\n this.logger.info('[PDFConverter] Attempting image PDF fallback...');\n const imagePdfConverter = new ImagePdfConverter(this.logger);\n let imagePdfPath: string | null = null;\n\n try {\n imagePdfPath = await imagePdfConverter.convert(url, reportId);\n\n // Use file:// URL for local file\n const localUrl = `file://${imagePdfPath}`;\n this.logger.info('[PDFConverter] Retrying with image PDF:', localUrl);\n\n await this.performConversion(\n localUrl,\n reportId,\n onComplete,\n cleanupAfterCallback,\n options,\n abortSignal,\n );\n\n this.logger.info('[PDFConverter] Fallback conversion succeeded');\n } catch (fallbackError) {\n this.logger.error(\n '[PDFConverter] Fallback conversion also failed:',\n fallbackError,\n );\n throw new ImagePdfFallbackError(originalError!, fallbackError as Error);\n } finally {\n // Cleanup temp image PDF\n if (imagePdfPath) {\n imagePdfConverter.cleanup(imagePdfPath);\n }\n }\n }\n\n private async performConversion(\n url: string,\n reportId: string,\n onComplete: ConversionCompleteCallback,\n cleanupAfterCallback: boolean,\n options: Omit<\n ConversionOptions,\n | 'to_formats'\n | 'image_export_mode'\n | 'ocr_engine'\n | 'accelerator_options'\n | 'ocr_options'\n | 'generate_picture_images'\n | 'images_scale'\n | 'force_ocr'\n > & {\n num_threads?: number;\n },\n abortSignal?: AbortSignal,\n ): Promise<void> {\n const startTime = Date.now();\n const conversionOptions = this.buildConversionOptions(options);\n\n this.logger.info(\n '[PDFConverter] Converting document with Async Source API...',\n );\n this.logger.info('[PDFConverter] Server will download from URL directly');\n this.logger.info(\n '[PDFConverter] Results will be returned as ZIP to avoid memory limits',\n );\n\n // Resolve URL (start local server for file:// URLs)\n const { httpUrl, server } = await this.resolveUrl(url);\n\n try {\n const task = await this.startConversionTask(httpUrl, conversionOptions);\n await this.trackTaskProgress(task);\n\n // Check abort after docling task completes\n if (abortSignal?.aborted) {\n this.logger.info(\n '[PDFConverter] Conversion aborted after docling completion',\n );\n const error = new Error('PDF conversion was aborted');\n error.name = 'AbortError';\n throw error;\n }\n\n await this.downloadResult(task.taskId);\n } finally {\n // Stop local file server if started\n if (server) {\n this.logger.info('[PDFConverter] Stopping local file server...');\n await server.stop();\n }\n }\n\n const cwd = process.cwd();\n const zipPath = join(cwd, 'result.zip');\n const extractDir = join(cwd, 'result_extracted');\n const outputDir = join(cwd, 'output', reportId);\n\n try {\n await this.processConvertedFiles(zipPath, extractDir, outputDir);\n\n // Check abort before callback\n if (abortSignal?.aborted) {\n this.logger.info('[PDFConverter] Conversion aborted before callback');\n const error = new Error('PDF conversion was aborted');\n error.name = 'AbortError';\n throw error;\n }\n\n // Execute callback with absolute output path\n this.logger.info('[PDFConverter] Executing completion callback...');\n await onComplete(outputDir);\n\n const duration = Date.now() - startTime;\n this.logger.info('[PDFConverter] Conversion completed successfully!');\n this.logger.info('[PDFConverter] Total time:', duration, 'ms');\n } finally {\n // Clean up temporary files (always cleanup temp files)\n this.logger.info('[PDFConverter] Cleaning up temporary files...');\n if (existsSync(zipPath)) {\n rmSync(zipPath, { force: true });\n }\n if (existsSync(extractDir)) {\n rmSync(extractDir, { recursive: true, force: true });\n }\n\n // Cleanup output directory only if requested\n if (cleanupAfterCallback) {\n this.logger.info(\n '[PDFConverter] Cleaning up output directory:',\n outputDir,\n );\n if (existsSync(outputDir)) {\n rmSync(outputDir, { recursive: true, force: true });\n }\n } else {\n this.logger.info('[PDFConverter] Output preserved at:', outputDir);\n }\n }\n }\n\n private buildConversionOptions(\n options: Omit<\n ConversionOptions,\n | 'to_formats'\n | 'image_export_mode'\n | 'ocr_engine'\n | 'accelerator_options'\n | 'ocr_options'\n | 'generate_picture_images'\n | 'images_scale'\n | 'force_ocr'\n > & {\n num_threads?: number;\n },\n ): ConversionOptions {\n return {\n ...omit(options, ['num_threads']),\n to_formats: ['json', 'html'],\n image_export_mode: 'embedded',\n ocr_engine: 'ocrmac',\n generate_picture_images: true,\n images_scale: 2.0,\n /**\n * While disabling this option yields the most accurate text extraction for readable PDFs,\n * text layers overlaid on images or drawings can introduce noise when not merged properly.\n * In practice, archaeological report PDFs almost always contain such overlapping cases.\n * Enabling force_ocr mitigates this risk. Although OCR may introduce minor errors compared\n * to direct text extraction, the accuracy remains high since the source is digital, not scanned paper.\n */\n force_ocr: true,\n accelerator_options: {\n device: 'mps',\n num_threads: options.num_threads,\n },\n };\n }\n\n private async startConversionTask(\n url: string,\n conversionOptions: ConversionOptions,\n ): Promise<AsyncConversionTask> {\n const task = await this.client.convertSourceAsync({\n sources: [\n {\n kind: 'http',\n url,\n },\n ],\n options: conversionOptions,\n target: {\n kind: 'zip',\n },\n });\n\n this.logger.info(`[PDFConverter] Task created: ${task.taskId}`);\n this.logger.info('[PDFConverter] Polling for progress...');\n\n return task;\n }\n\n /**\n * Start a local file server for file:// URLs\n *\n * @param url URL to check (file:// or http://)\n * @returns Object with httpUrl and optional server to stop later\n */\n private async resolveUrl(\n url: string,\n ): Promise<{ httpUrl: string; server?: LocalFileServer }> {\n if (url.startsWith('file://')) {\n const filePath = url.slice(7); // Remove 'file://' prefix\n const server = new LocalFileServer();\n const httpUrl = await server.start(filePath);\n\n this.logger.info('[PDFConverter] Started local file server:', httpUrl);\n\n return { httpUrl, server };\n }\n\n return { httpUrl: url };\n }\n\n private async trackTaskProgress(task: AsyncConversionTask): Promise<void> {\n const conversionStartTime = Date.now();\n let lastStatus = '';\n let isCompleted = false;\n\n const pollInterval = setInterval(() => {\n if (isCompleted) return;\n const elapsed = Math.floor((Date.now() - conversionStartTime) / 1000);\n process.stdout.write(\n `\\r[PDFConverter] Status: ${lastStatus || 'processing'} (${elapsed}s elapsed)`,\n );\n }, PDF_CONVERTER.POLL_INTERVAL_MS);\n\n task.on('progress', (status) => {\n lastStatus = status.task_status;\n if (status.task_position !== undefined) {\n process.stdout.write(\n `\\r[PDFConverter] Status: ${status.task_status} (position: ${status.task_position})`,\n );\n }\n });\n\n task.on('complete', () => {\n isCompleted = true;\n clearInterval(pollInterval);\n this.logger.info('\\n[PDFConverter] Conversion completed!');\n });\n\n task.on('error', (error) => {\n isCompleted = true;\n clearInterval(pollInterval);\n this.logger.error('\\n[PDFConverter] Conversion error:', error.message);\n });\n\n try {\n await task.waitForCompletion();\n } finally {\n isCompleted = true;\n clearInterval(pollInterval);\n }\n }\n\n private async downloadResult(taskId: string): Promise<void> {\n this.logger.info(\n '\\n[PDFConverter] Task completed, downloading ZIP file...',\n );\n\n const zipResult = await this.client.getTaskResultFile(taskId);\n\n if (!zipResult.success || !zipResult.fileStream) {\n throw new Error('Failed to get ZIP file result');\n }\n\n const zipPath = join(process.cwd(), 'result.zip');\n\n this.logger.info('[PDFConverter] Saving ZIP file to:', zipPath);\n const writeStream = createWriteStream(zipPath);\n await pipeline(zipResult.fileStream, writeStream);\n }\n\n private async processConvertedFiles(\n zipPath: string,\n extractDir: string,\n outputDir: string,\n ): Promise<void> {\n // Extract and save documents with images\n await ImageExtractor.extractAndSaveDocumentsFromZip(\n this.logger,\n zipPath,\n extractDir,\n outputDir,\n );\n }\n}\n","/**\n * Error thrown when both original PDF conversion and image PDF fallback fail.\n * Contains both errors for debugging purposes.\n */\nexport class ImagePdfFallbackError extends Error {\n public readonly name = 'ImagePdfFallbackError';\n\n constructor(\n public readonly originalError: Error,\n public readonly fallbackError: Error,\n ) {\n super(\n `PDF conversion failed with fallback. ` +\n `Original: ${originalError.message}. ` +\n `Fallback: ${fallbackError.message}`,\n );\n }\n}\n","import type { LoggerMethods } from '@heripo/logger';\nimport type { DoclingDocument } from '@heripo/model';\n\nimport {\n createWriteStream,\n existsSync,\n mkdirSync,\n readFileSync,\n readdirSync,\n rmSync,\n writeFileSync,\n} from 'node:fs';\nimport { extname, join } from 'node:path';\nimport * as yauzl from 'yauzl';\n\nimport {\n jqExtractBase64PngStrings,\n jqReplaceBase64WithPaths,\n} from '../utils/jq';\n\n/**\n * ImageExtractor handles extraction and processing of images from PDF conversion results\n *\n * This class provides functionality to:\n * - Extract ZIP files containing converted PDF documents\n * - Extract base64-encoded images from JSON and HTML content\n * - Save images as separate PNG files\n * - Replace base64 data with relative file paths\n */\nexport class ImageExtractor {\n /**\n * Extract a ZIP file to a target directory\n */\n private static async extractZip(\n zipPath: string,\n targetDir: string,\n ): Promise<void> {\n return new Promise((resolve, reject) => {\n yauzl.open(zipPath, { lazyEntries: true }, (err, zipfile) => {\n if (err || !zipfile) {\n reject(err || new Error('Failed to open zip file'));\n return;\n }\n\n zipfile.readEntry();\n\n zipfile.on('entry', (entry) => {\n const entryPath = join(targetDir, entry.fileName);\n\n if (/\\/$/.test(entry.fileName)) {\n // Directory entry\n mkdirSync(entryPath, { recursive: true });\n zipfile.readEntry();\n } else {\n // File entry\n zipfile.openReadStream(entry, (err, readStream) => {\n if (err || !readStream) {\n reject(err || new Error('Failed to open read stream'));\n return;\n }\n\n mkdirSync(join(entryPath, '..'), { recursive: true });\n const writeStream = createWriteStream(entryPath);\n\n readStream.pipe(writeStream);\n writeStream.on('finish', () => {\n zipfile.readEntry();\n });\n writeStream.on('error', reject);\n });\n }\n });\n\n zipfile.on('end', () => {\n resolve();\n });\n\n zipfile.on('error', reject);\n });\n });\n }\n\n /**\n * Extract base64 images from JSON file using jq (for large files)\n * Returns array of base64 data strings\n */\n private static async extractBase64ImagesFromJsonWithJq(\n jsonPath: string,\n ): Promise<string[]> {\n return jqExtractBase64PngStrings(jsonPath);\n }\n\n /**\n * Replace base64 images with file paths in JSON using jq (for large files)\n * Uses reduce to maintain counter state while walking the JSON\n */\n private static async replaceBase64ImagesInJsonWithJq(\n jsonPath: string,\n outputPath: string,\n dirName: string,\n prefix: string,\n ): Promise<number> {\n const { data, count } = (await jqReplaceBase64WithPaths(\n jsonPath,\n dirName,\n prefix,\n )) as { data: DoclingDocument; count: number };\n\n // Write transformed JSON to output file\n writeFileSync(outputPath, JSON.stringify(data, null, 2), 'utf-8');\n\n return count;\n }\n\n /**\n * Extract a base64-encoded image to a file and return the relative path\n */\n private static extractBase64ImageToFile(\n base64Data: string,\n imagesDir: string,\n index: number,\n prefix: string,\n dirName: string,\n ): string {\n const PREFIX = 'data:image/png;base64,';\n const base64Content = base64Data.startsWith(PREFIX)\n ? base64Data.slice(PREFIX.length)\n : base64Data;\n\n const filename = `${prefix}_${index}.png`;\n const filepath = join(imagesDir, filename);\n\n // Convert base64 to buffer and write to file\n const buffer = Buffer.from(base64Content, 'base64');\n writeFileSync(filepath, buffer);\n\n return `${dirName}/${filename}`;\n }\n\n /**\n * Save JSON and HTML documents with base64 images extracted to separate files\n * Uses jq for JSON processing to handle large files\n *\n * This method:\n * 1. Extracts base64-encoded images from JSON and HTML content\n * 2. Saves images as separate PNG files\n * 3. Replaces base64 data with relative file paths\n * 4. Saves the transformed documents to the output directory\n */\n private static async saveDocumentsWithExtractedImages(\n logger: LoggerMethods,\n outputDir: string,\n filename: string,\n jsonSourcePath: string,\n htmlContent: string,\n ): Promise<void> {\n // Clear output directory completely at the start, then recreate it\n try {\n if (existsSync(outputDir)) {\n rmSync(outputDir, { recursive: true, force: true });\n }\n } catch (e) {\n logger.warn('[PDFConverter] Failed to clear output directory:', e);\n }\n mkdirSync(outputDir, { recursive: true });\n\n // Get filename without extension\n const baseName = filename.replace(extname(filename), '');\n\n // Save JSON with extracted images (using jq for large files)\n const jsonPath = join(outputDir, `${baseName}.json`);\n try {\n // Create pages directory for JSON images\n const pagesDir = join(outputDir, 'pages');\n if (!existsSync(pagesDir)) {\n mkdirSync(pagesDir, { recursive: true });\n }\n\n // Step 1: Extract base64 images using jq (doesn't load full JSON into memory)\n const base64Images =\n await ImageExtractor.extractBase64ImagesFromJsonWithJq(jsonSourcePath);\n\n // Step 2: Save each image to file\n base64Images.forEach((base64Data, index) => {\n ImageExtractor.extractBase64ImageToFile(\n base64Data,\n pagesDir,\n index,\n 'page',\n 'pages',\n );\n });\n\n logger.info(\n `[PDFConverter] Extracted ${base64Images.length} images from JSON to ${pagesDir}`,\n );\n\n // Step 3: Replace base64 images with file paths using jq\n const replacedCount =\n await ImageExtractor.replaceBase64ImagesInJsonWithJq(\n jsonSourcePath,\n jsonPath,\n 'pages',\n 'page',\n );\n\n logger.info(\n `[PDFConverter] Replaced ${replacedCount} base64 images with file paths`,\n );\n } catch (e) {\n logger.warn(\n '[PDFConverter] Failed to extract images from JSON using jq. Error:',\n e,\n );\n throw e;\n }\n logger.info('[PDFConverter] Saved JSON:', jsonPath);\n\n // Save HTML with extracted images\n const htmlPath = join(outputDir, `${baseName}.html`);\n try {\n // Create images directory for HTML images\n const imagesDir = join(outputDir, 'images');\n if (!existsSync(imagesDir)) {\n mkdirSync(imagesDir, { recursive: true });\n }\n\n // Extract base64 images from HTML src attributes, save them as PNG files, and replace with file paths\n let imageIndex = 0;\n const transformedHtml = htmlContent.replace(\n /src=\"data:image\\/png;base64,([^\"]+)\"/g,\n (_, base64Content) => {\n const filename = `image_${imageIndex}.png`;\n const filepath = join(imagesDir, filename);\n\n // Convert base64 to buffer and write to file\n const buffer = Buffer.from(base64Content, 'base64');\n writeFileSync(filepath, buffer);\n\n const relativePath = `images/${filename}`;\n imageIndex += 1;\n\n return `src=\"${relativePath}\"`;\n },\n );\n\n logger.info(\n `[PDFConverter] Extracted ${imageIndex} images from HTML to ${imagesDir}`,\n );\n writeFileSync(htmlPath, transformedHtml, 'utf-8');\n } catch (e) {\n logger.warn(\n '[PDFConverter] Failed to extract images from HTML, writing original. Error:',\n e,\n );\n writeFileSync(htmlPath, htmlContent, 'utf-8');\n }\n logger.info('[PDFConverter] Saved HTML:', htmlPath);\n }\n\n /**\n * Extract documents from ZIP and save with extracted images\n * Uses jq for JSON processing to handle large files without loading into Node.js memory\n *\n * Complete workflow:\n * 1. Extract ZIP file to temporary directory\n * 2. Find JSON and HTML files from extracted files\n * 3. Use jq to extract base64 images from JSON and save as separate files\n * 4. Use jq to replace base64 with file paths in JSON\n * 5. Process HTML with regex to extract and replace images\n * 6. Save transformed documents to output directory (as result.json and result.html)\n */\n static async extractAndSaveDocumentsFromZip(\n logger: LoggerMethods,\n zipPath: string,\n extractDir: string,\n outputDir: string,\n ): Promise<void> {\n // Extract zip file\n logger.info('[PDFConverter] Extracting ZIP file...');\n await ImageExtractor.extractZip(zipPath, extractDir);\n\n // Find JSON and HTML files dynamically\n const files = readdirSync(extractDir);\n const jsonFile = files.find((f) => extname(f).toLowerCase() === '.json');\n const htmlFile = files.find((f) => extname(f).toLowerCase() === '.html');\n\n if (!jsonFile || !htmlFile) {\n throw new Error(\n `Expected one JSON and one HTML file in extracted directory. Found: ${files.join(', ')}`,\n );\n }\n\n // Get file paths\n const jsonPath = join(extractDir, jsonFile);\n const htmlPath = join(extractDir, htmlFile);\n\n // Read HTML content (HTML is typically smaller, safe to read into memory)\n const htmlContent = readFileSync(htmlPath, 'utf-8');\n\n // Save converted files to output directory with extracted images\n // JSON is processed with jq to avoid loading large files into memory\n logger.info('[PDFConverter] Saving converted files to output...');\n await ImageExtractor.saveDocumentsWithExtractedImages(\n logger,\n outputDir,\n 'result',\n jsonPath,\n htmlContent,\n );\n\n logger.info('[PDFConverter] Files saved to:', outputDir);\n }\n}\n","import { spawn } from 'node:child_process';\n\n/**\n * Resolve jq executable path from environment or default to 'jq' in PATH.\n */\nfunction getJqPath(): string {\n const p = process.env.JQ_PATH?.trim();\n return p && p.length > 0 ? p : 'jq';\n}\n\n/**\n * Run a jq program against a JSON file and parse the JSON result.\n * - program: jq filter/program string\n * - filePath: path to the input JSON file\n * Returns parsed JSON as type T.\n */\nexport function runJqFileJson<T = unknown>(\n program: string,\n filePath: string,\n): Promise<T> {\n return new Promise<T>((resolve, reject) => {\n const jqPath = getJqPath();\n const args = [\n '-c', // compact output (single line when possible)\n program,\n filePath,\n ];\n\n const child = spawn(jqPath, args, {\n stdio: ['ignore', 'pipe', 'pipe'],\n env: process.env,\n });\n\n let stdout = '';\n let stderr = '';\n\n child.stdout.setEncoding('utf-8');\n child.stderr.setEncoding('utf-8');\n\n child.stdout.on('data', (chunk: string) => {\n stdout += chunk;\n });\n child.stderr.on('data', (chunk: string) => {\n stderr += chunk;\n });\n\n child.on('error', (err) => {\n reject(err);\n });\n\n child.on('close', (code) => {\n if (code !== 0) {\n const error = new Error(\n `jq exited with code ${code}. ${stderr ? 'Stderr: ' + stderr : ''}`,\n );\n return reject(error);\n }\n try {\n // jq may output trailing newlines; trim is safe for JSON\n const text = stdout.trim();\n const parsed = JSON.parse(text) as T;\n resolve(parsed);\n } catch (e) {\n reject(\n new Error(\n `Failed to parse jq output as JSON. Output length=${stdout.length}. Error: ${(e as Error).message}`,\n ),\n );\n }\n });\n });\n}\n\n/**\n * Convenience: extract all base64 PNG data-URI strings from a JSON file.\n */\nexport function jqExtractBase64PngStrings(filePath: string): Promise<string[]> {\n const program = `\n [\n .. |\n select(type == \"string\" and startswith(\"data:image/png;base64\"))\n ]\n `;\n return runJqFileJson<string[]>(program, filePath);\n}\n\n/**\n * Convenience: replace base64 PNG data-URIs with file paths like `${dirName}/${prefix}_<idx>.png`.\n * Returns an object: { data, count }\n */\nexport function jqReplaceBase64WithPaths(\n filePath: string,\n dirName: string,\n prefix: string,\n): Promise<{ data: unknown; count: number }> {\n const program = `\n reduce paths(type == \"string\" and startswith(\"data:image/png;base64\")) as $p (\n {data: ., counter: 0};\n .counter as $idx |\n .data |= setpath($p; \"${dirName}/${prefix}_\\\\($idx).png\") |\n .counter += 1\n ) | {data: .data, count: .counter}\n `;\n return runJqFileJson<{ data: unknown; count: number }>(program, filePath);\n}\n","import type { Server } from 'node:http';\n\nimport { createReadStream, statSync } from 'node:fs';\nimport { createServer } from 'node:http';\nimport { basename } from 'node:path';\n\n/**\n * Simple local HTTP server for serving a single file\n *\n * Used to serve local PDF files to docling-serve which requires HTTP URLs.\n */\nexport class LocalFileServer {\n private server: Server | null = null;\n private port: number = 0;\n\n /**\n * Start serving a file and return the URL\n *\n * @param filePath Absolute path to the file to serve\n * @returns URL to access the file\n */\n async start(filePath: string): Promise<string> {\n const filename = basename(filePath);\n const stat = statSync(filePath);\n\n return new Promise((resolve, reject) => {\n this.server = createServer((req, res) => {\n if (req.url === `/${filename}`) {\n res.writeHead(200, {\n 'Content-Type': 'application/pdf',\n 'Content-Length': stat.size,\n });\n createReadStream(filePath).pipe(res);\n } else {\n res.writeHead(404);\n res.end('Not Found');\n }\n });\n\n this.server.on('error', reject);\n\n // Listen on random available port\n this.server.listen(0, '127.0.0.1', () => {\n const address = this.server!.address();\n if (typeof address === 'object' && address !== null) {\n this.port = address.port;\n resolve(`http://127.0.0.1:${this.port}/${filename}`);\n } else {\n reject(new Error('Failed to get server address'));\n }\n });\n });\n }\n\n /**\n * Stop the server\n */\n stop(): Promise<void> {\n return new Promise((resolve) => {\n if (this.server) {\n this.server.close(() => {\n this.server = null;\n this.port = 0;\n resolve();\n });\n } else {\n resolve();\n }\n });\n }\n}\n","import type { LoggerMethods } from '@heripo/logger';\n\nimport { spawnAsync } from '@heripo/shared';\nimport { existsSync, rmSync } from 'node:fs';\nimport { tmpdir } from 'node:os';\nimport { join } from 'node:path';\n\nimport { IMAGE_PDF_CONVERTER } from '../config/constants';\n\n/**\n * Utility class for converting PDF to image-based PDF using ImageMagick.\n * Used as a fallback when regular PDF conversion fails due to encoding issues.\n *\n * ## System Requirements\n * - ImageMagick (`brew install imagemagick`)\n * - Ghostscript (`brew install ghostscript`)\n */\nexport class ImagePdfConverter {\n constructor(private readonly logger: LoggerMethods) {}\n\n /**\n * Convert a PDF file to an image-based PDF.\n * Downloads the PDF from URL, converts it using ImageMagick, and returns the path.\n *\n * @param pdfUrl - URL of the source PDF\n * @param reportId - Report identifier for temp file naming\n * @returns Path to the converted image PDF in temp directory\n */\n async convert(pdfUrl: string, reportId: string): Promise<string> {\n const timestamp = Date.now();\n const tempDir = tmpdir();\n const inputPath = join(tempDir, `${reportId}-${timestamp}-input.pdf`);\n const outputPath = join(tempDir, `${reportId}-${timestamp}-image.pdf`);\n\n try {\n this.logger.info('[ImagePdfConverter] Downloading PDF from URL...');\n await this.downloadPdf(pdfUrl, inputPath);\n\n this.logger.info('[ImagePdfConverter] Converting to image PDF...');\n await this.convertToImagePdf(inputPath, outputPath);\n\n this.logger.info('[ImagePdfConverter] Image PDF created:', outputPath);\n return outputPath;\n } finally {\n // Always cleanup input file\n if (existsSync(inputPath)) {\n rmSync(inputPath, { force: true });\n }\n }\n }\n\n /**\n * Download PDF from URL to local path using curl\n */\n private async downloadPdf(url: string, outputPath: string): Promise<void> {\n const result = await spawnAsync('curl', [\n '-L', // Follow redirects\n '-o',\n outputPath,\n '-s', // Silent mode\n '--fail', // Fail on HTTP errors\n url,\n ]);\n\n if (result.code !== 0) {\n throw new Error(\n `Failed to download PDF: ${result.stderr || 'Unknown error'}`,\n );\n }\n }\n\n /**\n * Convert PDF to image-based PDF using ImageMagick\n */\n private async convertToImagePdf(\n inputPath: string,\n outputPath: string,\n ): Promise<void> {\n const result = await spawnAsync('magick', [\n '-density',\n IMAGE_PDF_CONVERTER.DENSITY.toString(),\n inputPath,\n '-quality',\n IMAGE_PDF_CONVERTER.QUALITY.toString(),\n outputPath,\n ]);\n\n if (result.code !== 0) {\n throw new Error(\n `Failed to convert PDF to image PDF: ${result.stderr || 'Unknown error'}`,\n );\n }\n }\n\n /**\n * Cleanup the temporary image PDF file\n */\n cleanup(imagePdfPath: string): void {\n if (existsSync(imagePdfPath)) {\n this.logger.info(\n '[ImagePdfConverter] Cleaning up temp file:',\n imagePdfPath,\n );\n rmSync(imagePdfPath, { force: true });\n }\n }\n}\n"],"mappings":";;;;;;;;;;;;;;;;;;;;;;;;;;;;;;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;;;ACGA,yBAAwB;AACxB,IAAAA,6BAAyB;AACzB,IAAAC,kBAAyB;AACzB,IAAAC,oBAAqB;;;ACHd,IAAM,aAAa;AAAA;AAAA;AAAA;AAAA,EAIxB,oBAAoB;AAAA;AAAA;AAAA;AAAA,EAKpB,2BAA2B;AAAA;AAAA;AAAA;AAAA,EAK3B,0BAA0B;AAAA;AAAA;AAAA;AAAA,EAK1B,8BAA8B;AAAA;AAAA;AAAA;AAAA,EAK9B,8BAA8B;AAChC;AAKO,IAAM,gBAAgB;AAAA;AAAA;AAAA;AAAA,EAI3B,kBAAkB;AACpB;AAKO,IAAM,sBAAsB;AAAA;AAAA;AAAA;AAAA,EAIjC,kBAAkB;AACpB;AAKO,IAAM,sBAAsB;AAAA;AAAA;AAAA;AAAA,EAIjC,SAAS;AAAA;AAAA;AAAA;AAAA,EAKT,SAAS;AACX;;;AE7DA,2BAAsB;AAkDf,SAAS,WACd,SACA,MACA,UAA6B,CAAC,GACR;AACtB,QAAM;IACJ,gBAAgB;IAChB,gBAAgB;IAChB,GAAG;EACL,IAAI;AAEJ,SAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,UAAM,WAAO,4BAAM,SAAS,MAAM,YAAY;AAE9C,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,QAAI,iBAAiB,KAAK,QAAQ;AAChC,WAAK,OAAO,GAAG,QAAQ,CAAC,SAAS;AAC/B,kBAAU,KAAK,SAAS;MAC1B,CAAC;IACH;AAEA,QAAI,iBAAiB,KAAK,QAAQ;AAChC,WAAK,OAAO,GAAG,QAAQ,CAAC,SAAS;AAC/B,kBAAU,KAAK,SAAS;MAC1B,CAAC;IACH;AAEA,SAAK,GAAG,SAAS,CAAC,SAAS;AACzB,cAAQ,EAAE,QAAQ,QAAQ,MAAM,QAAQ,EAAE,CAAC;IAC7C,CAAC;AAED,SAAK,GAAG,SAAS,MAAM;EACzB,CAAC;AACH;;;AGpFA,gCAAsB;AACtB,uBAAqB;;;ACQd,IAAM,uBAAuB;AAK7B,IAAM,qBAAqB,EAAE,OAAO,GAAG,OAAO,EAAE;AAWhD,IAAM,qBAAN,cAAiC,MAAM;AAAA,EAC5C,YAAY,SAAiB;AAC3B,UAAM,OAAO;AACb,SAAK,OAAO;AAAA,EACd;AACF;AAcO,SAAS,mBAAmB,QAA0C;AAC3E,QAAM,QAAQ,OAAO,MAAM,oBAAoB;AAC/C,MAAI,CAAC,MAAO,QAAO;AAEnB,QAAM,QAAQ,SAAS,MAAM,CAAC,CAAC;AAC/B,QAAM,QAAQ,SAAS,MAAM,CAAC,CAAC;AAE/B,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA,eAAe,GAAG,KAAK,IAAI,KAAK;AAAA,EAClC;AACF;AAkBO,SAAS,sBACd,SACA,UAA6B,UACvB;AACN,QAAM,EAAE,OAAO,MAAM,IAAI;AACzB,QAAM,SAAS,YAAY,SAAS,gBAAgB;AAGpD,MAAI,UAAU,KAAK,SAAS,IAAI;AAC9B,UAAM,IAAI;AAAA,MACR,GAAG,MAAM,IAAI,KAAK,IAAI,KAAK;AAAA,IAC7B;AAAA,EACF;AAGA,MAAI,UAAU,KAAK,QAAQ,mBAAmB,OAAO;AACnD,UAAM,IAAI,mBAAmB,kCAAkC;AAAA,EACjE;AACF;;;ADjFO,IAAM,qBAAN,MAAM,oBAAmB;AAAA,EACb;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EAEjB,YAAY,SAKT;AACD,SAAK,SAAS,QAAQ;AACtB,SAAK,WAAW,QAAQ;AACxB,SAAK,OAAO,QAAQ;AACpB,SAAK,sBAAsB,QAAQ;AAAA,EACrC;AAAA,EAEA,MAAM,QAAuB;AAC3B,SAAK,OAAO,KAAK,uDAAuD;AAExE,UAAM,KAAK,mBAAmB;AAC9B,UAAM,KAAK,uBAAuB;AAClC,UAAM,KAAK,WAAW;AACtB,UAAM,KAAK,kBAAkB;AAC7B,UAAM,KAAK,eAAe;AAC1B,UAAM,KAAK,oBAAoB;AAG/B,UAAM,YAAY,MAAM,KAAK,YAAY,KAAK,IAAI;AAElD,QAAI,aAAa,CAAC,KAAK,qBAAqB;AAC1C,WAAK,OAAO;AAAA,QACV;AAAA,QACA,KAAK;AAAA,MACP;AAAA,IACF,OAAO;AACL,YAAM,KAAK,kBAAkB;AAAA,IAC/B;AAEA,SAAK,OAAO,KAAK,sCAAsC;AAAA,EACzD;AAAA,EAEA,MAAc,qBAAiD;AAC7D,UAAM,SAAS,MAAM,WAAW,WAAW,CAAC,WAAW,CAAC;AAExD,QAAI,OAAO,SAAS,GAAG;AACrB,YAAM,IAAI,MAAM,gCAAgC;AAAA,IAClD;AAEA,UAAM,SAAS,OAAO,SAAS,OAAO;AACtC,UAAM,UAAU,mBAAmB,MAAM;AAEzC,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI,MAAM,gCAAgC;AAAA,IAClD;AAEA,SAAK,OAAO;AAAA,MACV;AAAA,MACA,QAAQ;AAAA,IACV;AAEA,QAAI;AACF,4BAAsB,SAAS,QAAQ;AAAA,IACzC,SAAS,OAAO;AACd,UAAI,iBAAiB,sBAAsB,QAAQ,SAAS,IAAI;AAC9D,aAAK,OAAO;AAAA,UACV;AAAA,QACF;AAAA,MACF;AACA,YAAM;AAAA,IACR;AAEA,WAAO;AAAA,EACT;AAAA,EAEA,MAAc,yBAAwC;AACpD,UAAM,SAAS,MAAM,WAAW,WAAW,CAAC,MAAM,QAAQ,KAAK,QAAQ,CAAC;AAExE,QAAI,OAAO,SAAS,GAAG;AACrB,YAAM,IAAI,MAAM,6CAA6C;AAAA,IAC/D;AAEA,UAAM,KAAK,wBAAwB;AAAA,EACrC;AAAA,EAEA,MAAc,0BAAyC;AACrD,UAAM,iBAAa,uBAAK,KAAK,UAAU,OAAO,QAAQ;AACtD,UAAM,SAAS,MAAM,WAAW,YAAY,CAAC,WAAW,CAAC;AAEzD,QAAI,OAAO,SAAS,GAAG;AACrB,YAAM,IAAI,MAAM,sCAAsC;AAAA,IACxD;AAEA,UAAM,SAAS,OAAO,SAAS,OAAO;AACtC,UAAM,UAAU,mBAAmB,MAAM;AAEzC,QAAI,CAAC,SAAS;AACZ,YAAM,IAAI,MAAM,qCAAqC;AAAA,IACvD;AAEA,0BAAsB,SAAS,MAAM;AAAA,EACvC;AAAA,EAEA,MAAc,aAA4B;AACxC,UAAM,cAAU,uBAAK,KAAK,UAAU,OAAO,KAAK;AAChD,UAAM,SAAS,MAAM,WAAW,SAAS,CAAC,WAAW,aAAa,KAAK,CAAC;AAExE,QAAI,OAAO,SAAS,GAAG;AACrB,WAAK,OAAO;AAAA,QACV;AAAA,QACA,OAAO;AAAA,MACT;AACA,YAAM,IAAI,MAAM,qCAAqC,OAAO,IAAI,EAAE;AAAA,IACpE;AAAA,EACF;AAAA,EAEA,MAAc,oBAAmC;AAC/C,UAAM,cAAU,uBAAK,KAAK,UAAU,OAAO,KAAK;AAChD,UAAM,SAAS,MAAM,WAAW,SAAS;AAAA,MACvC;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,CAAC;AAED,QAAI,OAAO,SAAS,GAAG;AACrB,WAAK,OAAO;AAAA,QACV;AAAA,QACA,OAAO;AAAA,MACT;AACA,YAAM,IAAI;AAAA,QACR,4CAA4C,OAAO,IAAI;AAAA,MACzD;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,iBAAgC;AAC5C,UAAM,cAAU,uBAAK,KAAK,UAAU,OAAO,KAAK;AAChD,UAAM,SAAS,MAAM,WAAW,SAAS;AAAA,MACvC;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,CAAC;AAED,QAAI,OAAO,SAAS,GAAG;AACrB,WAAK,OAAO;AAAA,QACV;AAAA,QACA,OAAO;AAAA,MACT;AACA,YAAM,IAAI,MAAM,yCAAyC,OAAO,IAAI,EAAE;AAAA,IACxE;AAAA,EACF;AAAA,EAEA,MAAc,sBAAqC;AACjD,UAAM,cAAU,uBAAK,KAAK,UAAU,OAAO,KAAK;AAChD,UAAM,SAAS,MAAM,WAAW,SAAS,CAAC,WAAW,eAAe,CAAC;AAErE,QAAI,OAAO,SAAS,GAAG;AACrB,WAAK,OAAO;AAAA,QACV;AAAA,QACA,OAAO;AAAA,MACT;AACA,YAAM,IAAI;AAAA,QACR,+CAA+C,OAAO,IAAI;AAAA,MAC5D;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,YAAY,MAAgC;AACxD,QAAI;AACF,YAAM,SAAS,MAAM,WAAW,QAAQ,CAAC,OAAO,IAAI,IAAI,EAAE,CAAC;AAC3D,aAAO,OAAO,SAAS,KAAK,CAAC,CAAC,OAAO,OAAO,KAAK;AAAA,IACnD,QAAQ;AACN,aAAO;AAAA,IACT;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,MAAa,cAA6B;AACxC,UAAM,KAAK,kBAAkB;AAAA,EAC/B;AAAA;AAAA,EAGA,aAAoB,kBAClB,QACA,MACe;AACf,WAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,YAAM,WAAO,iCAAM,QAAQ,CAAC,OAAO,IAAI,IAAI,EAAE,CAAC;AAC9C,YAAM,OAAiB,CAAC;AAExB,WAAK,QAAQ,GAAG,QAAQ,CAAC,SAAS;AAChC,cAAM,MAAc,KAAK,SAAS;AAClC,aAAK;AAAA,UACH,GAAG,IACA,MAAM,KAAK,EACX,IAAI,CAAC,MAAM,EAAE,KAAK,CAAC,EACnB,OAAO,OAAO;AAAA,QACnB;AAAA,MACF,CAAC;AAED,WAAK,GAAG,SAAS,MAAM;AACrB,YAAI,KAAK,WAAW,EAAG,QAAO,QAAQ;AAEtC,YAAI,YAAY,KAAK;AACrB,cAAM,OAAO,MAAM;AACjB,cAAI,EAAE,aAAa,EAAG,SAAQ;AAAA,QAChC;AAEA,eAAO;AAAA,UACL;AAAA,UACA,KAAK,KAAK,IAAI;AAAA,UACd;AAAA,UACA;AAAA,QACF;AACA,mBAAW,OAAO,MAAM;AACtB,gBAAM,eAAW,iCAAM,QAAQ,CAAC,MAAM,GAAG,CAAC;AAE1C,mBAAS,GAAG,SAAS,CAAC,aAAa;AACjC,gBAAI,aAAa,GAAG;AAClB,qBAAO,KAAK,+CAA+C,GAAG;AAAA,YAChE;AACA,iBAAK;AAAA,UACP,CAAC;AACD,mBAAS,GAAG,SAAS,CAACC,WAAU;AAC9B,mBAAO,KAAK,+CAA+CA,MAAK;AAChE,iBAAK;AAAA,UACP,CAAC;AAAA,QACH;AAAA,MACF,CAAC;AAED,WAAK,GAAG,SAAS,MAAM,QAAQ,CAAC;AAAA,IAClC,CAAC;AAAA,EACH;AAAA,EAEA,MAAc,oBAAmC;AAC/C,WAAO,IAAI,QAAQ,OAAO,SAAS,WAAW;AAE5C,UAAI,KAAK,qBAAqB;AAC5B,cAAM,oBAAmB,kBAAkB,KAAK,QAAQ,KAAK,IAAI;AAAA,MACnE;AAEA,YAAM,WAAW,KAAK;AACtB,YAAM,uBAAmB,uBAAK,UAAU,OAAO,eAAe;AAC9D,YAAM,OAAO,CAAC,OAAO,UAAU,KAAK,KAAK,SAAS,CAAC;AAEnD,WAAK,OAAO;AAAA,QACV;AAAA,QACA,KAAK;AAAA,MACP;AACA,YAAM,qBAAiB,iCAAM,kBAAkB,MAAM;AAAA,QACnD,UAAU;AAAA;AAAA,QACV,OAAO;AAAA;AAAA,MACT,CAAC;AAED,qBAAe,MAAM;AAErB,qBAAe,GAAG,SAAS,CAAC,UAAU;AACpC,aAAK,OAAO,MAAM,6CAA6C,KAAK;AACpE,eAAO,KAAK;AAAA,MACd,CAAC;AAGD,iBAAW,MAAM;AACf,gBAAQ;AAAA,MACV,GAAG,oBAAoB,gBAAgB;AAAA,IACzC,CAAC;AAAA,EACH;AACF;;;AExRA,wBAAqB;AACrB,IAAAC,kBAAsD;AACtD,IAAAC,oBAAqB;AACrB,sBAAyB;;;ACNlB,IAAM,wBAAN,cAAoC,MAAM;AAAA,EAG/C,YACkB,eACA,eAChB;AACA;AAAA,MACE,kDACe,cAAc,OAAO,eACrB,cAAc,OAAO;AAAA,IACtC;AAPgB;AACA;AAAA,EAOlB;AAAA,EAXgB,OAAO;AAYzB;;;ACdA,qBAQO;AACP,IAAAC,oBAA8B;AAC9B,YAAuB;;;ACbvB,IAAAC,6BAAsB;AAKtB,SAAS,YAAoB;AAC3B,QAAM,IAAI,QAAQ,IAAI,SAAS,KAAK;AACpC,SAAO,KAAK,EAAE,SAAS,IAAI,IAAI;AACjC;AAQO,SAAS,cACd,SACA,UACY;AACZ,SAAO,IAAI,QAAW,CAAC,SAAS,WAAW;AACzC,UAAM,SAAS,UAAU;AACzB,UAAM,OAAO;AAAA,MACX;AAAA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,UAAM,YAAQ,kCAAM,QAAQ,MAAM;AAAA,MAChC,OAAO,CAAC,UAAU,QAAQ,MAAM;AAAA,MAChC,KAAK,QAAQ;AAAA,IACf,CAAC;AAED,QAAI,SAAS;AACb,QAAI,SAAS;AAEb,UAAM,OAAO,YAAY,OAAO;AAChC,UAAM,OAAO,YAAY,OAAO;AAEhC,UAAM,OAAO,GAAG,QAAQ,CAAC,UAAkB;AACzC,gBAAU;AAAA,IACZ,CAAC;AACD,UAAM,OAAO,GAAG,QAAQ,CAAC,UAAkB;AACzC,gBAAU;AAAA,IACZ,CAAC;AAED,UAAM,GAAG,SAAS,CAAC,QAAQ;AACzB,aAAO,GAAG;AAAA,IACZ,CAAC;AAED,UAAM,GAAG,SAAS,CAAC,SAAS;AAC1B,UAAI,SAAS,GAAG;AACd,cAAM,QAAQ,IAAI;AAAA,UAChB,uBAAuB,IAAI,KAAK,SAAS,aAAa,SAAS,EAAE;AAAA,QACnE;AACA,eAAO,OAAO,KAAK;AAAA,MACrB;AACA,UAAI;AAEF,cAAM,OAAO,OAAO,KAAK;AACzB,cAAM,SAAS,KAAK,MAAM,IAAI;AAC9B,gBAAQ,MAAM;AAAA,MAChB,SAAS,GAAG;AACV;AAAA,UACE,IAAI;AAAA,YACF,oDAAoD,OAAO,MAAM,YAAa,EAAY,OAAO;AAAA,UACnG;AAAA,QACF;AAAA,MACF;AAAA,IACF,CAAC;AAAA,EACH,CAAC;AACH;AAKO,SAAS,0BAA0B,UAAqC;AAC7E,QAAM,UAAU;AAAA;AAAA;AAAA;AAAA;AAAA;AAMhB,SAAO,cAAwB,SAAS,QAAQ;AAClD;AAMO,SAAS,yBACd,UACA,SACA,QAC2C;AAC3C,QAAM,UAAU;AAAA;AAAA;AAAA;AAAA,gCAIc,OAAO,IAAI,MAAM;AAAA;AAAA;AAAA;AAI/C,SAAO,cAAgD,SAAS,QAAQ;AAC1E;;;AD3EO,IAAM,iBAAN,MAAM,gBAAe;AAAA;AAAA;AAAA;AAAA,EAI1B,aAAqB,WACnB,SACA,WACe;AACf,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,MAAM,WAAK,SAAS,EAAE,aAAa,KAAK,GAAG,CAAC,KAAK,YAAY;AAC3D,YAAI,OAAO,CAAC,SAAS;AACnB,iBAAO,OAAO,IAAI,MAAM,yBAAyB,CAAC;AAClD;AAAA,QACF;AAEA,gBAAQ,UAAU;AAElB,gBAAQ,GAAG,SAAS,CAAC,UAAU;AAC7B,gBAAM,gBAAY,wBAAK,WAAW,MAAM,QAAQ;AAEhD,cAAI,MAAM,KAAK,MAAM,QAAQ,GAAG;AAE9B,0CAAU,WAAW,EAAE,WAAW,KAAK,CAAC;AACxC,oBAAQ,UAAU;AAAA,UACpB,OAAO;AAEL,oBAAQ,eAAe,OAAO,CAACC,MAAK,eAAe;AACjD,kBAAIA,QAAO,CAAC,YAAY;AACtB,uBAAOA,QAAO,IAAI,MAAM,4BAA4B,CAAC;AACrD;AAAA,cACF;AAEA,gDAAU,wBAAK,WAAW,IAAI,GAAG,EAAE,WAAW,KAAK,CAAC;AACpD,oBAAM,kBAAc,kCAAkB,SAAS;AAE/C,yBAAW,KAAK,WAAW;AAC3B,0BAAY,GAAG,UAAU,MAAM;AAC7B,wBAAQ,UAAU;AAAA,cACpB,CAAC;AACD,0BAAY,GAAG,SAAS,MAAM;AAAA,YAChC,CAAC;AAAA,UACH;AAAA,QACF,CAAC;AAED,gBAAQ,GAAG,OAAO,MAAM;AACtB,kBAAQ;AAAA,QACV,CAAC;AAED,gBAAQ,GAAG,SAAS,MAAM;AAAA,MAC5B,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,aAAqB,kCACnB,UACmB;AACnB,WAAO,0BAA0B,QAAQ;AAAA,EAC3C;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,aAAqB,gCACnB,UACA,YACA,SACA,QACiB;AACjB,UAAM,EAAE,MAAM,MAAM,IAAK,MAAM;AAAA,MAC7B;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAGA,sCAAc,YAAY,KAAK,UAAU,MAAM,MAAM,CAAC,GAAG,OAAO;AAEhE,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA,EAKA,OAAe,yBACb,YACA,WACA,OACA,QACA,SACQ;AACR,UAAM,SAAS;AACf,UAAM,gBAAgB,WAAW,WAAW,MAAM,IAC9C,WAAW,MAAM,OAAO,MAAM,IAC9B;AAEJ,UAAM,WAAW,GAAG,MAAM,IAAI,KAAK;AACnC,UAAM,eAAW,wBAAK,WAAW,QAAQ;AAGzC,UAAM,SAAS,OAAO,KAAK,eAAe,QAAQ;AAClD,sCAAc,UAAU,MAAM;AAE9B,WAAO,GAAG,OAAO,IAAI,QAAQ;AAAA,EAC/B;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAYA,aAAqB,iCACnB,QACA,WACA,UACA,gBACA,aACe;AAEf,QAAI;AACF,cAAI,2BAAW,SAAS,GAAG;AACzB,mCAAO,WAAW,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;AAAA,MACpD;AAAA,IACF,SAAS,GAAG;AACV,aAAO,KAAK,oDAAoD,CAAC;AAAA,IACnE;AACA,kCAAU,WAAW,EAAE,WAAW,KAAK,CAAC;AAGxC,UAAM,WAAW,SAAS,YAAQ,2BAAQ,QAAQ,GAAG,EAAE;AAGvD,UAAM,eAAW,wBAAK,WAAW,GAAG,QAAQ,OAAO;AACnD,QAAI;AAEF,YAAM,eAAW,wBAAK,WAAW,OAAO;AACxC,UAAI,KAAC,2BAAW,QAAQ,GAAG;AACzB,sCAAU,UAAU,EAAE,WAAW,KAAK,CAAC;AAAA,MACzC;AAGA,YAAM,eACJ,MAAM,gBAAe,kCAAkC,cAAc;AAGvE,mBAAa,QAAQ,CAAC,YAAY,UAAU;AAC1C,wBAAe;AAAA,UACb;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF,CAAC;AAED,aAAO;AAAA,QACL,4BAA4B,aAAa,MAAM,wBAAwB,QAAQ;AAAA,MACjF;AAGA,YAAM,gBACJ,MAAM,gBAAe;AAAA,QACnB;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEF,aAAO;AAAA,QACL,2BAA2B,aAAa;AAAA,MAC1C;AAAA,IACF,SAAS,GAAG;AACV,aAAO;AAAA,QACL;AAAA,QACA;AAAA,MACF;AACA,YAAM;AAAA,IACR;AACA,WAAO,KAAK,8BAA8B,QAAQ;AAGlD,UAAM,eAAW,wBAAK,WAAW,GAAG,QAAQ,OAAO;AACnD,QAAI;AAEF,YAAM,gBAAY,wBAAK,WAAW,QAAQ;AAC1C,UAAI,KAAC,2BAAW,SAAS,GAAG;AAC1B,sCAAU,WAAW,EAAE,WAAW,KAAK,CAAC;AAAA,MAC1C;AAGA,UAAI,aAAa;AACjB,YAAM,kBAAkB,YAAY;AAAA,QAClC;AAAA,QACA,CAAC,GAAG,kBAAkB;AACpB,gBAAMC,YAAW,SAAS,UAAU;AACpC,gBAAM,eAAW,wBAAK,WAAWA,SAAQ;AAGzC,gBAAM,SAAS,OAAO,KAAK,eAAe,QAAQ;AAClD,4CAAc,UAAU,MAAM;AAE9B,gBAAM,eAAe,UAAUA,SAAQ;AACvC,wBAAc;AAEd,iBAAO,QAAQ,YAAY;AAAA,QAC7B;AAAA,MACF;AAEA,aAAO;AAAA,QACL,4BAA4B,UAAU,wBAAwB,SAAS;AAAA,MACzE;AACA,wCAAc,UAAU,iBAAiB,OAAO;AAAA,IAClD,SAAS,GAAG;AACV,aAAO;AAAA,QACL;AAAA,QACA;AAAA,MACF;AACA,wCAAc,UAAU,aAAa,OAAO;AAAA,IAC9C;AACA,WAAO,KAAK,8BAA8B,QAAQ;AAAA,EACpD;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAcA,aAAa,+BACX,QACA,SACA,YACA,WACe;AAEf,WAAO,KAAK,uCAAuC;AACnD,UAAM,gBAAe,WAAW,SAAS,UAAU;AAGnD,UAAM,YAAQ,4BAAY,UAAU;AACpC,UAAM,WAAW,MAAM,KAAK,CAAC,UAAM,2BAAQ,CAAC,EAAE,YAAY,MAAM,OAAO;AACvE,UAAM,WAAW,MAAM,KAAK,CAAC,UAAM,2BAAQ,CAAC,EAAE,YAAY,MAAM,OAAO;AAEvE,QAAI,CAAC,YAAY,CAAC,UAAU;AAC1B,YAAM,IAAI;AAAA,QACR,sEAAsE,MAAM,KAAK,IAAI,CAAC;AAAA,MACxF;AAAA,IACF;AAGA,UAAM,eAAW,wBAAK,YAAY,QAAQ;AAC1C,UAAM,eAAW,wBAAK,YAAY,QAAQ;AAG1C,UAAM,kBAAc,6BAAa,UAAU,OAAO;AAIlD,WAAO,KAAK,oDAAoD;AAChE,UAAM,gBAAe;AAAA,MACnB;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAEA,WAAO,KAAK,kCAAkC,SAAS;AAAA,EACzD;AACF;;;AEvTA,IAAAC,kBAA2C;AAC3C,uBAA6B;AAC7B,IAAAC,oBAAyB;AAOlB,IAAM,kBAAN,MAAsB;AAAA,EACnB,SAAwB;AAAA,EACxB,OAAe;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQvB,MAAM,MAAM,UAAmC;AAC7C,UAAM,eAAW,4BAAS,QAAQ;AAClC,UAAM,WAAO,0BAAS,QAAQ;AAE9B,WAAO,IAAI,QAAQ,CAAC,SAAS,WAAW;AACtC,WAAK,aAAS,+BAAa,CAAC,KAAK,QAAQ;AACvC,YAAI,IAAI,QAAQ,IAAI,QAAQ,IAAI;AAC9B,cAAI,UAAU,KAAK;AAAA,YACjB,gBAAgB;AAAA,YAChB,kBAAkB,KAAK;AAAA,UACzB,CAAC;AACD,gDAAiB,QAAQ,EAAE,KAAK,GAAG;AAAA,QACrC,OAAO;AACL,cAAI,UAAU,GAAG;AACjB,cAAI,IAAI,WAAW;AAAA,QACrB;AAAA,MACF,CAAC;AAED,WAAK,OAAO,GAAG,SAAS,MAAM;AAG9B,WAAK,OAAO,OAAO,GAAG,aAAa,MAAM;AACvC,cAAM,UAAU,KAAK,OAAQ,QAAQ;AACrC,YAAI,OAAO,YAAY,YAAY,YAAY,MAAM;AACnD,eAAK,OAAO,QAAQ;AACpB,kBAAQ,oBAAoB,KAAK,IAAI,IAAI,QAAQ,EAAE;AAAA,QACrD,OAAO;AACL,iBAAO,IAAI,MAAM,8BAA8B,CAAC;AAAA,QAClD;AAAA,MACF,CAAC;AAAA,IACH,CAAC;AAAA,EACH;AAAA;AAAA;AAAA;AAAA,EAKA,OAAsB;AACpB,WAAO,IAAI,QAAQ,CAAC,YAAY;AAC9B,UAAI,KAAK,QAAQ;AACf,aAAK,OAAO,MAAM,MAAM;AACtB,eAAK,SAAS;AACd,eAAK,OAAO;AACZ,kBAAQ;AAAA,QACV,CAAC;AAAA,MACH,OAAO;AACL,gBAAQ;AAAA,MACV;AAAA,IACF,CAAC;AAAA,EACH;AACF;;;ACnEA,IAAAC,kBAAmC;AACnC,qBAAuB;AACvB,IAAAC,oBAAqB;AAYd,IAAM,oBAAN,MAAwB;AAAA,EAC7B,YAA6B,QAAuB;AAAvB;AAAA,EAAwB;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUrD,MAAM,QAAQ,QAAgB,UAAmC;AAC/D,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,cAAU,uBAAO;AACvB,UAAM,gBAAY,wBAAK,SAAS,GAAG,QAAQ,IAAI,SAAS,YAAY;AACpE,UAAM,iBAAa,wBAAK,SAAS,GAAG,QAAQ,IAAI,SAAS,YAAY;AAErE,QAAI;AACF,WAAK,OAAO,KAAK,iDAAiD;AAClE,YAAM,KAAK,YAAY,QAAQ,SAAS;AAExC,WAAK,OAAO,KAAK,gDAAgD;AACjE,YAAM,KAAK,kBAAkB,WAAW,UAAU;AAElD,WAAK,OAAO,KAAK,0CAA0C,UAAU;AACrE,aAAO;AAAA,IACT,UAAE;AAEA,cAAI,4BAAW,SAAS,GAAG;AACzB,oCAAO,WAAW,EAAE,OAAO,KAAK,CAAC;AAAA,MACnC;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,YAAY,KAAa,YAAmC;AACxE,UAAM,SAAS,MAAM,WAAW,QAAQ;AAAA,MACtC;AAAA;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA;AAAA,MACA;AAAA;AAAA,MACA;AAAA,IACF,CAAC;AAED,QAAI,OAAO,SAAS,GAAG;AACrB,YAAM,IAAI;AAAA,QACR,2BAA2B,OAAO,UAAU,eAAe;AAAA,MAC7D;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,MAAc,kBACZ,WACA,YACe;AACf,UAAM,SAAS,MAAM,WAAW,UAAU;AAAA,MACxC;AAAA,MACA,oBAAoB,QAAQ,SAAS;AAAA,MACrC;AAAA,MACA;AAAA,MACA,oBAAoB,QAAQ,SAAS;AAAA,MACrC;AAAA,IACF,CAAC;AAED,QAAI,OAAO,SAAS,GAAG;AACrB,YAAM,IAAI;AAAA,QACR,uCAAuC,OAAO,UAAU,eAAe;AAAA,MACzE;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA,EAKA,QAAQ,cAA4B;AAClC,YAAI,4BAAW,YAAY,GAAG;AAC5B,WAAK,OAAO;AAAA,QACV;AAAA,QACA;AAAA,MACF;AACA,kCAAO,cAAc,EAAE,OAAO,KAAK,CAAC;AAAA,IACtC;AAAA,EACF;AACF;;;ALhFO,IAAM,eAAN,MAAmB;AAAA,EACxB,YACmB,QACA,QACA,yBAAkC,OACnD;AAHiB;AACA;AACA;AAAA,EAChB;AAAA,EAEH,MAAM,QACJ,KACA,UACA,YACA,sBACA,SAaA,aACA;AACA,SAAK,OAAO,KAAK,8BAA8B,GAAG;AAElD,QAAI,gBAA8B;AAElC,QAAI;AACF,YAAM,KAAK;AAAA,QACT;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AACA;AAAA,IACF,SAAS,OAAO;AAEd,UAAI,aAAa,SAAS;AACxB,cAAM;AAAA,MACR;AAEA,sBAAgB;AAChB,WAAK,OAAO,MAAM,qCAAqC,KAAK;AAE5D,UAAI,CAAC,KAAK,wBAAwB;AAChC,cAAM;AAAA,MACR;AAAA,IACF;AAGA,SAAK,OAAO,KAAK,iDAAiD;AAClE,UAAM,oBAAoB,IAAI,kBAAkB,KAAK,MAAM;AAC3D,QAAI,eAA8B;AAElC,QAAI;AACF,qBAAe,MAAM,kBAAkB,QAAQ,KAAK,QAAQ;AAG5D,YAAM,WAAW,UAAU,YAAY;AACvC,WAAK,OAAO,KAAK,2CAA2C,QAAQ;AAEpE,YAAM,KAAK;AAAA,QACT;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,QACA;AAAA,MACF;AAEA,WAAK,OAAO,KAAK,8CAA8C;AAAA,IACjE,SAAS,eAAe;AACtB,WAAK,OAAO;AAAA,QACV;AAAA,QACA;AAAA,MACF;AACA,YAAM,IAAI,sBAAsB,eAAgB,aAAsB;AAAA,IACxE,UAAE;AAEA,UAAI,cAAc;AAChB,0BAAkB,QAAQ,YAAY;AAAA,MACxC;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,kBACZ,KACA,UACA,YACA,sBACA,SAaA,aACe;AACf,UAAM,YAAY,KAAK,IAAI;AAC3B,UAAM,oBAAoB,KAAK,uBAAuB,OAAO;AAE7D,SAAK,OAAO;AAAA,MACV;AAAA,IACF;AACA,SAAK,OAAO,KAAK,uDAAuD;AACxE,SAAK,OAAO;AAAA,MACV;AAAA,IACF;AAGA,UAAM,EAAE,SAAS,OAAO,IAAI,MAAM,KAAK,WAAW,GAAG;AAErD,QAAI;AACF,YAAM,OAAO,MAAM,KAAK,oBAAoB,SAAS,iBAAiB;AACtE,YAAM,KAAK,kBAAkB,IAAI;AAGjC,UAAI,aAAa,SAAS;AACxB,aAAK,OAAO;AAAA,UACV;AAAA,QACF;AACA,cAAM,QAAQ,IAAI,MAAM,4BAA4B;AACpD,cAAM,OAAO;AACb,cAAM;AAAA,MACR;AAEA,YAAM,KAAK,eAAe,KAAK,MAAM;AAAA,IACvC,UAAE;AAEA,UAAI,QAAQ;AACV,aAAK,OAAO,KAAK,8CAA8C;AAC/D,cAAM,OAAO,KAAK;AAAA,MACpB;AAAA,IACF;AAEA,UAAM,MAAM,QAAQ,IAAI;AACxB,UAAM,cAAU,wBAAK,KAAK,YAAY;AACtC,UAAM,iBAAa,wBAAK,KAAK,kBAAkB;AAC/C,UAAM,gBAAY,wBAAK,KAAK,UAAU,QAAQ;AAE9C,QAAI;AACF,YAAM,KAAK,sBAAsB,SAAS,YAAY,SAAS;AAG/D,UAAI,aAAa,SAAS;AACxB,aAAK,OAAO,KAAK,mDAAmD;AACpE,cAAM,QAAQ,IAAI,MAAM,4BAA4B;AACpD,cAAM,OAAO;AACb,cAAM;AAAA,MACR;AAGA,WAAK,OAAO,KAAK,iDAAiD;AAClE,YAAM,WAAW,SAAS;AAE1B,YAAM,WAAW,KAAK,IAAI,IAAI;AAC9B,WAAK,OAAO,KAAK,mDAAmD;AACpE,WAAK,OAAO,KAAK,8BAA8B,UAAU,IAAI;AAAA,IAC/D,UAAE;AAEA,WAAK,OAAO,KAAK,+CAA+C;AAChE,cAAI,4BAAW,OAAO,GAAG;AACvB,oCAAO,SAAS,EAAE,OAAO,KAAK,CAAC;AAAA,MACjC;AACA,cAAI,4BAAW,UAAU,GAAG;AAC1B,oCAAO,YAAY,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;AAAA,MACrD;AAGA,UAAI,sBAAsB;AACxB,aAAK,OAAO;AAAA,UACV;AAAA,UACA;AAAA,QACF;AACA,gBAAI,4BAAW,SAAS,GAAG;AACzB,sCAAO,WAAW,EAAE,WAAW,MAAM,OAAO,KAAK,CAAC;AAAA,QACpD;AAAA,MACF,OAAO;AACL,aAAK,OAAO,KAAK,uCAAuC,SAAS;AAAA,MACnE;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,uBACN,SAamB;AACnB,WAAO;AAAA,MACL,OAAG,wBAAK,SAAS,CAAC,aAAa,CAAC;AAAA,MAChC,YAAY,CAAC,QAAQ,MAAM;AAAA,MAC3B,mBAAmB;AAAA,MACnB,YAAY;AAAA,MACZ,yBAAyB;AAAA,MACzB,cAAc;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,MAQd,WAAW;AAAA,MACX,qBAAqB;AAAA,QACnB,QAAQ;AAAA,QACR,aAAa,QAAQ;AAAA,MACvB;AAAA,IACF;AAAA,EACF;AAAA,EAEA,MAAc,oBACZ,KACA,mBAC8B;AAC9B,UAAM,OAAO,MAAM,KAAK,OAAO,mBAAmB;AAAA,MAChD,SAAS;AAAA,QACP;AAAA,UACE,MAAM;AAAA,UACN;AAAA,QACF;AAAA,MACF;AAAA,MACA,SAAS;AAAA,MACT,QAAQ;AAAA,QACN,MAAM;AAAA,MACR;AAAA,IACF,CAAC;AAED,SAAK,OAAO,KAAK,gCAAgC,KAAK,MAAM,EAAE;AAC9D,SAAK,OAAO,KAAK,wCAAwC;AAEzD,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAQA,MAAc,WACZ,KACwD;AACxD,QAAI,IAAI,WAAW,SAAS,GAAG;AAC7B,YAAM,WAAW,IAAI,MAAM,CAAC;AAC5B,YAAM,SAAS,IAAI,gBAAgB;AACnC,YAAM,UAAU,MAAM,OAAO,MAAM,QAAQ;AAE3C,WAAK,OAAO,KAAK,6CAA6C,OAAO;AAErE,aAAO,EAAE,SAAS,OAAO;AAAA,IAC3B;AAEA,WAAO,EAAE,SAAS,IAAI;AAAA,EACxB;AAAA,EAEA,MAAc,kBAAkB,MAA0C;AACxE,UAAM,sBAAsB,KAAK,IAAI;AACrC,QAAI,aAAa;AACjB,QAAI,cAAc;AAElB,UAAM,eAAe,YAAY,MAAM;AACrC,UAAI,YAAa;AACjB,YAAM,UAAU,KAAK,OAAO,KAAK,IAAI,IAAI,uBAAuB,GAAI;AACpE,cAAQ,OAAO;AAAA,QACb,4BAA4B,cAAc,YAAY,KAAK,OAAO;AAAA,MACpE;AAAA,IACF,GAAG,cAAc,gBAAgB;AAEjC,SAAK,GAAG,YAAY,CAAC,WAAW;AAC9B,mBAAa,OAAO;AACpB,UAAI,OAAO,kBAAkB,QAAW;AACtC,gBAAQ,OAAO;AAAA,UACb,4BAA4B,OAAO,WAAW,eAAe,OAAO,aAAa;AAAA,QACnF;AAAA,MACF;AAAA,IACF,CAAC;AAED,SAAK,GAAG,YAAY,MAAM;AACxB,oBAAc;AACd,oBAAc,YAAY;AAC1B,WAAK,OAAO,KAAK,wCAAwC;AAAA,IAC3D,CAAC;AAED,SAAK,GAAG,SAAS,CAAC,UAAU;AAC1B,oBAAc;AACd,oBAAc,YAAY;AAC1B,WAAK,OAAO,MAAM,sCAAsC,MAAM,OAAO;AAAA,IACvE,CAAC;AAED,QAAI;AACF,YAAM,KAAK,kBAAkB;AAAA,IAC/B,UAAE;AACA,oBAAc;AACd,oBAAc,YAAY;AAAA,IAC5B;AAAA,EACF;AAAA,EAEA,MAAc,eAAe,QAA+B;AAC1D,SAAK,OAAO;AAAA,MACV;AAAA,IACF;AAEA,UAAM,YAAY,MAAM,KAAK,OAAO,kBAAkB,MAAM;AAE5D,QAAI,CAAC,UAAU,WAAW,CAAC,UAAU,YAAY;AAC/C,YAAM,IAAI,MAAM,+BAA+B;AAAA,IACjD;AAEA,UAAM,cAAU,wBAAK,QAAQ,IAAI,GAAG,YAAY;AAEhD,SAAK,OAAO,KAAK,sCAAsC,OAAO;AAC9D,UAAM,kBAAc,mCAAkB,OAAO;AAC7C,cAAM,0BAAS,UAAU,YAAY,WAAW;AAAA,EAClD;AAAA,EAEA,MAAc,sBACZ,SACA,YACA,WACe;AAEf,UAAM,eAAe;AAAA,MACnB,KAAK;AAAA,MACL;AAAA,MACA;AAAA,MACA;AAAA,IACF;AAAA,EACF;AACF;;;AR9SO,IAAM,YAAN,MAAgB;AAAA,EACJ;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,EACT,SAAkC;AAAA,EAE1C,YAAY,SAAkB;AAC5B,UAAM;AAAA,MACJ;AAAA,MACA,UAAU,WAAW;AAAA,MACrB;AAAA,MACA,sBAAsB;AAAA,MACtB,yBAAyB;AAAA,IAC3B,IAAI;AAEJ,SAAK,SAAS;AAEd,QAAI,aAAa,SAAS;AACxB,WAAK,UAAU,QAAQ;AACvB,WAAK,OAAO;AAAA,IACd,OAAO;AACL,WAAK,OAAO,QAAQ;AACpB,WAAK,UAAU;AAAA,IACjB;AAEA,SAAK,UAAU;AACf,SAAK,WAAW,gBAAY,wBAAK,QAAQ,IAAI,GAAG,OAAO;AACvD,SAAK,sBAAsB;AAC3B,SAAK,yBAAyB;AAAA,EAChC;AAAA,EAEA,MAAM,OAAsB;AAC1B,SAAK,OAAO,KAAK,6BAA6B;AAE9C,SAAK,qBAAqB;AAC1B,SAAK,iBAAiB;AACtB,SAAK,kBAAkB;AAGvB,QAAI,KAAK,0BAA0B,CAAC,KAAK,SAAS;AAChD,WAAK,0BAA0B;AAC/B,WAAK,0BAA0B;AAAA,IACjC,WAAW,KAAK,0BAA0B,KAAK,SAAS;AACtD,WAAK,OAAO;AAAA,QACV;AAAA,MACF;AAAA,IACF;AAEA,QAAI,KAAK,SAAS;AAChB,WAAK,OAAO,KAAK,sCAAsC,KAAK,OAAO;AACnE,WAAK,SAAS,IAAI,2BAAQ;AAAA,QACxB,KAAK,EAAE,SAAS,KAAK,SAAS,SAAS,KAAK,QAAQ;AAAA,MACtD,CAAC;AACD,YAAM,KAAK,mBAAmB;AAC9B;AAAA,IACF;AAEA,SAAK,OAAO,KAAK,wCAAwC;AACzD,QAAI;AACF,YAAM,cAAc,IAAI,mBAAmB;AAAA,QACzC,QAAQ,KAAK;AAAA,QACb,UAAU,KAAK;AAAA,QACf,MAAM,KAAK;AAAA,QACX,qBAAqB,KAAK;AAAA,MAC5B,CAAC;AAED,YAAM,YAAY,MAAM;AAExB,YAAM,YAAY,oBAAoB,KAAK,IAAI;AAC/C,WAAK,SAAS,IAAI,2BAAQ;AAAA,QACxB,KAAK;AAAA,UACH,SAAS;AAAA,UACT,SAAS,KAAK;AAAA,QAChB;AAAA,MACF,CAAC;AAED,YAAM,KAAK,mBAAmB;AAC9B,WAAK,OAAO,KAAK,mBAAmB;AAAA,IACtC,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,sCAAsC,KAAK;AAC7D,YAAM,IAAI,MAAM,mCAAmC,KAAK,EAAE;AAAA,IAC5D;AAAA,EACF;AAAA,EAEQ,uBAA6B;AACnC,YAAI,0BAAS,MAAM,UAAU;AAC3B,YAAM,IAAI;AAAA,QACR,iEAA6D,0BAAS;AAAA,MACxE;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,mBAAyB;AAC/B,QAAI;AACF,+CAAS,YAAY,EAAE,OAAO,SAAS,CAAC;AAAA,IAC1C,QAAQ;AACN,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,oBAA0B;AAChC,QAAI;AACF,YAAM,oBAAgB,qCAAS,2BAA2B;AAAA,QACxD,UAAU;AAAA,MACZ,CAAC,EAAE,KAAK;AACR,YAAM,eAAe,cAAc,MAAM,eAAe;AACxD,UAAI,cAAc;AAChB,cAAM,QAAQ,SAAS,aAAa,CAAC,CAAC;AACtC,cAAM,QAAQ,SAAS,aAAa,CAAC,CAAC;AACtC,YAAI,QAAQ,MAAO,UAAU,MAAM,QAAQ,IAAK;AAC9C,gBAAM,IAAI;AAAA,YACR,sDAAsD,aAAa;AAAA,UACrE;AAAA,QACF;AAAA,MACF;AAAA,IACF,SAAS,OAAO;AACd,UAAI,iBAAiB,SAAS,MAAM,QAAQ,SAAS,aAAa,GAAG;AACnE,cAAM;AAAA,MACR;AACA,YAAM,IAAI,MAAM,+BAA+B;AAAA,IACjD;AAAA,EACF;AAAA,EAEQ,4BAAkC;AACxC,QAAI;AACF,+CAAS,gBAAgB,EAAE,OAAO,SAAS,CAAC;AAAA,IAC9C,QAAQ;AACN,YAAM,IAAI;AAAA,QACR;AAAA,MAEF;AAAA,IACF;AAAA,EACF;AAAA,EAEQ,4BAAkC;AACxC,QAAI;AACF,+CAAS,YAAY,EAAE,OAAO,SAAS,CAAC;AAAA,IAC1C,QAAQ;AACN,YAAM,IAAI;AAAA,QACR;AAAA,MAEF;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMQ,yBAAyB,OAAyB;AACxD,QAAI,iBAAiB,OAAO;AAC1B,YAAM,WAAW,KAAK,UAAU,KAAK;AACrC,aAAO,SAAS,SAAS,cAAc;AAAA,IACzC;AACA,WAAO;AAAA,EACT;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAUA,MAAc,gBAA+B;AAC3C,SAAK,OAAO,KAAK,kCAAkC;AAInD,UAAM,mBAAmB,kBAAkB,KAAK,QAAQ,KAAK,IAAK;AAGlE,UAAM,cAAc,IAAI,mBAAmB;AAAA,MACzC,QAAQ,KAAK;AAAA,MACb,UAAU,KAAK;AAAA,MACf,MAAM,KAAK;AAAA,MACX,qBAAqB;AAAA;AAAA,IACvB,CAAC;AAED,UAAM,YAAY,YAAY;AAG9B,SAAK,QAAQ,QAAQ;AACrB,SAAK,SAAS,IAAI,2BAAQ;AAAA,MACxB,KAAK;AAAA,QACH,SAAS,oBAAoB,KAAK,IAAI;AAAA,QACtC,SAAS,KAAK;AAAA,MAChB;AAAA,IACF,CAAC;AAED,UAAM,KAAK,mBAAmB;AAC9B,SAAK,OAAO,KAAK,2CAA2C;AAAA,EAC9D;AAAA,EAEA,MAAc,qBAAoC;AAChD,UAAM,cAAc,WAAW;AAC/B,UAAM,gBAAgB,WAAW;AACjC,UAAM,cAAc,WAAW;AAC/B,QAAI,cAAc;AAElB,aAAS,UAAU,GAAG,WAAW,aAAa,WAAW;AACvD,UAAI;AACF,cAAM,KAAK,OAAQ,OAAO;AAC1B,aAAK,OAAO,KAAK,6BAA6B;AAC9C;AAAA,MACF,QAAQ;AACN,cAAM,MAAM,KAAK,IAAI;AACrB,YAAI,MAAM,eAAe,aAAa;AACpC,eAAK,OAAO;AAAA,YACV;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,YACA;AAAA,UACF;AACA,wBAAc;AAAA,QAChB;AAEA,YAAI,UAAU,aAAa;AACzB,gBAAM,IAAI,QAAQ,CAAC,YAAY,WAAW,SAAS,aAAa,CAAC;AAAA,QACnE;AAAA,MACF;AAAA,IACF;AAEA,UAAM,IAAI,MAAM,sDAAsD;AAAA,EACxE;AAAA,EAEA,MAAa,MACX,KACA,UACA,YACA,sBACA,SAaA,aACA;AACA,QAAI,CAAC,KAAK,QAAQ;AAChB,YAAM,IAAI;AAAA,QACR;AAAA,MACF;AAAA,IACF;AAGA,UAAM,aAAa,CAAC,KAAK,WAAW,KAAK,SAAS;AAClD,UAAM,cAAc,WAAW;AAC/B,QAAI,UAAU;AAEd,WAAO,WAAW,aAAa;AAC7B,UAAI;AAEF,cAAM,2BACJ,KAAK,0BAA0B,CAAC,KAAK;AACvC,cAAM,YAAY,IAAI;AAAA,UACpB,KAAK;AAAA,UACL,KAAK;AAAA,UACL;AAAA,QACF;AACA,eAAO,MAAM,UAAU;AAAA,UACrB;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,UACA;AAAA,QACF;AAAA,MACF,SAAS,OAAO;AAEd,YAAI,aAAa,SAAS;AACxB,gBAAM;AAAA,QACR;AAGA,YACE,cACA,KAAK,yBAAyB,KAAK,KACnC,UAAU,aACV;AACA,eAAK,OAAO;AAAA,YACV;AAAA,UACF;AACA,gBAAM,KAAK,cAAc;AACzB;AACA;AAAA,QACF;AACA,cAAM;AAAA,MACR;AAAA,IACF;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA,EAOA,MAAa,UAAyB;AACpC,SAAK,OAAO,KAAK,0BAA0B;AAE3C,QAAI;AAEF,UAAI,CAAC,KAAK,WAAW,KAAK,MAAM;AAC9B,cAAM,mBAAmB,kBAAkB,KAAK,QAAQ,KAAK,IAAI;AAAA,MACnE;AAAA,IACF,SAAS,OAAO;AACd,WAAK,OAAO,MAAM,sCAAsC,KAAK;AAAA,IAC/D,UAAE;AAEA,WAAK,QAAQ,QAAQ;AACrB,WAAK,SAAS;AACd,WAAK,OAAO,KAAK,sBAAsB;AAAA,IACzC;AAAA,EACF;AACF;","names":["import_node_child_process","import_node_os","import_node_path","Error","import_node_fs","import_node_path","import_node_path","import_node_child_process","err","filename","import_node_fs","import_node_path","import_node_fs","import_node_path"]}
|
package/dist/index.d.cts
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { LoggerMethods } from '@heripo/logger';
|
|
2
|
+
import { ConversionOptions } from 'docling-sdk';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Callback function invoked after PDF conversion completes
|
|
6
|
+
* @param outputPath Absolute path to the output directory containing result files
|
|
7
|
+
*/
|
|
8
|
+
type ConversionCompleteCallback = (outputPath: string) => Promise<void> | void;
|
|
9
|
+
|
|
10
|
+
type Options = {
|
|
11
|
+
logger: LoggerMethods;
|
|
12
|
+
timeout?: number;
|
|
13
|
+
venvPath?: string;
|
|
14
|
+
killExistingProcess?: boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Enable fallback to image-based PDF when conversion fails.
|
|
17
|
+
* Only works with local server mode (port option).
|
|
18
|
+
* Requires ImageMagick and Ghostscript to be installed.
|
|
19
|
+
*/
|
|
20
|
+
enableImagePdfFallback?: boolean;
|
|
21
|
+
} & ({
|
|
22
|
+
port?: number;
|
|
23
|
+
} | {
|
|
24
|
+
baseUrl: string;
|
|
25
|
+
});
|
|
26
|
+
/**
|
|
27
|
+
* PDFParser - A PDF document parser using docling-serve
|
|
28
|
+
*
|
|
29
|
+
* ## System Requirements
|
|
30
|
+
* Before using PDFParser, ensure your system meets these requirements:
|
|
31
|
+
*
|
|
32
|
+
* ### Operating System
|
|
33
|
+
* - macOS 10.15 (Catalina) or later
|
|
34
|
+
*
|
|
35
|
+
* ### Required Software
|
|
36
|
+
* - `python3` (version 3.9 - 3.12)
|
|
37
|
+
* - Python 3.13+ is NOT compatible with docling-serve
|
|
38
|
+
* - Recommended: Python 3.11 or 3.12
|
|
39
|
+
* - Install specific version: `pyenv install 3.12.0 && pyenv global 3.12.0`
|
|
40
|
+
* - `jq` - JSON processor
|
|
41
|
+
* - Install: `brew install jq`
|
|
42
|
+
* - `lsof` - List open files (usually pre-installed on macOS)
|
|
43
|
+
*
|
|
44
|
+
* ## Initialization Process
|
|
45
|
+
* When `init()` is called, the following setup occurs automatically:
|
|
46
|
+
*
|
|
47
|
+
* ### If using external server (baseUrl provided):
|
|
48
|
+
* 1. Connects to the provided baseUrl
|
|
49
|
+
* 2. Waits for server health check (up to 120 seconds)
|
|
50
|
+
*
|
|
51
|
+
* ### If using local server (default):
|
|
52
|
+
* 1. **Python Environment Setup**
|
|
53
|
+
* - Verifies Python version compatibility (3.9-3.12)
|
|
54
|
+
* - Creates Python virtual environment at `venvPath` (default: `.venv`)
|
|
55
|
+
* - Verifies virtual environment Python version
|
|
56
|
+
*
|
|
57
|
+
* 2. **Dependency Installation**
|
|
58
|
+
* - Upgrades pip to latest version
|
|
59
|
+
* - Installs setuptools and wheel
|
|
60
|
+
* - Installs pyarrow (binary-only to avoid compilation)
|
|
61
|
+
* - Installs docling-serve package
|
|
62
|
+
*
|
|
63
|
+
* 3. **Server Management**
|
|
64
|
+
* - Checks if specified port is already in use
|
|
65
|
+
* - If `killExistingProcess` is true, kills any process using the port
|
|
66
|
+
* - If port is in use and `killExistingProcess` is false, reuses existing server
|
|
67
|
+
* - Otherwise, starts new docling-serve instance on specified port
|
|
68
|
+
* - Waits for server to become ready (health check, up to 120 seconds)
|
|
69
|
+
*
|
|
70
|
+
* ## Notes
|
|
71
|
+
* - First initialization may take several minutes due to Python package downloads
|
|
72
|
+
* - Subsequent initializations are faster if virtual environment already exists
|
|
73
|
+
* - The virtual environment and packages are reused across sessions
|
|
74
|
+
* - Server process runs in background and needs to be managed separately if needed
|
|
75
|
+
*/
|
|
76
|
+
declare class PDFParser {
|
|
77
|
+
private readonly logger;
|
|
78
|
+
private readonly port?;
|
|
79
|
+
private readonly baseUrl?;
|
|
80
|
+
private readonly timeout;
|
|
81
|
+
private readonly venvPath;
|
|
82
|
+
private readonly killExistingProcess;
|
|
83
|
+
private readonly enableImagePdfFallback;
|
|
84
|
+
private client;
|
|
85
|
+
constructor(options: Options);
|
|
86
|
+
init(): Promise<void>;
|
|
87
|
+
private checkOperatingSystem;
|
|
88
|
+
private checkJqInstalled;
|
|
89
|
+
private checkMacOSVersion;
|
|
90
|
+
private checkImageMagickInstalled;
|
|
91
|
+
private checkGhostscriptInstalled;
|
|
92
|
+
/**
|
|
93
|
+
* Check if an error is a connection refused error (ECONNREFUSED).
|
|
94
|
+
* This typically indicates the Docling server has crashed.
|
|
95
|
+
*/
|
|
96
|
+
private isConnectionRefusedError;
|
|
97
|
+
/**
|
|
98
|
+
* Restart the Docling server after it has crashed.
|
|
99
|
+
* This kills any existing process on the port, starts a new server,
|
|
100
|
+
* and waits for it to become ready.
|
|
101
|
+
*
|
|
102
|
+
* Note: This method is only called when canRecover is true,
|
|
103
|
+
* which guarantees this.port is defined.
|
|
104
|
+
*/
|
|
105
|
+
private restartServer;
|
|
106
|
+
private waitForServerReady;
|
|
107
|
+
parse(url: string, reportId: string, onComplete: ConversionCompleteCallback, cleanupAfterCallback: boolean, options: Omit<ConversionOptions, 'to_formats' | 'image_export_mode' | 'ocr_engine' | 'accelerator_options' | 'ocr_options' | 'generate_picture_images' | 'images_scale' | 'force_ocr'> & {
|
|
108
|
+
num_threads?: number;
|
|
109
|
+
}, abortSignal?: AbortSignal): Promise<void>;
|
|
110
|
+
/**
|
|
111
|
+
* Dispose the parser instance.
|
|
112
|
+
* - Sets the internal client to null
|
|
113
|
+
* - If a local docling server was started (no baseUrl), kills the process on the configured port
|
|
114
|
+
*/
|
|
115
|
+
dispose(): Promise<void>;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Error thrown when both original PDF conversion and image PDF fallback fail.
|
|
120
|
+
* Contains both errors for debugging purposes.
|
|
121
|
+
*/
|
|
122
|
+
declare class ImagePdfFallbackError extends Error {
|
|
123
|
+
readonly originalError: Error;
|
|
124
|
+
readonly fallbackError: Error;
|
|
125
|
+
readonly name = "ImagePdfFallbackError";
|
|
126
|
+
constructor(originalError: Error, fallbackError: Error);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export { type ConversionCompleteCallback, ImagePdfFallbackError, PDFParser };
|
package/dist/index.d.ts
ADDED
|
@@ -0,0 +1,129 @@
|
|
|
1
|
+
import { LoggerMethods } from '@heripo/logger';
|
|
2
|
+
import { ConversionOptions } from 'docling-sdk';
|
|
3
|
+
|
|
4
|
+
/**
|
|
5
|
+
* Callback function invoked after PDF conversion completes
|
|
6
|
+
* @param outputPath Absolute path to the output directory containing result files
|
|
7
|
+
*/
|
|
8
|
+
type ConversionCompleteCallback = (outputPath: string) => Promise<void> | void;
|
|
9
|
+
|
|
10
|
+
type Options = {
|
|
11
|
+
logger: LoggerMethods;
|
|
12
|
+
timeout?: number;
|
|
13
|
+
venvPath?: string;
|
|
14
|
+
killExistingProcess?: boolean;
|
|
15
|
+
/**
|
|
16
|
+
* Enable fallback to image-based PDF when conversion fails.
|
|
17
|
+
* Only works with local server mode (port option).
|
|
18
|
+
* Requires ImageMagick and Ghostscript to be installed.
|
|
19
|
+
*/
|
|
20
|
+
enableImagePdfFallback?: boolean;
|
|
21
|
+
} & ({
|
|
22
|
+
port?: number;
|
|
23
|
+
} | {
|
|
24
|
+
baseUrl: string;
|
|
25
|
+
});
|
|
26
|
+
/**
|
|
27
|
+
* PDFParser - A PDF document parser using docling-serve
|
|
28
|
+
*
|
|
29
|
+
* ## System Requirements
|
|
30
|
+
* Before using PDFParser, ensure your system meets these requirements:
|
|
31
|
+
*
|
|
32
|
+
* ### Operating System
|
|
33
|
+
* - macOS 10.15 (Catalina) or later
|
|
34
|
+
*
|
|
35
|
+
* ### Required Software
|
|
36
|
+
* - `python3` (version 3.9 - 3.12)
|
|
37
|
+
* - Python 3.13+ is NOT compatible with docling-serve
|
|
38
|
+
* - Recommended: Python 3.11 or 3.12
|
|
39
|
+
* - Install specific version: `pyenv install 3.12.0 && pyenv global 3.12.0`
|
|
40
|
+
* - `jq` - JSON processor
|
|
41
|
+
* - Install: `brew install jq`
|
|
42
|
+
* - `lsof` - List open files (usually pre-installed on macOS)
|
|
43
|
+
*
|
|
44
|
+
* ## Initialization Process
|
|
45
|
+
* When `init()` is called, the following setup occurs automatically:
|
|
46
|
+
*
|
|
47
|
+
* ### If using external server (baseUrl provided):
|
|
48
|
+
* 1. Connects to the provided baseUrl
|
|
49
|
+
* 2. Waits for server health check (up to 120 seconds)
|
|
50
|
+
*
|
|
51
|
+
* ### If using local server (default):
|
|
52
|
+
* 1. **Python Environment Setup**
|
|
53
|
+
* - Verifies Python version compatibility (3.9-3.12)
|
|
54
|
+
* - Creates Python virtual environment at `venvPath` (default: `.venv`)
|
|
55
|
+
* - Verifies virtual environment Python version
|
|
56
|
+
*
|
|
57
|
+
* 2. **Dependency Installation**
|
|
58
|
+
* - Upgrades pip to latest version
|
|
59
|
+
* - Installs setuptools and wheel
|
|
60
|
+
* - Installs pyarrow (binary-only to avoid compilation)
|
|
61
|
+
* - Installs docling-serve package
|
|
62
|
+
*
|
|
63
|
+
* 3. **Server Management**
|
|
64
|
+
* - Checks if specified port is already in use
|
|
65
|
+
* - If `killExistingProcess` is true, kills any process using the port
|
|
66
|
+
* - If port is in use and `killExistingProcess` is false, reuses existing server
|
|
67
|
+
* - Otherwise, starts new docling-serve instance on specified port
|
|
68
|
+
* - Waits for server to become ready (health check, up to 120 seconds)
|
|
69
|
+
*
|
|
70
|
+
* ## Notes
|
|
71
|
+
* - First initialization may take several minutes due to Python package downloads
|
|
72
|
+
* - Subsequent initializations are faster if virtual environment already exists
|
|
73
|
+
* - The virtual environment and packages are reused across sessions
|
|
74
|
+
* - Server process runs in background and needs to be managed separately if needed
|
|
75
|
+
*/
|
|
76
|
+
declare class PDFParser {
|
|
77
|
+
private readonly logger;
|
|
78
|
+
private readonly port?;
|
|
79
|
+
private readonly baseUrl?;
|
|
80
|
+
private readonly timeout;
|
|
81
|
+
private readonly venvPath;
|
|
82
|
+
private readonly killExistingProcess;
|
|
83
|
+
private readonly enableImagePdfFallback;
|
|
84
|
+
private client;
|
|
85
|
+
constructor(options: Options);
|
|
86
|
+
init(): Promise<void>;
|
|
87
|
+
private checkOperatingSystem;
|
|
88
|
+
private checkJqInstalled;
|
|
89
|
+
private checkMacOSVersion;
|
|
90
|
+
private checkImageMagickInstalled;
|
|
91
|
+
private checkGhostscriptInstalled;
|
|
92
|
+
/**
|
|
93
|
+
* Check if an error is a connection refused error (ECONNREFUSED).
|
|
94
|
+
* This typically indicates the Docling server has crashed.
|
|
95
|
+
*/
|
|
96
|
+
private isConnectionRefusedError;
|
|
97
|
+
/**
|
|
98
|
+
* Restart the Docling server after it has crashed.
|
|
99
|
+
* This kills any existing process on the port, starts a new server,
|
|
100
|
+
* and waits for it to become ready.
|
|
101
|
+
*
|
|
102
|
+
* Note: This method is only called when canRecover is true,
|
|
103
|
+
* which guarantees this.port is defined.
|
|
104
|
+
*/
|
|
105
|
+
private restartServer;
|
|
106
|
+
private waitForServerReady;
|
|
107
|
+
parse(url: string, reportId: string, onComplete: ConversionCompleteCallback, cleanupAfterCallback: boolean, options: Omit<ConversionOptions, 'to_formats' | 'image_export_mode' | 'ocr_engine' | 'accelerator_options' | 'ocr_options' | 'generate_picture_images' | 'images_scale' | 'force_ocr'> & {
|
|
108
|
+
num_threads?: number;
|
|
109
|
+
}, abortSignal?: AbortSignal): Promise<void>;
|
|
110
|
+
/**
|
|
111
|
+
* Dispose the parser instance.
|
|
112
|
+
* - Sets the internal client to null
|
|
113
|
+
* - If a local docling server was started (no baseUrl), kills the process on the configured port
|
|
114
|
+
*/
|
|
115
|
+
dispose(): Promise<void>;
|
|
116
|
+
}
|
|
117
|
+
|
|
118
|
+
/**
|
|
119
|
+
* Error thrown when both original PDF conversion and image PDF fallback fail.
|
|
120
|
+
* Contains both errors for debugging purposes.
|
|
121
|
+
*/
|
|
122
|
+
declare class ImagePdfFallbackError extends Error {
|
|
123
|
+
readonly originalError: Error;
|
|
124
|
+
readonly fallbackError: Error;
|
|
125
|
+
readonly name = "ImagePdfFallbackError";
|
|
126
|
+
constructor(originalError: Error, fallbackError: Error);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
export { type ConversionCompleteCallback, ImagePdfFallbackError, PDFParser };
|