hazo_pdf 1.7.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/SETUP_CHECKLIST.md +693 -0
- package/config/hazo_pdf_config.ini.sample +42 -0
- package/db_setup_postgres.sql +17 -0
- package/db_setup_sqlite.sql +13 -0
- package/dist/{chunk-NQ6KUJWG.js → chunk-7M53O3HF.js} +14 -4
- package/dist/chunk-7M53O3HF.js.map +1 -0
- package/dist/{chunk-4JJOUQ62.js → chunk-KDOQ3FIO.js} +176 -87
- package/dist/chunk-KDOQ3FIO.js.map +1 -0
- package/dist/{chunk-KHB3VZJQ.js → chunk-LFFCPDWC.js} +14 -3
- package/dist/chunk-LFFCPDWC.js.map +1 -0
- package/dist/{chunk-264BTVJT.js → chunk-TZJ5S57X.js} +18 -31
- package/dist/chunk-TZJ5S57X.js.map +1 -0
- package/dist/index.d.ts +9 -5
- package/dist/index.js +35 -16
- package/dist/index.js.map +1 -1
- package/dist/{pdf_saver-7FA4DAXI.js → pdf_saver-T6SEDYEE.js} +3 -3
- package/dist/{pdf_viewer-B6S5PJJB.js → pdf_viewer-TFCSUGWU.js} +3 -3
- package/dist/server/index.d.ts +5 -1
- package/dist/server/index.js +219 -81
- package/dist/server/index.js.map +1 -1
- package/dist/server/{text_search-2OZOVUIP.js → text_search-PVDG5Y6I.js} +14 -3
- package/dist/server/text_search-PVDG5Y6I.js.map +1 -0
- package/dist/styles/full.css +5821 -7156
- package/dist/styles/full.css.map +1 -1
- package/dist/styles/index.css +4844 -3929
- package/dist/styles/index.css.map +1 -1
- package/dist/{text_search-I2KZ7DTW.js → text_search-SO4ZOMIZ.js} +2 -2
- package/package.json +51 -36
- package/dist/chunk-264BTVJT.js.map +0 -1
- package/dist/chunk-4JJOUQ62.js.map +0 -1
- package/dist/chunk-KHB3VZJQ.js.map +0 -1
- package/dist/chunk-NQ6KUJWG.js.map +0 -1
- package/dist/server/text_search-2OZOVUIP.js.map +0 -1
- /package/dist/{pdf_saver-7FA4DAXI.js.map → pdf_saver-T6SEDYEE.js.map} +0 -0
- /package/dist/{pdf_viewer-B6S5PJJB.js.map → pdf_viewer-TFCSUGWU.js.map} +0 -0
- /package/dist/{text_search-I2KZ7DTW.js.map → text_search-SO4ZOMIZ.js.map} +0 -0
package/dist/server/index.js.map
CHANGED
|
@@ -1 +1 @@
|
|
|
1
|
-
{"version":3,"sources":["../../src/utils/logger.ts","../../src/server/extract.ts","../../src/server/snippet.ts","../../src/server/split.ts","../../src/server/index.ts"],"sourcesContent":["/**\n * Logger Utility for hazo_pdf\n * Provides a unified logging interface that can use hazo_logs or fallback to console\n */\n\n/**\n * Logger interface matching hazo_logs Logger type\n * This allows consumers to pass in their own logger instance\n */\nexport interface Logger {\n info: (message: string, data?: Record<string, unknown>) => void;\n debug: (message: string, data?: Record<string, unknown>) => void;\n warn: (message: string, data?: Record<string, unknown>) => void;\n error: (message: string, data?: Record<string, unknown>) => void;\n}\n\n/**\n * Fallback logger using console\n * Used when no external logger is provided\n */\nconst console_logger: Logger = {\n info: (message: string, data?: Record<string, unknown>) => {\n if (data) {\n console.log(`[hazo_pdf] ${message}`, data);\n } else {\n console.log(`[hazo_pdf] ${message}`);\n }\n },\n debug: (message: string, data?: Record<string, unknown>) => {\n if (data) {\n console.debug(`[hazo_pdf] ${message}`, data);\n } else {\n console.debug(`[hazo_pdf] ${message}`);\n }\n },\n warn: (message: string, data?: Record<string, unknown>) => {\n if (data) {\n console.warn(`[hazo_pdf] ${message}`, data);\n } else {\n console.warn(`[hazo_pdf] ${message}`);\n }\n },\n error: (message: string, data?: Record<string, unknown>) => {\n if (data) {\n console.error(`[hazo_pdf] ${message}`, data);\n } else {\n console.error(`[hazo_pdf] ${message}`);\n }\n },\n};\n\n/**\n * Current logger instance\n * Defaults to console_logger, can be set via set_logger()\n */\nlet current_logger: Logger = console_logger;\n\n/**\n * Set the logger instance to use throughout hazo_pdf\n * @param logger - Logger instance (from hazo_logs or custom), or undefined to reset to console\n */\nexport function set_logger(logger: Logger | undefined): void {\n current_logger = logger || console_logger;\n}\n\n/**\n * Get the current logger instance\n * @returns Current logger (either hazo_logs instance or console fallback)\n */\nexport function get_logger(): Logger {\n return current_logger;\n}\n","/**\n * Server-side document extraction utility\n * Uses hazo_llm_api for LLM-based extraction and hazo_files for storage\n */\n\nimport type { Logger } from '../utils/logger';\nimport { get_logger } from '../utils/logger';\nimport type { FileAccessProvider } from '../types/file_access';\nimport type {\n ExtractDocumentSource,\n ExtractDocumentOptions,\n ExtractDocumentResult,\n} from './types';\n\n// Lazy-loaded modules (server-only)\nlet is_initialized = false;\nlet hazo_files_adapter: unknown = null;\nlet llm_initialized = false;\n\n/**\n * Reset extraction state (for testing)\n */\nexport function reset_extraction_state(): void {\n is_initialized = false;\n hazo_files_adapter = null;\n llm_initialized = false;\n}\n\n/**\n * Initialize hazo_llm_api and SQLite adapter (lazy, once per process)\n */\nasync function ensure_initialized(\n sqlite_path?: string,\n logger: Logger = get_logger()\n): Promise<void> {\n if (is_initialized) {\n return;\n }\n\n try {\n // Dynamic import of server-only modules\n const { initialize_llm_api, get_current_config } = await import('hazo_llm_api/server');\n const { SqliteAdapter } = await import('hazo_connect/server');\n\n // Initialize hazo_llm_api if not already done\n if (!llm_initialized) {\n await initialize_llm_api({ logger });\n llm_initialized = true;\n logger.info('hazo_llm_api initialized');\n }\n\n // Determine SQLite path\n const config = get_current_config();\n const db_path = sqlite_path || config?.sqlite_path || 'prompt_library.sqlite';\n\n // Create hazo_connect SqliteAdapter\n hazo_files_adapter = new SqliteAdapter({\n type: 'sqlite',\n database_path: db_path,\n });\n\n // Ensure hazo_files table exists\n const { HAZO_FILES_TABLE_SCHEMA } = await import('hazo_files');\n const adapter = hazo_files_adapter as { rawQuery: (sql: string) => Promise<unknown> };\n await adapter.rawQuery(HAZO_FILES_TABLE_SCHEMA.sqlite.ddl);\n for (const idx of HAZO_FILES_TABLE_SCHEMA.sqlite.indexes) {\n await adapter.rawQuery(idx);\n }\n\n logger.debug('hazo_files table initialized', { sqlite_path: db_path });\n is_initialized = true;\n } catch (error) {\n logger.error('Failed to initialize extraction', {\n error: error instanceof Error ? error.message : String(error),\n });\n throw error;\n }\n}\n\n/**\n * Load document from file path or URL as base64\n */\nasync function load_document_as_base64(\n file_path: string,\n logger: Logger = get_logger()\n): Promise<{ base64: string; mime_type: string }> {\n // HTTP(S) URL\n if (file_path.startsWith('http://') || file_path.startsWith('https://')) {\n logger.debug('Loading document from URL', { url: file_path });\n const response = await fetch(file_path);\n if (!response.ok) {\n throw new Error(`Failed to fetch document: ${response.status} ${response.statusText}`);\n }\n const buffer = await response.arrayBuffer();\n const base64 = Buffer.from(buffer).toString('base64');\n const content_type = response.headers.get('content-type') || 'application/pdf';\n return { base64, mime_type: content_type };\n }\n\n // Local file path\n logger.debug('Loading document from filesystem', { path: file_path });\n const fs = await import('fs');\n const path = await import('path');\n\n if (!fs.existsSync(file_path)) {\n throw new Error(`File not found: ${file_path}`);\n }\n\n const buffer = fs.readFileSync(file_path);\n const base64 = buffer.toString('base64');\n\n // Determine MIME type from extension\n const ext = path.extname(file_path).toLowerCase();\n const mime_types: Record<string, string> = {\n '.pdf': 'application/pdf',\n '.png': 'image/png',\n '.jpg': 'image/jpeg',\n '.jpeg': 'image/jpeg',\n '.gif': 'image/gif',\n '.webp': 'image/webp',\n };\n const mime_type = mime_types[ext] || 'application/pdf';\n\n return { base64, mime_type };\n}\n\n/**\n * Load document by hazo_files record ID\n */\nasync function load_document_by_file_id(\n file_id: string,\n _storage_type: 'local' | 'google_drive', // Used as fallback, actual storage_type comes from record\n file_manager: FileAccessProvider | undefined,\n logger: Logger = get_logger()\n): Promise<{ base64: string; mime_type: string; file_path: string }> {\n // Get file record from hazo_files\n const { HAZO_FILES_TABLE_SCHEMA } = await import('hazo_files');\n const { createCrudService } = await import('hazo_connect/server');\n\n if (!hazo_files_adapter) {\n throw new Error('hazo_files adapter not initialized');\n }\n\n type FileRecord = { id: string; file_path: string; file_type: string; storage_type: string };\n type CrudServiceType = { findMany: (opts: { where: { id: string } }) => Promise<FileRecord[]> };\n\n const crudService = createCrudService(\n hazo_files_adapter as Parameters<typeof createCrudService>[0],\n HAZO_FILES_TABLE_SCHEMA.tableName\n );\n\n // Get file metadata by ID\n const files = await (crudService as unknown as CrudServiceType).findMany({\n where: { id: file_id },\n });\n\n if (!files || files.length === 0) {\n throw new Error(`File record not found: ${file_id}`);\n }\n\n const file_record = files[0];\n const file_path = file_record.file_path;\n const mime_type = file_record.file_type || 'application/pdf';\n\n logger.debug('Found file record', { file_id, file_path, storage_type: file_record.storage_type });\n\n // If Google Drive storage, use file_manager to download\n if (file_record.storage_type === 'google_drive') {\n if (!file_manager) {\n throw new Error('file_manager is required for Google Drive files');\n }\n if (!file_manager.isInitialized()) {\n throw new Error('file_manager is not initialized');\n }\n\n logger.debug('Downloading from Google Drive', { file_path });\n const result = await file_manager.downloadFile(file_path);\n if (!result.success || !result.data) {\n throw new Error(result.error || 'Failed to download file from Google Drive');\n }\n\n const buffer = Buffer.from(result.data as ArrayBuffer);\n const base64 = buffer.toString('base64');\n return { base64, mime_type, file_path };\n }\n\n // Local storage - read from filesystem\n return {\n ...(await load_document_as_base64(file_path, logger)),\n file_path,\n };\n}\n\n/**\n * Extract structured data from a PDF document using LLM\n *\n * @param source - Document source (file_path OR file_id)\n * @param options - Extraction options (prompt_area, prompt_key, etc.)\n * @returns Extraction result with data and metadata\n *\n * @example\n * // From file path\n * const result = await extract_document_data(\n * { file_path: '/path/to/doc.pdf' },\n * { prompt_area: 'document', prompt_key: 'initial_classification' }\n * );\n *\n * @example\n * // From hazo_files record ID\n * const result = await extract_document_data(\n * { file_id: 'abc-123' },\n * { prompt_area: 'invoice', prompt_key: 'extract_line_items', sqlite_path: './data/app.sqlite' }\n * );\n */\nexport async function extract_document_data(\n source: ExtractDocumentSource,\n options: ExtractDocumentOptions\n): Promise<ExtractDocumentResult> {\n const logger = options.logger || get_logger();\n const storage_type = options.storage_type || 'local';\n const save_to_hazo_files = options.save_to_hazo_files !== false;\n\n // Validate source\n if (!source.file_path && !source.file_id) {\n return {\n success: false,\n error: 'Either file_path or file_id is required',\n };\n }\n\n if (source.file_path && source.file_id) {\n return {\n success: false,\n error: 'Provide either file_path or file_id, not both',\n };\n }\n\n try {\n // Initialize hazo_llm_api and hazo_files\n await ensure_initialized(options.sqlite_path, logger);\n\n // Load document\n let document_b64: string;\n let document_mime_type: string;\n let resolved_file_path: string;\n\n if (source.file_path) {\n const loaded = await load_document_as_base64(source.file_path, logger);\n document_b64 = loaded.base64;\n document_mime_type = options.mime_type || loaded.mime_type;\n resolved_file_path = source.file_path;\n } else {\n const loaded = await load_document_by_file_id(\n source.file_id!,\n storage_type,\n options.file_manager,\n logger\n );\n document_b64 = loaded.base64;\n document_mime_type = options.mime_type || loaded.mime_type;\n resolved_file_path = loaded.file_path;\n }\n\n const doc_size_kb = Math.round((document_b64.length * 0.75) / 1024);\n logger.info('Starting extraction', {\n file_path: resolved_file_path,\n size_kb: doc_size_kb,\n prompt: `${options.prompt_area}/${options.prompt_key}`,\n });\n\n // Import hazo_llm_api functions\n const {\n hazo_llm_dynamic_data_extract,\n get_database,\n get_prompt_by_area_and_key,\n default_logger,\n } = await import('hazo_llm_api/server');\n\n // Verify prompt exists\n const db = get_database();\n if (!db) {\n return {\n success: false,\n error: 'LLM database not initialized',\n };\n }\n\n const prompt_record = get_prompt_by_area_and_key(\n db,\n options.prompt_area,\n options.prompt_key,\n default_logger\n );\n\n if (!prompt_record) {\n return {\n success: false,\n error: `Prompt not found: area=\"${options.prompt_area}\", key=\"${options.prompt_key}\"`,\n };\n }\n\n logger.debug('Found prompt', {\n has_next_prompt: !!prompt_record.next_prompt,\n });\n\n // Run dynamic extraction\n const dynamic_response = await hazo_llm_dynamic_data_extract({\n initial_prompt_area: options.prompt_area,\n initial_prompt_key: options.prompt_key,\n file_b64: document_b64,\n file_mime_type: document_mime_type,\n max_depth: options.max_depth ?? 10,\n continue_on_error: options.continue_on_error ?? false,\n });\n\n logger.info('Extraction completed', {\n successful_steps: dynamic_response.successful_steps,\n total_steps: dynamic_response.total_steps,\n stop_reason: dynamic_response.final_stop_reason,\n });\n\n if (!dynamic_response.success && dynamic_response.errors.length > 0) {\n logger.error('Extraction failed', { errors: dynamic_response.errors });\n return {\n success: false,\n error: dynamic_response.errors[0]?.error || 'Dynamic extraction failed',\n successful_steps: dynamic_response.successful_steps,\n total_steps: dynamic_response.total_steps,\n stop_reason: dynamic_response.final_stop_reason,\n };\n }\n\n const extracted_data = dynamic_response.merged_result;\n\n // Save to hazo_files if enabled\n let extraction_id: string | undefined;\n let file_id: string | undefined = source.file_id;\n\n // Use original_file_path for hazo_files storage if provided (when extraction uses temp file)\n const storage_file_path = options.original_file_path || resolved_file_path;\n\n if (save_to_hazo_files && hazo_files_adapter) {\n try {\n const { createFileMetadataService, HAZO_FILES_TABLE_SCHEMA } = await import('hazo_files');\n const { createCrudService } = await import('hazo_connect/server');\n\n type FileIdRecord = { id: string };\n type CrudServiceWithFindMany = {\n findMany: (opts: { where: { file_path: string; storage_type: string } }) => Promise<FileIdRecord[]>;\n };\n\n const crudService = createCrudService(\n hazo_files_adapter as Parameters<typeof createCrudService>[0],\n HAZO_FILES_TABLE_SCHEMA.tableName\n );\n // Cast to unknown first to avoid type conflicts with hazo_files internal types\n const metadataService = createFileMetadataService(crudService as unknown as Parameters<typeof createFileMetadataService>[0], { logger });\n\n // Check if file record exists (use storage_file_path for hazo_files)\n const existing = await metadataService.getExtractions(\n storage_file_path,\n storage_type\n );\n\n if (existing === null) {\n // Create file record first\n const filename =\n options.filename ||\n storage_file_path.split('/').pop() ||\n 'unknown';\n logger.debug('Creating file record', { file_path: storage_file_path, filename });\n\n await metadataService.recordUpload({\n filename,\n file_type: document_mime_type,\n file_path: storage_file_path,\n storage_type,\n });\n\n // Get the created file record ID\n const files = await (crudService as unknown as CrudServiceWithFindMany).findMany({\n where: { file_path: storage_file_path, storage_type },\n });\n if (files && files.length > 0) {\n file_id = files[0].id;\n }\n }\n\n // Add extraction\n const extraction_result = await metadataService.addExtraction(\n storage_file_path,\n storage_type,\n extracted_data,\n { source: 'hazo_llm_api' }\n );\n\n if (extraction_result) {\n extraction_id = extraction_result.id;\n logger.debug('Saved extraction to hazo_files', { extraction_id });\n }\n } catch (save_error) {\n // Don't fail extraction if saving fails\n const error_msg = save_error instanceof Error ? save_error.message : String(save_error);\n const error_stack = save_error instanceof Error ? save_error.stack : undefined;\n logger.error('Failed to save extraction to hazo_files', {\n error: error_msg,\n stack: error_stack,\n storage_file_path,\n storage_type,\n });\n }\n }\n\n return {\n success: true,\n data: extracted_data,\n extraction_id,\n file_id,\n file_path: storage_file_path,\n successful_steps: dynamic_response.successful_steps,\n total_steps: dynamic_response.total_steps,\n stop_reason: dynamic_response.final_stop_reason,\n step_results: dynamic_response.step_results?.map((s: { prompt_area: string; prompt_key: string; success: boolean; result?: Record<string, unknown>; error?: string }) => ({\n prompt_area: s.prompt_area,\n prompt_key: s.prompt_key,\n success: s.success,\n data: s.result,\n error: s.error,\n })),\n };\n } catch (error) {\n logger.error('Extraction error', {\n error: error instanceof Error ? error.message : String(error),\n });\n return {\n success: false,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n}\n","/**\n * PDF text snippet extraction utility\n * Finds text in a PDF, highlights it, and returns an image snippet of the surrounding area\n *\n * Server-only: requires @napi-rs/canvas (installed as transitive dep of pdfjs-dist)\n */\n\nimport type { PDFDocumentProxy } from 'pdfjs-dist';\nimport type { TextSearchResult } from '../utils/text_search';\nimport type {\n SnippetSource,\n SnippetOptions,\n SnippetResult,\n PageSnippet,\n SnippetSize,\n} from './snippet_types';\n\n/**\n * Extract a text snippet from a PDF document\n *\n * Finds the specified text, highlights it, and returns a cropped image snippet.\n * Works for text-based PDFs (precise highlighting) and image-based PDFs\n * (full page fallback, or approximate LLM-based highlighting).\n *\n * @param source - PDF source (file path or raw bytes)\n * @param options - Snippet extraction options\n * @returns SnippetResult with PNG image buffer(s)\n */\nexport async function extract_text_snippet(\n source: SnippetSource,\n options: SnippetOptions\n): Promise<SnippetResult> {\n const {\n search_text,\n page_index = 0,\n snippet_size = 'half',\n match_mode = 'first',\n render_scale = 2.0,\n highlight_color = 'rgba(255, 255, 0, 0.35)',\n use_llm_for_image_pdf = false,\n } = options;\n\n if (!search_text) {\n return { success: false, snippets: [], error: 'search_text is required' };\n }\n\n if (!source.file_path && !source.pdf_bytes) {\n return { success: false, snippets: [], error: 'Either file_path or pdf_bytes is required' };\n }\n\n try {\n // Load PDF bytes\n const pdf_bytes = await load_pdf_bytes(source);\n\n // Load pdfjs-dist legacy build for Node.js (avoids Path2D requirement)\n const pdfjs = await import('pdfjs-dist/legacy/build/pdf.mjs');\n\n // Resolve standard font data path for proper text rendering\n const path_mod = await import('path');\n const standard_font_data_url = path_mod.resolve(\n process.cwd(), 'node_modules/pdfjs-dist/standard_fonts/'\n ) + '/';\n\n // Load PDF document (Node.js mode - no web worker)\n const pdf = await pdfjs.getDocument({\n data: new Uint8Array(pdf_bytes),\n useSystemFonts: true,\n standardFontDataUrl: standard_font_data_url,\n verbosity: 0,\n }).promise;\n\n // Load text search utility\n const { find_all_text_in_pdf } = await import('../utils/text_search');\n\n // Determine which pages to search\n const total_pages = pdf.numPages;\n const snippets: PageSnippet[] = [];\n\n if (match_mode === 'first') {\n // Search from page_index through all pages, stop at first match\n for (let i = page_index; i < total_pages; i++) {\n const page_snippet = await process_page(\n pdf, i, search_text, snippet_size, render_scale,\n highlight_color, use_llm_for_image_pdf, find_all_text_in_pdf\n );\n if (page_snippet) {\n snippets.push(page_snippet);\n break;\n }\n }\n } else {\n // 'all' mode: search all pages starting from page_index\n for (let i = page_index; i < total_pages; i++) {\n const page_snippet = await process_page(\n pdf, i, search_text, snippet_size, render_scale,\n highlight_color, use_llm_for_image_pdf, find_all_text_in_pdf\n );\n if (page_snippet) {\n snippets.push(page_snippet);\n }\n }\n }\n\n pdf.destroy();\n\n if (snippets.length === 0) {\n return { success: false, snippets: [], error: `Text \"${search_text}\" not found in PDF` };\n }\n\n return { success: true, snippets };\n } catch (err) {\n return {\n success: false,\n snippets: [],\n error: `Snippet extraction failed: ${err instanceof Error ? err.message : String(err)}`,\n };\n }\n}\n\n/**\n * Process a single page: search for text, render, highlight, crop\n */\nasync function process_page(\n pdf: PDFDocumentProxy,\n page_index: number,\n search_text: string,\n snippet_size: SnippetSize,\n render_scale: number,\n highlight_color: string,\n use_llm_for_image_pdf: boolean,\n find_all_text_in_pdf: typeof import('../utils/text_search').find_all_text_in_pdf,\n): Promise<PageSnippet | null> {\n const page = await pdf.getPage(page_index + 1); // pdfjs uses 1-based\n const viewport = page.getViewport({ scale: render_scale });\n\n // Search for text on this page\n let matches = await find_all_text_in_pdf(pdf, search_text, { page_index });\n let highlight_approximate = false;\n\n // Check if this is an image-based PDF (no text content)\n const text_content = await page.getTextContent();\n const has_text_layer = text_content.items.some(\n (item) => 'str' in item && (item as { str: string }).str.trim().length > 0\n );\n\n if (matches.length === 0 && !has_text_layer) {\n // Image-based PDF\n if (use_llm_for_image_pdf) {\n // Try LLM-based approximate location\n const llm_matches = await find_text_with_llm(\n page, viewport, search_text, render_scale\n );\n if (llm_matches) {\n matches = llm_matches;\n highlight_approximate = true;\n } else {\n // LLM couldn't find it either - return full page, no highlight\n return await render_full_page_snippet(page, viewport, page_index);\n }\n } else {\n // No LLM - return full page as snippet, no highlight\n return await render_full_page_snippet(page, viewport, page_index);\n }\n }\n\n if (matches.length === 0) {\n return null; // Text not found on this page\n }\n\n // Render the page to canvas\n const { createCanvas } = await import('@napi-rs/canvas');\n const canvas = createCanvas(viewport.width, viewport.height);\n const context = canvas.getContext('2d');\n\n // White background\n context.fillStyle = '#ffffff';\n context.fillRect(0, 0, viewport.width, viewport.height);\n\n // Render PDF page\n await page.render({\n canvasContext: context as unknown as CanvasRenderingContext2D,\n viewport,\n }).promise;\n\n // Draw highlights over matches\n draw_highlights(context, viewport, matches, highlight_color);\n\n // Calculate snippet crop bounds\n const crop = calculate_crop_bounds(\n viewport, matches, snippet_size, render_scale\n );\n\n // Crop to snippet\n const snippet_canvas = createCanvas(crop.width, crop.height);\n const snippet_ctx = snippet_canvas.getContext('2d');\n snippet_ctx.drawImage(\n canvas,\n crop.x, crop.y, crop.width, crop.height,\n 0, 0, crop.width, crop.height\n );\n\n const image_buffer = snippet_canvas.toBuffer('image/png');\n const image_base64 = image_buffer.toString('base64');\n\n return {\n page_index,\n image_buffer,\n image_base64,\n matches,\n highlight_approximate,\n snippet_width: crop.width,\n snippet_height: crop.height,\n };\n}\n\n/**\n * Render full page as a snippet (used for image-based PDFs without text location)\n */\nasync function render_full_page_snippet(\n page: import('pdfjs-dist').PDFPageProxy,\n viewport: import('pdfjs-dist').PageViewport,\n page_index: number,\n): Promise<PageSnippet> {\n const { createCanvas } = await import('@napi-rs/canvas');\n const canvas = createCanvas(viewport.width, viewport.height);\n const context = canvas.getContext('2d');\n\n context.fillStyle = '#ffffff';\n context.fillRect(0, 0, viewport.width, viewport.height);\n\n await page.render({\n canvasContext: context as unknown as CanvasRenderingContext2D,\n viewport,\n }).promise;\n\n const image_buffer = canvas.toBuffer('image/png');\n const image_base64 = image_buffer.toString('base64');\n\n return {\n page_index,\n image_buffer,\n image_base64,\n matches: [],\n highlight_approximate: true,\n snippet_width: viewport.width,\n snippet_height: viewport.height,\n };\n}\n\n/**\n * Draw semi-transparent highlight rectangles over text matches\n */\nfunction draw_highlights(\n context: ReturnType<import('@napi-rs/canvas').Canvas['getContext']>,\n viewport: import('pdfjs-dist').PageViewport,\n matches: TextSearchResult[],\n highlight_color: string,\n): void {\n // Parse highlight color\n context.save();\n context.fillStyle = highlight_color;\n\n for (const match of matches) {\n // Convert PDF coordinates to viewport (screen) coordinates\n // PDF origin is bottom-left, viewport origin is top-left\n const [x1, y1] = viewport.convertToViewportPoint(match.x, match.y + match.height);\n const [x2, y2] = viewport.convertToViewportPoint(match.x + match.width, match.y);\n\n const rx = Math.min(x1, x2);\n const ry = Math.min(y1, y2);\n const rw = Math.abs(x2 - x1);\n const rh = Math.abs(y2 - y1);\n\n context.fillRect(rx, ry, rw, rh);\n }\n\n context.restore();\n}\n\n/**\n * Calculate the crop bounds for the snippet image\n * Centers on the match bounding box, expands to fit requested snippet size,\n * and clamps to page boundaries\n */\nfunction calculate_crop_bounds(\n viewport: import('pdfjs-dist').PageViewport,\n matches: TextSearchResult[],\n snippet_size: SnippetSize,\n _render_scale: number,\n): { x: number; y: number; width: number; height: number } {\n const page_width = viewport.width;\n const page_height = viewport.height;\n\n if (snippet_size === 'full') {\n return { x: 0, y: 0, width: page_width, height: page_height };\n }\n\n // Compute bounding box of all matches in viewport coordinates\n let min_x = Infinity, min_y = Infinity, max_x = -Infinity, max_y = -Infinity;\n\n for (const match of matches) {\n const [x1, y1] = viewport.convertToViewportPoint(match.x, match.y + match.height);\n const [x2, y2] = viewport.convertToViewportPoint(match.x + match.width, match.y);\n\n min_x = Math.min(min_x, x1, x2);\n min_y = Math.min(min_y, y1, y2);\n max_x = Math.max(max_x, x1, x2);\n max_y = Math.max(max_y, y1, y2);\n }\n\n // Add padding around matches (20px at render scale)\n const padding = 40;\n min_x -= padding;\n min_y -= padding;\n max_x += padding;\n max_y += padding;\n\n const match_width = max_x - min_x;\n const match_height = max_y - min_y;\n\n // Determine requested snippet dimensions\n // Always use full page width; only height varies by snippet size\n const snippet_width = page_width;\n let snippet_height: number;\n\n if (snippet_size === 'half') {\n snippet_height = page_height / 2;\n } else {\n // quarter\n snippet_height = page_height / 4;\n }\n\n // Use the larger of requested size vs match coverage\n const final_width = Math.max(snippet_width, match_width);\n const final_height = Math.max(snippet_height, match_height);\n\n // Center on the match bounding box center\n const match_center_x = (min_x + max_x) / 2;\n const match_center_y = (min_y + max_y) / 2;\n\n let crop_x = match_center_x - final_width / 2;\n let crop_y = match_center_y - final_height / 2;\n\n // Clamp to page boundaries\n crop_x = Math.max(0, Math.min(crop_x, page_width - final_width));\n crop_y = Math.max(0, Math.min(crop_y, page_height - final_height));\n\n // Ensure dimensions don't exceed page\n const clamped_width = Math.min(final_width, page_width);\n const clamped_height = Math.min(final_height, page_height);\n\n return {\n x: Math.round(crop_x),\n y: Math.round(crop_y),\n width: Math.round(clamped_width),\n height: Math.round(clamped_height),\n };\n}\n\n/**\n * Load PDF bytes from a SnippetSource\n */\nasync function load_pdf_bytes(source: SnippetSource): Promise<Uint8Array> {\n if (source.pdf_bytes) {\n return source.pdf_bytes;\n }\n\n if (!source.file_path) {\n throw new Error('Either file_path or pdf_bytes is required');\n }\n\n const file_path = source.file_path;\n\n // HTTP(S) URL\n if (file_path.startsWith('http://') || file_path.startsWith('https://')) {\n const response = await fetch(file_path);\n if (!response.ok) {\n throw new Error(`Failed to fetch PDF: ${response.status} ${response.statusText}`);\n }\n const buffer = await response.arrayBuffer();\n return new Uint8Array(buffer);\n }\n\n // Local file\n const fs = await import('fs');\n if (!fs.existsSync(file_path)) {\n throw new Error(`File not found: ${file_path}`);\n }\n const buffer = fs.readFileSync(file_path);\n return new Uint8Array(buffer);\n}\n\n/**\n * Use LLM vision to find approximate text location in an image-based PDF page\n * Returns approximate TextSearchResult[] or null if LLM can't find the text\n */\nasync function find_text_with_llm(\n page: import('pdfjs-dist').PDFPageProxy,\n viewport: import('pdfjs-dist').PageViewport,\n search_text: string,\n _render_scale: number,\n): Promise<TextSearchResult[] | null> {\n try {\n // Render page to PNG for LLM\n const { createCanvas } = await import('@napi-rs/canvas');\n const canvas = createCanvas(viewport.width, viewport.height);\n const context = canvas.getContext('2d');\n\n context.fillStyle = '#ffffff';\n context.fillRect(0, 0, viewport.width, viewport.height);\n\n await page.render({\n canvasContext: context as unknown as CanvasRenderingContext2D,\n viewport,\n }).promise;\n\n const image_buffer = canvas.toBuffer('image/png');\n const image_base64 = image_buffer.toString('base64');\n\n // Dynamic import of hazo_llm_api\n const { hazo_llm_image_text } = await import('hazo_llm_api/server');\n\n const prompt = `Find the text \"${search_text}\" in this document image.\nIf found, return ONLY a JSON object with the approximate bounding box as percentage coordinates (0-100):\n{\"found\": true, \"x_pct\": <left edge %>, \"y_pct\": <top edge %>, \"width_pct\": <width %>, \"height_pct\": <height %>}\n\nIf the text is not found, return:\n{\"found\": false}\n\nReturn ONLY the JSON object, nothing else.`;\n\n const response = await hazo_llm_image_text({\n image_b64: image_base64,\n image_mime_type: 'image/png',\n prompt,\n });\n\n if (!response || !response.text) {\n return null;\n }\n\n // Parse LLM response\n const json_match = response.text.match(/\\{[^}]+\\}/);\n if (!json_match) {\n return null;\n }\n\n const parsed = JSON.parse(json_match[0]);\n if (!parsed.found) {\n return null;\n }\n\n // Convert percentage coordinates to PDF coordinates\n // We need to return in PDF coordinate space for consistency\n const page_view = page.getViewport({ scale: 1.0 });\n const pdf_width = page_view.width;\n const pdf_height = page_view.height;\n\n const x = (parsed.x_pct / 100) * pdf_width;\n const y_from_top = (parsed.y_pct / 100) * pdf_height;\n const width = (parsed.width_pct / 100) * pdf_width;\n const height = (parsed.height_pct / 100) * pdf_height;\n\n // Convert from top-origin to PDF bottom-origin\n const y_pdf = pdf_height - y_from_top - height;\n\n return [{\n x,\n y: y_pdf,\n width,\n height,\n match_type: 'partial' as const,\n matched_text: search_text,\n }];\n } catch {\n // LLM not available or failed - return null to fall back to full page\n return null;\n }\n}\n","/**\n * Server-side PDF split utility\n * Splits a PDF into multiple files using pdf-lib and stores them via FileAccessProvider\n */\n\nimport { get_logger } from '../utils/logger';\nimport type { FileAccessProvider } from '../types/file_access';\nimport type { PdfSplitRequest, PdfSplitResult, PdfSplitOutput } from '../components/split_viewer/types';\n\n/**\n * Sanitize a label into a safe filename segment\n * lowercase, replace non-alphanumeric with underscore, trim, max 60 chars\n */\nfunction sanitize_label(label: string): string {\n return label\n .toLowerCase()\n .replace(/[^a-z0-9]+/g, '_')\n .replace(/^_+|_+$/g, '')\n .slice(0, 60);\n}\n\n/**\n * Generate an output filename from a label and page list\n * Example: \"Bank Statement - HSBC\" + [1,2,3] → \"bank_statement_hsbc_p1-p3.pdf\"\n */\nfunction generate_filename(label: string, pages: number[]): string {\n const safe_label = sanitize_label(label);\n const sorted = [...pages].sort((a, b) => a - b);\n const page_range =\n sorted.length === 1\n ? `p${sorted[0]}`\n : `p${sorted[0]}-p${sorted[sorted.length - 1]}`;\n return `${safe_label}_${page_range}.pdf`;\n}\n\n/**\n * Convert download data to Uint8Array\n */\nfunction to_uint8array(data: unknown): Uint8Array {\n if (!data) {\n throw new Error('No data received from downloadFile');\n }\n if (data instanceof Uint8Array) {\n return data;\n }\n if (data instanceof ArrayBuffer) {\n return new Uint8Array(data);\n }\n // Buffer (Node.js) — has .buffer, .byteOffset, .byteLength\n if (typeof Buffer !== 'undefined' && Buffer.isBuffer(data)) {\n return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);\n }\n throw new Error('Unsupported data type from downloadFile');\n}\n\n/**\n * Split a PDF into multiple files based on split instructions\n *\n * @param request - Split request with source file ID and split instructions\n * @param file_manager - FileAccessProvider for downloading and uploading files\n * @returns PdfSplitResult with output file metadata for each split\n *\n * @example\n * const result = await split_pdf(\n * {\n * source_file_id: 'path/to/document.pdf',\n * splits: [\n * { split_id: 'split-1', label: 'Invoice', pages: [1, 2] },\n * { split_id: 'split-2', label: 'Attachment', pages: [3, 4, 5] },\n * ],\n * output_folder: 'splits/',\n * },\n * file_manager\n * );\n */\nexport async function split_pdf(\n request: PdfSplitRequest,\n file_manager: FileAccessProvider\n): Promise<PdfSplitResult> {\n const logger = get_logger();\n\n logger.info('Starting PDF split', {\n source_file_id: request.source_file_id,\n split_count: request.splits.length,\n output_folder: request.output_folder,\n });\n\n // Dynamic import of pdf-lib (server-only)\n const { PDFDocument } = await import('pdf-lib');\n\n // Download source PDF\n logger.debug('Downloading source PDF', { file_id: request.source_file_id });\n const download_result = await file_manager.downloadFile(request.source_file_id);\n if (!download_result.success || !download_result.data) {\n throw new Error(\n download_result.error || `Failed to download source file: ${request.source_file_id}`\n );\n }\n\n // Convert to Uint8Array and load PDF\n const source_bytes = to_uint8array(download_result.data);\n logger.debug('Loading source PDF', { byte_size: source_bytes.byteLength });\n const source_pdf = await PDFDocument.load(source_bytes);\n const total_pages = source_pdf.getPageCount();\n\n logger.info('Source PDF loaded', { total_pages });\n\n // Validate splits: every page 1..N must appear in exactly one split\n const page_counts = new Array<number>(total_pages + 1).fill(0); // index 1..N\n for (const split of request.splits) {\n for (const page of split.pages) {\n if (page < 1 || page > total_pages) {\n throw new Error(\n `Split \"${split.label}\" references page ${page} which is out of range (1-${total_pages})`\n );\n }\n page_counts[page]++;\n }\n }\n for (let p = 1; p <= total_pages; p++) {\n if (page_counts[p] === 0) {\n throw new Error(`Page ${p} is not included in any split`);\n }\n if (page_counts[p] > 1) {\n throw new Error(`Page ${p} appears in multiple splits`);\n }\n }\n\n logger.debug('Split validation passed', { total_pages, split_count: request.splits.length });\n\n // Process each split instruction\n const outputs: PdfSplitOutput[] = [];\n const output_folder = request.output_folder || '';\n\n for (const instruction of request.splits) {\n logger.debug('Processing split', {\n split_id: instruction.split_id,\n label: instruction.label,\n pages: instruction.pages,\n });\n\n // Create new PDF document\n const new_pdf = await PDFDocument.create();\n\n // Copy pages (pdf-lib uses 0-indexed)\n const page_indices = instruction.pages.map((p) => p - 1);\n const copied_pages = await new_pdf.copyPages(source_pdf, page_indices);\n for (const page of copied_pages) {\n new_pdf.addPage(page);\n }\n\n // Serialize to bytes\n const pdf_bytes = await new_pdf.save();\n const byte_size = pdf_bytes.byteLength;\n\n // Generate output filename and path\n const file_name =\n instruction.output_filename || generate_filename(instruction.label, instruction.pages);\n const file_path = output_folder ? `${output_folder.replace(/\\/$/, '')}/${file_name}` : file_name;\n\n // Upload via file_manager\n logger.debug('Uploading split PDF', { file_path, byte_size });\n const upload_result = await file_manager.uploadFile(new Uint8Array(pdf_bytes), file_path, {\n overwrite: true,\n });\n\n if (!upload_result.success) {\n throw new Error(\n upload_result.error || `Failed to upload split \"${instruction.label}\" to ${file_path}`\n );\n }\n\n outputs.push({\n split_id: instruction.split_id,\n label: instruction.label,\n pages: instruction.pages,\n file_id: file_path,\n file_name,\n file_path,\n page_count: instruction.pages.length,\n byte_size,\n });\n\n logger.info('Split uploaded', {\n split_id: instruction.split_id,\n label: instruction.label,\n file_path,\n page_count: instruction.pages.length,\n byte_size,\n });\n }\n\n logger.info('PDF split complete', {\n source_file_id: request.source_file_id,\n output_count: outputs.length,\n });\n\n return {\n source_file_id: request.source_file_id,\n splits: outputs,\n };\n}\n","/**\n * Server-side entry point for hazo_pdf\n * Provides document extraction utilities for server-side usage\n *\n * IMPORTANT: This module is server-only and will throw if imported in the browser\n *\n * @example\n * import { extract_document_data } from 'hazo_pdf/server';\n *\n * const result = await extract_document_data(\n * { file_path: '/path/to/document.pdf' },\n * { prompt_area: 'document', prompt_key: 'initial_classification' }\n * );\n */\n\n// Runtime guard - prevent browser usage\nif (typeof window !== 'undefined') {\n throw new Error(\n 'hazo_pdf/server cannot be imported in the browser. ' +\n 'This module is server-only and requires Node.js runtime.'\n );\n}\n\n// Export extraction utilities\nexport { extract_document_data, reset_extraction_state } from './extract';\n\n// Export snippet utilities\nexport { extract_text_snippet } from './snippet';\n\n// Export split utilities\nexport { split_pdf } from './split';\nexport type {\n PdfSplitRequest,\n PdfSplitInstruction,\n PdfSplitResult,\n PdfSplitOutput,\n} from '../components/split_viewer/types';\n\n// Export types\nexport type {\n ExtractDocumentSource,\n ExtractDocumentOptions,\n ExtractDocumentResult,\n ExtractStepResult,\n} from './types';\n\nexport type {\n SnippetSource,\n SnippetOptions,\n SnippetResult,\n PageSnippet,\n SnippetSize,\n SnippetMatchMode,\n} from './snippet_types';\n"],"mappings":";AAoBA,IAAM,iBAAyB;AAAA,EAC7B,MAAM,CAAC,SAAiB,SAAmC;AACzD,QAAI,MAAM;AACR,cAAQ,IAAI,cAAc,OAAO,IAAI,IAAI;AAAA,IAC3C,OAAO;AACL,cAAQ,IAAI,cAAc,OAAO,EAAE;AAAA,IACrC;AAAA,EACF;AAAA,EACA,OAAO,CAAC,SAAiB,SAAmC;AAC1D,QAAI,MAAM;AACR,cAAQ,MAAM,cAAc,OAAO,IAAI,IAAI;AAAA,IAC7C,OAAO;AACL,cAAQ,MAAM,cAAc,OAAO,EAAE;AAAA,IACvC;AAAA,EACF;AAAA,EACA,MAAM,CAAC,SAAiB,SAAmC;AACzD,QAAI,MAAM;AACR,cAAQ,KAAK,cAAc,OAAO,IAAI,IAAI;AAAA,IAC5C,OAAO;AACL,cAAQ,KAAK,cAAc,OAAO,EAAE;AAAA,IACtC;AAAA,EACF;AAAA,EACA,OAAO,CAAC,SAAiB,SAAmC;AAC1D,QAAI,MAAM;AACR,cAAQ,MAAM,cAAc,OAAO,IAAI,IAAI;AAAA,IAC7C,OAAO;AACL,cAAQ,MAAM,cAAc,OAAO,EAAE;AAAA,IACvC;AAAA,EACF;AACF;AAMA,IAAI,iBAAyB;AActB,SAAS,aAAqB;AACnC,SAAO;AACT;;;ACxDA,IAAI,iBAAiB;AACrB,IAAI,qBAA8B;AAClC,IAAI,kBAAkB;AAKf,SAAS,yBAA+B;AAC7C,mBAAiB;AACjB,uBAAqB;AACrB,oBAAkB;AACpB;AAKA,eAAe,mBACb,aACA,SAAiB,WAAW,GACb;AACf,MAAI,gBAAgB;AAClB;AAAA,EACF;AAEA,MAAI;AAEF,UAAM,EAAE,oBAAoB,mBAAmB,IAAI,MAAM,OAAO,qBAAqB;AACrF,UAAM,EAAE,cAAc,IAAI,MAAM,OAAO,qBAAqB;AAG5D,QAAI,CAAC,iBAAiB;AACpB,YAAM,mBAAmB,EAAE,OAAO,CAAC;AACnC,wBAAkB;AAClB,aAAO,KAAK,0BAA0B;AAAA,IACxC;AAGA,UAAM,SAAS,mBAAmB;AAClC,UAAM,UAAU,eAAe,QAAQ,eAAe;AAGtD,yBAAqB,IAAI,cAAc;AAAA,MACrC,MAAM;AAAA,MACN,eAAe;AAAA,IACjB,CAAC;AAGD,UAAM,EAAE,wBAAwB,IAAI,MAAM,OAAO,YAAY;AAC7D,UAAM,UAAU;AAChB,UAAM,QAAQ,SAAS,wBAAwB,OAAO,GAAG;AACzD,eAAW,OAAO,wBAAwB,OAAO,SAAS;AACxD,YAAM,QAAQ,SAAS,GAAG;AAAA,IAC5B;AAEA,WAAO,MAAM,gCAAgC,EAAE,aAAa,QAAQ,CAAC;AACrE,qBAAiB;AAAA,EACnB,SAAS,OAAO;AACd,WAAO,MAAM,mCAAmC;AAAA,MAC9C,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,IAC9D,CAAC;AACD,UAAM;AAAA,EACR;AACF;AAKA,eAAe,wBACb,WACA,SAAiB,WAAW,GACoB;AAEhD,MAAI,UAAU,WAAW,SAAS,KAAK,UAAU,WAAW,UAAU,GAAG;AACvE,WAAO,MAAM,6BAA6B,EAAE,KAAK,UAAU,CAAC;AAC5D,UAAM,WAAW,MAAM,MAAM,SAAS;AACtC,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,6BAA6B,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,IACvF;AACA,UAAMA,UAAS,MAAM,SAAS,YAAY;AAC1C,UAAMC,UAAS,OAAO,KAAKD,OAAM,EAAE,SAAS,QAAQ;AACpD,UAAM,eAAe,SAAS,QAAQ,IAAI,cAAc,KAAK;AAC7D,WAAO,EAAE,QAAAC,SAAQ,WAAW,aAAa;AAAA,EAC3C;AAGA,SAAO,MAAM,oCAAoC,EAAE,MAAM,UAAU,CAAC;AACpE,QAAM,KAAK,MAAM,OAAO,IAAI;AAC5B,QAAM,OAAO,MAAM,OAAO,MAAM;AAEhC,MAAI,CAAC,GAAG,WAAW,SAAS,GAAG;AAC7B,UAAM,IAAI,MAAM,mBAAmB,SAAS,EAAE;AAAA,EAChD;AAEA,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,QAAM,SAAS,OAAO,SAAS,QAAQ;AAGvC,QAAM,MAAM,KAAK,QAAQ,SAAS,EAAE,YAAY;AAChD,QAAM,aAAqC;AAAA,IACzC,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,QAAQ;AAAA,IACR,SAAS;AAAA,EACX;AACA,QAAM,YAAY,WAAW,GAAG,KAAK;AAErC,SAAO,EAAE,QAAQ,UAAU;AAC7B;AAKA,eAAe,yBACb,SACA,eACA,cACA,SAAiB,WAAW,GACuC;AAEnE,QAAM,EAAE,wBAAwB,IAAI,MAAM,OAAO,YAAY;AAC7D,QAAM,EAAE,kBAAkB,IAAI,MAAM,OAAO,qBAAqB;AAEhE,MAAI,CAAC,oBAAoB;AACvB,UAAM,IAAI,MAAM,oCAAoC;AAAA,EACtD;AAKA,QAAM,cAAc;AAAA,IAClB;AAAA,IACA,wBAAwB;AAAA,EAC1B;AAGA,QAAM,QAAQ,MAAO,YAA2C,SAAS;AAAA,IACvE,OAAO,EAAE,IAAI,QAAQ;AAAA,EACvB,CAAC;AAED,MAAI,CAAC,SAAS,MAAM,WAAW,GAAG;AAChC,UAAM,IAAI,MAAM,0BAA0B,OAAO,EAAE;AAAA,EACrD;AAEA,QAAM,cAAc,MAAM,CAAC;AAC3B,QAAM,YAAY,YAAY;AAC9B,QAAM,YAAY,YAAY,aAAa;AAE3C,SAAO,MAAM,qBAAqB,EAAE,SAAS,WAAW,cAAc,YAAY,aAAa,CAAC;AAGhG,MAAI,YAAY,iBAAiB,gBAAgB;AAC/C,QAAI,CAAC,cAAc;AACjB,YAAM,IAAI,MAAM,iDAAiD;AAAA,IACnE;AACA,QAAI,CAAC,aAAa,cAAc,GAAG;AACjC,YAAM,IAAI,MAAM,iCAAiC;AAAA,IACnD;AAEA,WAAO,MAAM,iCAAiC,EAAE,UAAU,CAAC;AAC3D,UAAM,SAAS,MAAM,aAAa,aAAa,SAAS;AACxD,QAAI,CAAC,OAAO,WAAW,CAAC,OAAO,MAAM;AACnC,YAAM,IAAI,MAAM,OAAO,SAAS,2CAA2C;AAAA,IAC7E;AAEA,UAAM,SAAS,OAAO,KAAK,OAAO,IAAmB;AACrD,UAAM,SAAS,OAAO,SAAS,QAAQ;AACvC,WAAO,EAAE,QAAQ,WAAW,UAAU;AAAA,EACxC;AAGA,SAAO;AAAA,IACL,GAAI,MAAM,wBAAwB,WAAW,MAAM;AAAA,IACnD;AAAA,EACF;AACF;AAuBA,eAAsB,sBACpB,QACA,SACgC;AAChC,QAAM,SAAS,QAAQ,UAAU,WAAW;AAC5C,QAAM,eAAe,QAAQ,gBAAgB;AAC7C,QAAM,qBAAqB,QAAQ,uBAAuB;AAG1D,MAAI,CAAC,OAAO,aAAa,CAAC,OAAO,SAAS;AACxC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI,OAAO,aAAa,OAAO,SAAS;AACtC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI;AAEF,UAAM,mBAAmB,QAAQ,aAAa,MAAM;AAGpD,QAAI;AACJ,QAAI;AACJ,QAAI;AAEJ,QAAI,OAAO,WAAW;AACpB,YAAM,SAAS,MAAM,wBAAwB,OAAO,WAAW,MAAM;AACrE,qBAAe,OAAO;AACtB,2BAAqB,QAAQ,aAAa,OAAO;AACjD,2BAAqB,OAAO;AAAA,IAC9B,OAAO;AACL,YAAM,SAAS,MAAM;AAAA,QACnB,OAAO;AAAA,QACP;AAAA,QACA,QAAQ;AAAA,QACR;AAAA,MACF;AACA,qBAAe,OAAO;AACtB,2BAAqB,QAAQ,aAAa,OAAO;AACjD,2BAAqB,OAAO;AAAA,IAC9B;AAEA,UAAM,cAAc,KAAK,MAAO,aAAa,SAAS,OAAQ,IAAI;AAClE,WAAO,KAAK,uBAAuB;AAAA,MACjC,WAAW;AAAA,MACX,SAAS;AAAA,MACT,QAAQ,GAAG,QAAQ,WAAW,IAAI,QAAQ,UAAU;AAAA,IACtD,CAAC;AAGD,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IAAI,MAAM,OAAO,qBAAqB;AAGtC,UAAM,KAAK,aAAa;AACxB,QAAI,CAAC,IAAI;AACP,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO;AAAA,MACT;AAAA,IACF;AAEA,UAAM,gBAAgB;AAAA,MACpB;AAAA,MACA,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR;AAAA,IACF;AAEA,QAAI,CAAC,eAAe;AAClB,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,2BAA2B,QAAQ,WAAW,WAAW,QAAQ,UAAU;AAAA,MACpF;AAAA,IACF;AAEA,WAAO,MAAM,gBAAgB;AAAA,MAC3B,iBAAiB,CAAC,CAAC,cAAc;AAAA,IACnC,CAAC;AAGD,UAAM,mBAAmB,MAAM,8BAA8B;AAAA,MAC3D,qBAAqB,QAAQ;AAAA,MAC7B,oBAAoB,QAAQ;AAAA,MAC5B,UAAU;AAAA,MACV,gBAAgB;AAAA,MAChB,WAAW,QAAQ,aAAa;AAAA,MAChC,mBAAmB,QAAQ,qBAAqB;AAAA,IAClD,CAAC;AAED,WAAO,KAAK,wBAAwB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,aAAa,iBAAiB;AAAA,MAC9B,aAAa,iBAAiB;AAAA,IAChC,CAAC;AAED,QAAI,CAAC,iBAAiB,WAAW,iBAAiB,OAAO,SAAS,GAAG;AACnE,aAAO,MAAM,qBAAqB,EAAE,QAAQ,iBAAiB,OAAO,CAAC;AACrE,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,iBAAiB,OAAO,CAAC,GAAG,SAAS;AAAA,QAC5C,kBAAkB,iBAAiB;AAAA,QACnC,aAAa,iBAAiB;AAAA,QAC9B,aAAa,iBAAiB;AAAA,MAChC;AAAA,IACF;AAEA,UAAM,iBAAiB,iBAAiB;AAGxC,QAAI;AACJ,QAAI,UAA8B,OAAO;AAGzC,UAAM,oBAAoB,QAAQ,sBAAsB;AAExD,QAAI,sBAAsB,oBAAoB;AAC5C,UAAI;AACF,cAAM,EAAE,2BAA2B,wBAAwB,IAAI,MAAM,OAAO,YAAY;AACxF,cAAM,EAAE,kBAAkB,IAAI,MAAM,OAAO,qBAAqB;AAOhE,cAAM,cAAc;AAAA,UAClB;AAAA,UACA,wBAAwB;AAAA,QAC1B;AAEA,cAAM,kBAAkB,0BAA0B,aAA2E,EAAE,OAAO,CAAC;AAGvI,cAAM,WAAW,MAAM,gBAAgB;AAAA,UACrC;AAAA,UACA;AAAA,QACF;AAEA,YAAI,aAAa,MAAM;AAErB,gBAAM,WACJ,QAAQ,YACR,kBAAkB,MAAM,GAAG,EAAE,IAAI,KACjC;AACF,iBAAO,MAAM,wBAAwB,EAAE,WAAW,mBAAmB,SAAS,CAAC;AAE/E,gBAAM,gBAAgB,aAAa;AAAA,YACjC;AAAA,YACA,WAAW;AAAA,YACX,WAAW;AAAA,YACX;AAAA,UACF,CAAC;AAGD,gBAAM,QAAQ,MAAO,YAAmD,SAAS;AAAA,YAC/E,OAAO,EAAE,WAAW,mBAAmB,aAAa;AAAA,UACtD,CAAC;AACD,cAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,sBAAU,MAAM,CAAC,EAAE;AAAA,UACrB;AAAA,QACF;AAGA,cAAM,oBAAoB,MAAM,gBAAgB;AAAA,UAC9C;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE,QAAQ,eAAe;AAAA,QAC3B;AAEA,YAAI,mBAAmB;AACrB,0BAAgB,kBAAkB;AAClC,iBAAO,MAAM,kCAAkC,EAAE,cAAc,CAAC;AAAA,QAClE;AAAA,MACF,SAAS,YAAY;AAEnB,cAAM,YAAY,sBAAsB,QAAQ,WAAW,UAAU,OAAO,UAAU;AACtF,cAAM,cAAc,sBAAsB,QAAQ,WAAW,QAAQ;AACrE,eAAO,MAAM,2CAA2C;AAAA,UACtD,OAAO;AAAA,UACP,OAAO;AAAA,UACP;AAAA,UACA;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,MAAM;AAAA,MACN;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,kBAAkB,iBAAiB;AAAA,MACnC,aAAa,iBAAiB;AAAA,MAC9B,aAAa,iBAAiB;AAAA,MAC9B,cAAc,iBAAiB,cAAc,IAAI,CAAC,OAAwH;AAAA,QACxK,aAAa,EAAE;AAAA,QACf,YAAY,EAAE;AAAA,QACd,SAAS,EAAE;AAAA,QACX,MAAM,EAAE;AAAA,QACR,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,IACJ;AAAA,EACF,SAAS,OAAO;AACd,WAAO,MAAM,oBAAoB;AAAA,MAC/B,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,IAC9D,CAAC;AACD,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,IAC9D;AAAA,EACF;AACF;;;AC3ZA,eAAsB,qBACpB,QACA,SACwB;AACxB,QAAM;AAAA,IACJ;AAAA,IACA,aAAa;AAAA,IACb,eAAe;AAAA,IACf,aAAa;AAAA,IACb,eAAe;AAAA,IACf,kBAAkB;AAAA,IAClB,wBAAwB;AAAA,EAC1B,IAAI;AAEJ,MAAI,CAAC,aAAa;AAChB,WAAO,EAAE,SAAS,OAAO,UAAU,CAAC,GAAG,OAAO,0BAA0B;AAAA,EAC1E;AAEA,MAAI,CAAC,OAAO,aAAa,CAAC,OAAO,WAAW;AAC1C,WAAO,EAAE,SAAS,OAAO,UAAU,CAAC,GAAG,OAAO,4CAA4C;AAAA,EAC5F;AAEA,MAAI;AAEF,UAAM,YAAY,MAAM,eAAe,MAAM;AAG7C,UAAM,QAAQ,MAAM,OAAO,iCAAiC;AAG5D,UAAM,WAAW,MAAM,OAAO,MAAM;AACpC,UAAM,yBAAyB,SAAS;AAAA,MACtC,QAAQ,IAAI;AAAA,MAAG;AAAA,IACjB,IAAI;AAGJ,UAAM,MAAM,MAAM,MAAM,YAAY;AAAA,MAClC,MAAM,IAAI,WAAW,SAAS;AAAA,MAC9B,gBAAgB;AAAA,MAChB,qBAAqB;AAAA,MACrB,WAAW;AAAA,IACb,CAAC,EAAE;AAGH,UAAM,EAAE,qBAAqB,IAAI,MAAM,OAAO,2BAAsB;AAGpE,UAAM,cAAc,IAAI;AACxB,UAAM,WAA0B,CAAC;AAEjC,QAAI,eAAe,SAAS;AAE1B,eAAS,IAAI,YAAY,IAAI,aAAa,KAAK;AAC7C,cAAM,eAAe,MAAM;AAAA,UACzB;AAAA,UAAK;AAAA,UAAG;AAAA,UAAa;AAAA,UAAc;AAAA,UACnC;AAAA,UAAiB;AAAA,UAAuB;AAAA,QAC1C;AACA,YAAI,cAAc;AAChB,mBAAS,KAAK,YAAY;AAC1B;AAAA,QACF;AAAA,MACF;AAAA,IACF,OAAO;AAEL,eAAS,IAAI,YAAY,IAAI,aAAa,KAAK;AAC7C,cAAM,eAAe,MAAM;AAAA,UACzB;AAAA,UAAK;AAAA,UAAG;AAAA,UAAa;AAAA,UAAc;AAAA,UACnC;AAAA,UAAiB;AAAA,UAAuB;AAAA,QAC1C;AACA,YAAI,cAAc;AAChB,mBAAS,KAAK,YAAY;AAAA,QAC5B;AAAA,MACF;AAAA,IACF;AAEA,QAAI,QAAQ;AAEZ,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO,EAAE,SAAS,OAAO,UAAU,CAAC,GAAG,OAAO,SAAS,WAAW,qBAAqB;AAAA,IACzF;AAEA,WAAO,EAAE,SAAS,MAAM,SAAS;AAAA,EACnC,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU,CAAC;AAAA,MACX,OAAO,8BAA8B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,IACvF;AAAA,EACF;AACF;AAKA,eAAe,aACb,KACA,YACA,aACA,cACA,cACA,iBACA,uBACA,sBAC6B;AAC7B,QAAM,OAAO,MAAM,IAAI,QAAQ,aAAa,CAAC;AAC7C,QAAM,WAAW,KAAK,YAAY,EAAE,OAAO,aAAa,CAAC;AAGzD,MAAI,UAAU,MAAM,qBAAqB,KAAK,aAAa,EAAE,WAAW,CAAC;AACzE,MAAI,wBAAwB;AAG5B,QAAM,eAAe,MAAM,KAAK,eAAe;AAC/C,QAAM,iBAAiB,aAAa,MAAM;AAAA,IACxC,CAAC,SAAS,SAAS,QAAS,KAAyB,IAAI,KAAK,EAAE,SAAS;AAAA,EAC3E;AAEA,MAAI,QAAQ,WAAW,KAAK,CAAC,gBAAgB;AAE3C,QAAI,uBAAuB;AAEzB,YAAM,cAAc,MAAM;AAAA,QACxB;AAAA,QAAM;AAAA,QAAU;AAAA,QAAa;AAAA,MAC/B;AACA,UAAI,aAAa;AACf,kBAAU;AACV,gCAAwB;AAAA,MAC1B,OAAO;AAEL,eAAO,MAAM,yBAAyB,MAAM,UAAU,UAAU;AAAA,MAClE;AAAA,IACF,OAAO;AAEL,aAAO,MAAM,yBAAyB,MAAM,UAAU,UAAU;AAAA,IAClE;AAAA,EACF;AAEA,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO;AAAA,EACT;AAGA,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AACvD,QAAM,SAAS,aAAa,SAAS,OAAO,SAAS,MAAM;AAC3D,QAAM,UAAU,OAAO,WAAW,IAAI;AAGtC,UAAQ,YAAY;AACpB,UAAQ,SAAS,GAAG,GAAG,SAAS,OAAO,SAAS,MAAM;AAGtD,QAAM,KAAK,OAAO;AAAA,IAChB,eAAe;AAAA,IACf;AAAA,EACF,CAAC,EAAE;AAGH,kBAAgB,SAAS,UAAU,SAAS,eAAe;AAG3D,QAAM,OAAO;AAAA,IACX;AAAA,IAAU;AAAA,IAAS;AAAA,IAAc;AAAA,EACnC;AAGA,QAAM,iBAAiB,aAAa,KAAK,OAAO,KAAK,MAAM;AAC3D,QAAM,cAAc,eAAe,WAAW,IAAI;AAClD,cAAY;AAAA,IACV;AAAA,IACA,KAAK;AAAA,IAAG,KAAK;AAAA,IAAG,KAAK;AAAA,IAAO,KAAK;AAAA,IACjC;AAAA,IAAG;AAAA,IAAG,KAAK;AAAA,IAAO,KAAK;AAAA,EACzB;AAEA,QAAM,eAAe,eAAe,SAAS,WAAW;AACxD,QAAM,eAAe,aAAa,SAAS,QAAQ;AAEnD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,eAAe,KAAK;AAAA,IACpB,gBAAgB,KAAK;AAAA,EACvB;AACF;AAKA,eAAe,yBACb,MACA,UACA,YACsB;AACtB,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AACvD,QAAM,SAAS,aAAa,SAAS,OAAO,SAAS,MAAM;AAC3D,QAAM,UAAU,OAAO,WAAW,IAAI;AAEtC,UAAQ,YAAY;AACpB,UAAQ,SAAS,GAAG,GAAG,SAAS,OAAO,SAAS,MAAM;AAEtD,QAAM,KAAK,OAAO;AAAA,IAChB,eAAe;AAAA,IACf;AAAA,EACF,CAAC,EAAE;AAEH,QAAM,eAAe,OAAO,SAAS,WAAW;AAChD,QAAM,eAAe,aAAa,SAAS,QAAQ;AAEnD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,CAAC;AAAA,IACV,uBAAuB;AAAA,IACvB,eAAe,SAAS;AAAA,IACxB,gBAAgB,SAAS;AAAA,EAC3B;AACF;AAKA,SAAS,gBACP,SACA,UACA,SACA,iBACM;AAEN,UAAQ,KAAK;AACb,UAAQ,YAAY;AAEpB,aAAW,SAAS,SAAS;AAG3B,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,GAAG,MAAM,IAAI,MAAM,MAAM;AAChF,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,IAAI,MAAM,OAAO,MAAM,CAAC;AAE/E,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE;AAC1B,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE;AAC1B,UAAM,KAAK,KAAK,IAAI,KAAK,EAAE;AAC3B,UAAM,KAAK,KAAK,IAAI,KAAK,EAAE;AAE3B,YAAQ,SAAS,IAAI,IAAI,IAAI,EAAE;AAAA,EACjC;AAEA,UAAQ,QAAQ;AAClB;AAOA,SAAS,sBACP,UACA,SACA,cACA,eACyD;AACzD,QAAM,aAAa,SAAS;AAC5B,QAAM,cAAc,SAAS;AAE7B,MAAI,iBAAiB,QAAQ;AAC3B,WAAO,EAAE,GAAG,GAAG,GAAG,GAAG,OAAO,YAAY,QAAQ,YAAY;AAAA,EAC9D;AAGA,MAAI,QAAQ,UAAU,QAAQ,UAAU,QAAQ,WAAW,QAAQ;AAEnE,aAAW,SAAS,SAAS;AAC3B,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,GAAG,MAAM,IAAI,MAAM,MAAM;AAChF,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,IAAI,MAAM,OAAO,MAAM,CAAC;AAE/E,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAC9B,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAC9B,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAC9B,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAAA,EAChC;AAGA,QAAM,UAAU;AAChB,WAAS;AACT,WAAS;AACT,WAAS;AACT,WAAS;AAET,QAAM,cAAc,QAAQ;AAC5B,QAAM,eAAe,QAAQ;AAI7B,QAAM,gBAAgB;AACtB,MAAI;AAEJ,MAAI,iBAAiB,QAAQ;AAC3B,qBAAiB,cAAc;AAAA,EACjC,OAAO;AAEL,qBAAiB,cAAc;AAAA,EACjC;AAGA,QAAM,cAAc,KAAK,IAAI,eAAe,WAAW;AACvD,QAAM,eAAe,KAAK,IAAI,gBAAgB,YAAY;AAG1D,QAAM,kBAAkB,QAAQ,SAAS;AACzC,QAAM,kBAAkB,QAAQ,SAAS;AAEzC,MAAI,SAAS,iBAAiB,cAAc;AAC5C,MAAI,SAAS,iBAAiB,eAAe;AAG7C,WAAS,KAAK,IAAI,GAAG,KAAK,IAAI,QAAQ,aAAa,WAAW,CAAC;AAC/D,WAAS,KAAK,IAAI,GAAG,KAAK,IAAI,QAAQ,cAAc,YAAY,CAAC;AAGjE,QAAM,gBAAgB,KAAK,IAAI,aAAa,UAAU;AACtD,QAAM,iBAAiB,KAAK,IAAI,cAAc,WAAW;AAEzD,SAAO;AAAA,IACL,GAAG,KAAK,MAAM,MAAM;AAAA,IACpB,GAAG,KAAK,MAAM,MAAM;AAAA,IACpB,OAAO,KAAK,MAAM,aAAa;AAAA,IAC/B,QAAQ,KAAK,MAAM,cAAc;AAAA,EACnC;AACF;AAKA,eAAe,eAAe,QAA4C;AACxE,MAAI,OAAO,WAAW;AACpB,WAAO,OAAO;AAAA,EAChB;AAEA,MAAI,CAAC,OAAO,WAAW;AACrB,UAAM,IAAI,MAAM,2CAA2C;AAAA,EAC7D;AAEA,QAAM,YAAY,OAAO;AAGzB,MAAI,UAAU,WAAW,SAAS,KAAK,UAAU,WAAW,UAAU,GAAG;AACvE,UAAM,WAAW,MAAM,MAAM,SAAS;AACtC,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,MAAM,wBAAwB,SAAS,MAAM,IAAI,SAAS,UAAU,EAAE;AAAA,IAClF;AACA,UAAMC,UAAS,MAAM,SAAS,YAAY;AAC1C,WAAO,IAAI,WAAWA,OAAM;AAAA,EAC9B;AAGA,QAAM,KAAK,MAAM,OAAO,IAAI;AAC5B,MAAI,CAAC,GAAG,WAAW,SAAS,GAAG;AAC7B,UAAM,IAAI,MAAM,mBAAmB,SAAS,EAAE;AAAA,EAChD;AACA,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,SAAO,IAAI,WAAW,MAAM;AAC9B;AAMA,eAAe,mBACb,MACA,UACA,aACA,eACoC;AACpC,MAAI;AAEF,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AACvD,UAAM,SAAS,aAAa,SAAS,OAAO,SAAS,MAAM;AAC3D,UAAM,UAAU,OAAO,WAAW,IAAI;AAEtC,YAAQ,YAAY;AACpB,YAAQ,SAAS,GAAG,GAAG,SAAS,OAAO,SAAS,MAAM;AAEtD,UAAM,KAAK,OAAO;AAAA,MAChB,eAAe;AAAA,MACf;AAAA,IACF,CAAC,EAAE;AAEH,UAAM,eAAe,OAAO,SAAS,WAAW;AAChD,UAAM,eAAe,aAAa,SAAS,QAAQ;AAGnD,UAAM,EAAE,oBAAoB,IAAI,MAAM,OAAO,qBAAqB;AAElE,UAAM,SAAS,kBAAkB,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAS5C,UAAM,WAAW,MAAM,oBAAoB;AAAA,MACzC,WAAW;AAAA,MACX,iBAAiB;AAAA,MACjB;AAAA,IACF,CAAC;AAED,QAAI,CAAC,YAAY,CAAC,SAAS,MAAM;AAC/B,aAAO;AAAA,IACT;AAGA,UAAM,aAAa,SAAS,KAAK,MAAM,WAAW;AAClD,QAAI,CAAC,YAAY;AACf,aAAO;AAAA,IACT;AAEA,UAAM,SAAS,KAAK,MAAM,WAAW,CAAC,CAAC;AACvC,QAAI,CAAC,OAAO,OAAO;AACjB,aAAO;AAAA,IACT;AAIA,UAAM,YAAY,KAAK,YAAY,EAAE,OAAO,EAAI,CAAC;AACjD,UAAM,YAAY,UAAU;AAC5B,UAAM,aAAa,UAAU;AAE7B,UAAM,IAAK,OAAO,QAAQ,MAAO;AACjC,UAAM,aAAc,OAAO,QAAQ,MAAO;AAC1C,UAAM,QAAS,OAAO,YAAY,MAAO;AACzC,UAAM,SAAU,OAAO,aAAa,MAAO;AAG3C,UAAM,QAAQ,aAAa,aAAa;AAExC,WAAO,CAAC;AAAA,MACN;AAAA,MACA,GAAG;AAAA,MACH;AAAA,MACA;AAAA,MACA,YAAY;AAAA,MACZ,cAAc;AAAA,IAChB,CAAC;AAAA,EACH,QAAQ;AAEN,WAAO;AAAA,EACT;AACF;;;ACjdA,SAAS,eAAe,OAAuB;AAC7C,SAAO,MACJ,YAAY,EACZ,QAAQ,eAAe,GAAG,EAC1B,QAAQ,YAAY,EAAE,EACtB,MAAM,GAAG,EAAE;AAChB;AAMA,SAAS,kBAAkB,OAAe,OAAyB;AACjE,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,SAAS,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC9C,QAAM,aACJ,OAAO,WAAW,IACd,IAAI,OAAO,CAAC,CAAC,KACb,IAAI,OAAO,CAAC,CAAC,KAAK,OAAO,OAAO,SAAS,CAAC,CAAC;AACjD,SAAO,GAAG,UAAU,IAAI,UAAU;AACpC;AAKA,SAAS,cAAc,MAA2B;AAChD,MAAI,CAAC,MAAM;AACT,UAAM,IAAI,MAAM,oCAAoC;AAAA,EACtD;AACA,MAAI,gBAAgB,YAAY;AAC9B,WAAO;AAAA,EACT;AACA,MAAI,gBAAgB,aAAa;AAC/B,WAAO,IAAI,WAAW,IAAI;AAAA,EAC5B;AAEA,MAAI,OAAO,WAAW,eAAe,OAAO,SAAS,IAAI,GAAG;AAC1D,WAAO,IAAI,WAAW,KAAK,QAAQ,KAAK,YAAY,KAAK,UAAU;AAAA,EACrE;AACA,QAAM,IAAI,MAAM,yCAAyC;AAC3D;AAsBA,eAAsB,UACpB,SACA,cACyB;AACzB,QAAM,SAAS,WAAW;AAE1B,SAAO,KAAK,sBAAsB;AAAA,IAChC,gBAAgB,QAAQ;AAAA,IACxB,aAAa,QAAQ,OAAO;AAAA,IAC5B,eAAe,QAAQ;AAAA,EACzB,CAAC;AAGD,QAAM,EAAE,YAAY,IAAI,MAAM,OAAO,SAAS;AAG9C,SAAO,MAAM,0BAA0B,EAAE,SAAS,QAAQ,eAAe,CAAC;AAC1E,QAAM,kBAAkB,MAAM,aAAa,aAAa,QAAQ,cAAc;AAC9E,MAAI,CAAC,gBAAgB,WAAW,CAAC,gBAAgB,MAAM;AACrD,UAAM,IAAI;AAAA,MACR,gBAAgB,SAAS,mCAAmC,QAAQ,cAAc;AAAA,IACpF;AAAA,EACF;AAGA,QAAM,eAAe,cAAc,gBAAgB,IAAI;AACvD,SAAO,MAAM,sBAAsB,EAAE,WAAW,aAAa,WAAW,CAAC;AACzE,QAAM,aAAa,MAAM,YAAY,KAAK,YAAY;AACtD,QAAM,cAAc,WAAW,aAAa;AAE5C,SAAO,KAAK,qBAAqB,EAAE,YAAY,CAAC;AAGhD,QAAM,cAAc,IAAI,MAAc,cAAc,CAAC,EAAE,KAAK,CAAC;AAC7D,aAAW,SAAS,QAAQ,QAAQ;AAClC,eAAW,QAAQ,MAAM,OAAO;AAC9B,UAAI,OAAO,KAAK,OAAO,aAAa;AAClC,cAAM,IAAI;AAAA,UACR,UAAU,MAAM,KAAK,qBAAqB,IAAI,6BAA6B,WAAW;AAAA,QACxF;AAAA,MACF;AACA,kBAAY,IAAI;AAAA,IAClB;AAAA,EACF;AACA,WAAS,IAAI,GAAG,KAAK,aAAa,KAAK;AACrC,QAAI,YAAY,CAAC,MAAM,GAAG;AACxB,YAAM,IAAI,MAAM,QAAQ,CAAC,+BAA+B;AAAA,IAC1D;AACA,QAAI,YAAY,CAAC,IAAI,GAAG;AACtB,YAAM,IAAI,MAAM,QAAQ,CAAC,6BAA6B;AAAA,IACxD;AAAA,EACF;AAEA,SAAO,MAAM,2BAA2B,EAAE,aAAa,aAAa,QAAQ,OAAO,OAAO,CAAC;AAG3F,QAAM,UAA4B,CAAC;AACnC,QAAM,gBAAgB,QAAQ,iBAAiB;AAE/C,aAAW,eAAe,QAAQ,QAAQ;AACxC,WAAO,MAAM,oBAAoB;AAAA,MAC/B,UAAU,YAAY;AAAA,MACtB,OAAO,YAAY;AAAA,MACnB,OAAO,YAAY;AAAA,IACrB,CAAC;AAGD,UAAM,UAAU,MAAM,YAAY,OAAO;AAGzC,UAAM,eAAe,YAAY,MAAM,IAAI,CAAC,MAAM,IAAI,CAAC;AACvD,UAAM,eAAe,MAAM,QAAQ,UAAU,YAAY,YAAY;AACrE,eAAW,QAAQ,cAAc;AAC/B,cAAQ,QAAQ,IAAI;AAAA,IACtB;AAGA,UAAM,YAAY,MAAM,QAAQ,KAAK;AACrC,UAAM,YAAY,UAAU;AAG5B,UAAM,YACJ,YAAY,mBAAmB,kBAAkB,YAAY,OAAO,YAAY,KAAK;AACvF,UAAM,YAAY,gBAAgB,GAAG,cAAc,QAAQ,OAAO,EAAE,CAAC,IAAI,SAAS,KAAK;AAGvF,WAAO,MAAM,uBAAuB,EAAE,WAAW,UAAU,CAAC;AAC5D,UAAM,gBAAgB,MAAM,aAAa,WAAW,IAAI,WAAW,SAAS,GAAG,WAAW;AAAA,MACxF,WAAW;AAAA,IACb,CAAC;AAED,QAAI,CAAC,cAAc,SAAS;AAC1B,YAAM,IAAI;AAAA,QACR,cAAc,SAAS,2BAA2B,YAAY,KAAK,QAAQ,SAAS;AAAA,MACtF;AAAA,IACF;AAEA,YAAQ,KAAK;AAAA,MACX,UAAU,YAAY;AAAA,MACtB,OAAO,YAAY;AAAA,MACnB,OAAO,YAAY;AAAA,MACnB,SAAS;AAAA,MACT;AAAA,MACA;AAAA,MACA,YAAY,YAAY,MAAM;AAAA,MAC9B;AAAA,IACF,CAAC;AAED,WAAO,KAAK,kBAAkB;AAAA,MAC5B,UAAU,YAAY;AAAA,MACtB,OAAO,YAAY;AAAA,MACnB;AAAA,MACA,YAAY,YAAY,MAAM;AAAA,MAC9B;AAAA,IACF,CAAC;AAAA,EACH;AAEA,SAAO,KAAK,sBAAsB;AAAA,IAChC,gBAAgB,QAAQ;AAAA,IACxB,cAAc,QAAQ;AAAA,EACxB,CAAC;AAED,SAAO;AAAA,IACL,gBAAgB,QAAQ;AAAA,IACxB,QAAQ;AAAA,EACV;AACF;;;ACzLA,IAAI,OAAO,WAAW,aAAa;AACjC,QAAM,IAAI;AAAA,IACR;AAAA,EAEF;AACF;","names":["buffer","base64","buffer"]}
|
|
1
|
+
{"version":3,"sources":["../../src/server/index.ts","../../src/server/extract.ts","../../src/utils/logger.ts","../../src/server/snippet.ts","../../src/server/split.ts"],"sourcesContent":["/**\n * Server-side entry point for hazo_pdf\n * Provides document extraction utilities for server-side usage\n *\n * IMPORTANT: This module is server-only and will throw if imported in the browser\n *\n * @example\n * import { extract_document_data } from 'hazo_pdf/server';\n *\n * const result = await extract_document_data(\n * { file_path: '/path/to/document.pdf' },\n * { prompt_area: 'document', prompt_key: 'initial_classification' }\n * );\n */\n\nimport { HazoInternalError } from 'hazo_core/errors';\n\n// Runtime guard - prevent browser usage\nif (typeof window !== 'undefined') {\n throw new HazoInternalError({\n code: 'HAZO_PDF_SERVER_ONLY',\n pkg: 'hazo_pdf',\n message:\n 'hazo_pdf/server cannot be imported in the browser. ' +\n 'This module is server-only and requires Node.js runtime.',\n });\n}\n\n// Export extraction utilities\nexport { extract_document_data, reset_extraction_state } from './extract';\n\n// Export snippet utilities\nexport { extract_text_snippet } from './snippet';\n\n// Export split utilities\nexport { split_pdf } from './split';\nexport type {\n PdfSplitRequest,\n PdfSplitInstruction,\n PdfSplitResult,\n PdfSplitOutput,\n} from '../components/split_viewer/types';\n\n// Export types\nexport type {\n ExtractDocumentSource,\n ExtractDocumentOptions,\n ExtractDocumentResult,\n ExtractStepResult,\n} from './types';\n\nexport type {\n SnippetSource,\n SnippetOptions,\n SnippetResult,\n PageSnippet,\n SnippetSize,\n SnippetMatchMode,\n} from './snippet_types';\n","/**\n * Server-side document extraction utility\n * Uses hazo_llm_api for LLM-based extraction and hazo_files for storage\n */\n\nimport {\n generateRequestId,\n getCorrelationId,\n optional_import,\n withContext,\n} from 'hazo_core';\nimport {\n HazoExternalError,\n HazoNotFoundError,\n HazoUnavailableError,\n HazoValidationError,\n} from 'hazo_core/errors';\nimport type { Logger } from '../utils/logger';\nimport { get_logger } from '../utils/logger';\nimport type { FileAccessProvider } from '../types/file_access';\nimport type {\n ExtractDocumentSource,\n ExtractDocumentOptions,\n ExtractDocumentResult,\n} from './types';\n\nasync function require_module<T>(pkg: string): Promise<T> {\n const mod = await optional_import<T>(pkg);\n if (!mod) {\n throw new HazoUnavailableError({\n code: 'HAZO_PDF_OPTIONAL_DEP_MISSING',\n pkg: 'hazo_pdf',\n message: `Required optional peer \"${pkg}\" is not installed`,\n });\n }\n return mod;\n}\n\n// Lazy-loaded modules (server-only)\nlet is_initialized = false;\nlet hazo_files_adapter: unknown = null;\nlet llm_initialized = false;\n\n/**\n * Reset extraction state (for testing)\n */\nexport function reset_extraction_state(): void {\n is_initialized = false;\n hazo_files_adapter = null;\n llm_initialized = false;\n}\n\n/**\n * Initialize hazo_llm_api and SQLite adapter (lazy, once per process)\n */\nasync function ensure_initialized(\n sqlite_path?: string,\n logger: Logger = get_logger()\n): Promise<void> {\n if (is_initialized) {\n return;\n }\n\n try {\n const llm_mod = await require_module<typeof import('hazo_llm_api/server')>('hazo_llm_api/server');\n const connect_mod = await require_module<typeof import('hazo_connect/server')>('hazo_connect/server');\n const { initialize_llm_api, get_current_config } = llm_mod;\n const { SqliteAdapter } = connect_mod;\n\n // Initialize hazo_llm_api if not already done\n if (!llm_initialized) {\n await initialize_llm_api({ logger });\n llm_initialized = true;\n logger.debug('extract.llm_api.initialized');\n }\n\n // Determine SQLite path\n const config = get_current_config();\n const db_path = sqlite_path || config?.sqlite_path || 'prompt_library.sqlite';\n\n // Create hazo_connect SqliteAdapter\n hazo_files_adapter = new SqliteAdapter({\n type: 'sqlite',\n database_path: db_path,\n });\n\n // Ensure hazo_files table exists\n const files_mod = await require_module<typeof import('hazo_files')>('hazo_files');\n const { HAZO_FILES_TABLE_SCHEMA } = files_mod;\n const adapter = hazo_files_adapter as { rawQuery: (sql: string) => Promise<unknown> };\n await adapter.rawQuery(HAZO_FILES_TABLE_SCHEMA.sqlite.ddl);\n for (const idx of HAZO_FILES_TABLE_SCHEMA.sqlite.indexes) {\n await adapter.rawQuery(idx);\n }\n\n logger.debug('extract.hazo_files.table_initialized', { sqlite_path: db_path });\n is_initialized = true;\n } catch (error) {\n logger.error('extract.initialize.failed', {\n error: error instanceof Error ? error.message : String(error),\n });\n throw error;\n }\n}\n\n/**\n * Load document from file path or URL as base64\n */\nasync function load_document_as_base64(\n file_path: string,\n logger: Logger = get_logger()\n): Promise<{ base64: string; mime_type: string }> {\n // HTTP(S) URL\n if (file_path.startsWith('http://') || file_path.startsWith('https://')) {\n logger.debug('extract.document.load_from_url', { url: file_path });\n const response = await fetch(file_path);\n if (!response.ok) {\n throw new HazoExternalError({\n code: 'HAZO_PDF_EXTERNAL_FETCH_FAILED',\n pkg: 'hazo_pdf',\n message: `Failed to fetch document: ${response.status} ${response.statusText}`,\n httpStatus: 502,\n });\n }\n const buffer = await response.arrayBuffer();\n const base64 = Buffer.from(buffer).toString('base64');\n const content_type = response.headers.get('content-type') || 'application/pdf';\n return { base64, mime_type: content_type };\n }\n\n // Local file path\n logger.debug('extract.document.load_from_fs', { path: file_path });\n const fs = await import('fs');\n const path = await import('path');\n\n if (!fs.existsSync(file_path)) {\n throw new HazoNotFoundError({\n code: 'HAZO_PDF_FILE_NOT_FOUND',\n pkg: 'hazo_pdf',\n message: `File not found: ${file_path}`,\n });\n }\n\n const buffer = fs.readFileSync(file_path);\n const base64 = buffer.toString('base64');\n\n // Determine MIME type from extension\n const ext = path.extname(file_path).toLowerCase();\n const mime_types: Record<string, string> = {\n '.pdf': 'application/pdf',\n '.png': 'image/png',\n '.jpg': 'image/jpeg',\n '.jpeg': 'image/jpeg',\n '.gif': 'image/gif',\n '.webp': 'image/webp',\n };\n const mime_type = mime_types[ext] || 'application/pdf';\n\n return { base64, mime_type };\n}\n\n/**\n * Load document by hazo_files record ID\n */\nasync function load_document_by_file_id(\n file_id: string,\n _storage_type: 'local' | 'google_drive', // Used as fallback, actual storage_type comes from record\n file_manager: FileAccessProvider | undefined,\n logger: Logger = get_logger()\n): Promise<{ base64: string; mime_type: string; file_path: string }> {\n // Get file record from hazo_files\n const files_mod = await require_module<typeof import('hazo_files')>('hazo_files');\n const connect_mod = await require_module<typeof import('hazo_connect/server')>('hazo_connect/server');\n const { HAZO_FILES_TABLE_SCHEMA } = files_mod;\n const { createCrudService } = connect_mod;\n\n if (!hazo_files_adapter) {\n throw new HazoUnavailableError({\n code: 'HAZO_PDF_ADAPTER_UNINITIALIZED',\n pkg: 'hazo_pdf',\n message: 'hazo_files adapter not initialized — call ensure_initialized() first',\n });\n }\n\n type FileRecord = { id: string; file_path: string; file_type: string; storage_type: string };\n type CrudServiceType = { findMany: (opts: { where: { id: string } }) => Promise<FileRecord[]> };\n\n const crudService = createCrudService(\n hazo_files_adapter as Parameters<typeof createCrudService>[0],\n HAZO_FILES_TABLE_SCHEMA.tableName\n );\n\n // Get file metadata by ID\n const files = await (crudService as unknown as CrudServiceType).findMany({\n where: { id: file_id },\n });\n\n if (!files || files.length === 0) {\n throw new HazoNotFoundError({\n code: 'HAZO_PDF_FILE_RECORD_NOT_FOUND',\n pkg: 'hazo_pdf',\n message: `File record not found: ${file_id}`,\n });\n }\n\n const file_record = files[0];\n const file_path = file_record.file_path;\n const mime_type = file_record.file_type || 'application/pdf';\n\n logger.debug('extract.file_record.found', { file_id, file_path, storage_type: file_record.storage_type });\n\n // If Google Drive storage, use file_manager to download\n if (file_record.storage_type === 'google_drive') {\n if (!file_manager) {\n throw new HazoValidationError({\n code: 'HAZO_PDF_FILE_MANAGER_REQUIRED',\n pkg: 'hazo_pdf',\n message: 'file_manager is required for Google Drive files',\n });\n }\n if (!file_manager.isInitialized()) {\n throw new HazoUnavailableError({\n code: 'HAZO_PDF_FILE_MANAGER_UNINITIALIZED',\n pkg: 'hazo_pdf',\n message: 'file_manager is not initialized',\n });\n }\n\n logger.debug('extract.google_drive.download', { file_path });\n const result = await file_manager.downloadFile(file_path);\n if (!result.success || !result.data) {\n throw new HazoExternalError({\n code: 'HAZO_PDF_EXTERNAL_DOWNLOAD_FAILED',\n pkg: 'hazo_pdf',\n message: result.error || 'Failed to download file from Google Drive',\n httpStatus: 502,\n });\n }\n\n const buffer = Buffer.from(result.data as ArrayBuffer);\n const base64 = buffer.toString('base64');\n return { base64, mime_type, file_path };\n }\n\n // Local storage - read from filesystem\n return {\n ...(await load_document_as_base64(file_path, logger)),\n file_path,\n };\n}\n\n/**\n * Extract structured data from a PDF document using LLM\n *\n * @param source - Document source (file_path OR file_id)\n * @param options - Extraction options (prompt_area, prompt_key, etc.)\n * @returns Extraction result with data and metadata\n *\n * @example\n * // From file path\n * const result = await extract_document_data(\n * { file_path: '/path/to/doc.pdf' },\n * { prompt_area: 'document', prompt_key: 'initial_classification' }\n * );\n *\n * @example\n * // From hazo_files record ID\n * const result = await extract_document_data(\n * { file_id: 'abc-123' },\n * { prompt_area: 'invoice', prompt_key: 'extract_line_items', sqlite_path: './data/app.sqlite' }\n * );\n */\nexport async function extract_document_data(\n source: ExtractDocumentSource,\n options: ExtractDocumentOptions\n): Promise<ExtractDocumentResult> {\n const correlationId = getCorrelationId() ?? generateRequestId();\n return withContext({ correlationId }, () => _extract_document_data(source, options));\n}\n\nasync function _extract_document_data(\n source: ExtractDocumentSource,\n options: ExtractDocumentOptions\n): Promise<ExtractDocumentResult> {\n const logger = options.logger || get_logger();\n const storage_type = options.storage_type || 'local';\n const save_to_hazo_files = options.save_to_hazo_files !== false;\n\n // Validate source\n if (!source.file_path && !source.file_id) {\n return {\n success: false,\n error: 'Either file_path or file_id is required',\n };\n }\n\n if (source.file_path && source.file_id) {\n return {\n success: false,\n error: 'Provide either file_path or file_id, not both',\n };\n }\n\n try {\n // Initialize hazo_llm_api and hazo_files\n await ensure_initialized(options.sqlite_path, logger);\n\n // Load document\n let document_b64: string;\n let document_mime_type: string;\n let resolved_file_path: string;\n\n if (source.file_path) {\n const loaded = await load_document_as_base64(source.file_path, logger);\n document_b64 = loaded.base64;\n document_mime_type = options.mime_type || loaded.mime_type;\n resolved_file_path = source.file_path;\n } else {\n const loaded = await load_document_by_file_id(\n source.file_id!,\n storage_type,\n options.file_manager,\n logger\n );\n document_b64 = loaded.base64;\n document_mime_type = options.mime_type || loaded.mime_type;\n resolved_file_path = loaded.file_path;\n }\n\n const doc_size_kb = Math.round((document_b64.length * 0.75) / 1024);\n logger.info('Starting extraction', {\n file_path: resolved_file_path,\n size_kb: doc_size_kb,\n prompt: `${options.prompt_area}/${options.prompt_key}`,\n });\n\n // Import hazo_llm_api functions\n const llm_mod = await require_module<typeof import('hazo_llm_api/server')>('hazo_llm_api/server');\n const {\n hazo_llm_dynamic_data_extract,\n get_database,\n get_prompt_by_area_and_key,\n default_logger,\n } = llm_mod;\n\n // Verify prompt exists\n const db = get_database();\n if (!db) {\n return {\n success: false,\n error: 'LLM database not initialized',\n };\n }\n\n const prompt_record = get_prompt_by_area_and_key(\n db,\n options.prompt_area,\n options.prompt_key,\n default_logger\n );\n\n if (!prompt_record) {\n return {\n success: false,\n error: `Prompt not found: area=\"${options.prompt_area}\", key=\"${options.prompt_key}\"`,\n };\n }\n\n logger.debug('Found prompt', {\n has_next_prompt: !!prompt_record.next_prompt,\n });\n\n // Run dynamic extraction\n const dynamic_response = await hazo_llm_dynamic_data_extract({\n initial_prompt_area: options.prompt_area,\n initial_prompt_key: options.prompt_key,\n file_b64: document_b64,\n file_mime_type: document_mime_type,\n max_depth: options.max_depth ?? 10,\n continue_on_error: options.continue_on_error ?? false,\n });\n\n logger.info('Extraction completed', {\n successful_steps: dynamic_response.successful_steps,\n total_steps: dynamic_response.total_steps,\n stop_reason: dynamic_response.final_stop_reason,\n });\n\n if (!dynamic_response.success && dynamic_response.errors.length > 0) {\n logger.error('Extraction failed', { errors: dynamic_response.errors });\n return {\n success: false,\n error: dynamic_response.errors[0]?.error || 'Dynamic extraction failed',\n successful_steps: dynamic_response.successful_steps,\n total_steps: dynamic_response.total_steps,\n stop_reason: dynamic_response.final_stop_reason,\n };\n }\n\n const extracted_data = dynamic_response.merged_result;\n\n // Save to hazo_files if enabled\n let extraction_id: string | undefined;\n let file_id: string | undefined = source.file_id;\n\n // Use original_file_path for hazo_files storage if provided (when extraction uses temp file)\n const storage_file_path = options.original_file_path || resolved_file_path;\n\n if (save_to_hazo_files && hazo_files_adapter) {\n try {\n const files_mod = await require_module<typeof import('hazo_files')>('hazo_files');\n const connect_mod = await require_module<typeof import('hazo_connect/server')>('hazo_connect/server');\n const { createFileMetadataService, HAZO_FILES_TABLE_SCHEMA } = files_mod;\n const { createCrudService } = connect_mod;\n\n type FileIdRecord = { id: string };\n type CrudServiceWithFindMany = {\n findMany: (opts: { where: { file_path: string; storage_type: string } }) => Promise<FileIdRecord[]>;\n };\n\n const crudService = createCrudService(\n hazo_files_adapter as Parameters<typeof createCrudService>[0],\n HAZO_FILES_TABLE_SCHEMA.tableName\n );\n // Cast to unknown first to avoid type conflicts with hazo_files internal types\n const metadataService = createFileMetadataService(crudService as unknown as Parameters<typeof createFileMetadataService>[0], { logger });\n\n // Check if file record exists (use storage_file_path for hazo_files)\n const existing = await metadataService.getExtractions(\n storage_file_path,\n storage_type\n );\n\n if (existing === null) {\n // Create file record first\n const filename =\n options.filename ||\n storage_file_path.split('/').pop() ||\n 'unknown';\n logger.debug('Creating file record', { file_path: storage_file_path, filename });\n\n await metadataService.recordUpload({\n filename,\n file_type: document_mime_type,\n file_path: storage_file_path,\n storage_type,\n });\n\n // Get the created file record ID\n const files = await (crudService as unknown as CrudServiceWithFindMany).findMany({\n where: { file_path: storage_file_path, storage_type },\n });\n if (files && files.length > 0) {\n file_id = files[0].id;\n }\n }\n\n // Add extraction\n const extraction_result = await metadataService.addExtraction(\n storage_file_path,\n storage_type,\n extracted_data,\n { source: 'hazo_llm_api' }\n );\n\n if (extraction_result) {\n extraction_id = extraction_result.id;\n logger.debug('Saved extraction to hazo_files', { extraction_id });\n }\n } catch (save_error) {\n // Don't fail extraction if saving fails\n const error_msg = save_error instanceof Error ? save_error.message : String(save_error);\n const error_stack = save_error instanceof Error ? save_error.stack : undefined;\n logger.error('Failed to save extraction to hazo_files', {\n error: error_msg,\n stack: error_stack,\n storage_file_path,\n storage_type,\n });\n }\n }\n\n return {\n success: true,\n data: extracted_data,\n extraction_id,\n file_id,\n file_path: storage_file_path,\n successful_steps: dynamic_response.successful_steps,\n total_steps: dynamic_response.total_steps,\n stop_reason: dynamic_response.final_stop_reason,\n step_results: dynamic_response.step_results?.map((s: { prompt_area: string; prompt_key: string; success: boolean; result?: Record<string, unknown>; error?: string }) => ({\n prompt_area: s.prompt_area,\n prompt_key: s.prompt_key,\n success: s.success,\n data: s.result,\n error: s.error,\n })),\n };\n } catch (error) {\n logger.error('Extraction error', {\n error: error instanceof Error ? error.message : String(error),\n });\n return {\n success: false,\n error: error instanceof Error ? error.message : String(error),\n };\n }\n}\n","/**\n * Logger Utility for hazo_pdf\n *\n * Default behavior: delegates to hazo_core's createLogger('hazo_pdf'), which\n * auto-injects correlationId, env, and writes structured JSON via hazo_logs.\n * Consumers can still override with set_logger() — useful for browser-only\n * loggers or app-specific instrumentation.\n */\n\nimport { createLogger } from 'hazo_core';\n\n/**\n * Logger interface matching hazo_logs Logger type\n * This allows consumers to pass in their own logger instance\n */\nexport interface Logger {\n info: (message: string, data?: Record<string, unknown>) => void;\n debug: (message: string, data?: Record<string, unknown>) => void;\n warn: (message: string, data?: Record<string, unknown>) => void;\n error: (message: string, data?: Record<string, unknown>) => void;\n}\n\n/**\n * Console fallback used only when running in a browser where hazo_logs\n * isn't installed or fails to load. hazo_core falls back to JSON stderr in\n * Node and to console in browsers, so this is rarely exercised.\n */\nconst console_logger: Logger = {\n info: (message, data) => (data ? console.log(`[hazo_pdf] ${message}`, data) : console.log(`[hazo_pdf] ${message}`)),\n debug: (message, data) => (data ? console.debug(`[hazo_pdf] ${message}`, data) : console.debug(`[hazo_pdf] ${message}`)),\n warn: (message, data) => (data ? console.warn(`[hazo_pdf] ${message}`, data) : console.warn(`[hazo_pdf] ${message}`)),\n error: (message, data) => (data ? console.error(`[hazo_pdf] ${message}`, data) : console.error(`[hazo_pdf] ${message}`)),\n};\n\nfunction build_default_logger(): Logger {\n try {\n return createLogger('hazo_pdf') as unknown as Logger;\n } catch {\n return console_logger;\n }\n}\n\nlet current_logger: Logger | null = null;\n\n/**\n * Set the logger instance to use throughout hazo_pdf.\n * Pass `undefined` to reset to the hazo_core default.\n */\nexport function set_logger(logger: Logger | undefined): void {\n current_logger = logger ?? null;\n}\n\n/**\n * Get the current logger instance — defaults to hazo_core's createLogger\n * on first access if no consumer override has been set.\n */\nexport function get_logger(): Logger {\n if (!current_logger) {\n current_logger = build_default_logger();\n }\n return current_logger;\n}\n","/**\n * PDF text snippet extraction utility\n * Finds text in a PDF, highlights it, and returns an image snippet of the surrounding area\n *\n * Server-only: requires @napi-rs/canvas (installed as transitive dep of pdfjs-dist)\n */\n\nimport type { PDFDocumentProxy } from 'pdfjs-dist';\nimport {\n generateRequestId,\n getCorrelationId,\n optional_import,\n withContext,\n} from 'hazo_core';\nimport {\n HazoExternalError,\n HazoNotFoundError,\n HazoUnavailableError,\n HazoValidationError,\n} from 'hazo_core/errors';\nimport type { TextSearchResult } from '../utils/text_search';\nimport type {\n SnippetSource,\n SnippetOptions,\n SnippetResult,\n PageSnippet,\n SnippetSize,\n} from './snippet_types';\n\nasync function require_module<T>(pkg: string): Promise<T> {\n const mod = await optional_import<T>(pkg);\n if (!mod) {\n throw new HazoUnavailableError({\n code: 'HAZO_PDF_OPTIONAL_DEP_MISSING',\n pkg: 'hazo_pdf',\n message: `Required optional peer \"${pkg}\" is not installed`,\n });\n }\n return mod;\n}\n\n/**\n * Extract a text snippet from a PDF document\n *\n * Finds the specified text, highlights it, and returns a cropped image snippet.\n * Works for text-based PDFs (precise highlighting) and image-based PDFs\n * (full page fallback, or approximate LLM-based highlighting).\n *\n * @param source - PDF source (file path or raw bytes)\n * @param options - Snippet extraction options\n * @returns SnippetResult with PNG image buffer(s)\n */\nexport async function extract_text_snippet(\n source: SnippetSource,\n options: SnippetOptions\n): Promise<SnippetResult> {\n const correlationId = getCorrelationId() ?? generateRequestId();\n return withContext({ correlationId }, () => _extract_text_snippet(source, options));\n}\n\nasync function _extract_text_snippet(\n source: SnippetSource,\n options: SnippetOptions\n): Promise<SnippetResult> {\n const {\n search_text,\n page_index = 0,\n snippet_size = 'half',\n match_mode = 'first',\n render_scale = 2.0,\n highlight_color = 'rgba(255, 255, 0, 0.35)',\n use_llm_for_image_pdf = false,\n } = options;\n\n if (!search_text) {\n return { success: false, snippets: [], error: 'search_text is required' };\n }\n\n if (!source.file_path && !source.pdf_bytes) {\n return { success: false, snippets: [], error: 'Either file_path or pdf_bytes is required' };\n }\n\n try {\n // Load PDF bytes\n const pdf_bytes = await load_pdf_bytes(source);\n\n // Load pdfjs-dist legacy build for Node.js (avoids Path2D requirement)\n const pdfjs = await import('pdfjs-dist/legacy/build/pdf.mjs');\n\n // Resolve standard font data path for proper text rendering\n const path_mod = await import('path');\n const standard_font_data_url = path_mod.resolve(\n process.cwd(), 'node_modules/pdfjs-dist/standard_fonts/'\n ) + '/';\n\n // Load PDF document (Node.js mode - no web worker)\n const pdf = await pdfjs.getDocument({\n data: new Uint8Array(pdf_bytes),\n useSystemFonts: true,\n standardFontDataUrl: standard_font_data_url,\n verbosity: 0,\n }).promise;\n\n // Load text search utility\n const { find_all_text_in_pdf } = await import('../utils/text_search');\n\n // Determine which pages to search\n const total_pages = pdf.numPages;\n const snippets: PageSnippet[] = [];\n\n if (match_mode === 'first') {\n // Search from page_index through all pages, stop at first match\n for (let i = page_index; i < total_pages; i++) {\n const page_snippet = await process_page(\n pdf, i, search_text, snippet_size, render_scale,\n highlight_color, use_llm_for_image_pdf, find_all_text_in_pdf\n );\n if (page_snippet) {\n snippets.push(page_snippet);\n break;\n }\n }\n } else {\n // 'all' mode: search all pages starting from page_index\n for (let i = page_index; i < total_pages; i++) {\n const page_snippet = await process_page(\n pdf, i, search_text, snippet_size, render_scale,\n highlight_color, use_llm_for_image_pdf, find_all_text_in_pdf\n );\n if (page_snippet) {\n snippets.push(page_snippet);\n }\n }\n }\n\n pdf.destroy();\n\n if (snippets.length === 0) {\n return { success: false, snippets: [], error: `Text \"${search_text}\" not found in PDF` };\n }\n\n return { success: true, snippets };\n } catch (err) {\n return {\n success: false,\n snippets: [],\n error: `Snippet extraction failed: ${err instanceof Error ? err.message : String(err)}`,\n };\n }\n}\n\n/**\n * Process a single page: search for text, render, highlight, crop\n */\nasync function process_page(\n pdf: PDFDocumentProxy,\n page_index: number,\n search_text: string,\n snippet_size: SnippetSize,\n render_scale: number,\n highlight_color: string,\n use_llm_for_image_pdf: boolean,\n find_all_text_in_pdf: typeof import('../utils/text_search').find_all_text_in_pdf,\n): Promise<PageSnippet | null> {\n const page = await pdf.getPage(page_index + 1); // pdfjs uses 1-based\n const viewport = page.getViewport({ scale: render_scale });\n\n // Search for text on this page\n let matches = await find_all_text_in_pdf(pdf, search_text, { page_index });\n let highlight_approximate = false;\n\n // Check if this is an image-based PDF (no text content)\n const text_content = await page.getTextContent();\n const has_text_layer = text_content.items.some(\n (item) => 'str' in item && (item as { str: string }).str.trim().length > 0\n );\n\n if (matches.length === 0 && !has_text_layer) {\n // Image-based PDF\n if (use_llm_for_image_pdf) {\n // Try LLM-based approximate location\n const llm_matches = await find_text_with_llm(\n page, viewport, search_text, render_scale\n );\n if (llm_matches) {\n matches = llm_matches;\n highlight_approximate = true;\n } else {\n // LLM couldn't find it either - return full page, no highlight\n return await render_full_page_snippet(page, viewport, page_index);\n }\n } else {\n // No LLM - return full page as snippet, no highlight\n return await render_full_page_snippet(page, viewport, page_index);\n }\n }\n\n if (matches.length === 0) {\n return null; // Text not found on this page\n }\n\n // Render the page to canvas\n const { createCanvas } = await import('@napi-rs/canvas');\n const canvas = createCanvas(viewport.width, viewport.height);\n const context = canvas.getContext('2d');\n\n // White background\n context.fillStyle = '#ffffff';\n context.fillRect(0, 0, viewport.width, viewport.height);\n\n // Render PDF page\n await page.render({\n canvasContext: context as unknown as CanvasRenderingContext2D,\n viewport,\n }).promise;\n\n // Draw highlights over matches\n draw_highlights(context, viewport, matches, highlight_color);\n\n // Calculate snippet crop bounds\n const crop = calculate_crop_bounds(\n viewport, matches, snippet_size, render_scale\n );\n\n // Crop to snippet\n const snippet_canvas = createCanvas(crop.width, crop.height);\n const snippet_ctx = snippet_canvas.getContext('2d');\n snippet_ctx.drawImage(\n canvas,\n crop.x, crop.y, crop.width, crop.height,\n 0, 0, crop.width, crop.height\n );\n\n const image_buffer = snippet_canvas.toBuffer('image/png');\n const image_base64 = image_buffer.toString('base64');\n\n return {\n page_index,\n image_buffer,\n image_base64,\n matches,\n highlight_approximate,\n snippet_width: crop.width,\n snippet_height: crop.height,\n };\n}\n\n/**\n * Render full page as a snippet (used for image-based PDFs without text location)\n */\nasync function render_full_page_snippet(\n page: import('pdfjs-dist').PDFPageProxy,\n viewport: import('pdfjs-dist').PageViewport,\n page_index: number,\n): Promise<PageSnippet> {\n const { createCanvas } = await import('@napi-rs/canvas');\n const canvas = createCanvas(viewport.width, viewport.height);\n const context = canvas.getContext('2d');\n\n context.fillStyle = '#ffffff';\n context.fillRect(0, 0, viewport.width, viewport.height);\n\n await page.render({\n canvasContext: context as unknown as CanvasRenderingContext2D,\n viewport,\n }).promise;\n\n const image_buffer = canvas.toBuffer('image/png');\n const image_base64 = image_buffer.toString('base64');\n\n return {\n page_index,\n image_buffer,\n image_base64,\n matches: [],\n highlight_approximate: true,\n snippet_width: viewport.width,\n snippet_height: viewport.height,\n };\n}\n\n/**\n * Draw semi-transparent highlight rectangles over text matches\n */\nfunction draw_highlights(\n context: ReturnType<import('@napi-rs/canvas').Canvas['getContext']>,\n viewport: import('pdfjs-dist').PageViewport,\n matches: TextSearchResult[],\n highlight_color: string,\n): void {\n // Parse highlight color\n context.save();\n context.fillStyle = highlight_color;\n\n for (const match of matches) {\n // Convert PDF coordinates to viewport (screen) coordinates\n // PDF origin is bottom-left, viewport origin is top-left\n const [x1, y1] = viewport.convertToViewportPoint(match.x, match.y + match.height);\n const [x2, y2] = viewport.convertToViewportPoint(match.x + match.width, match.y);\n\n const rx = Math.min(x1, x2);\n const ry = Math.min(y1, y2);\n const rw = Math.abs(x2 - x1);\n const rh = Math.abs(y2 - y1);\n\n context.fillRect(rx, ry, rw, rh);\n }\n\n context.restore();\n}\n\n/**\n * Calculate the crop bounds for the snippet image\n * Centers on the match bounding box, expands to fit requested snippet size,\n * and clamps to page boundaries\n */\nfunction calculate_crop_bounds(\n viewport: import('pdfjs-dist').PageViewport,\n matches: TextSearchResult[],\n snippet_size: SnippetSize,\n _render_scale: number,\n): { x: number; y: number; width: number; height: number } {\n const page_width = viewport.width;\n const page_height = viewport.height;\n\n if (snippet_size === 'full') {\n return { x: 0, y: 0, width: page_width, height: page_height };\n }\n\n // Compute bounding box of all matches in viewport coordinates\n let min_x = Infinity, min_y = Infinity, max_x = -Infinity, max_y = -Infinity;\n\n for (const match of matches) {\n const [x1, y1] = viewport.convertToViewportPoint(match.x, match.y + match.height);\n const [x2, y2] = viewport.convertToViewportPoint(match.x + match.width, match.y);\n\n min_x = Math.min(min_x, x1, x2);\n min_y = Math.min(min_y, y1, y2);\n max_x = Math.max(max_x, x1, x2);\n max_y = Math.max(max_y, y1, y2);\n }\n\n // Add padding around matches (20px at render scale)\n const padding = 40;\n min_x -= padding;\n min_y -= padding;\n max_x += padding;\n max_y += padding;\n\n const match_width = max_x - min_x;\n const match_height = max_y - min_y;\n\n // Determine requested snippet dimensions\n // Always use full page width; only height varies by snippet size\n const snippet_width = page_width;\n let snippet_height: number;\n\n if (snippet_size === 'half') {\n snippet_height = page_height / 2;\n } else {\n // quarter\n snippet_height = page_height / 4;\n }\n\n // Use the larger of requested size vs match coverage\n const final_width = Math.max(snippet_width, match_width);\n const final_height = Math.max(snippet_height, match_height);\n\n // Center on the match bounding box center\n const match_center_x = (min_x + max_x) / 2;\n const match_center_y = (min_y + max_y) / 2;\n\n let crop_x = match_center_x - final_width / 2;\n let crop_y = match_center_y - final_height / 2;\n\n // Clamp to page boundaries\n crop_x = Math.max(0, Math.min(crop_x, page_width - final_width));\n crop_y = Math.max(0, Math.min(crop_y, page_height - final_height));\n\n // Ensure dimensions don't exceed page\n const clamped_width = Math.min(final_width, page_width);\n const clamped_height = Math.min(final_height, page_height);\n\n return {\n x: Math.round(crop_x),\n y: Math.round(crop_y),\n width: Math.round(clamped_width),\n height: Math.round(clamped_height),\n };\n}\n\n/**\n * Load PDF bytes from a SnippetSource\n */\nasync function load_pdf_bytes(source: SnippetSource): Promise<Uint8Array> {\n if (source.pdf_bytes) {\n return source.pdf_bytes;\n }\n\n if (!source.file_path) {\n throw new HazoValidationError({\n code: 'HAZO_PDF_VALIDATION_ERROR',\n pkg: 'hazo_pdf',\n message: 'Either file_path or pdf_bytes is required',\n });\n }\n\n const file_path = source.file_path;\n\n // HTTP(S) URL\n if (file_path.startsWith('http://') || file_path.startsWith('https://')) {\n const response = await fetch(file_path);\n if (!response.ok) {\n throw new HazoExternalError({\n code: 'HAZO_PDF_EXTERNAL_FETCH_FAILED',\n pkg: 'hazo_pdf',\n message: `Failed to fetch PDF: ${response.status} ${response.statusText}`,\n httpStatus: 502,\n });\n }\n const buffer = await response.arrayBuffer();\n return new Uint8Array(buffer);\n }\n\n // Local file\n const fs = await import('fs');\n if (!fs.existsSync(file_path)) {\n throw new HazoNotFoundError({\n code: 'HAZO_PDF_FILE_NOT_FOUND',\n pkg: 'hazo_pdf',\n message: `File not found: ${file_path}`,\n });\n }\n const buffer = fs.readFileSync(file_path);\n return new Uint8Array(buffer);\n}\n\n/**\n * Use LLM vision to find approximate text location in an image-based PDF page\n * Returns approximate TextSearchResult[] or null if LLM can't find the text\n */\nasync function find_text_with_llm(\n page: import('pdfjs-dist').PDFPageProxy,\n viewport: import('pdfjs-dist').PageViewport,\n search_text: string,\n _render_scale: number,\n): Promise<TextSearchResult[] | null> {\n try {\n // Render page to PNG for LLM\n const { createCanvas } = await import('@napi-rs/canvas');\n const canvas = createCanvas(viewport.width, viewport.height);\n const context = canvas.getContext('2d');\n\n context.fillStyle = '#ffffff';\n context.fillRect(0, 0, viewport.width, viewport.height);\n\n await page.render({\n canvasContext: context as unknown as CanvasRenderingContext2D,\n viewport,\n }).promise;\n\n const image_buffer = canvas.toBuffer('image/png');\n const image_base64 = image_buffer.toString('base64');\n\n // Dynamic import of hazo_llm_api\n const llm_mod = await require_module<typeof import('hazo_llm_api/server')>('hazo_llm_api/server');\n const { hazo_llm_image_text } = llm_mod;\n\n const prompt = `Find the text \"${search_text}\" in this document image.\nIf found, return ONLY a JSON object with the approximate bounding box as percentage coordinates (0-100):\n{\"found\": true, \"x_pct\": <left edge %>, \"y_pct\": <top edge %>, \"width_pct\": <width %>, \"height_pct\": <height %>}\n\nIf the text is not found, return:\n{\"found\": false}\n\nReturn ONLY the JSON object, nothing else.`;\n\n const response = await hazo_llm_image_text({\n image_b64: image_base64,\n image_mime_type: 'image/png',\n prompt,\n });\n\n if (!response || !response.text) {\n return null;\n }\n\n // Parse LLM response\n const json_match = response.text.match(/\\{[^}]+\\}/);\n if (!json_match) {\n return null;\n }\n\n const parsed = JSON.parse(json_match[0]);\n if (!parsed.found) {\n return null;\n }\n\n // Convert percentage coordinates to PDF coordinates\n // We need to return in PDF coordinate space for consistency\n const page_view = page.getViewport({ scale: 1.0 });\n const pdf_width = page_view.width;\n const pdf_height = page_view.height;\n\n const x = (parsed.x_pct / 100) * pdf_width;\n const y_from_top = (parsed.y_pct / 100) * pdf_height;\n const width = (parsed.width_pct / 100) * pdf_width;\n const height = (parsed.height_pct / 100) * pdf_height;\n\n // Convert from top-origin to PDF bottom-origin\n const y_pdf = pdf_height - y_from_top - height;\n\n return [{\n x,\n y: y_pdf,\n width,\n height,\n match_type: 'partial' as const,\n matched_text: search_text,\n }];\n } catch {\n // LLM not available or failed - return null to fall back to full page\n return null;\n }\n}\n","/**\n * Server-side PDF split utility\n * Splits a PDF into multiple files using pdf-lib and stores them via FileAccessProvider\n */\n\nimport {\n generateRequestId,\n getCorrelationId,\n withContext,\n} from 'hazo_core';\nimport {\n HazoExternalError,\n HazoValidationError,\n} from 'hazo_core/errors';\nimport { get_logger } from '../utils/logger';\nimport type { FileAccessProvider } from '../types/file_access';\nimport type { PdfSplitRequest, PdfSplitResult, PdfSplitOutput } from '../components/split_viewer/types';\n\n/**\n * Sanitize a label into a safe filename segment\n * lowercase, replace non-alphanumeric with underscore, trim, max 60 chars\n */\nfunction sanitize_label(label: string): string {\n return label\n .toLowerCase()\n .replace(/[^a-z0-9]+/g, '_')\n .replace(/^_+|_+$/g, '')\n .slice(0, 60);\n}\n\n/**\n * Generate an output filename from a label and page list\n * Example: \"Bank Statement - HSBC\" + [1,2,3] → \"bank_statement_hsbc_p1-p3.pdf\"\n */\nfunction generate_filename(label: string, pages: number[]): string {\n const safe_label = sanitize_label(label);\n const sorted = [...pages].sort((a, b) => a - b);\n const page_range =\n sorted.length === 1\n ? `p${sorted[0]}`\n : `p${sorted[0]}-p${sorted[sorted.length - 1]}`;\n return `${safe_label}_${page_range}.pdf`;\n}\n\n/**\n * Convert download data to Uint8Array\n */\nfunction to_uint8array(data: unknown): Uint8Array {\n if (!data) {\n throw new HazoExternalError({\n code: 'HAZO_PDF_EXTERNAL_DOWNLOAD_EMPTY',\n pkg: 'hazo_pdf',\n message: 'No data received from downloadFile',\n httpStatus: 502,\n });\n }\n if (data instanceof Uint8Array) {\n return data;\n }\n if (data instanceof ArrayBuffer) {\n return new Uint8Array(data);\n }\n // Buffer (Node.js) — has .buffer, .byteOffset, .byteLength\n if (typeof Buffer !== 'undefined' && Buffer.isBuffer(data)) {\n return new Uint8Array(data.buffer, data.byteOffset, data.byteLength);\n }\n throw new HazoValidationError({\n code: 'HAZO_PDF_UNSUPPORTED_TYPE',\n pkg: 'hazo_pdf',\n message: 'Unsupported data type from downloadFile',\n });\n}\n\n/**\n * Split a PDF into multiple files based on split instructions\n *\n * @param request - Split request with source file ID and split instructions\n * @param file_manager - FileAccessProvider for downloading and uploading files\n * @returns PdfSplitResult with output file metadata for each split\n *\n * @example\n * const result = await split_pdf(\n * {\n * source_file_id: 'path/to/document.pdf',\n * splits: [\n * { split_id: 'split-1', label: 'Invoice', pages: [1, 2] },\n * { split_id: 'split-2', label: 'Attachment', pages: [3, 4, 5] },\n * ],\n * output_folder: 'splits/',\n * },\n * file_manager\n * );\n */\nexport async function split_pdf(\n request: PdfSplitRequest,\n file_manager: FileAccessProvider\n): Promise<PdfSplitResult> {\n const correlationId = getCorrelationId() ?? generateRequestId();\n return withContext({ correlationId }, () => _split_pdf(request, file_manager));\n}\n\nasync function _split_pdf(\n request: PdfSplitRequest,\n file_manager: FileAccessProvider\n): Promise<PdfSplitResult> {\n const logger = get_logger();\n\n logger.info('pdf.split.start', {\n source_file_id: request.source_file_id,\n split_count: request.splits.length,\n output_folder: request.output_folder,\n });\n\n // Dynamic import of pdf-lib (server-only)\n const { PDFDocument } = await import('pdf-lib');\n\n // Download source PDF\n logger.debug('split.source.download', { file_id: request.source_file_id });\n const download_result = await file_manager.downloadFile(request.source_file_id);\n if (!download_result.success || !download_result.data) {\n throw new HazoExternalError({\n code: 'HAZO_PDF_EXTERNAL_DOWNLOAD_FAILED',\n pkg: 'hazo_pdf',\n message: download_result.error || `Failed to download source file: ${request.source_file_id}`,\n httpStatus: 502,\n });\n }\n\n // Convert to Uint8Array and load PDF\n const source_bytes = to_uint8array(download_result.data);\n logger.debug('split.source.load', { byte_size: source_bytes.byteLength });\n const source_pdf = await PDFDocument.load(source_bytes);\n const total_pages = source_pdf.getPageCount();\n\n logger.debug('split.source.loaded', { total_pages });\n\n // Validate splits: every page 1..N must appear in exactly one split\n const page_counts = new Array<number>(total_pages + 1).fill(0); // index 1..N\n for (const split of request.splits) {\n for (const page of split.pages) {\n if (page < 1 || page > total_pages) {\n throw new HazoValidationError({\n code: 'HAZO_PDF_SPLIT_PAGE_OUT_OF_RANGE',\n pkg: 'hazo_pdf',\n message: `Split \"${split.label}\" references page ${page} which is out of range (1-${total_pages})`,\n });\n }\n page_counts[page]++;\n }\n }\n for (let p = 1; p <= total_pages; p++) {\n if (page_counts[p] === 0) {\n throw new HazoValidationError({\n code: 'HAZO_PDF_SPLIT_PAGE_MISSING',\n pkg: 'hazo_pdf',\n message: `Page ${p} is not included in any split`,\n });\n }\n if (page_counts[p] > 1) {\n throw new HazoValidationError({\n code: 'HAZO_PDF_SPLIT_PAGE_DUPLICATED',\n pkg: 'hazo_pdf',\n message: `Page ${p} appears in multiple splits`,\n });\n }\n }\n\n logger.debug('split.validation.passed', { total_pages, split_count: request.splits.length });\n\n // Process each split instruction\n const outputs: PdfSplitOutput[] = [];\n const output_folder = request.output_folder || '';\n\n for (const instruction of request.splits) {\n logger.debug('split.instruction.start', {\n split_id: instruction.split_id,\n label: instruction.label,\n pages: instruction.pages,\n });\n\n // Create new PDF document\n const new_pdf = await PDFDocument.create();\n\n // Copy pages (pdf-lib uses 0-indexed)\n const page_indices = instruction.pages.map((p) => p - 1);\n const copied_pages = await new_pdf.copyPages(source_pdf, page_indices);\n for (const page of copied_pages) {\n new_pdf.addPage(page);\n }\n\n // Serialize to bytes\n const pdf_bytes = await new_pdf.save();\n const byte_size = pdf_bytes.byteLength;\n\n // Generate output filename and path\n const file_name =\n instruction.output_filename || generate_filename(instruction.label, instruction.pages);\n const file_path = output_folder ? `${output_folder.replace(/\\/$/, '')}/${file_name}` : file_name;\n\n // Upload via file_manager\n logger.debug('split.upload', { file_path, byte_size });\n const upload_result = await file_manager.uploadFile(new Uint8Array(pdf_bytes), file_path, {\n overwrite: true,\n });\n\n if (!upload_result.success) {\n throw new HazoExternalError({\n code: 'HAZO_PDF_EXTERNAL_UPLOAD_FAILED',\n pkg: 'hazo_pdf',\n message: upload_result.error || `Failed to upload split \"${instruction.label}\" to ${file_path}`,\n httpStatus: 502,\n });\n }\n\n outputs.push({\n split_id: instruction.split_id,\n label: instruction.label,\n pages: instruction.pages,\n file_id: file_path,\n file_name,\n file_path,\n page_count: instruction.pages.length,\n byte_size,\n });\n\n logger.debug('split.upload.done', {\n split_id: instruction.split_id,\n label: instruction.label,\n file_path,\n page_count: instruction.pages.length,\n byte_size,\n });\n }\n\n logger.info('pdf.split.complete', {\n source_file_id: request.source_file_id,\n output_count: outputs.length,\n });\n\n return {\n source_file_id: request.source_file_id,\n splits: outputs,\n };\n}\n"],"mappings":";AAeA,SAAS,yBAAyB;;;ACVlC;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;AACP;AAAA,EACE;AAAA,EACA;AAAA,EACA;AAAA,EACA;AAAA,OACK;;;ACPP,SAAS,oBAAoB;AAkB7B,IAAM,iBAAyB;AAAA,EAC7B,MAAM,CAAC,SAAS,SAAU,OAAO,QAAQ,IAAI,cAAc,OAAO,IAAI,IAAI,IAAI,QAAQ,IAAI,cAAc,OAAO,EAAE;AAAA,EACjH,OAAO,CAAC,SAAS,SAAU,OAAO,QAAQ,MAAM,cAAc,OAAO,IAAI,IAAI,IAAI,QAAQ,MAAM,cAAc,OAAO,EAAE;AAAA,EACtH,MAAM,CAAC,SAAS,SAAU,OAAO,QAAQ,KAAK,cAAc,OAAO,IAAI,IAAI,IAAI,QAAQ,KAAK,cAAc,OAAO,EAAE;AAAA,EACnH,OAAO,CAAC,SAAS,SAAU,OAAO,QAAQ,MAAM,cAAc,OAAO,IAAI,IAAI,IAAI,QAAQ,MAAM,cAAc,OAAO,EAAE;AACxH;AAEA,SAAS,uBAA+B;AACtC,MAAI;AACF,WAAO,aAAa,UAAU;AAAA,EAChC,QAAQ;AACN,WAAO;AAAA,EACT;AACF;AAEA,IAAI,iBAAgC;AAc7B,SAAS,aAAqB;AACnC,MAAI,CAAC,gBAAgB;AACnB,qBAAiB,qBAAqB;AAAA,EACxC;AACA,SAAO;AACT;;;ADnCA,eAAe,eAAkB,KAAyB;AACxD,QAAM,MAAM,MAAM,gBAAmB,GAAG;AACxC,MAAI,CAAC,KAAK;AACR,UAAM,IAAI,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,2BAA2B,GAAG;AAAA,IACzC,CAAC;AAAA,EACH;AACA,SAAO;AACT;AAGA,IAAI,iBAAiB;AACrB,IAAI,qBAA8B;AAClC,IAAI,kBAAkB;AAKf,SAAS,yBAA+B;AAC7C,mBAAiB;AACjB,uBAAqB;AACrB,oBAAkB;AACpB;AAKA,eAAe,mBACb,aACA,SAAiB,WAAW,GACb;AACf,MAAI,gBAAgB;AAClB;AAAA,EACF;AAEA,MAAI;AACF,UAAM,UAAU,MAAM,eAAqD,qBAAqB;AAChG,UAAM,cAAc,MAAM,eAAqD,qBAAqB;AACpG,UAAM,EAAE,oBAAoB,mBAAmB,IAAI;AACnD,UAAM,EAAE,cAAc,IAAI;AAG1B,QAAI,CAAC,iBAAiB;AACpB,YAAM,mBAAmB,EAAE,OAAO,CAAC;AACnC,wBAAkB;AAClB,aAAO,MAAM,6BAA6B;AAAA,IAC5C;AAGA,UAAM,SAAS,mBAAmB;AAClC,UAAM,UAAU,eAAe,QAAQ,eAAe;AAGtD,yBAAqB,IAAI,cAAc;AAAA,MACrC,MAAM;AAAA,MACN,eAAe;AAAA,IACjB,CAAC;AAGD,UAAM,YAAY,MAAM,eAA4C,YAAY;AAChF,UAAM,EAAE,wBAAwB,IAAI;AACpC,UAAM,UAAU;AAChB,UAAM,QAAQ,SAAS,wBAAwB,OAAO,GAAG;AACzD,eAAW,OAAO,wBAAwB,OAAO,SAAS;AACxD,YAAM,QAAQ,SAAS,GAAG;AAAA,IAC5B;AAEA,WAAO,MAAM,wCAAwC,EAAE,aAAa,QAAQ,CAAC;AAC7E,qBAAiB;AAAA,EACnB,SAAS,OAAO;AACd,WAAO,MAAM,6BAA6B;AAAA,MACxC,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,IAC9D,CAAC;AACD,UAAM;AAAA,EACR;AACF;AAKA,eAAe,wBACb,WACA,SAAiB,WAAW,GACoB;AAEhD,MAAI,UAAU,WAAW,SAAS,KAAK,UAAU,WAAW,UAAU,GAAG;AACvE,WAAO,MAAM,kCAAkC,EAAE,KAAK,UAAU,CAAC;AACjE,UAAM,WAAW,MAAM,MAAM,SAAS;AACtC,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAI,kBAAkB;AAAA,QAC1B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS,6BAA6B,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,QAC5E,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AACA,UAAMA,UAAS,MAAM,SAAS,YAAY;AAC1C,UAAMC,UAAS,OAAO,KAAKD,OAAM,EAAE,SAAS,QAAQ;AACpD,UAAM,eAAe,SAAS,QAAQ,IAAI,cAAc,KAAK;AAC7D,WAAO,EAAE,QAAAC,SAAQ,WAAW,aAAa;AAAA,EAC3C;AAGA,SAAO,MAAM,iCAAiC,EAAE,MAAM,UAAU,CAAC;AACjE,QAAM,KAAK,MAAM,OAAO,IAAI;AAC5B,QAAM,OAAO,MAAM,OAAO,MAAM;AAEhC,MAAI,CAAC,GAAG,WAAW,SAAS,GAAG;AAC7B,UAAM,IAAI,kBAAkB;AAAA,MAC1B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,mBAAmB,SAAS;AAAA,IACvC,CAAC;AAAA,EACH;AAEA,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,QAAM,SAAS,OAAO,SAAS,QAAQ;AAGvC,QAAM,MAAM,KAAK,QAAQ,SAAS,EAAE,YAAY;AAChD,QAAM,aAAqC;AAAA,IACzC,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,QAAQ;AAAA,IACR,SAAS;AAAA,IACT,QAAQ;AAAA,IACR,SAAS;AAAA,EACX;AACA,QAAM,YAAY,WAAW,GAAG,KAAK;AAErC,SAAO,EAAE,QAAQ,UAAU;AAC7B;AAKA,eAAe,yBACb,SACA,eACA,cACA,SAAiB,WAAW,GACuC;AAEnE,QAAM,YAAY,MAAM,eAA4C,YAAY;AAChF,QAAM,cAAc,MAAM,eAAqD,qBAAqB;AACpG,QAAM,EAAE,wBAAwB,IAAI;AACpC,QAAM,EAAE,kBAAkB,IAAI;AAE9B,MAAI,CAAC,oBAAoB;AACvB,UAAM,IAAI,qBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAKA,QAAM,cAAc;AAAA,IAClB;AAAA,IACA,wBAAwB;AAAA,EAC1B;AAGA,QAAM,QAAQ,MAAO,YAA2C,SAAS;AAAA,IACvE,OAAO,EAAE,IAAI,QAAQ;AAAA,EACvB,CAAC;AAED,MAAI,CAAC,SAAS,MAAM,WAAW,GAAG;AAChC,UAAM,IAAI,kBAAkB;AAAA,MAC1B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,0BAA0B,OAAO;AAAA,IAC5C,CAAC;AAAA,EACH;AAEA,QAAM,cAAc,MAAM,CAAC;AAC3B,QAAM,YAAY,YAAY;AAC9B,QAAM,YAAY,YAAY,aAAa;AAE3C,SAAO,MAAM,6BAA6B,EAAE,SAAS,WAAW,cAAc,YAAY,aAAa,CAAC;AAGxG,MAAI,YAAY,iBAAiB,gBAAgB;AAC/C,QAAI,CAAC,cAAc;AACjB,YAAM,IAAI,oBAAoB;AAAA,QAC5B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS;AAAA,MACX,CAAC;AAAA,IACH;AACA,QAAI,CAAC,aAAa,cAAc,GAAG;AACjC,YAAM,IAAI,qBAAqB;AAAA,QAC7B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS;AAAA,MACX,CAAC;AAAA,IACH;AAEA,WAAO,MAAM,iCAAiC,EAAE,UAAU,CAAC;AAC3D,UAAM,SAAS,MAAM,aAAa,aAAa,SAAS;AACxD,QAAI,CAAC,OAAO,WAAW,CAAC,OAAO,MAAM;AACnC,YAAM,IAAI,kBAAkB;AAAA,QAC1B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS,OAAO,SAAS;AAAA,QACzB,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAEA,UAAM,SAAS,OAAO,KAAK,OAAO,IAAmB;AACrD,UAAM,SAAS,OAAO,SAAS,QAAQ;AACvC,WAAO,EAAE,QAAQ,WAAW,UAAU;AAAA,EACxC;AAGA,SAAO;AAAA,IACL,GAAI,MAAM,wBAAwB,WAAW,MAAM;AAAA,IACnD;AAAA,EACF;AACF;AAuBA,eAAsB,sBACpB,QACA,SACgC;AAChC,QAAM,gBAAgB,iBAAiB,KAAK,kBAAkB;AAC9D,SAAO,YAAY,EAAE,cAAc,GAAG,MAAM,uBAAuB,QAAQ,OAAO,CAAC;AACrF;AAEA,eAAe,uBACb,QACA,SACgC;AAChC,QAAM,SAAS,QAAQ,UAAU,WAAW;AAC5C,QAAM,eAAe,QAAQ,gBAAgB;AAC7C,QAAM,qBAAqB,QAAQ,uBAAuB;AAG1D,MAAI,CAAC,OAAO,aAAa,CAAC,OAAO,SAAS;AACxC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI,OAAO,aAAa,OAAO,SAAS;AACtC,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO;AAAA,IACT;AAAA,EACF;AAEA,MAAI;AAEF,UAAM,mBAAmB,QAAQ,aAAa,MAAM;AAGpD,QAAI;AACJ,QAAI;AACJ,QAAI;AAEJ,QAAI,OAAO,WAAW;AACpB,YAAM,SAAS,MAAM,wBAAwB,OAAO,WAAW,MAAM;AACrE,qBAAe,OAAO;AACtB,2BAAqB,QAAQ,aAAa,OAAO;AACjD,2BAAqB,OAAO;AAAA,IAC9B,OAAO;AACL,YAAM,SAAS,MAAM;AAAA,QACnB,OAAO;AAAA,QACP;AAAA,QACA,QAAQ;AAAA,QACR;AAAA,MACF;AACA,qBAAe,OAAO;AACtB,2BAAqB,QAAQ,aAAa,OAAO;AACjD,2BAAqB,OAAO;AAAA,IAC9B;AAEA,UAAM,cAAc,KAAK,MAAO,aAAa,SAAS,OAAQ,IAAI;AAClE,WAAO,KAAK,uBAAuB;AAAA,MACjC,WAAW;AAAA,MACX,SAAS;AAAA,MACT,QAAQ,GAAG,QAAQ,WAAW,IAAI,QAAQ,UAAU;AAAA,IACtD,CAAC;AAGD,UAAM,UAAU,MAAM,eAAqD,qBAAqB;AAChG,UAAM;AAAA,MACJ;AAAA,MACA;AAAA,MACA;AAAA,MACA;AAAA,IACF,IAAI;AAGJ,UAAM,KAAK,aAAa;AACxB,QAAI,CAAC,IAAI;AACP,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO;AAAA,MACT;AAAA,IACF;AAEA,UAAM,gBAAgB;AAAA,MACpB;AAAA,MACA,QAAQ;AAAA,MACR,QAAQ;AAAA,MACR;AAAA,IACF;AAEA,QAAI,CAAC,eAAe;AAClB,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,2BAA2B,QAAQ,WAAW,WAAW,QAAQ,UAAU;AAAA,MACpF;AAAA,IACF;AAEA,WAAO,MAAM,gBAAgB;AAAA,MAC3B,iBAAiB,CAAC,CAAC,cAAc;AAAA,IACnC,CAAC;AAGD,UAAM,mBAAmB,MAAM,8BAA8B;AAAA,MAC3D,qBAAqB,QAAQ;AAAA,MAC7B,oBAAoB,QAAQ;AAAA,MAC5B,UAAU;AAAA,MACV,gBAAgB;AAAA,MAChB,WAAW,QAAQ,aAAa;AAAA,MAChC,mBAAmB,QAAQ,qBAAqB;AAAA,IAClD,CAAC;AAED,WAAO,KAAK,wBAAwB;AAAA,MAClC,kBAAkB,iBAAiB;AAAA,MACnC,aAAa,iBAAiB;AAAA,MAC9B,aAAa,iBAAiB;AAAA,IAChC,CAAC;AAED,QAAI,CAAC,iBAAiB,WAAW,iBAAiB,OAAO,SAAS,GAAG;AACnE,aAAO,MAAM,qBAAqB,EAAE,QAAQ,iBAAiB,OAAO,CAAC;AACrE,aAAO;AAAA,QACL,SAAS;AAAA,QACT,OAAO,iBAAiB,OAAO,CAAC,GAAG,SAAS;AAAA,QAC5C,kBAAkB,iBAAiB;AAAA,QACnC,aAAa,iBAAiB;AAAA,QAC9B,aAAa,iBAAiB;AAAA,MAChC;AAAA,IACF;AAEA,UAAM,iBAAiB,iBAAiB;AAGxC,QAAI;AACJ,QAAI,UAA8B,OAAO;AAGzC,UAAM,oBAAoB,QAAQ,sBAAsB;AAExD,QAAI,sBAAsB,oBAAoB;AAC5C,UAAI;AACF,cAAM,YAAY,MAAM,eAA4C,YAAY;AAChF,cAAM,cAAc,MAAM,eAAqD,qBAAqB;AACpG,cAAM,EAAE,2BAA2B,wBAAwB,IAAI;AAC/D,cAAM,EAAE,kBAAkB,IAAI;AAO9B,cAAM,cAAc;AAAA,UAClB;AAAA,UACA,wBAAwB;AAAA,QAC1B;AAEA,cAAM,kBAAkB,0BAA0B,aAA2E,EAAE,OAAO,CAAC;AAGvI,cAAM,WAAW,MAAM,gBAAgB;AAAA,UACrC;AAAA,UACA;AAAA,QACF;AAEA,YAAI,aAAa,MAAM;AAErB,gBAAM,WACJ,QAAQ,YACR,kBAAkB,MAAM,GAAG,EAAE,IAAI,KACjC;AACF,iBAAO,MAAM,wBAAwB,EAAE,WAAW,mBAAmB,SAAS,CAAC;AAE/E,gBAAM,gBAAgB,aAAa;AAAA,YACjC;AAAA,YACA,WAAW;AAAA,YACX,WAAW;AAAA,YACX;AAAA,UACF,CAAC;AAGD,gBAAM,QAAQ,MAAO,YAAmD,SAAS;AAAA,YAC/E,OAAO,EAAE,WAAW,mBAAmB,aAAa;AAAA,UACtD,CAAC;AACD,cAAI,SAAS,MAAM,SAAS,GAAG;AAC7B,sBAAU,MAAM,CAAC,EAAE;AAAA,UACrB;AAAA,QACF;AAGA,cAAM,oBAAoB,MAAM,gBAAgB;AAAA,UAC9C;AAAA,UACA;AAAA,UACA;AAAA,UACA,EAAE,QAAQ,eAAe;AAAA,QAC3B;AAEA,YAAI,mBAAmB;AACrB,0BAAgB,kBAAkB;AAClC,iBAAO,MAAM,kCAAkC,EAAE,cAAc,CAAC;AAAA,QAClE;AAAA,MACF,SAAS,YAAY;AAEnB,cAAM,YAAY,sBAAsB,QAAQ,WAAW,UAAU,OAAO,UAAU;AACtF,cAAM,cAAc,sBAAsB,QAAQ,WAAW,QAAQ;AACrE,eAAO,MAAM,2CAA2C;AAAA,UACtD,OAAO;AAAA,UACP,OAAO;AAAA,UACP;AAAA,UACA;AAAA,QACF,CAAC;AAAA,MACH;AAAA,IACF;AAEA,WAAO;AAAA,MACL,SAAS;AAAA,MACT,MAAM;AAAA,MACN;AAAA,MACA;AAAA,MACA,WAAW;AAAA,MACX,kBAAkB,iBAAiB;AAAA,MACnC,aAAa,iBAAiB;AAAA,MAC9B,aAAa,iBAAiB;AAAA,MAC9B,cAAc,iBAAiB,cAAc,IAAI,CAAC,OAAwH;AAAA,QACxK,aAAa,EAAE;AAAA,QACf,YAAY,EAAE;AAAA,QACd,SAAS,EAAE;AAAA,QACX,MAAM,EAAE;AAAA,QACR,OAAO,EAAE;AAAA,MACX,EAAE;AAAA,IACJ;AAAA,EACF,SAAS,OAAO;AACd,WAAO,MAAM,oBAAoB;AAAA,MAC/B,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,IAC9D,CAAC;AACD,WAAO;AAAA,MACL,SAAS;AAAA,MACT,OAAO,iBAAiB,QAAQ,MAAM,UAAU,OAAO,KAAK;AAAA,IAC9D;AAAA,EACF;AACF;;;AEpfA;AAAA,EACE,qBAAAC;AAAA,EACA,oBAAAC;AAAA,EACA,mBAAAC;AAAA,EACA,eAAAC;AAAA,OACK;AACP;AAAA,EACE,qBAAAC;AAAA,EACA,qBAAAC;AAAA,EACA,wBAAAC;AAAA,EACA,uBAAAC;AAAA,OACK;AAUP,eAAeC,gBAAkB,KAAyB;AACxD,QAAM,MAAM,MAAMN,iBAAmB,GAAG;AACxC,MAAI,CAAC,KAAK;AACR,UAAM,IAAII,sBAAqB;AAAA,MAC7B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,2BAA2B,GAAG;AAAA,IACzC,CAAC;AAAA,EACH;AACA,SAAO;AACT;AAaA,eAAsB,qBACpB,QACA,SACwB;AACxB,QAAM,gBAAgBL,kBAAiB,KAAKD,mBAAkB;AAC9D,SAAOG,aAAY,EAAE,cAAc,GAAG,MAAM,sBAAsB,QAAQ,OAAO,CAAC;AACpF;AAEA,eAAe,sBACb,QACA,SACwB;AACxB,QAAM;AAAA,IACJ;AAAA,IACA,aAAa;AAAA,IACb,eAAe;AAAA,IACf,aAAa;AAAA,IACb,eAAe;AAAA,IACf,kBAAkB;AAAA,IAClB,wBAAwB;AAAA,EAC1B,IAAI;AAEJ,MAAI,CAAC,aAAa;AAChB,WAAO,EAAE,SAAS,OAAO,UAAU,CAAC,GAAG,OAAO,0BAA0B;AAAA,EAC1E;AAEA,MAAI,CAAC,OAAO,aAAa,CAAC,OAAO,WAAW;AAC1C,WAAO,EAAE,SAAS,OAAO,UAAU,CAAC,GAAG,OAAO,4CAA4C;AAAA,EAC5F;AAEA,MAAI;AAEF,UAAM,YAAY,MAAM,eAAe,MAAM;AAG7C,UAAM,QAAQ,MAAM,OAAO,iCAAiC;AAG5D,UAAM,WAAW,MAAM,OAAO,MAAM;AACpC,UAAM,yBAAyB,SAAS;AAAA,MACtC,QAAQ,IAAI;AAAA,MAAG;AAAA,IACjB,IAAI;AAGJ,UAAM,MAAM,MAAM,MAAM,YAAY;AAAA,MAClC,MAAM,IAAI,WAAW,SAAS;AAAA,MAC9B,gBAAgB;AAAA,MAChB,qBAAqB;AAAA,MACrB,WAAW;AAAA,IACb,CAAC,EAAE;AAGH,UAAM,EAAE,qBAAqB,IAAI,MAAM,OAAO,2BAAsB;AAGpE,UAAM,cAAc,IAAI;AACxB,UAAM,WAA0B,CAAC;AAEjC,QAAI,eAAe,SAAS;AAE1B,eAAS,IAAI,YAAY,IAAI,aAAa,KAAK;AAC7C,cAAM,eAAe,MAAM;AAAA,UACzB;AAAA,UAAK;AAAA,UAAG;AAAA,UAAa;AAAA,UAAc;AAAA,UACnC;AAAA,UAAiB;AAAA,UAAuB;AAAA,QAC1C;AACA,YAAI,cAAc;AAChB,mBAAS,KAAK,YAAY;AAC1B;AAAA,QACF;AAAA,MACF;AAAA,IACF,OAAO;AAEL,eAAS,IAAI,YAAY,IAAI,aAAa,KAAK;AAC7C,cAAM,eAAe,MAAM;AAAA,UACzB;AAAA,UAAK;AAAA,UAAG;AAAA,UAAa;AAAA,UAAc;AAAA,UACnC;AAAA,UAAiB;AAAA,UAAuB;AAAA,QAC1C;AACA,YAAI,cAAc;AAChB,mBAAS,KAAK,YAAY;AAAA,QAC5B;AAAA,MACF;AAAA,IACF;AAEA,QAAI,QAAQ;AAEZ,QAAI,SAAS,WAAW,GAAG;AACzB,aAAO,EAAE,SAAS,OAAO,UAAU,CAAC,GAAG,OAAO,SAAS,WAAW,qBAAqB;AAAA,IACzF;AAEA,WAAO,EAAE,SAAS,MAAM,SAAS;AAAA,EACnC,SAAS,KAAK;AACZ,WAAO;AAAA,MACL,SAAS;AAAA,MACT,UAAU,CAAC;AAAA,MACX,OAAO,8BAA8B,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,IACvF;AAAA,EACF;AACF;AAKA,eAAe,aACb,KACA,YACA,aACA,cACA,cACA,iBACA,uBACA,sBAC6B;AAC7B,QAAM,OAAO,MAAM,IAAI,QAAQ,aAAa,CAAC;AAC7C,QAAM,WAAW,KAAK,YAAY,EAAE,OAAO,aAAa,CAAC;AAGzD,MAAI,UAAU,MAAM,qBAAqB,KAAK,aAAa,EAAE,WAAW,CAAC;AACzE,MAAI,wBAAwB;AAG5B,QAAM,eAAe,MAAM,KAAK,eAAe;AAC/C,QAAM,iBAAiB,aAAa,MAAM;AAAA,IACxC,CAAC,SAAS,SAAS,QAAS,KAAyB,IAAI,KAAK,EAAE,SAAS;AAAA,EAC3E;AAEA,MAAI,QAAQ,WAAW,KAAK,CAAC,gBAAgB;AAE3C,QAAI,uBAAuB;AAEzB,YAAM,cAAc,MAAM;AAAA,QACxB;AAAA,QAAM;AAAA,QAAU;AAAA,QAAa;AAAA,MAC/B;AACA,UAAI,aAAa;AACf,kBAAU;AACV,gCAAwB;AAAA,MAC1B,OAAO;AAEL,eAAO,MAAM,yBAAyB,MAAM,UAAU,UAAU;AAAA,MAClE;AAAA,IACF,OAAO;AAEL,aAAO,MAAM,yBAAyB,MAAM,UAAU,UAAU;AAAA,IAClE;AAAA,EACF;AAEA,MAAI,QAAQ,WAAW,GAAG;AACxB,WAAO;AAAA,EACT;AAGA,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AACvD,QAAM,SAAS,aAAa,SAAS,OAAO,SAAS,MAAM;AAC3D,QAAM,UAAU,OAAO,WAAW,IAAI;AAGtC,UAAQ,YAAY;AACpB,UAAQ,SAAS,GAAG,GAAG,SAAS,OAAO,SAAS,MAAM;AAGtD,QAAM,KAAK,OAAO;AAAA,IAChB,eAAe;AAAA,IACf;AAAA,EACF,CAAC,EAAE;AAGH,kBAAgB,SAAS,UAAU,SAAS,eAAe;AAG3D,QAAM,OAAO;AAAA,IACX;AAAA,IAAU;AAAA,IAAS;AAAA,IAAc;AAAA,EACnC;AAGA,QAAM,iBAAiB,aAAa,KAAK,OAAO,KAAK,MAAM;AAC3D,QAAM,cAAc,eAAe,WAAW,IAAI;AAClD,cAAY;AAAA,IACV;AAAA,IACA,KAAK;AAAA,IAAG,KAAK;AAAA,IAAG,KAAK;AAAA,IAAO,KAAK;AAAA,IACjC;AAAA,IAAG;AAAA,IAAG,KAAK;AAAA,IAAO,KAAK;AAAA,EACzB;AAEA,QAAM,eAAe,eAAe,SAAS,WAAW;AACxD,QAAM,eAAe,aAAa,SAAS,QAAQ;AAEnD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA;AAAA,IACA,eAAe,KAAK;AAAA,IACpB,gBAAgB,KAAK;AAAA,EACvB;AACF;AAKA,eAAe,yBACb,MACA,UACA,YACsB;AACtB,QAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AACvD,QAAM,SAAS,aAAa,SAAS,OAAO,SAAS,MAAM;AAC3D,QAAM,UAAU,OAAO,WAAW,IAAI;AAEtC,UAAQ,YAAY;AACpB,UAAQ,SAAS,GAAG,GAAG,SAAS,OAAO,SAAS,MAAM;AAEtD,QAAM,KAAK,OAAO;AAAA,IAChB,eAAe;AAAA,IACf;AAAA,EACF,CAAC,EAAE;AAEH,QAAM,eAAe,OAAO,SAAS,WAAW;AAChD,QAAM,eAAe,aAAa,SAAS,QAAQ;AAEnD,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,SAAS,CAAC;AAAA,IACV,uBAAuB;AAAA,IACvB,eAAe,SAAS;AAAA,IACxB,gBAAgB,SAAS;AAAA,EAC3B;AACF;AAKA,SAAS,gBACP,SACA,UACA,SACA,iBACM;AAEN,UAAQ,KAAK;AACb,UAAQ,YAAY;AAEpB,aAAW,SAAS,SAAS;AAG3B,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,GAAG,MAAM,IAAI,MAAM,MAAM;AAChF,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,IAAI,MAAM,OAAO,MAAM,CAAC;AAE/E,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE;AAC1B,UAAM,KAAK,KAAK,IAAI,IAAI,EAAE;AAC1B,UAAM,KAAK,KAAK,IAAI,KAAK,EAAE;AAC3B,UAAM,KAAK,KAAK,IAAI,KAAK,EAAE;AAE3B,YAAQ,SAAS,IAAI,IAAI,IAAI,EAAE;AAAA,EACjC;AAEA,UAAQ,QAAQ;AAClB;AAOA,SAAS,sBACP,UACA,SACA,cACA,eACyD;AACzD,QAAM,aAAa,SAAS;AAC5B,QAAM,cAAc,SAAS;AAE7B,MAAI,iBAAiB,QAAQ;AAC3B,WAAO,EAAE,GAAG,GAAG,GAAG,GAAG,OAAO,YAAY,QAAQ,YAAY;AAAA,EAC9D;AAGA,MAAI,QAAQ,UAAU,QAAQ,UAAU,QAAQ,WAAW,QAAQ;AAEnE,aAAW,SAAS,SAAS;AAC3B,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,GAAG,MAAM,IAAI,MAAM,MAAM;AAChF,UAAM,CAAC,IAAI,EAAE,IAAI,SAAS,uBAAuB,MAAM,IAAI,MAAM,OAAO,MAAM,CAAC;AAE/E,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAC9B,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAC9B,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAC9B,YAAQ,KAAK,IAAI,OAAO,IAAI,EAAE;AAAA,EAChC;AAGA,QAAM,UAAU;AAChB,WAAS;AACT,WAAS;AACT,WAAS;AACT,WAAS;AAET,QAAM,cAAc,QAAQ;AAC5B,QAAM,eAAe,QAAQ;AAI7B,QAAM,gBAAgB;AACtB,MAAI;AAEJ,MAAI,iBAAiB,QAAQ;AAC3B,qBAAiB,cAAc;AAAA,EACjC,OAAO;AAEL,qBAAiB,cAAc;AAAA,EACjC;AAGA,QAAM,cAAc,KAAK,IAAI,eAAe,WAAW;AACvD,QAAM,eAAe,KAAK,IAAI,gBAAgB,YAAY;AAG1D,QAAM,kBAAkB,QAAQ,SAAS;AACzC,QAAM,kBAAkB,QAAQ,SAAS;AAEzC,MAAI,SAAS,iBAAiB,cAAc;AAC5C,MAAI,SAAS,iBAAiB,eAAe;AAG7C,WAAS,KAAK,IAAI,GAAG,KAAK,IAAI,QAAQ,aAAa,WAAW,CAAC;AAC/D,WAAS,KAAK,IAAI,GAAG,KAAK,IAAI,QAAQ,cAAc,YAAY,CAAC;AAGjE,QAAM,gBAAgB,KAAK,IAAI,aAAa,UAAU;AACtD,QAAM,iBAAiB,KAAK,IAAI,cAAc,WAAW;AAEzD,SAAO;AAAA,IACL,GAAG,KAAK,MAAM,MAAM;AAAA,IACpB,GAAG,KAAK,MAAM,MAAM;AAAA,IACpB,OAAO,KAAK,MAAM,aAAa;AAAA,IAC/B,QAAQ,KAAK,MAAM,cAAc;AAAA,EACnC;AACF;AAKA,eAAe,eAAe,QAA4C;AACxE,MAAI,OAAO,WAAW;AACpB,WAAO,OAAO;AAAA,EAChB;AAEA,MAAI,CAAC,OAAO,WAAW;AACrB,UAAM,IAAII,qBAAoB;AAAA,MAC5B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS;AAAA,IACX,CAAC;AAAA,EACH;AAEA,QAAM,YAAY,OAAO;AAGzB,MAAI,UAAU,WAAW,SAAS,KAAK,UAAU,WAAW,UAAU,GAAG;AACvE,UAAM,WAAW,MAAM,MAAM,SAAS;AACtC,QAAI,CAAC,SAAS,IAAI;AAChB,YAAM,IAAIH,mBAAkB;AAAA,QAC1B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS,wBAAwB,SAAS,MAAM,IAAI,SAAS,UAAU;AAAA,QACvE,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AACA,UAAMK,UAAS,MAAM,SAAS,YAAY;AAC1C,WAAO,IAAI,WAAWA,OAAM;AAAA,EAC9B;AAGA,QAAM,KAAK,MAAM,OAAO,IAAI;AAC5B,MAAI,CAAC,GAAG,WAAW,SAAS,GAAG;AAC7B,UAAM,IAAIJ,mBAAkB;AAAA,MAC1B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,mBAAmB,SAAS;AAAA,IACvC,CAAC;AAAA,EACH;AACA,QAAM,SAAS,GAAG,aAAa,SAAS;AACxC,SAAO,IAAI,WAAW,MAAM;AAC9B;AAMA,eAAe,mBACb,MACA,UACA,aACA,eACoC;AACpC,MAAI;AAEF,UAAM,EAAE,aAAa,IAAI,MAAM,OAAO,iBAAiB;AACvD,UAAM,SAAS,aAAa,SAAS,OAAO,SAAS,MAAM;AAC3D,UAAM,UAAU,OAAO,WAAW,IAAI;AAEtC,YAAQ,YAAY;AACpB,YAAQ,SAAS,GAAG,GAAG,SAAS,OAAO,SAAS,MAAM;AAEtD,UAAM,KAAK,OAAO;AAAA,MAChB,eAAe;AAAA,MACf;AAAA,IACF,CAAC,EAAE;AAEH,UAAM,eAAe,OAAO,SAAS,WAAW;AAChD,UAAM,eAAe,aAAa,SAAS,QAAQ;AAGnD,UAAM,UAAU,MAAMG,gBAAqD,qBAAqB;AAChG,UAAM,EAAE,oBAAoB,IAAI;AAEhC,UAAM,SAAS,kBAAkB,WAAW;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAAA;AAS5C,UAAM,WAAW,MAAM,oBAAoB;AAAA,MACzC,WAAW;AAAA,MACX,iBAAiB;AAAA,MACjB;AAAA,IACF,CAAC;AAED,QAAI,CAAC,YAAY,CAAC,SAAS,MAAM;AAC/B,aAAO;AAAA,IACT;AAGA,UAAM,aAAa,SAAS,KAAK,MAAM,WAAW;AAClD,QAAI,CAAC,YAAY;AACf,aAAO;AAAA,IACT;AAEA,UAAM,SAAS,KAAK,MAAM,WAAW,CAAC,CAAC;AACvC,QAAI,CAAC,OAAO,OAAO;AACjB,aAAO;AAAA,IACT;AAIA,UAAM,YAAY,KAAK,YAAY,EAAE,OAAO,EAAI,CAAC;AACjD,UAAM,YAAY,UAAU;AAC5B,UAAM,aAAa,UAAU;AAE7B,UAAM,IAAK,OAAO,QAAQ,MAAO;AACjC,UAAM,aAAc,OAAO,QAAQ,MAAO;AAC1C,UAAM,QAAS,OAAO,YAAY,MAAO;AACzC,UAAM,SAAU,OAAO,aAAa,MAAO;AAG3C,UAAM,QAAQ,aAAa,aAAa;AAExC,WAAO,CAAC;AAAA,MACN;AAAA,MACA,GAAG;AAAA,MACH;AAAA,MACA;AAAA,MACA,YAAY;AAAA,MACZ,cAAc;AAAA,IAChB,CAAC;AAAA,EACH,QAAQ;AAEN,WAAO;AAAA,EACT;AACF;;;ACvgBA;AAAA,EACE,qBAAAE;AAAA,EACA,oBAAAC;AAAA,EACA,eAAAC;AAAA,OACK;AACP;AAAA,EACE,qBAAAC;AAAA,EACA,uBAAAC;AAAA,OACK;AASP,SAAS,eAAe,OAAuB;AAC7C,SAAO,MACJ,YAAY,EACZ,QAAQ,eAAe,GAAG,EAC1B,QAAQ,YAAY,EAAE,EACtB,MAAM,GAAG,EAAE;AAChB;AAMA,SAAS,kBAAkB,OAAe,OAAyB;AACjE,QAAM,aAAa,eAAe,KAAK;AACvC,QAAM,SAAS,CAAC,GAAG,KAAK,EAAE,KAAK,CAAC,GAAG,MAAM,IAAI,CAAC;AAC9C,QAAM,aACJ,OAAO,WAAW,IACd,IAAI,OAAO,CAAC,CAAC,KACb,IAAI,OAAO,CAAC,CAAC,KAAK,OAAO,OAAO,SAAS,CAAC,CAAC;AACjD,SAAO,GAAG,UAAU,IAAI,UAAU;AACpC;AAKA,SAAS,cAAc,MAA2B;AAChD,MAAI,CAAC,MAAM;AACT,UAAM,IAAIC,mBAAkB;AAAA,MAC1B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS;AAAA,MACT,YAAY;AAAA,IACd,CAAC;AAAA,EACH;AACA,MAAI,gBAAgB,YAAY;AAC9B,WAAO;AAAA,EACT;AACA,MAAI,gBAAgB,aAAa;AAC/B,WAAO,IAAI,WAAW,IAAI;AAAA,EAC5B;AAEA,MAAI,OAAO,WAAW,eAAe,OAAO,SAAS,IAAI,GAAG;AAC1D,WAAO,IAAI,WAAW,KAAK,QAAQ,KAAK,YAAY,KAAK,UAAU;AAAA,EACrE;AACA,QAAM,IAAIC,qBAAoB;AAAA,IAC5B,MAAM;AAAA,IACN,KAAK;AAAA,IACL,SAAS;AAAA,EACX,CAAC;AACH;AAsBA,eAAsB,UACpB,SACA,cACyB;AACzB,QAAM,gBAAgBC,kBAAiB,KAAKC,mBAAkB;AAC9D,SAAOC,aAAY,EAAE,cAAc,GAAG,MAAM,WAAW,SAAS,YAAY,CAAC;AAC/E;AAEA,eAAe,WACb,SACA,cACyB;AACzB,QAAM,SAAS,WAAW;AAE1B,SAAO,KAAK,mBAAmB;AAAA,IAC7B,gBAAgB,QAAQ;AAAA,IACxB,aAAa,QAAQ,OAAO;AAAA,IAC5B,eAAe,QAAQ;AAAA,EACzB,CAAC;AAGD,QAAM,EAAE,YAAY,IAAI,MAAM,OAAO,SAAS;AAG9C,SAAO,MAAM,yBAAyB,EAAE,SAAS,QAAQ,eAAe,CAAC;AACzE,QAAM,kBAAkB,MAAM,aAAa,aAAa,QAAQ,cAAc;AAC9E,MAAI,CAAC,gBAAgB,WAAW,CAAC,gBAAgB,MAAM;AACrD,UAAM,IAAIJ,mBAAkB;AAAA,MAC1B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,gBAAgB,SAAS,mCAAmC,QAAQ,cAAc;AAAA,MAC3F,YAAY;AAAA,IACd,CAAC;AAAA,EACH;AAGA,QAAM,eAAe,cAAc,gBAAgB,IAAI;AACvD,SAAO,MAAM,qBAAqB,EAAE,WAAW,aAAa,WAAW,CAAC;AACxE,QAAM,aAAa,MAAM,YAAY,KAAK,YAAY;AACtD,QAAM,cAAc,WAAW,aAAa;AAE5C,SAAO,MAAM,uBAAuB,EAAE,YAAY,CAAC;AAGnD,QAAM,cAAc,IAAI,MAAc,cAAc,CAAC,EAAE,KAAK,CAAC;AAC7D,aAAW,SAAS,QAAQ,QAAQ;AAClC,eAAW,QAAQ,MAAM,OAAO;AAC9B,UAAI,OAAO,KAAK,OAAO,aAAa;AAClC,cAAM,IAAIC,qBAAoB;AAAA,UAC5B,MAAM;AAAA,UACN,KAAK;AAAA,UACL,SAAS,UAAU,MAAM,KAAK,qBAAqB,IAAI,6BAA6B,WAAW;AAAA,QACjG,CAAC;AAAA,MACH;AACA,kBAAY,IAAI;AAAA,IAClB;AAAA,EACF;AACA,WAAS,IAAI,GAAG,KAAK,aAAa,KAAK;AACrC,QAAI,YAAY,CAAC,MAAM,GAAG;AACxB,YAAM,IAAIA,qBAAoB;AAAA,QAC5B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS,QAAQ,CAAC;AAAA,MACpB,CAAC;AAAA,IACH;AACA,QAAI,YAAY,CAAC,IAAI,GAAG;AACtB,YAAM,IAAIA,qBAAoB;AAAA,QAC5B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS,QAAQ,CAAC;AAAA,MACpB,CAAC;AAAA,IACH;AAAA,EACF;AAEA,SAAO,MAAM,2BAA2B,EAAE,aAAa,aAAa,QAAQ,OAAO,OAAO,CAAC;AAG3F,QAAM,UAA4B,CAAC;AACnC,QAAM,gBAAgB,QAAQ,iBAAiB;AAE/C,aAAW,eAAe,QAAQ,QAAQ;AACxC,WAAO,MAAM,2BAA2B;AAAA,MACtC,UAAU,YAAY;AAAA,MACtB,OAAO,YAAY;AAAA,MACnB,OAAO,YAAY;AAAA,IACrB,CAAC;AAGD,UAAM,UAAU,MAAM,YAAY,OAAO;AAGzC,UAAM,eAAe,YAAY,MAAM,IAAI,CAAC,MAAM,IAAI,CAAC;AACvD,UAAM,eAAe,MAAM,QAAQ,UAAU,YAAY,YAAY;AACrE,eAAW,QAAQ,cAAc;AAC/B,cAAQ,QAAQ,IAAI;AAAA,IACtB;AAGA,UAAM,YAAY,MAAM,QAAQ,KAAK;AACrC,UAAM,YAAY,UAAU;AAG5B,UAAM,YACJ,YAAY,mBAAmB,kBAAkB,YAAY,OAAO,YAAY,KAAK;AACvF,UAAM,YAAY,gBAAgB,GAAG,cAAc,QAAQ,OAAO,EAAE,CAAC,IAAI,SAAS,KAAK;AAGvF,WAAO,MAAM,gBAAgB,EAAE,WAAW,UAAU,CAAC;AACrD,UAAM,gBAAgB,MAAM,aAAa,WAAW,IAAI,WAAW,SAAS,GAAG,WAAW;AAAA,MACxF,WAAW;AAAA,IACb,CAAC;AAED,QAAI,CAAC,cAAc,SAAS;AAC1B,YAAM,IAAID,mBAAkB;AAAA,QAC1B,MAAM;AAAA,QACN,KAAK;AAAA,QACL,SAAS,cAAc,SAAS,2BAA2B,YAAY,KAAK,QAAQ,SAAS;AAAA,QAC7F,YAAY;AAAA,MACd,CAAC;AAAA,IACH;AAEA,YAAQ,KAAK;AAAA,MACX,UAAU,YAAY;AAAA,MACtB,OAAO,YAAY;AAAA,MACnB,OAAO,YAAY;AAAA,MACnB,SAAS;AAAA,MACT;AAAA,MACA;AAAA,MACA,YAAY,YAAY,MAAM;AAAA,MAC9B;AAAA,IACF,CAAC;AAED,WAAO,MAAM,qBAAqB;AAAA,MAChC,UAAU,YAAY;AAAA,MACtB,OAAO,YAAY;AAAA,MACnB;AAAA,MACA,YAAY,YAAY,MAAM;AAAA,MAC9B;AAAA,IACF,CAAC;AAAA,EACH;AAEA,SAAO,KAAK,sBAAsB;AAAA,IAChC,gBAAgB,QAAQ;AAAA,IACxB,cAAc,QAAQ;AAAA,EACxB,CAAC;AAED,SAAO;AAAA,IACL,gBAAgB,QAAQ;AAAA,IACxB,QAAQ;AAAA,EACV;AACF;;;AJjOA,IAAI,OAAO,WAAW,aAAa;AACjC,QAAM,IAAI,kBAAkB;AAAA,IAC1B,MAAM;AAAA,IACN,KAAK;AAAA,IACL,SACE;AAAA,EAEJ,CAAC;AACH;","names":["buffer","base64","generateRequestId","getCorrelationId","optional_import","withContext","HazoExternalError","HazoNotFoundError","HazoUnavailableError","HazoValidationError","require_module","buffer","generateRequestId","getCorrelationId","withContext","HazoExternalError","HazoValidationError","HazoExternalError","HazoValidationError","getCorrelationId","generateRequestId","withContext"]}
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
// src/utils/text_search.ts
|
|
2
|
+
import { HazoInternalError } from "hazo_core/errors";
|
|
2
3
|
async function find_text_in_pdf(pdf, search_value, options) {
|
|
3
4
|
try {
|
|
4
5
|
const {
|
|
@@ -51,7 +52,12 @@ async function find_text_in_pdf(pdf, search_value, options) {
|
|
|
51
52
|
}
|
|
52
53
|
return null;
|
|
53
54
|
} catch (err) {
|
|
54
|
-
throw new
|
|
55
|
+
throw new HazoInternalError({
|
|
56
|
+
code: "HAZO_PDF_TEXT_SEARCH_FAILED",
|
|
57
|
+
pkg: "hazo_pdf",
|
|
58
|
+
message: `Text search failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
59
|
+
cause: err instanceof Error ? err : new Error(String(err))
|
|
60
|
+
});
|
|
55
61
|
}
|
|
56
62
|
}
|
|
57
63
|
async function find_all_text_in_pdf(pdf, search_value, options) {
|
|
@@ -107,7 +113,12 @@ async function find_all_text_in_pdf(pdf, search_value, options) {
|
|
|
107
113
|
}
|
|
108
114
|
return results;
|
|
109
115
|
} catch (err) {
|
|
110
|
-
throw new
|
|
116
|
+
throw new HazoInternalError({
|
|
117
|
+
code: "HAZO_PDF_TEXT_SEARCH_FAILED",
|
|
118
|
+
pkg: "hazo_pdf",
|
|
119
|
+
message: `Text search failed: ${err instanceof Error ? err.message : String(err)}`,
|
|
120
|
+
cause: err instanceof Error ? err : new Error(String(err))
|
|
121
|
+
});
|
|
111
122
|
}
|
|
112
123
|
}
|
|
113
124
|
function extract_text_position(text_item, padding_x, padding_y, y_offset) {
|
|
@@ -151,4 +162,4 @@ export {
|
|
|
151
162
|
find_all_text_in_pdf,
|
|
152
163
|
find_text_in_pdf
|
|
153
164
|
};
|
|
154
|
-
//# sourceMappingURL=text_search-
|
|
165
|
+
//# sourceMappingURL=text_search-PVDG5Y6I.js.map
|
|
@@ -0,0 +1 @@
|
|
|
1
|
+
{"version":3,"sources":["../../src/utils/text_search.ts"],"sourcesContent":["/**\n * Text search utility for finding text in PDF documents\n * Extracted from test app to be reusable across the library\n */\n\nimport type { PDFDocumentProxy } from 'pdfjs-dist';\nimport { HazoInternalError } from 'hazo_core/errors';\n\n/**\n * Options for text search in PDF\n */\nexport interface TextSearchOptions {\n /** Page index to search (0-based) */\n page_index: number;\n\n /** Whether to normalize text by removing commas and spaces (default: true) */\n normalize?: boolean;\n\n /** Minimum length for partial matches (default: 3) */\n min_partial_match_length?: number;\n\n /** Horizontal padding around text (default: 2) */\n padding_x?: number;\n\n /** Vertical padding around text (default: 1) */\n padding_y?: number;\n\n /** Y-axis offset to adjust highlight position (default: -3) */\n y_offset?: number;\n}\n\n/**\n * Result of text search in PDF\n */\nexport interface TextSearchResult {\n /** X coordinate in PDF coordinate system */\n x: number;\n\n /** Y coordinate in PDF coordinate system */\n y: number;\n\n /** Width of the text box */\n width: number;\n\n /** Height of the text box */\n height: number;\n\n /** Type of match found */\n match_type: 'exact' | 'partial';\n\n /** The actual text that was matched */\n matched_text: string;\n}\n\n/**\n * Find text in a PDF document and return its position\n *\n * @param pdf - PDF document proxy from pdfjs\n * @param search_value - Text to search for\n * @param options - Search options\n * @returns Position and dimensions of the text, or null if not found\n */\nexport async function find_text_in_pdf(\n pdf: PDFDocumentProxy,\n search_value: string,\n options: TextSearchOptions\n): Promise<TextSearchResult | null> {\n try {\n const {\n page_index,\n normalize = true,\n min_partial_match_length = 3,\n padding_x = 2,\n padding_y = 1,\n y_offset = -3,\n } = options;\n\n // Get page (pdfjs uses 1-based indexing)\n const page = await pdf.getPage(page_index + 1);\n const text_content = await page.getTextContent();\n\n // Normalize search value if enabled\n const normalized_search = normalize\n ? search_value.toString().replace(/[,\\s]/g, '').toLowerCase()\n : search_value.toString().toLowerCase();\n\n // First pass: look for exact match\n for (const item of text_content.items) {\n if ('str' in item) {\n const text_item = item as { str: string; transform: number[]; width?: number; height?: number };\n const text = normalize\n ? text_item.str.replace(/[,\\s]/g, '').toLowerCase()\n : text_item.str.toLowerCase();\n\n // Only match if this text item equals our search value\n if (text === normalized_search) {\n const result = extract_text_position(text_item, padding_x, padding_y, y_offset);\n return {\n ...result,\n match_type: 'exact',\n matched_text: text_item.str,\n };\n }\n }\n }\n\n // Second pass: look for partial match (text item contains search value)\n if (normalized_search.length >= min_partial_match_length) {\n for (const item of text_content.items) {\n if ('str' in item) {\n const text_item = item as { str: string; transform: number[]; width?: number; height?: number };\n const text = normalize\n ? text_item.str.replace(/[,\\s]/g, '').toLowerCase()\n : text_item.str.toLowerCase();\n\n if (text.includes(normalized_search)) {\n const result = extract_partial_match_position(\n text_item, text, normalized_search, padding_x, padding_y, y_offset\n );\n return {\n ...result,\n match_type: 'partial',\n matched_text: text_item.str,\n };\n }\n }\n }\n }\n\n return null;\n } catch (err) {\n throw new HazoInternalError({\n code: 'HAZO_PDF_TEXT_SEARCH_FAILED',\n pkg: 'hazo_pdf',\n message: `Text search failed: ${err instanceof Error ? err.message : String(err)}`,\n cause: err instanceof Error ? err : new Error(String(err)),\n });\n }\n}\n\n/**\n * Find ALL occurrences of text in a PDF page and return their positions\n *\n * @param pdf - PDF document proxy from pdfjs\n * @param search_value - Text to search for\n * @param options - Search options\n * @returns Array of all matching positions (empty if none found)\n */\nexport async function find_all_text_in_pdf(\n pdf: PDFDocumentProxy,\n search_value: string,\n options: TextSearchOptions\n): Promise<TextSearchResult[]> {\n try {\n const {\n page_index,\n normalize = true,\n min_partial_match_length = 3,\n padding_x = 2,\n padding_y = 1,\n y_offset = -3,\n } = options;\n\n const page = await pdf.getPage(page_index + 1);\n const text_content = await page.getTextContent();\n\n const normalized_search = normalize\n ? search_value.toString().replace(/[,\\s]/g, '').toLowerCase()\n : search_value.toString().toLowerCase();\n\n const results: TextSearchResult[] = [];\n\n // Collect all exact matches\n for (const item of text_content.items) {\n if ('str' in item) {\n const text_item = item as { str: string; transform: number[]; width?: number; height?: number };\n const text = normalize\n ? text_item.str.replace(/[,\\s]/g, '').toLowerCase()\n : text_item.str.toLowerCase();\n\n if (text === normalized_search) {\n const result = extract_text_position(text_item, padding_x, padding_y, y_offset);\n results.push({\n ...result,\n match_type: 'exact',\n matched_text: text_item.str,\n });\n }\n }\n }\n\n // If no exact matches, collect partial matches\n if (results.length === 0 && normalized_search.length >= min_partial_match_length) {\n for (const item of text_content.items) {\n if ('str' in item) {\n const text_item = item as { str: string; transform: number[]; width?: number; height?: number };\n const text = normalize\n ? text_item.str.replace(/[,\\s]/g, '').toLowerCase()\n : text_item.str.toLowerCase();\n\n if (text.includes(normalized_search)) {\n const result = extract_partial_match_position(\n text_item, text, normalized_search, padding_x, padding_y, y_offset\n );\n results.push({\n ...result,\n match_type: 'partial',\n matched_text: text_item.str,\n });\n }\n }\n }\n }\n\n return results;\n } catch (err) {\n throw new HazoInternalError({\n code: 'HAZO_PDF_TEXT_SEARCH_FAILED',\n pkg: 'hazo_pdf',\n message: `Text search failed: ${err instanceof Error ? err.message : String(err)}`,\n cause: err instanceof Error ? err : new Error(String(err)),\n });\n }\n}\n\n/**\n * Extract position and dimensions from a PDF text item (full item bounds)\n * @private\n */\nfunction extract_text_position(\n text_item: { str: string; transform: number[]; width?: number; height?: number },\n padding_x: number,\n padding_y: number,\n y_offset: number\n): { x: number; y: number; width: number; height: number } {\n // Extract position from transform matrix [a, b, c, d, x, y]\n const x = text_item.transform[4];\n const y = text_item.transform[5];\n\n // Calculate dimensions\n const font_size = Math.abs(text_item.transform[0]) || 10;\n const base_width = text_item.width || (text_item.str.length * font_size * 0.55);\n const base_height = text_item.height || font_size;\n\n // Add padding for better visibility\n const width = base_width + (padding_x * 2);\n const height = base_height + (padding_y * 2);\n\n return {\n x: x - padding_x,\n y: y - padding_y + y_offset,\n width,\n height,\n };\n}\n\n/**\n * Extract position for a partial match — narrows the bounding box\n * to just the matched substring within the full text item.\n * @private\n */\nfunction extract_partial_match_position(\n text_item: { str: string; transform: number[]; width?: number; height?: number },\n normalized_text: string,\n normalized_search: string,\n padding_x: number,\n padding_y: number,\n y_offset: number\n): { x: number; y: number; width: number; height: number } {\n const x = text_item.transform[4];\n const y = text_item.transform[5];\n const font_size = Math.abs(text_item.transform[0]) || 10;\n const base_width = text_item.width || (text_item.str.length * font_size * 0.55);\n const base_height = text_item.height || font_size;\n\n // Calculate the proportion of the text item that the match covers\n const total_chars = normalized_text.length;\n const match_start = normalized_text.indexOf(normalized_search);\n const match_chars = normalized_search.length;\n\n if (total_chars === 0 || match_start < 0) {\n // Fallback to full item bounds\n return extract_text_position(text_item, padding_x, padding_y, y_offset);\n }\n\n // Estimate x offset and width based on character proportions\n const char_width = base_width / total_chars;\n const match_x_offset = match_start * char_width;\n const match_width = match_chars * char_width;\n\n return {\n x: x + match_x_offset - padding_x,\n y: y - padding_y + y_offset,\n width: match_width + (padding_x * 2),\n height: base_height + (padding_y * 2),\n };\n}\n"],"mappings":";AAMA,SAAS,yBAAyB;AAwDlC,eAAsB,iBACpB,KACA,cACA,SACkC;AAClC,MAAI;AACF,UAAM;AAAA,MACJ;AAAA,MACA,YAAY;AAAA,MACZ,2BAA2B;AAAA,MAC3B,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ,WAAW;AAAA,IACb,IAAI;AAGJ,UAAM,OAAO,MAAM,IAAI,QAAQ,aAAa,CAAC;AAC7C,UAAM,eAAe,MAAM,KAAK,eAAe;AAG/C,UAAM,oBAAoB,YACtB,aAAa,SAAS,EAAE,QAAQ,UAAU,EAAE,EAAE,YAAY,IAC1D,aAAa,SAAS,EAAE,YAAY;AAGxC,eAAW,QAAQ,aAAa,OAAO;AACrC,UAAI,SAAS,MAAM;AACjB,cAAM,YAAY;AAClB,cAAM,OAAO,YACT,UAAU,IAAI,QAAQ,UAAU,EAAE,EAAE,YAAY,IAChD,UAAU,IAAI,YAAY;AAG9B,YAAI,SAAS,mBAAmB;AAC9B,gBAAM,SAAS,sBAAsB,WAAW,WAAW,WAAW,QAAQ;AAC9E,iBAAO;AAAA,YACL,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,cAAc,UAAU;AAAA,UAC1B;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAGA,QAAI,kBAAkB,UAAU,0BAA0B;AACxD,iBAAW,QAAQ,aAAa,OAAO;AACrC,YAAI,SAAS,MAAM;AACjB,gBAAM,YAAY;AAClB,gBAAM,OAAO,YACT,UAAU,IAAI,QAAQ,UAAU,EAAE,EAAE,YAAY,IAChD,UAAU,IAAI,YAAY;AAE9B,cAAI,KAAK,SAAS,iBAAiB,GAAG;AACpC,kBAAM,SAAS;AAAA,cACb;AAAA,cAAW;AAAA,cAAM;AAAA,cAAmB;AAAA,cAAW;AAAA,cAAW;AAAA,YAC5D;AACA,mBAAO;AAAA,cACL,GAAG;AAAA,cACH,YAAY;AAAA,cACZ,cAAc,UAAU;AAAA,YAC1B;AAAA,UACF;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,IAAI,kBAAkB;AAAA,MAC1B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,uBAAuB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,MAChF,OAAO,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAAA,IAC3D,CAAC;AAAA,EACH;AACF;AAUA,eAAsB,qBACpB,KACA,cACA,SAC6B;AAC7B,MAAI;AACF,UAAM;AAAA,MACJ;AAAA,MACA,YAAY;AAAA,MACZ,2BAA2B;AAAA,MAC3B,YAAY;AAAA,MACZ,YAAY;AAAA,MACZ,WAAW;AAAA,IACb,IAAI;AAEJ,UAAM,OAAO,MAAM,IAAI,QAAQ,aAAa,CAAC;AAC7C,UAAM,eAAe,MAAM,KAAK,eAAe;AAE/C,UAAM,oBAAoB,YACtB,aAAa,SAAS,EAAE,QAAQ,UAAU,EAAE,EAAE,YAAY,IAC1D,aAAa,SAAS,EAAE,YAAY;AAExC,UAAM,UAA8B,CAAC;AAGrC,eAAW,QAAQ,aAAa,OAAO;AACrC,UAAI,SAAS,MAAM;AACjB,cAAM,YAAY;AAClB,cAAM,OAAO,YACT,UAAU,IAAI,QAAQ,UAAU,EAAE,EAAE,YAAY,IAChD,UAAU,IAAI,YAAY;AAE9B,YAAI,SAAS,mBAAmB;AAC9B,gBAAM,SAAS,sBAAsB,WAAW,WAAW,WAAW,QAAQ;AAC9E,kBAAQ,KAAK;AAAA,YACX,GAAG;AAAA,YACH,YAAY;AAAA,YACZ,cAAc,UAAU;AAAA,UAC1B,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAGA,QAAI,QAAQ,WAAW,KAAK,kBAAkB,UAAU,0BAA0B;AAChF,iBAAW,QAAQ,aAAa,OAAO;AACrC,YAAI,SAAS,MAAM;AACjB,gBAAM,YAAY;AAClB,gBAAM,OAAO,YACT,UAAU,IAAI,QAAQ,UAAU,EAAE,EAAE,YAAY,IAChD,UAAU,IAAI,YAAY;AAE9B,cAAI,KAAK,SAAS,iBAAiB,GAAG;AACpC,kBAAM,SAAS;AAAA,cACb;AAAA,cAAW;AAAA,cAAM;AAAA,cAAmB;AAAA,cAAW;AAAA,cAAW;AAAA,YAC5D;AACA,oBAAQ,KAAK;AAAA,cACX,GAAG;AAAA,cACH,YAAY;AAAA,cACZ,cAAc,UAAU;AAAA,YAC1B,CAAC;AAAA,UACH;AAAA,QACF;AAAA,MACF;AAAA,IACF;AAEA,WAAO;AAAA,EACT,SAAS,KAAK;AACZ,UAAM,IAAI,kBAAkB;AAAA,MAC1B,MAAM;AAAA,MACN,KAAK;AAAA,MACL,SAAS,uBAAuB,eAAe,QAAQ,IAAI,UAAU,OAAO,GAAG,CAAC;AAAA,MAChF,OAAO,eAAe,QAAQ,MAAM,IAAI,MAAM,OAAO,GAAG,CAAC;AAAA,IAC3D,CAAC;AAAA,EACH;AACF;AAMA,SAAS,sBACP,WACA,WACA,WACA,UACyD;AAEzD,QAAM,IAAI,UAAU,UAAU,CAAC;AAC/B,QAAM,IAAI,UAAU,UAAU,CAAC;AAG/B,QAAM,YAAY,KAAK,IAAI,UAAU,UAAU,CAAC,CAAC,KAAK;AACtD,QAAM,aAAa,UAAU,SAAU,UAAU,IAAI,SAAS,YAAY;AAC1E,QAAM,cAAc,UAAU,UAAU;AAGxC,QAAM,QAAQ,aAAc,YAAY;AACxC,QAAM,SAAS,cAAe,YAAY;AAE1C,SAAO;AAAA,IACL,GAAG,IAAI;AAAA,IACP,GAAG,IAAI,YAAY;AAAA,IACnB;AAAA,IACA;AAAA,EACF;AACF;AAOA,SAAS,+BACP,WACA,iBACA,mBACA,WACA,WACA,UACyD;AACzD,QAAM,IAAI,UAAU,UAAU,CAAC;AAC/B,QAAM,IAAI,UAAU,UAAU,CAAC;AAC/B,QAAM,YAAY,KAAK,IAAI,UAAU,UAAU,CAAC,CAAC,KAAK;AACtD,QAAM,aAAa,UAAU,SAAU,UAAU,IAAI,SAAS,YAAY;AAC1E,QAAM,cAAc,UAAU,UAAU;AAGxC,QAAM,cAAc,gBAAgB;AACpC,QAAM,cAAc,gBAAgB,QAAQ,iBAAiB;AAC7D,QAAM,cAAc,kBAAkB;AAEtC,MAAI,gBAAgB,KAAK,cAAc,GAAG;AAExC,WAAO,sBAAsB,WAAW,WAAW,WAAW,QAAQ;AAAA,EACxE;AAGA,QAAM,aAAa,aAAa;AAChC,QAAM,iBAAiB,cAAc;AACrC,QAAM,cAAc,cAAc;AAElC,SAAO;AAAA,IACL,GAAG,IAAI,iBAAiB;AAAA,IACxB,GAAG,IAAI,YAAY;AAAA,IACnB,OAAO,cAAe,YAAY;AAAA,IAClC,QAAQ,cAAe,YAAY;AAAA,EACrC;AACF;","names":[]}
|