npm - @doclo/core - Versions diffs - 0.2.2 → 0.2.4 - Mend

@doclo/core 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +16 -15
package/dist/index.d.ts +2 -2
package/dist/index.js +11 -12
package/dist/index.js.map +1 -1
package/dist/internal/validation-utils.d.ts +1 -1
package/dist/internal/validation-utils.js +1 -1
package/dist/internal/validation-utils.js.map +1 -1
package/dist/pdf-utils.d.ts +12 -7
package/dist/pdf-utils.js +10 -11
package/dist/pdf-utils.js.map +1 -1
package/dist/{validation-BQO54qAY.d.ts → validation-C_RN-Xqr.d.ts} +35 -8
package/dist/validation.d.ts +1 -1
package/dist/validation.js.map +1 -1
package/package.json +1 -1

package/dist/pdf-utils.js.map CHANGED Viewed

	@@ -1 +1 @@
1	- {"version":3,"sources":["../src/pdf-utils.ts","../src/runtime/base64.ts"],"sourcesContent":["/*\n PDF Utilities\n \n Edge Runtime compatible PDF manipulation utilities using pdf-lib.\n * These functions work in Node.js, Vercel Edge Functions, Cloudflare Workers, and browsers.\n /\n\nimport { PDFDocument } from 'pdf-lib';\nimport { base64ToArrayBuffer, uint8ArrayToBase64 } from './runtime/base64.js';\nimport type { DocumentIR } from './internal/validation-utils.js';\n\n/\n Get the total number of pages in a PDF document\n \n @param dataUrl - PDF data URI in format: data:application/pdf;base64,{base64data}\n * @returns Total page count\n * @throws {Error} If the input is not a valid PDF data URL\n \n @example\n * ```typescript\n * const pageCount = await getPDFPageCount('data:application/pdf;base64,JVBERi0...');\n * console.log(`PDF has ${pageCount} pages`);\n * ```\n /\nexport async function getPDFPageCount(dataUrl: string): Promise<number> {\n const base64Match = dataUrl.match(/^data:application\\/pdf;base64,(.+)$/);\n if (!base64Match) {\n throw new Error('Invalid PDF data URL format. Expected: data:application/pdf;base64,{base64data}');\n }\n\n const base64Data = base64Match[1];\n const pdfBytes = base64ToArrayBuffer(base64Data);\n const pdfDoc = await PDFDocument.load(pdfBytes);\n return pdfDoc.getPageCount();\n}\n\n/\n Split a PDF into multiple smaller PDFs based on page ranges\n \n @param dataUrl - PDF data URI in format: data:application/pdf;base64,{base64data}\n * @param pageRanges - Array of [startPage, endPage] tuples (1-indexed, inclusive)\n * @returns Array of PDF data URLs, one for each page range\n * @throws {Error} If the input is not a valid PDF data URL or page ranges are invalid\n \n @example\n * ```typescript\n * // Split a 10-page PDF into three chunks\n * const chunks = await splitPDFIntoChunks(pdfDataUrl, [\n * [1, 3], // Pages 1-3\n * [4, 7], // Pages 4-7\n * [8, 10] // Pages 8-10\n * ]);\n * console.log(`Created ${chunks.length} PDF chunks`);\n * ```\n /\nexport async function splitPDFIntoChunks(\n dataUrl: string,\n pageRanges: Array<[number, number]>\n): Promise<string[]> {\n // Extract base64 data from data URL\n const base64Match = dataUrl.match(/^data:application\\/pdf;base64,(.+)$/);\n if (!base64Match) {\n throw new Error('Invalid PDF data URL format. Expected: data:application/pdf;base64,{base64data}');\n }\n\n const base64Data = base64Match[1];\n const pdfBytes = base64ToArrayBuffer(base64Data);\n\n // Load the PDF\n const pdfDoc = await PDFDocument.load(pdfBytes);\n const totalPages = pdfDoc.getPageCount();\n\n const chunks: string[] = [];\n\n for (const [startPage, endPage] of pageRanges) {\n // Validate page range\n if (startPage < 1 \|\| endPage > totalPages \|\| startPage > endPage) {\n throw new Error(\n `Invalid page range [${startPage}, ${endPage}] for PDF with ${totalPages} pages. ` +\n `Page numbers must be 1-indexed and within bounds.`\n );\n }\n\n // Create new PDF with only these pages\n const chunkDoc = await PDFDocument.create();\n const pagesToCopy = Array.from(\n { length: endPage - startPage + 1 },\n (_, i) => startPage - 1 + i // Convert to 0-indexed\n );\n\n const copiedPages = await chunkDoc.copyPages(pdfDoc, pagesToCopy);\n copiedPages.forEach(page => chunkDoc.addPage(page));\n\n // Serialize to base64 using Edge Runtime compatible adapter\n const chunkBytes = await chunkDoc.save();\n const chunkBase64 = uint8ArrayToBase64(chunkBytes);\n chunks.push(`data:application/pdf;base64,${chunkBase64}`);\n }\n\n return chunks;\n}\n\n/\n Get the page count from a DocumentIR, with fallback logic\n \n This helper function checks multiple sources for page count:\n * 1. `extras.pageCount` (explicit page count from provider or PDF analysis)\n * 2. `pages.length` (fallback - number of pages in the IR)\n \n Note: For Unsiloed provider, `pages.length` represents semantic chunks,\n * not traditional pages. Use `extras.totalSemanticChunks` to distinguish.\n \n @param ir - DocumentIR to get page count from\n * @returns Page count (or chunk count for Unsiloed)\n \n @example\n * ```typescript\n * const ir = await parseNode.run(pdfUrl, { provider: ocrProvider });\n * const pageCount = getDocumentPageCount(ir);\n * console.log(`Document has ${pageCount} pages`);\n * ```\n /\nexport function getDocumentPageCount(ir: DocumentIR): number {\n // Prefer explicit pageCount from extras\n if (ir.extras?.pageCount !== undefined) {\n return ir.extras.pageCount;\n }\n\n // Fallback to pages array length\n return ir.pages.length;\n}\n\n/\n Get total page count across multiple DocumentIR objects (chunked results)\n \n For chunked parsing results, this sums up the page counts across all chunks.\n * It respects `extras.pageCount` if available, otherwise uses `pages.length`.\n \n @param irArray - Array of DocumentIR objects from chunked parsing\n * @returns Total page count across all chunks\n \n @example\n * ```typescript\n * const chunks = await parseNode.run(largePdfUrl, {\n * provider: ocrProvider,\n * chunked: { maxPagesPerChunk: 10 }\n * });\n * const totalPages = getTotalPageCount(chunks);\n * console.log(`Total pages across ${chunks.length} chunks: ${totalPages}`);\n * ```\n /\nexport function getTotalPageCount(irArray: DocumentIR[]): number {\n return irArray.reduce((sum, ir) => sum + getDocumentPageCount(ir), 0);\n}\n\n/\n Get comprehensive page-related metadata from a DocumentIR\n \n Returns detailed information about page counts, chunk information,\n * and whether the result is chunked or a complete document.\n \n @param ir - DocumentIR to analyze\n * @returns Metadata object with page count details\n \n @example\n * ```typescript\n * const metadata = getPageCountMetadata(ir);\n * console.log(`Document has ${metadata.pageCount} pages`);\n * if (metadata.isChunked) {\n * console.log(`This is chunk ${metadata.chunkIndex + 1} of ${metadata.totalChunks}`);\n * console.log(`Contains pages ${metadata.pageRange[0]} to ${metadata.pageRange[1]}`);\n * }\n * ```\n /\nexport function getPageCountMetadata(ir: DocumentIR): {\n /* Total page count (or chunk count for Unsiloed) /\n pageCount: number;\n /* Number of pages in the IR (may differ from pageCount for chunked docs) /\n pagesInIR: number;\n /* Whether this is a chunked result /\n isChunked: boolean;\n /* For chunked results: which chunk this is (0-indexed) /\n chunkIndex?: number;\n /* For chunked results: total number of chunks /\n totalChunks?: number;\n /* For chunked results: page range [start, end] (1-indexed, inclusive) /\n pageRange?: [number, number];\n /* For Unsiloed: total semantic chunks /\n totalSemanticChunks?: number;\n /* Whether this is from Unsiloed (semantic chunking, not traditional pages) /\n isSemanticChunking: boolean;\n} {\n const pagesInIR = ir.pages.length;\n const pageCount = ir.extras?.pageCount ?? pagesInIR;\n const isSemanticChunking = ir.extras?.totalSemanticChunks !== undefined;\n const isChunked = ir.extras?.chunkIndex !== undefined && ir.extras?.totalChunks !== undefined;\n\n return {\n pageCount,\n pagesInIR,\n isChunked,\n chunkIndex: ir.extras?.chunkIndex as number \| undefined,\n totalChunks: ir.extras?.totalChunks as number \| undefined,\n pageRange: ir.extras?.pageRange as [number, number] \| undefined,\n totalSemanticChunks: ir.extras?.totalSemanticChunks as number \| undefined,\n isSemanticChunking\n };\n}\n","/\n Universal Base64 Adapter\n \n Provides base64 encoding/decoding for both Node.js and Edge Runtime.\n * Replaces Node.js Buffer usage with Web APIs for Edge compatibility.\n \n @module @doclo/core/runtime/base64\n /\n\n/\n Convert ArrayBuffer to base64 string\n \n Uses different strategies depending on runtime:\n * - Edge Runtime / Browser: btoa() with binary string conversion\n * - Node.js: Buffer.toString('base64')\n \n @param buffer - ArrayBuffer to encode\n * @returns Base64 encoded string\n \n @example\n * ```typescript\n * const buffer = new Uint8Array([72, 101, 108, 108, 111]).buffer;\n * const base64 = arrayBufferToBase64(buffer); // \"SGVsbG8=\"\n * ```\n /\nexport function arrayBufferToBase64(buffer: ArrayBuffer): string {\n // Node.js: Use Buffer for best performance\n if (typeof Buffer !== 'undefined') {\n return Buffer.from(buffer).toString('base64');\n }\n\n // Edge Runtime / Browser: Use btoa() with binary string\n const bytes = new Uint8Array(buffer);\n let binary = '';\n for (let i = 0; i < bytes.byteLength; i++) {\n binary += String.fromCharCode(bytes[i]);\n }\n return btoa(binary);\n}\n\n/\n Convert base64 string to ArrayBuffer\n \n Uses different strategies depending on runtime:\n * - Edge Runtime / Browser: atob() with Uint8Array conversion\n * - Node.js: Buffer.from(base64, 'base64')\n \n @param base64 - Base64 encoded string (with or without data URI prefix)\n * @returns Decoded ArrayBuffer\n \n @example\n * ```typescript\n * const buffer = base64ToArrayBuffer(\"SGVsbG8=\");\n * const text = new TextDecoder().decode(buffer); // \"Hello\"\n \n // Also handles data URIs\n * const buffer2 = base64ToArrayBuffer(\"data:image/png;base64,iVBORw0KG...\");\n * ```\n /\nexport function base64ToArrayBuffer(base64: string): ArrayBuffer {\n // Remove data URI prefix if present\n const cleanBase64 = base64.replace(/^data:[^;]+;base64,/, '');\n\n // Node.js: Use Buffer for best performance\n if (typeof Buffer !== 'undefined') {\n const buffer = Buffer.from(cleanBase64, 'base64');\n // Convert Node.js Buffer to ArrayBuffer\n return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);\n }\n\n // Edge Runtime / Browser: Use atob()\n const binaryString = atob(cleanBase64);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n return bytes.buffer;\n}\n\n/\n Convert Uint8Array to base64 string\n \n Convenience wrapper around arrayBufferToBase64 for Uint8Array inputs.\n \n @param bytes - Uint8Array to encode\n * @returns Base64 encoded string\n /\nexport function uint8ArrayToBase64(bytes: Uint8Array): string {\n return arrayBufferToBase64(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength) as ArrayBuffer);\n}\n\n/\n Convert base64 string to Uint8Array\n \n Convenience wrapper around base64ToArrayBuffer with Uint8Array result.\n \n @param base64 - Base64 encoded string\n * @returns Decoded Uint8Array\n /\nexport function base64ToUint8Array(base64: string): Uint8Array {\n return new Uint8Array(base64ToArrayBuffer(base64));\n}\n\n/\n Create a data URI from ArrayBuffer\n \n @param buffer - Data to encode\n * @param mimeType - MIME type (default: application/octet-stream)\n * @returns Data URI string\n \n @example\n * ```typescript\n * const buffer = new TextEncoder().encode(\"Hello, World!\");\n * const dataUri = createDataUri(buffer.buffer, 'text/plain');\n * // \"data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==\"\n * ```\n /\nexport function createDataUri(buffer: ArrayBuffer, mimeType = 'application/octet-stream'): string {\n const base64 = arrayBufferToBase64(buffer);\n return `data:${mimeType};base64,${base64}`;\n}\n\n/\n Check if a string is a valid data URI\n \n @param input - String to check\n * @returns True if valid data URI format\n /\nexport function isDataUri(input: string): boolean {\n return /^data:[^;,]+;base64,/.test(input);\n}\n\n/\n Extract MIME type from data URI\n \n @param dataUri - Data URI string\n * @returns MIME type or null if invalid\n \n @example\n * ```typescript\n * const mime = extractMimeType(\"data:image/png;base64,iVBOR...\");\n * console.log(mime); // \"image/png\"\n * ```\n */\nexport function extractMimeType(dataUri: string): string \| null {\n const match = dataUri.match(/^data:([^;,]+);base64,/);\n return match ? match[1] : null;\n}\n"],"mappings":";AAOA,SAAS,mBAAmB;;;ACkBrB,SAAS,oBAAoB,QAA6B;AAE/D,MAAI,OAAO,WAAW,aAAa;AACjC,WAAO,OAAO,KAAK,MAAM,EAAE,SAAS,QAAQ;AAAA,EAC9C;AAGA,QAAM,QAAQ,IAAI,WAAW,MAAM;AACnC,MAAI,SAAS;AACb,WAAS,IAAI,GAAG,IAAI,MAAM,YAAY,KAAK;AACzC,cAAU,OAAO,aAAa,MAAM,CAAC,CAAC;AAAA,EACxC;AACA,SAAO,KAAK,MAAM;AACpB;AAqBO,SAAS,oBAAoB,QAA6B;AAE/D,QAAM,cAAc,OAAO,QAAQ,uBAAuB,EAAE;AAG5D,MAAI,OAAO,WAAW,aAAa;AACjC,UAAM,SAAS,OAAO,KAAK,aAAa,QAAQ;AAEhD,WAAO,OAAO,OAAO,MAAM,OAAO,YAAY,OAAO,aAAa,OAAO,UAAU;AAAA,EACrF;AAGA,QAAM,eAAe,KAAK,WAAW;AACrC,QAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,WAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,UAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,EACtC;AACA,SAAO,MAAM;AACf;AAUO,SAAS,mBAAmB,OAA2B;AAC5D,SAAO,oBAAoB,MAAM,OAAO,MAAM,MAAM,YAAY,MAAM,aAAa,MAAM,UAAU,CAAgB;AACrH;;;ADjEA,eAAsB,gBAAgB,SAAkC;AACtE,QAAM,cAAc,QAAQ,MAAM,qCAAqC;AACvE,MAAI,CAAC,aAAa;AAChB,UAAM,IAAI,MAAM,iFAAiF;AAAA,EACnG;AAEA,QAAM,aAAa,YAAY,CAAC;AAChC,QAAM,WAAW,oBAAoB,UAAU;AAC/C,QAAM,SAAS,MAAM,YAAY,KAAK,QAAQ;AAC9C,SAAO,OAAO,aAAa;AAC7B;AAqBA,eAAsB,mBACpB,SACA,YACmB;AAEnB,QAAM,cAAc,QAAQ,MAAM,qCAAqC;AACvE,MAAI,CAAC,aAAa;AAChB,UAAM,IAAI,MAAM,iFAAiF;AAAA,EACnG;AAEA,QAAM,aAAa,YAAY,CAAC;AAChC,QAAM,WAAW,oBAAoB,UAAU;AAG/C,QAAM,SAAS,MAAM,YAAY,KAAK,QAAQ;AAC9C,QAAM,aAAa,OAAO,aAAa;AAEvC,QAAM,SAAmB,CAAC;AAE1B,aAAW,CAAC,WAAW,OAAO,KAAK,YAAY;AAE7C,QAAI,YAAY,KAAK,UAAU,cAAc,YAAY,SAAS;AAChE,YAAM,IAAI;AAAA,QACR,uBAAuB,SAAS,KAAK,OAAO,kBAAkB,UAAU;AAAA,MAE1E;AAAA,IACF;AAGA,UAAM,WAAW,MAAM,YAAY,OAAO;AAC1C,UAAM,cAAc,MAAM;AAAA,MACxB,EAAE,QAAQ,UAAU,YAAY,EAAE;AAAA,MAClC,CAAC,GAAG,MAAM,YAAY,IAAI;AAAA;AAAA,IAC5B;AAEA,UAAM,cAAc,MAAM,SAAS,UAAU,QAAQ,WAAW;AAChE,gBAAY,QAAQ,UAAQ,SAAS,QAAQ,IAAI,CAAC;AAGlD,UAAM,aAAa,MAAM,SAAS,KAAK;AACvC,UAAM,cAAc,mBAAmB,UAAU;AACjD,WAAO,KAAK,+BAA+B,WAAW,EAAE;AAAA,EAC1D;AAEA,SAAO;AACT;AAsBO,SAAS,qBAAqB,IAAwB;AAE3D,MAAI,GAAG,QAAQ,cAAc,QAAW;AACtC,WAAO,GAAG,OAAO;AAAA,EACnB;AAGA,SAAO,GAAG,MAAM;AAClB;AAqBO,SAAS,kBAAkB,SAA+B;AAC/D,SAAO,QAAQ,OAAO,CAAC,KAAK,OAAO,MAAM,qBAAqB,EAAE,GAAG,CAAC;AACtE;AAqBO,SAAS,qBAAqB,IAiBnC;AACA,QAAM,YAAY,GAAG,MAAM;AAC3B,QAAM,YAAY,GAAG,QAAQ,aAAa;AAC1C,QAAM,qBAAqB,GAAG,QAAQ,wBAAwB;AAC9D,QAAM,YAAY,GAAG,QAAQ,eAAe,UAAa,GAAG,QAAQ,gBAAgB;AAEpF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,YAAY,GAAG,QAAQ;AAAA,IACvB,aAAa,GAAG,QAAQ;AAAA,IACxB,WAAW,GAAG,QAAQ;AAAA,IACtB,qBAAqB,GAAG,QAAQ;AAAA,IAChC;AAAA,EACF;AACF;","names":[]}
1	+ {"version":3,"sources":["../src/pdf-utils.ts","../src/runtime/base64.ts"],"sourcesContent":["/*\n PDF Utilities\n \n Edge Runtime compatible PDF manipulation utilities using pdf-lib.\n * These functions work in Node.js, Vercel Edge Functions, Cloudflare Workers, and browsers.\n /\n\nimport { PDFDocument } from 'pdf-lib';\nimport { base64ToArrayBuffer, uint8ArrayToBase64 } from './runtime/base64.js';\nimport type { DocumentIR } from './internal/validation-utils.js';\n\n/\n Extract raw base64 data from PDF input.\n * Accepts both data URL format and raw base64 strings.\n \n @param input - PDF data URL (data:application/pdf;base64,...) or raw base64 string\n * @returns Raw base64 string without data URL prefix\n /\nfunction extractPDFBase64(input: string): string {\n // If it's a data URL, extract the base64 part\n const dataUrlMatch = input.match(/^data:application\\/pdf;base64,(.+)$/);\n if (dataUrlMatch) {\n return dataUrlMatch[1];\n }\n\n // Otherwise assume it's raw base64\n return input;\n}\n\n/\n Get the total number of pages in a PDF document\n \n @param input - PDF data URL (data:application/pdf;base64,...) or raw base64 string\n * @returns Total page count\n * @throws {Error} If the input is not valid PDF data\n \n @example\n * ```typescript\n * // With data URL\n * const pageCount = await getPDFPageCount('data:application/pdf;base64,JVBERi0...');\n \n // With raw base64\n * const pageCount = await getPDFPageCount('JVBERi0xLjQK...');\n \n console.log(`PDF has ${pageCount} pages`);\n * ```\n /\nexport async function getPDFPageCount(input: string): Promise<number> {\n const base64Data = extractPDFBase64(input);\n const pdfBytes = base64ToArrayBuffer(base64Data);\n const pdfDoc = await PDFDocument.load(pdfBytes);\n return pdfDoc.getPageCount();\n}\n\n/\n Split a PDF into multiple smaller PDFs based on page ranges\n \n @param input - PDF data URL (data:application/pdf;base64,...) or raw base64 string\n * @param pageRanges - Array of [startPage, endPage] tuples (1-indexed, inclusive)\n * @returns Array of PDF data URLs, one for each page range\n * @throws {Error} If the input is not valid PDF data or page ranges are invalid\n \n @example\n * ```typescript\n * // Split a 10-page PDF into three chunks\n * const chunks = await splitPDFIntoChunks(pdfDataUrl, [\n * [1, 3], // Pages 1-3\n * [4, 7], // Pages 4-7\n * [8, 10] // Pages 8-10\n * ]);\n * console.log(`Created ${chunks.length} PDF chunks`);\n * ```\n /\nexport async function splitPDFIntoChunks(\n input: string,\n pageRanges: Array<[number, number]>\n): Promise<string[]> {\n // Extract base64 data (accepts both data URL and raw base64)\n const base64Data = extractPDFBase64(input);\n const pdfBytes = base64ToArrayBuffer(base64Data);\n\n // Load the PDF\n const pdfDoc = await PDFDocument.load(pdfBytes);\n const totalPages = pdfDoc.getPageCount();\n\n const chunks: string[] = [];\n\n for (const [startPage, endPage] of pageRanges) {\n // Validate page range\n if (startPage < 1 \|\| endPage > totalPages \|\| startPage > endPage) {\n throw new Error(\n `Invalid page range [${startPage}, ${endPage}] for PDF with ${totalPages} pages. ` +\n `Page numbers must be 1-indexed and within bounds.`\n );\n }\n\n // Create new PDF with only these pages\n const chunkDoc = await PDFDocument.create();\n const pagesToCopy = Array.from(\n { length: endPage - startPage + 1 },\n (_, i) => startPage - 1 + i // Convert to 0-indexed\n );\n\n const copiedPages = await chunkDoc.copyPages(pdfDoc, pagesToCopy);\n copiedPages.forEach(page => chunkDoc.addPage(page));\n\n // Serialize to base64 using Edge Runtime compatible adapter\n const chunkBytes = await chunkDoc.save();\n const chunkBase64 = uint8ArrayToBase64(chunkBytes);\n chunks.push(`data:application/pdf;base64,${chunkBase64}`);\n }\n\n return chunks;\n}\n\n/\n Get the page count from a DocumentIR, with fallback logic\n \n This helper function checks multiple sources for page count:\n * 1. `extras.pageCount` (explicit page count from provider or PDF analysis)\n * 2. `pages.length` (fallback - number of pages in the IR)\n \n Note: For Unsiloed provider, `pages.length` represents semantic chunks,\n * not traditional pages. Use `extras.totalSemanticChunks` to distinguish.\n \n @param ir - DocumentIR to get page count from\n * @returns Page count (or chunk count for Unsiloed)\n \n @example\n * ```typescript\n * const ir = await parseNode.run(pdfUrl, { provider: ocrProvider });\n * const pageCount = getDocumentPageCount(ir);\n * console.log(`Document has ${pageCount} pages`);\n * ```\n /\nexport function getDocumentPageCount(ir: DocumentIR): number {\n // Prefer explicit pageCount from extras\n if (ir.extras?.pageCount !== undefined) {\n return ir.extras.pageCount;\n }\n\n // Fallback to pages array length\n return ir.pages.length;\n}\n\n/\n Get total page count across multiple DocumentIR objects (chunked results)\n \n For chunked parsing results, this sums up the page counts across all chunks.\n * It respects `extras.pageCount` if available, otherwise uses `pages.length`.\n \n @param irArray - Array of DocumentIR objects from chunked parsing\n * @returns Total page count across all chunks\n \n @example\n * ```typescript\n * const chunks = await parseNode.run(largePdfUrl, {\n * provider: ocrProvider,\n * chunked: { maxPagesPerChunk: 10 }\n * });\n * const totalPages = getTotalPageCount(chunks);\n * console.log(`Total pages across ${chunks.length} chunks: ${totalPages}`);\n * ```\n /\nexport function getTotalPageCount(irArray: DocumentIR[]): number {\n return irArray.reduce((sum, ir) => sum + getDocumentPageCount(ir), 0);\n}\n\n/\n Get comprehensive page-related metadata from a DocumentIR\n \n Returns detailed information about page counts, chunk information,\n * and whether the result is chunked or a complete document.\n \n @param ir - DocumentIR to analyze\n * @returns Metadata object with page count details\n \n @example\n * ```typescript\n * const metadata = getPageCountMetadata(ir);\n * console.log(`Document has ${metadata.pageCount} pages`);\n * if (metadata.isChunked) {\n * console.log(`This is chunk ${metadata.chunkIndex + 1} of ${metadata.totalChunks}`);\n * console.log(`Contains pages ${metadata.pageRange[0]} to ${metadata.pageRange[1]}`);\n * }\n * ```\n /\nexport function getPageCountMetadata(ir: DocumentIR): {\n /* Total page count (or chunk count for Unsiloed) /\n pageCount: number;\n /* Number of pages in the IR (may differ from pageCount for chunked docs) /\n pagesInIR: number;\n /* Whether this is a chunked result /\n isChunked: boolean;\n /* For chunked results: which chunk this is (0-indexed) /\n chunkIndex?: number;\n /* For chunked results: total number of chunks /\n totalChunks?: number;\n /* For chunked results: page range [start, end] (1-indexed, inclusive) /\n pageRange?: [number, number];\n /* For Unsiloed: total semantic chunks /\n totalSemanticChunks?: number;\n /* Whether this is from Unsiloed (semantic chunking, not traditional pages) /\n isSemanticChunking: boolean;\n} {\n const pagesInIR = ir.pages.length;\n const pageCount = ir.extras?.pageCount ?? pagesInIR;\n const isSemanticChunking = ir.extras?.totalSemanticChunks !== undefined;\n const isChunked = ir.extras?.chunkIndex !== undefined && ir.extras?.totalChunks !== undefined;\n\n return {\n pageCount,\n pagesInIR,\n isChunked,\n chunkIndex: ir.extras?.chunkIndex as number \| undefined,\n totalChunks: ir.extras?.totalChunks as number \| undefined,\n pageRange: ir.extras?.pageRange as [number, number] \| undefined,\n totalSemanticChunks: ir.extras?.totalSemanticChunks as number \| undefined,\n isSemanticChunking\n };\n}\n","/\n Universal Base64 Adapter\n \n Provides base64 encoding/decoding for both Node.js and Edge Runtime.\n * Replaces Node.js Buffer usage with Web APIs for Edge compatibility.\n \n @module @doclo/core/runtime/base64\n /\n\n/\n Convert ArrayBuffer to base64 string\n \n Uses different strategies depending on runtime:\n * - Edge Runtime / Browser: btoa() with binary string conversion\n * - Node.js: Buffer.toString('base64')\n \n @param buffer - ArrayBuffer to encode\n * @returns Base64 encoded string\n \n @example\n * ```typescript\n * const buffer = new Uint8Array([72, 101, 108, 108, 111]).buffer;\n * const base64 = arrayBufferToBase64(buffer); // \"SGVsbG8=\"\n * ```\n /\nexport function arrayBufferToBase64(buffer: ArrayBuffer): string {\n // Node.js: Use Buffer for best performance\n if (typeof Buffer !== 'undefined') {\n return Buffer.from(buffer).toString('base64');\n }\n\n // Edge Runtime / Browser: Use btoa() with binary string\n const bytes = new Uint8Array(buffer);\n let binary = '';\n for (let i = 0; i < bytes.byteLength; i++) {\n binary += String.fromCharCode(bytes[i]);\n }\n return btoa(binary);\n}\n\n/\n Convert base64 string to ArrayBuffer\n \n Uses different strategies depending on runtime:\n * - Edge Runtime / Browser: atob() with Uint8Array conversion\n * - Node.js: Buffer.from(base64, 'base64')\n \n @param base64 - Base64 encoded string (with or without data URI prefix)\n * @returns Decoded ArrayBuffer\n \n @example\n * ```typescript\n * const buffer = base64ToArrayBuffer(\"SGVsbG8=\");\n * const text = new TextDecoder().decode(buffer); // \"Hello\"\n \n // Also handles data URIs\n * const buffer2 = base64ToArrayBuffer(\"data:image/png;base64,iVBORw0KG...\");\n * ```\n /\nexport function base64ToArrayBuffer(base64: string): ArrayBuffer {\n // Remove data URI prefix if present\n const cleanBase64 = base64.replace(/^data:[^;]+;base64,/, '');\n\n // Node.js: Use Buffer for best performance\n if (typeof Buffer !== 'undefined') {\n const buffer = Buffer.from(cleanBase64, 'base64');\n // Convert Node.js Buffer to ArrayBuffer\n return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);\n }\n\n // Edge Runtime / Browser: Use atob()\n const binaryString = atob(cleanBase64);\n const bytes = new Uint8Array(binaryString.length);\n for (let i = 0; i < binaryString.length; i++) {\n bytes[i] = binaryString.charCodeAt(i);\n }\n return bytes.buffer;\n}\n\n/\n Convert Uint8Array to base64 string\n \n Convenience wrapper around arrayBufferToBase64 for Uint8Array inputs.\n \n @param bytes - Uint8Array to encode\n * @returns Base64 encoded string\n /\nexport function uint8ArrayToBase64(bytes: Uint8Array): string {\n return arrayBufferToBase64(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength) as ArrayBuffer);\n}\n\n/\n Convert base64 string to Uint8Array\n \n Convenience wrapper around base64ToArrayBuffer with Uint8Array result.\n \n @param base64 - Base64 encoded string\n * @returns Decoded Uint8Array\n /\nexport function base64ToUint8Array(base64: string): Uint8Array {\n return new Uint8Array(base64ToArrayBuffer(base64));\n}\n\n/\n Create a data URI from ArrayBuffer\n \n @param buffer - Data to encode\n * @param mimeType - MIME type (default: application/octet-stream)\n * @returns Data URI string\n \n @example\n * ```typescript\n * const buffer = new TextEncoder().encode(\"Hello, World!\");\n * const dataUri = createDataUri(buffer.buffer, 'text/plain');\n * // \"data:text/plain;base64,SGVsbG8sIFdvcmxkIQ==\"\n * ```\n /\nexport function createDataUri(buffer: ArrayBuffer, mimeType = 'application/octet-stream'): string {\n const base64 = arrayBufferToBase64(buffer);\n return `data:${mimeType};base64,${base64}`;\n}\n\n/\n Check if a string is a valid data URI\n \n @param input - String to check\n * @returns True if valid data URI format\n /\nexport function isDataUri(input: string): boolean {\n return /^data:[^;,]+;base64,/.test(input);\n}\n\n/\n Extract MIME type from data URI\n \n @param dataUri - Data URI string\n * @returns MIME type or null if invalid\n \n @example\n * ```typescript\n * const mime = extractMimeType(\"data:image/png;base64,iVBOR...\");\n * console.log(mime); // \"image/png\"\n * ```\n */\nexport function extractMimeType(dataUri: string): string \| null {\n const match = dataUri.match(/^data:([^;,]+);base64,/);\n return match ? match[1] : null;\n}\n"],"mappings":";AAOA,SAAS,mBAAmB;;;ACkBrB,SAAS,oBAAoB,QAA6B;AAE/D,MAAI,OAAO,WAAW,aAAa;AACjC,WAAO,OAAO,KAAK,MAAM,EAAE,SAAS,QAAQ;AAAA,EAC9C;AAGA,QAAM,QAAQ,IAAI,WAAW,MAAM;AACnC,MAAI,SAAS;AACb,WAAS,IAAI,GAAG,IAAI,MAAM,YAAY,KAAK;AACzC,cAAU,OAAO,aAAa,MAAM,CAAC,CAAC;AAAA,EACxC;AACA,SAAO,KAAK,MAAM;AACpB;AAqBO,SAAS,oBAAoB,QAA6B;AAE/D,QAAM,cAAc,OAAO,QAAQ,uBAAuB,EAAE;AAG5D,MAAI,OAAO,WAAW,aAAa;AACjC,UAAM,SAAS,OAAO,KAAK,aAAa,QAAQ;AAEhD,WAAO,OAAO,OAAO,MAAM,OAAO,YAAY,OAAO,aAAa,OAAO,UAAU;AAAA,EACrF;AAGA,QAAM,eAAe,KAAK,WAAW;AACrC,QAAM,QAAQ,IAAI,WAAW,aAAa,MAAM;AAChD,WAAS,IAAI,GAAG,IAAI,aAAa,QAAQ,KAAK;AAC5C,UAAM,CAAC,IAAI,aAAa,WAAW,CAAC;AAAA,EACtC;AACA,SAAO,MAAM;AACf;AAUO,SAAS,mBAAmB,OAA2B;AAC5D,SAAO,oBAAoB,MAAM,OAAO,MAAM,MAAM,YAAY,MAAM,aAAa,MAAM,UAAU,CAAgB;AACrH;;;ADvEA,SAAS,iBAAiB,OAAuB;AAE/C,QAAM,eAAe,MAAM,MAAM,qCAAqC;AACtE,MAAI,cAAc;AAChB,WAAO,aAAa,CAAC;AAAA,EACvB;AAGA,SAAO;AACT;AAoBA,eAAsB,gBAAgB,OAAgC;AACpE,QAAM,aAAa,iBAAiB,KAAK;AACzC,QAAM,WAAW,oBAAoB,UAAU;AAC/C,QAAM,SAAS,MAAM,YAAY,KAAK,QAAQ;AAC9C,SAAO,OAAO,aAAa;AAC7B;AAqBA,eAAsB,mBACpB,OACA,YACmB;AAEnB,QAAM,aAAa,iBAAiB,KAAK;AACzC,QAAM,WAAW,oBAAoB,UAAU;AAG/C,QAAM,SAAS,MAAM,YAAY,KAAK,QAAQ;AAC9C,QAAM,aAAa,OAAO,aAAa;AAEvC,QAAM,SAAmB,CAAC;AAE1B,aAAW,CAAC,WAAW,OAAO,KAAK,YAAY;AAE7C,QAAI,YAAY,KAAK,UAAU,cAAc,YAAY,SAAS;AAChE,YAAM,IAAI;AAAA,QACR,uBAAuB,SAAS,KAAK,OAAO,kBAAkB,UAAU;AAAA,MAE1E;AAAA,IACF;AAGA,UAAM,WAAW,MAAM,YAAY,OAAO;AAC1C,UAAM,cAAc,MAAM;AAAA,MACxB,EAAE,QAAQ,UAAU,YAAY,EAAE;AAAA,MAClC,CAAC,GAAG,MAAM,YAAY,IAAI;AAAA;AAAA,IAC5B;AAEA,UAAM,cAAc,MAAM,SAAS,UAAU,QAAQ,WAAW;AAChE,gBAAY,QAAQ,UAAQ,SAAS,QAAQ,IAAI,CAAC;AAGlD,UAAM,aAAa,MAAM,SAAS,KAAK;AACvC,UAAM,cAAc,mBAAmB,UAAU;AACjD,WAAO,KAAK,+BAA+B,WAAW,EAAE;AAAA,EAC1D;AAEA,SAAO;AACT;AAsBO,SAAS,qBAAqB,IAAwB;AAE3D,MAAI,GAAG,QAAQ,cAAc,QAAW;AACtC,WAAO,GAAG,OAAO;AAAA,EACnB;AAGA,SAAO,GAAG,MAAM;AAClB;AAqBO,SAAS,kBAAkB,SAA+B;AAC/D,SAAO,QAAQ,OAAO,CAAC,KAAK,OAAO,MAAM,qBAAqB,EAAE,GAAG,CAAC;AACtE;AAqBO,SAAS,qBAAqB,IAiBnC;AACA,QAAM,YAAY,GAAG,MAAM;AAC3B,QAAM,YAAY,GAAG,QAAQ,aAAa;AAC1C,QAAM,qBAAqB,GAAG,QAAQ,wBAAwB;AAC9D,QAAM,YAAY,GAAG,QAAQ,eAAe,UAAa,GAAG,QAAQ,gBAAgB;AAEpF,SAAO;AAAA,IACL;AAAA,IACA;AAAA,IACA;AAAA,IACA,YAAY,GAAG,QAAQ;AAAA,IACvB,aAAa,GAAG,QAAQ;AAAA,IACxB,WAAW,GAAG,QAAQ;AAAA,IACtB,qBAAqB,GAAG,QAAQ;AAAA,IAChC;AAAA,EACF;AACF;","names":[]}

package/dist/{validation-BQO54qAY.d.ts → validation-C_RN-Xqr.d.ts} RENAMED Viewed

@@ -148,14 +148,20 @@ type MultimodalInput = {
         base64?: string;
         fileId?: string;
     }>;
+    /** Optional system prompt (text-only, prepended to conversation) */
+    systemPrompt?: string;
 };
+/** Effort level type for reasoning configuration */
+type ReasoningEffort = 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';
 /** Reasoning configuration (normalized across providers) */
 type ReasoningConfig = {
-    /** Reasoning effort level: low (20% budget), medium (50%), high (80%) */
-    effort?: 'low' | 'medium' | 'high';
+    /** Effort level - normalized across providers (xhigh: 95%, high: 80%, medium: 50%, low: 20%, minimal: 10%, none: 0%) */
+    effort?: ReasoningEffort;
+    /** Direct token budget - used by Anthropic/Google/Qwen models */
+    max_tokens?: number;
     /** Exclude reasoning tokens from response (only use for accuracy, not visible) */
     exclude?: boolean;
-    /** Enable reasoning with default (medium) effort */
+    /** Enable reasoning with default (medium) effort. Set to false to explicitly disable. */
     enabled?: boolean;
 };
 /** Base LLM provider (text-only) */
@@ -179,6 +185,14 @@ type LLMProvider = {
         cacheReadInputTokens?: number;
     }>;
 };
+/** Text completion response (for non-JSON outputs like JSX/code) */
+type TextResponse = {
+    text: string;
+    rawText?: string;
+    inputTokens?: number;
+    outputTokens?: number;
+    costUSD?: number;
+};
 /** Vision-capable LLM provider */
 type VLMProvider = {
     /** Full 3-layer identity (provider/model/method) */
@@ -199,6 +213,15 @@ type VLMProvider = {
         cacheCreationInputTokens?: number;
         cacheReadInputTokens?: number;
     }>;
+    /**
+     * Complete a text prompt without JSON mode (optional).
+     * Use this when you need raw text output (JSX, code, markdown, etc.)
+     */
+    completeText?: (input: {
+        input: MultimodalInput;
+        max_tokens?: number;
+        reasoning?: ReasoningConfig;
+    }) => Promise<TextResponse>;
     capabilities: {
         supportsImages: true;
         supportsPDFs: boolean;
@@ -584,7 +607,8 @@ type ParseNodeConfig = {
      * ```
      */
     reasoning?: {
-        effort?: 'low' | 'medium' | 'high';
+        effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';
+        max_tokens?: number;
         exclude?: boolean;
         enabled?: boolean;
     };
@@ -637,7 +661,8 @@ type SplitNodeConfig = {
      * ```
      */
     reasoning?: {
-        effort?: 'low' | 'medium' | 'high';
+        effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';
+        max_tokens?: number;
         exclude?: boolean;
         enabled?: boolean;
     };
@@ -694,7 +719,8 @@ type CategorizeNodeConfig = {
      * ```
      */
     reasoning?: {
-        effort?: 'low' | 'medium' | 'high';
+        effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';
+        max_tokens?: number;
         exclude?: boolean;
         enabled?: boolean;
     };
@@ -724,7 +750,8 @@ type ExtractNodeConfig<T = any> = {
     };
     consensus?: ConsensusConfig;
     reasoning?: {
-        effort?: 'low' | 'medium' | 'high';
+        effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';
+        max_tokens?: number;
         exclude?: boolean;
         enabled?: boolean;
     };
@@ -1295,4 +1322,4 @@ declare const RESERVED_VARIABLES: {
  */
 declare function protectReservedVariables(nodeType: 'extract' | 'categorize' | 'parse', userVariables: Record<string, any> | undefined, autoInjectedVariables: Record<string, any>): Record<string, any>;
-export { type SegmentationResult as $, type AccessMethod as A, type BBox as B, type ConsensusConfig as C, type DocumentIR as D, type ExtractNodeConfig as E, type FieldVotingDetails as F, type AggregatedMetrics as G, type FlowContext as H, type IRLine as I, type NodeCtx as J, type NodeTypeInfo as K, type LLMProvider as L, type MultimodalInput as M, type NormalizedBBox as N, type OCRProvider as O, type ProviderVendor as P, type NodeDef as Q, type ReasoningConfig as R, type SplitDocument as S, type NodeTypeName as T, type CompatibilityRule as U, type VLMProvider as V, type ValidationResult as W, type JSONSchemaNode as X, type ProcessingMode as Y, type PageRangeOptions as Z, type LanguageOptions as _, type IRPage as a, type ExtractedImage as a0, type OCRProviderOptions as a1, type VLMProviderOptions as a2, type ProviderCitation as a3, type FlowStepLocation as a4, aggregateMetrics as a5, node as a6, runPipeline as a7, FlowExecutionError as a8, FlowValidationError as a9, NODE_COMPATIBILITY_MATRIX as aa, getNodeTypeName as ab, getNodeTypeInfo as ac, getCompatibleTargets as ad, getSuggestedConnections as ae, validateNodeConnection as af, getValidForEachStarters as ag, canStartForEachItemFlow as ah, validateJson as ai, RESERVED_VARIABLES as aj, protectReservedVariables as ak, extractErrorMessage as al, type ProviderIdentity as am, toProviderString as an, parseProviderString as ao, isLocalEndpoint as ap, createIdentity as aq, type OutputFormat as ar, type TableFormat as as, type ChunkingStrategy as at, type LLMDerivedOptions as au, type SupportedMimeType as av, type TraceContextLite as aw, type NodeObservabilityContext as ax, type DocumentIRExtras as b, type LLMJsonProvider as c, type ConsensusRunResult as d, type ConsensusMetadata as e, type OutputWithConsensus as f, type MaybeWithConsensusMetadata as g, type FlowInput as h, type FlowInputValidation as i, type FlowResult as j, type CitationSourceType as k, type LineCitation as l, type FieldCitation as m, type CitationConfig as n, type OutputWithCitations as o, type ParseNodeConfig as p, type SplitNodeConfig as q, type CategorizeNodeConfig as r, type ExtractInputMode as s, type ChunkMetadata as t, type ChunkOutput as u, type ChunkNodeConfig as v, type CombineNodeConfig as w, type OutputNodeConfig as x, type EnhancedExtractionSchema as y, type StepMetric as z };
+export { type LanguageOptions as $, type AccessMethod as A, type BBox as B, type ConsensusConfig as C, type DocumentIR as D, type ExtractNodeConfig as E, type FieldVotingDetails as F, type AggregatedMetrics as G, type FlowContext as H, type IRLine as I, type NodeCtx as J, type NodeTypeInfo as K, type LLMProvider as L, type MultimodalInput as M, type NormalizedBBox as N, type OCRProvider as O, type ProviderVendor as P, type NodeDef as Q, type ReasoningConfig as R, type SplitDocument as S, type TextResponse as T, type NodeTypeName as U, type VLMProvider as V, type CompatibilityRule as W, type ValidationResult as X, type JSONSchemaNode as Y, type ProcessingMode as Z, type PageRangeOptions as _, type IRPage as a, type SegmentationResult as a0, type ExtractedImage as a1, type OCRProviderOptions as a2, type VLMProviderOptions as a3, type ProviderCitation as a4, type FlowStepLocation as a5, aggregateMetrics as a6, node as a7, runPipeline as a8, FlowExecutionError as a9, FlowValidationError as aa, NODE_COMPATIBILITY_MATRIX as ab, getNodeTypeName as ac, getNodeTypeInfo as ad, getCompatibleTargets as ae, getSuggestedConnections as af, validateNodeConnection as ag, getValidForEachStarters as ah, canStartForEachItemFlow as ai, validateJson as aj, RESERVED_VARIABLES as ak, protectReservedVariables as al, extractErrorMessage as am, type ProviderIdentity as an, toProviderString as ao, parseProviderString as ap, isLocalEndpoint as aq, createIdentity as ar, type ReasoningEffort as as, type OutputFormat as at, type TableFormat as au, type ChunkingStrategy as av, type LLMDerivedOptions as aw, type SupportedMimeType as ax, type TraceContextLite as ay, type NodeObservabilityContext as az, type DocumentIRExtras as b, type LLMJsonProvider as c, type ConsensusRunResult as d, type ConsensusMetadata as e, type OutputWithConsensus as f, type MaybeWithConsensusMetadata as g, type FlowInput as h, type FlowInputValidation as i, type FlowResult as j, type CitationSourceType as k, type LineCitation as l, type FieldCitation as m, type CitationConfig as n, type OutputWithCitations as o, type ParseNodeConfig as p, type SplitNodeConfig as q, type CategorizeNodeConfig as r, type ExtractInputMode as s, type ChunkMetadata as t, type ChunkOutput as u, type ChunkNodeConfig as v, type CombineNodeConfig as w, type OutputNodeConfig as x, type EnhancedExtractionSchema as y, type StepMetric as z };

package/dist/validation.d.ts CHANGED Viewed

	@@ -1 +1 @@
1	- export { U as CompatibilityRule, a9 as FlowValidationError, aa as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, K as NodeTypeInfo, T as NodeTypeName, W as ValidationResult, ah as canStartForEachItemFlow, ad as getCompatibleTargets, ac as getNodeTypeInfo, ab as getNodeTypeName, ae as getSuggestedConnections, ag as getValidForEachStarters, af as validateNodeConnection } from './validation-~~BQO54qAY~~.js';
1	+ export { W as CompatibilityRule, aa as FlowValidationError, ab as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, K as NodeTypeInfo, U as NodeTypeName, X as ValidationResult, ai as canStartForEachItemFlow, ae as getCompatibleTargets, ad as getNodeTypeInfo, ac as getNodeTypeName, af as getSuggestedConnections, ah as getValidForEachStarters, ag as validateNodeConnection } from './validation-C_RN-Xqr.js';