@doclo/core 0.2.2 → 0.2.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1 +1 @@
1
- export { G as AggregatedMetrics, B as BBox, r as CategorizeNodeConfig, t as ChunkMetadata, v as ChunkNodeConfig, u as ChunkOutput, at as ChunkingStrategy, n as CitationConfig, k as CitationSourceType, w as CombineNodeConfig, U as CompatibilityRule, C as ConsensusConfig, e as ConsensusMetadata, d as ConsensusRunResult, D as DocumentIR, b as DocumentIRExtras, y as EnhancedExtractionSchema, s as ExtractInputMode, E as ExtractNodeConfig, a0 as ExtractedImage, m as FieldCitation, F as FieldVotingDetails, H as FlowContext, a8 as FlowExecutionError, h as FlowInput, i as FlowInputValidation, j as FlowResult, a4 as FlowStepLocation, a9 as FlowValidationError, I as IRLine, a as IRPage, X as JSONSchemaNode, au as LLMDerivedOptions, c as LLMJsonProvider, L as LLMProvider, _ as LanguageOptions, l as LineCitation, g as MaybeWithConsensusMetadata, M as MultimodalInput, aa as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, ax as NodeObservabilityContext, K as NodeTypeInfo, T as NodeTypeName, N as NormalizedBBox, O as OCRProvider, a1 as OCRProviderOptions, ar as OutputFormat, x as OutputNodeConfig, o as OutputWithCitations, f as OutputWithConsensus, Z as PageRangeOptions, p as ParseNodeConfig, Y as ProcessingMode, a3 as ProviderCitation, aj as RESERVED_VARIABLES, R as ReasoningConfig, $ as SegmentationResult, S as SplitDocument, q as SplitNodeConfig, z as StepMetric, av as SupportedMimeType, as as TableFormat, aw as TraceContextLite, V as VLMProvider, a2 as VLMProviderOptions, W as ValidationResult, a5 as aggregateMetrics, ah as canStartForEachItemFlow, al as extractErrorMessage, ad as getCompatibleTargets, ac as getNodeTypeInfo, ab as getNodeTypeName, ae as getSuggestedConnections, ag as getValidForEachStarters, a6 as node, ak as protectReservedVariables, a7 as runPipeline, ai as validateJson, af as validateNodeConnection } from '../validation-BQO54qAY.js';
1
+ export { G as AggregatedMetrics, B as BBox, r as CategorizeNodeConfig, t as ChunkMetadata, v as ChunkNodeConfig, u as ChunkOutput, av as ChunkingStrategy, n as CitationConfig, k as CitationSourceType, w as CombineNodeConfig, W as CompatibilityRule, C as ConsensusConfig, e as ConsensusMetadata, d as ConsensusRunResult, D as DocumentIR, b as DocumentIRExtras, y as EnhancedExtractionSchema, s as ExtractInputMode, E as ExtractNodeConfig, a1 as ExtractedImage, m as FieldCitation, F as FieldVotingDetails, H as FlowContext, a9 as FlowExecutionError, h as FlowInput, i as FlowInputValidation, j as FlowResult, a5 as FlowStepLocation, aa as FlowValidationError, I as IRLine, a as IRPage, Y as JSONSchemaNode, aw as LLMDerivedOptions, c as LLMJsonProvider, L as LLMProvider, $ as LanguageOptions, l as LineCitation, g as MaybeWithConsensusMetadata, M as MultimodalInput, ab as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, az as NodeObservabilityContext, K as NodeTypeInfo, U as NodeTypeName, N as NormalizedBBox, O as OCRProvider, a2 as OCRProviderOptions, at as OutputFormat, x as OutputNodeConfig, o as OutputWithCitations, f as OutputWithConsensus, _ as PageRangeOptions, p as ParseNodeConfig, Z as ProcessingMode, a4 as ProviderCitation, ak as RESERVED_VARIABLES, R as ReasoningConfig, as as ReasoningEffort, a0 as SegmentationResult, S as SplitDocument, q as SplitNodeConfig, z as StepMetric, ax as SupportedMimeType, au as TableFormat, T as TextResponse, ay as TraceContextLite, V as VLMProvider, a3 as VLMProviderOptions, X as ValidationResult, a6 as aggregateMetrics, ai as canStartForEachItemFlow, am as extractErrorMessage, ae as getCompatibleTargets, ad as getNodeTypeInfo, ac as getNodeTypeName, af as getSuggestedConnections, ah as getValidForEachStarters, a7 as node, al as protectReservedVariables, a8 as runPipeline, aj as validateJson, ag as validateNodeConnection } from '../validation-C_RN-Xqr.js';
@@ -610,7 +610,7 @@ function validateJson(data, schema) {
610
610
  const dangerousProps = ["__proto__", "constructor", "prototype"];
611
611
  if (schema2.additionalProperties === false && schema2.properties) {
612
612
  const allowedProps = Object.keys(schema2.properties);
613
- const requiredProps = schema2.required || [];
613
+ const requiredProps = Array.isArray(schema2.required) ? schema2.required : [];
614
614
  const allAllowedProps = /* @__PURE__ */ new Set([...allowedProps, ...requiredProps]);
615
615
  for (const key of [...Object.keys(value), ...Object.getOwnPropertyNames(value)]) {
616
616
  if (dangerousProps.includes(key)) {
@@ -1 +1 @@
1
- {"version":3,"sources":["../../src/internal/validation-utils.ts"],"sourcesContent":["/**\n * Browser-safe validation utilities\n *\n * This module contains all validation code with ZERO Node.js dependencies.\n * It can be safely bundled for browser environments.\n */\n\n// Edge Runtime compatible - no AJV dependency\n\n// Re-export all types and constants from the validation section of index.ts\n// This file has NO fs imports and is completely browser-safe\n\n/** Page-centric IR */\nexport type BBox = { x: number; y: number; w: number; h: number };\nexport type IRLine = {\n text: string;\n bbox?: BBox;\n startChar?: number; // Character offset in full document text\n endChar?: number; // Character offset in full document text\n lineId?: string; // Unique line identifier (e.g., \"p1_l5\" for page 1, line 5)\n};\nexport type IRPage = {\n pageNumber?: number; // Explicit 1-indexed page number (for chunked documents)\n width: number;\n height: number;\n lines: IRLine[];\n markdown?: string; // Rich markdown preserving layout (tables, headers, lists)\n html?: string; // Rich HTML preserving layout (tables, headers, lists)\n extras?: Record<string, unknown>\n};\n\n/** Standard extras fields for DocumentIR */\nexport type DocumentIRExtras = {\n /** Total number of pages in the original document (for PDFs, DOCX, etc.) */\n pageCount?: number;\n /** Cost in USD for processing this document */\n costUSD?: number;\n /** Provider-specific raw response */\n raw?: unknown;\n /** For chunked documents: which chunk this is (0-indexed) */\n chunkIndex?: number;\n /** For chunked documents: total number of chunks */\n totalChunks?: number;\n /** For chunked documents: page range [startPage, endPage] (1-indexed, inclusive) */\n pageRange?: [number, number];\n /** For Unsiloed: total semantic chunks (not traditional pages) */\n totalSemanticChunks?: number;\n /** Allow arbitrary additional fields */\n [key: string]: unknown;\n};\n\nexport type DocumentIR = {\n pages: IRPage[];\n extras?: DocumentIRExtras;\n};\n\n/** Provider identity for 3-layer hierarchy (provider/model/method) */\nimport type { ProviderIdentity } from '../provider-identity.js';\n\n/** Provider capability contracts */\nexport type OCRProvider = {\n /** Full 3-layer identity (provider/model/method) */\n identity?: ProviderIdentity;\n /** Canonical name in \"provider:model\" format */\n name: string;\n parseToIR: (input: { url?: string; base64?: string }) => Promise<DocumentIR>;\n};\n\n/** Multimodal input for VLM providers */\nexport type MultimodalInput = {\n text?: string;\n images?: Array<{ url?: string; base64?: string; mimeType: string }>;\n pdfs?: Array<{ url?: string; base64?: string; fileId?: string }>;\n};\n\n/** Reasoning configuration (normalized across providers) */\nexport type ReasoningConfig = {\n /** Reasoning effort level: low (20% budget), medium (50%), high (80%) */\n effort?: 'low' | 'medium' | 'high';\n /** Exclude reasoning tokens from response (only use for accuracy, not visible) */\n exclude?: boolean;\n /** Enable reasoning with default (medium) effort */\n enabled?: boolean;\n};\n\n/** Base LLM provider (text-only) */\nexport type LLMProvider = {\n /** Full 3-layer identity (provider/model/method) */\n identity?: ProviderIdentity;\n /** Canonical name in \"provider:model\" format */\n name: string;\n completeJson: (input: { prompt: string; schema: object; max_tokens?: number; reasoning?: ReasoningConfig }) =>\n Promise<{ json: unknown; rawText?: string; costUSD?: number; inputTokens?: number; outputTokens?: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number }>;\n};\n\n/** Vision-capable LLM provider */\nexport type VLMProvider = {\n /** Full 3-layer identity (provider/model/method) */\n identity?: ProviderIdentity;\n /** Canonical name in \"provider:model\" format */\n name: string;\n completeJson: (input: { prompt: string | MultimodalInput; schema: object; max_tokens?: number; reasoning?: ReasoningConfig }) =>\n Promise<{ json: unknown; rawText?: string; costUSD?: number; inputTokens?: number; outputTokens?: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number }>;\n capabilities: {\n supportsImages: true;\n supportsPDFs: boolean;\n maxPDFPages?: number;\n };\n};\n\n/** Legacy alias for backward compatibility */\nexport type LLMJsonProvider = VLMProvider;\n\n// ============================================================================\n// Processing Options - Normalized types for provider-agnostic configuration\n// ============================================================================\n\n/**\n * Processing quality/speed tradeoff modes\n * Providers map their specific modes to these normalized values\n */\nexport type ProcessingMode = 'fast' | 'balanced' | 'high_accuracy';\n\n/**\n * Page range specification for partial document processing\n * Allows processing a subset of pages for cost savings\n */\nexport type PageRangeOptions = {\n /** Process only the first N pages */\n maxPages?: number;\n /** Specific page range (0-indexed), e.g., \"0,2-4,10\" */\n pageRange?: string;\n};\n\n/**\n * Language hints for OCR processing\n */\nexport type LanguageOptions = {\n /** ISO language codes for OCR, e.g., ['en', 'de', 'fr'] */\n langs?: string[];\n};\n\n/**\n * Document segmentation result for splitting \"stapled\" PDFs\n * Returns page boundaries for each detected document type\n */\nexport type SegmentationResult = {\n segments: Array<{\n /** Document type name (e.g., 'invoice', 'contract') */\n name: string;\n /** Page indices (0-indexed) belonging to this segment */\n pages: number[];\n /** Confidence level of segmentation */\n confidence: 'high' | 'medium' | 'low';\n }>;\n metadata: {\n /** Total pages in the original document */\n totalPages: number;\n /** How segmentation was performed */\n segmentationMethod: 'auto' | 'schema' | 'manual';\n };\n};\n\n/**\n * Extracted image from a document\n * Represents figures, charts, or embedded images\n */\nexport type ExtractedImage = {\n /** Block ID or reference (provider-specific) */\n id: string;\n /** Page number where image appears (0-indexed) */\n pageNumber: number;\n /** Base64-encoded image data */\n base64: string;\n /** MIME type of the image */\n mimeType: string;\n /** Location on page (normalized 0-1 coordinates) */\n bbox?: NormalizedBBox;\n /** Caption text if detected */\n caption?: string;\n};\n\n/**\n * Extended OCR provider options (beyond basic parseToIR)\n * These options are normalized across different OCR providers\n */\nexport type OCRProviderOptions = PageRangeOptions & LanguageOptions & {\n /** Processing quality/speed tradeoff */\n mode?: ProcessingMode;\n /** Force OCR even on text-based PDFs */\n forceOCR?: boolean;\n /** Extract embedded images from document */\n extractImages?: boolean;\n /** Add page delimiters to output */\n paginate?: boolean;\n /** Remove and redo existing OCR */\n stripExistingOCR?: boolean;\n};\n\n/**\n * Output format options for LLM-based text fields\n * Controls how text content is formatted in the response\n */\nexport type OutputFormat = 'markdown' | 'html' | 'json' | 'text';\n\n/**\n * Table format options for tabular data in responses\n */\nexport type TableFormat = 'markdown' | 'html' | 'csv';\n\n/**\n * Chunking strategy options for document segmentation\n */\nexport type ChunkingStrategy = 'page' | 'section' | 'paragraph' | 'semantic';\n\n/**\n * LLM-derived feature options\n * These features are implemented via prompting rather than native API support\n */\nexport type LLMDerivedOptions = {\n /** Format for text output in string fields */\n outputFormat?: OutputFormat;\n /** Format for tables within text fields */\n tableFormat?: TableFormat;\n /** Add page break markers (---) between pages */\n pageMarkers?: boolean;\n /** Include per-field confidence scores (attached to result, not in JSON) */\n includeConfidence?: boolean;\n /** Include source citations with bounding boxes (attached to result, not in JSON) */\n includeSources?: boolean;\n /** Include block type classification for each extracted element */\n includeBlockTypes?: boolean;\n /** Extract document headers (repeated content at top of pages) */\n extractHeaders?: boolean;\n /** Extract document footers (repeated content at bottom of pages) */\n extractFooters?: boolean;\n /** Document chunking strategy */\n chunkingStrategy?: ChunkingStrategy;\n /** Maximum chunk size in characters (when using chunking) */\n maxChunkSize?: number;\n /** Language hints for the document (e.g., ['English', 'German']) */\n languageHints?: string[];\n};\n\n/**\n * Extended VLM provider options for document extraction\n * These options are normalized across different VLM providers\n */\nexport type VLMProviderOptions = PageRangeOptions & LanguageOptions & LLMDerivedOptions & {\n /** Processing quality/speed tradeoff */\n mode?: ProcessingMode;\n /** Force OCR even on text-based PDFs */\n forceOCR?: boolean;\n /** Additional prompt/instructions for extraction */\n prompt?: string;\n /** Schema for auto-segmentation of multi-document PDFs */\n segmentationSchema?: object;\n};\n\n/**\n * Provider citation from source document\n * Maps extracted fields to their source locations\n */\nexport type ProviderCitation = {\n /** JSON path to extracted field (e.g., \"invoice.total\") */\n fieldPath: string;\n /** Source block IDs from the provider */\n blockIds: string[];\n /** Confidence score (0-1) */\n confidence?: number;\n};\n\n/** Consensus configuration for any node */\nexport type ConsensusConfig = {\n runs: number; // Number of times to run\n strategy?: 'majority' | 'unanimous'; // Default: majority\n onTie?: 'random' | 'fail' | 'retry'; // Default: random\n parallel?: boolean; // Run consensus in parallel (default: true)\n includeMetadata?: boolean; // Include detailed consensus metadata (default: false)\n level?: 'object' | 'field'; // Voting level: object (default) or per-field\n retryOnFailure?: boolean; // Retry failed/empty runs (default: false)\n maxRetries?: number; // Max retries per run (default: 1)\n};\n\n/** Individual consensus run result */\nexport type ConsensusRunResult<T = any> = {\n runIndex: number;\n value: T | null;\n success: boolean;\n error?: string;\n startTime: number;\n endTime: number;\n duration: number;\n attempts?: number; // Number of attempts (1 = no retry, >1 = retried)\n};\n\n/** Field-level voting details */\nexport type FieldVotingDetails = {\n fieldPath: string;\n values: Array<{\n /** The actual value for this voting option - can be any JSON-serializable type */\n value: unknown;\n count: number;\n percentage: number;\n runIndices: number[];\n }>;\n /** The winning value from consensus - can be any JSON-serializable type */\n winner: unknown;\n isTie: boolean;\n agreementScore: number; // 0.0 to 1.0\n};\n\n/** Consensus execution metadata */\nexport type ConsensusMetadata<T = unknown> = {\n totalRuns: number;\n successfulRuns: number;\n failedRuns: number;\n strategy: 'majority' | 'unanimous';\n selectedResult: T;\n selectedRunIndex: number;\n confidence: 'high' | 'medium' | 'low';\n overallAgreement: number; // 0.0 to 1.0\n fieldAgreement: Record<string, number>; // Field path -> agreement score\n votingDetails: FieldVotingDetails[];\n runs: ConsensusRunResult<T>[];\n executionTime: number;\n wasRetry: boolean;\n tieBreakerUsed?: 'random' | 'retry' | 'fail' | null;\n // New fields for enhanced consensus features\n votingLevel?: 'object' | 'field';\n isSyntheticResult?: boolean; // true if field-level voting composed a new object\n totalRetries?: number; // Total retry attempts across all runs\n emptyResultsFiltered?: number; // Number of empty results filtered out\n};\n\n/** Output with consensus metadata wrapper */\nexport type OutputWithConsensus<T = unknown> = {\n data: T;\n consensus: ConsensusMetadata<T>;\n};\n\n/** Conditional type helper for consensus metadata */\nexport type MaybeWithConsensusMetadata<T, Config> = Config extends { includeMetadata: true }\n ? OutputWithConsensus<T>\n : T;\n\n/** Flow input/output types */\nexport type FlowInput = {\n url?: string;\n base64?: string;\n pages?: number[]; // For post-split runs\n bounds?: BBox; // For post-split runs\n};\n\n/**\n * All MIME types supported by at least one provider.\n * This is the union of all provider capabilities.\n */\nexport type SupportedMimeType =\n // PDF\n | 'application/pdf'\n // Images - common\n | 'image/jpeg'\n | 'image/png'\n | 'image/gif'\n | 'image/webp'\n // Images - additional\n | 'image/tiff'\n | 'image/bmp'\n | 'image/heic'\n | 'image/heif'\n | 'image/vnd.adobe.photoshop' // PSD\n // Microsoft Office\n | 'application/msword' // DOC\n | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' // DOCX\n | 'application/vnd.ms-excel' // XLS\n | 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' // XLSX\n | 'application/vnd.ms-powerpoint' // PPT\n | 'application/vnd.openxmlformats-officedocument.presentationml.presentation' // PPTX\n // OpenDocument formats (Datalab)\n | 'application/vnd.oasis.opendocument.text' // ODT\n | 'application/vnd.oasis.opendocument.spreadsheet' // ODS\n | 'application/vnd.oasis.opendocument.presentation' // ODP\n // Text formats\n | 'text/plain' // TXT\n | 'text/csv' // CSV\n | 'text/html' // HTML\n | 'application/rtf' // RTF\n // Other\n | 'application/epub+zip'; // EPUB\n\n/**\n * Flow-level input validation configuration\n *\n * Allows specifying accepted MIME types for early validation\n * before flow execution begins.\n */\nexport type FlowInputValidation = {\n /**\n * List of accepted MIME types.\n * If specified, input must match one of these types or validation fails.\n * If empty/undefined, all supported types are accepted.\n */\n acceptedFormats?: SupportedMimeType[];\n /**\n * Whether to throw on validation failure.\n * @default true\n */\n throwOnInvalid?: boolean;\n};\n\nexport type FlowResult<T = any> = {\n output: T;\n metrics: StepMetric[];\n aggregated: AggregatedMetrics;\n artifacts: Record<string, any>;\n error?: Error;\n};\n\nexport type SplitDocument = {\n type: string; // 'invoice', 'bunker', 'other'\n schema?: object; // Matched schema (optional - only present when schemas provided)\n pages: number[]; // Page numbers\n bounds?: BBox; // Bounding box\n input: FlowInput; // Original input for re-processing\n};\n\n/** Citation and source tracking types */\n\n/** Citation source type indicating data provenance */\nexport type CitationSourceType = 'ocr' | 'vlm' | 'llm' | 'inferred';\n\n/** Normalized bounding box (0-1 coordinates relative to page dimensions) */\nexport type NormalizedBBox = {\n x: number; // Left edge (0-1)\n y: number; // Top edge (0-1)\n w: number; // Width (0-1)\n h: number; // Height (0-1)\n};\n\n/** Line-level citation reference with spatial information */\nexport type LineCitation = {\n pageNumber: number; // 1-indexed page number\n lineIndex: number; // 0-indexed line position on page\n bbox?: NormalizedBBox; // Normalized bounding box (0-1 coordinates)\n text: string; // Text snippet for verification\n confidence?: number; // 0-1 confidence score\n sourceType: CitationSourceType;\n startChar?: number; // Character offset in full document\n endChar?: number; // Character offset in full document\n};\n\n/** Field-level citation mapping extracted values to sources */\nexport type FieldCitation = {\n fieldPath: string; // JSON path to field (e.g., \"invoice.lineItems[0].amount\")\n /** Extracted value - can be any JSON-serializable type */\n value: unknown;\n citations: LineCitation[]; // Source lines supporting this value\n reasoning?: string; // LLM explanation for inferred values\n confidence?: number; // Overall confidence (0-1)\n};\n\n/** Citation configuration for nodes */\nexport type CitationConfig = {\n enabled: boolean; // Enable citation tracking (default: false)\n includeTextSnippets?: boolean; // Include text snippets in citations (default: true)\n includeBoundingBoxes?: boolean; // Include bboxes when available (default: true)\n includeConfidence?: boolean; // Include confidence scores (default: true)\n minConfidence?: number; // Minimum confidence threshold (0-1, default: 0.0)\n detectInferred?: boolean; // Use LLM to detect inferred values (default: false)\n};\n\n/** Extended output with citations */\nexport type OutputWithCitations<T> = {\n data: T; // Extracted data\n citations: FieldCitation[]; // Field-level citations\n metadata: {\n totalPages?: number; // Total pages processed\n sourceType: CitationSourceType; // Primary source type\n hasInferredValues?: boolean; // Whether any values were inferred\n processingTime?: number; // Processing time in ms\n };\n};\n\n/** Node configuration types */\nexport type ParseNodeConfig = {\n provider: OCRProvider | VLMProvider;\n consensus?: ConsensusConfig;\n chunked?: {\n maxPagesPerChunk: number;\n overlap?: number; // Default: 0\n parallel?: boolean; // Default: true - process chunks in parallel for speed\n };\n format?: 'text' | 'markdown' | 'html'; // Output format: text (default, line-level citations), markdown/html (page-level citations, preserves structure)\n describeFigures?: boolean; // When true, VLM providers describe charts/figures/diagrams in text. Default: false\n includeImages?: boolean; // When true, providers extract images (figures/tables/charts) from documents. Supported by Surya/Marker. Default: false\n additionalPrompt?: string; // Custom OCR guidance or instructions\n citations?: CitationConfig; // Citation tracking config\n\n // NEW: Prompt asset support\n promptRef?: string; // Reference to prompt asset (e.g., \"default-parse@1.0.0\")\n /**\n * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).\n *\n * Auto-injected variables (no need to pass manually):\n * - format: From config.format\n * - schema: Constructed schema (if applicable)\n * - describeFigures: From config.describeFigures\n * - citationsEnabled: From config.citations?.enabled\n *\n * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).\n */\n promptVariables?: Record<string, any>;\n\n /**\n * Additional instructions to append to the default prompt.\n * This provides a simple way to customize the prompt without creating a custom prompt asset.\n * The instructions will be added after the main prompt content.\n *\n * @example\n * ```typescript\n * parse({\n * provider: vlmProvider,\n * format: 'markdown',\n * additionalInstructions: \"Pay special attention to preserving table structures and footnotes.\"\n * })\n * ```\n */\n additionalInstructions?: string;\n\n /**\n * When using promptRef, automatically inject format instruction if {{format}} placeholder is not present.\n * This ensures the UI format selection always takes effect.\n * Default: true\n *\n * @example\n * ```typescript\n * parse({\n * provider: vlmProvider,\n * format: 'markdown',\n * promptRef: 'my-custom-prompt',\n * autoInjectFormat: false // Disable auto-injection\n * })\n * ```\n */\n autoInjectFormat?: boolean;\n\n /**\n * Enable extended reasoning/thinking for VLM providers that support it.\n * Only applies when using a VLM provider (not OCR).\n *\n * @example\n * ```typescript\n * parse({\n * provider: vlmProvider,\n * format: 'markdown',\n * reasoning: { enabled: true, effort: 'medium' }\n * })\n * ```\n */\n reasoning?: {\n effort?: 'low' | 'medium' | 'high';\n exclude?: boolean;\n enabled?: boolean;\n };\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\nexport type SplitNodeConfig = {\n provider: VLMProvider;\n\n /**\n * Simple category definitions (recommended).\n * Each category can be a string or an object with name and optional description.\n *\n * @example\n * ```typescript\n * split({\n * provider: vlmProvider,\n * categories: [\n * 'invoice',\n * { name: 'cover_letter', description: 'Cover letter or transmittal pages' },\n * { name: 'contract', description: 'Legal agreements with terms and signatures' }\n * ]\n * })\n * ```\n */\n categories?: (string | { name: string; description?: string })[];\n\n /**\n * @deprecated Use `categories` instead. Full schema definitions for backwards compatibility.\n * Schema names are used as category names, but schemas are no longer attached to output.\n */\n schemas?: Record<string, object>; // { invoice: Schema, bunker: Schema }\n\n includeOther?: boolean; // Default: true\n consensus?: ConsensusConfig;\n schemaRef?: string; // Reference to schema asset (e.g., \"document-split@2.0.0\")\n\n /**\n * Enable extended reasoning/thinking for providers that support it.\n *\n * @example\n * ```typescript\n * split({\n * provider: vlmProvider,\n * categories: ['invoice', 'receipt', 'contract'],\n * reasoning: { enabled: true, effort: 'high' }\n * })\n * ```\n */\n reasoning?: {\n effort?: 'low' | 'medium' | 'high';\n exclude?: boolean;\n enabled?: boolean;\n };\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\nexport type CategorizeNodeConfig = {\n provider: LLMProvider | VLMProvider;\n categories: (string | { name: string; description?: string })[];\n consensus?: ConsensusConfig;\n additionalPrompt?: string; // Custom categorization instructions\n\n // NEW: Prompt asset support\n promptRef?: string; // Reference to prompt asset (e.g., \"default-categorize@1.0.0\")\n /**\n * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).\n *\n * Auto-injected variables (no need to pass manually):\n * - categories: From config.categories\n * - documentText: Computed from DocumentIR input\n *\n * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).\n */\n promptVariables?: Record<string, any>;\n\n /**\n * Additional instructions to append to the default prompt.\n * This provides a simple way to customize the prompt without creating a custom prompt asset.\n * The instructions will be added after the main prompt content.\n *\n * @example\n * ```typescript\n * categorize({\n * provider: llmProvider,\n * categories: ['invoice', 'receipt', 'contract'],\n * additionalInstructions: \"Consider the document's header and footer when categorizing.\"\n * })\n * ```\n */\n additionalInstructions?: string;\n\n /**\n * Enable extended reasoning/thinking for providers that support it.\n *\n * @example\n * ```typescript\n * categorize({\n * provider: vlmProvider,\n * categories: ['invoice', 'receipt', 'contract'],\n * reasoning: { enabled: true, effort: 'low' }\n * })\n * ```\n */\n reasoning?: {\n effort?: 'low' | 'medium' | 'high';\n exclude?: boolean;\n enabled?: boolean;\n };\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\n/**\n * Controls what inputs the extract node ingests.\n * - 'auto': Automatically detect input type and route appropriately (default)\n * - 'ir': Only DocumentIR from previous step (text-only extraction)\n * - 'ir+source': Both DocumentIR AND source document (multimodal with parsed text)\n * - 'source': Only raw source document (direct VLM extraction, no parsed text)\n *\n * Auto mode logic:\n * - If DocumentIR available AND source available AND VLM provider -> 'ir+source'\n * - If only DocumentIR available -> 'ir'\n * - If only FlowInput available AND VLM provider -> 'source'\n */\nexport type ExtractInputMode = 'auto' | 'ir' | 'ir+source' | 'source';\n\nexport type ExtractNodeConfig<T = any> = {\n provider: LLMProvider | VLMProvider;\n schema: object | EnhancedExtractionSchema<T> | { ref: string }; // Accept plain, enhanced, or reference\n consensus?: ConsensusConfig;\n reasoning?: {\n effort?: 'low' | 'medium' | 'high';\n exclude?: boolean;\n enabled?: boolean;\n };\n additionalPrompt?: string; // Custom extraction instructions (appended after schema)\n citations?: CitationConfig; // Citation tracking config\n\n // NEW: Prompt asset support\n promptRef?: string; // Reference to prompt asset (e.g., \"default-extraction@1.0.0\")\n /**\n * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).\n *\n * Auto-injected variables (no need to pass manually):\n * - schema: From config.schema\n * - documentText: Computed from DocumentIR or FlowInput\n * - schemaTitle: From schema.title or default \"the provided schema\"\n * - schemaDescription: From schema.description or empty string\n * - structuredFormat: Generated formatting instructions (for markdown/html)\n *\n * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).\n */\n promptVariables?: Record<string, any>;\n\n /**\n * Additional instructions to append to the default prompt.\n * This provides a simple way to customize the prompt without creating a custom prompt asset.\n * The instructions will be added after the main prompt content.\n *\n * @example\n * ```typescript\n * extract({\n * provider: llmProvider,\n * schema: mySchema,\n * additionalInstructions: \"Be strict with date formats. Use YYYY-MM-DD format only.\"\n * })\n * ```\n */\n additionalInstructions?: string;\n\n /**\n * Controls what inputs the extract node ingests.\n * - 'auto': Automatically detect input type and route appropriately (default)\n * - 'ir': Only DocumentIR from previous step (text-only extraction)\n * - 'ir+source': Both DocumentIR AND source document (multimodal with parsed text)\n * - 'source': Only raw source document (direct VLM extraction, no parsed text)\n * @default 'auto'\n */\n inputMode?: ExtractInputMode;\n\n /**\n * In split/forEach contexts, use the original unsplit document instead of the segment.\n * Only applies when inputMode includes source ('ir+source' or 'source').\n * @default false (uses split segment source)\n */\n useOriginalSource?: boolean;\n\n /**\n * When auto mode has both IR and source available with VLM provider:\n * - true: use 'ir+source' for maximum context (hybrid multimodal)\n * - false: use 'ir' for text-only extraction (lower cost)\n * Only applies when inputMode='auto'.\n * @default true\n */\n preferVisual?: boolean;\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\n/** Chunk output structure */\nexport type ChunkMetadata = {\n // Core content\n content: string;\n id: string; // Unique chunk identifier\n\n // Position metadata\n index: number; // Chunk position in sequence\n startChar: number;\n endChar: number;\n\n // Document context\n pageNumbers: number[]; // Pages this chunk spans\n section?: string; // Section/chapter title\n headers?: string[]; // Hierarchy of headers above this chunk\n\n // Chunking metadata\n strategy: string; // Which strategy created this chunk\n tokenCount?: number; // For LLM context planning\n wordCount: number;\n charCount: number;\n};\n\nexport type ChunkOutput = {\n chunks: ChunkMetadata[];\n totalChunks: number;\n averageChunkSize: number;\n sourceMetadata?: {\n providerType?: string; // 'ocr' | 'vlm' - original provider type\n };\n sourceDocument?: DocumentIR; // Original DocumentIR for citation mapping\n};\n\nexport type ChunkNodeConfig = {\n strategy: 'recursive' | 'section' | 'page' | 'fixed';\n maxSize?: number; // Max characters per chunk (recursive, section)\n minSize?: number; // Min characters per chunk (default: 100)\n overlap?: number; // Character overlap between chunks (default: 0)\n separators?: string[]; // Hierarchical separators (recursive)\n pagesPerChunk?: number; // Pages per chunk (page strategy)\n combineShortPages?: boolean; // Combine short pages (page strategy)\n minPageContent?: number; // Min content length to keep page (page strategy)\n size?: number; // Fixed size for fixed strategy\n unit?: 'tokens' | 'characters'; // Unit for fixed strategy\n};\n\nexport type CombineNodeConfig = {\n strategy: 'merge' | 'concatenate' | 'first' | 'last';\n};\n\nexport type OutputNodeConfig = {\n source?: string | string[];\n transform?: 'first' | 'last' | 'merge' | 'pick' | 'custom';\n fields?: string[];\n name?: string;\n /**\n * Custom transform function for 'custom' transform mode.\n * @param inputs - The input value(s) from the source step(s)\n * @param artifacts - All artifacts from the flow execution\n * @returns The transformed output value\n */\n customTransform?: (inputs: unknown | unknown[], artifacts: Record<string, unknown>) => unknown;\n};\n\n/** Enhanced extraction schema with examples and guidance */\nexport type EnhancedExtractionSchema<T = unknown> = {\n // Core schema (JSON Schema or Zod schema)\n schema: object;\n\n // Optional extraction enhancements\n examples?: Array<{\n description: string; // Description of this example\n input: string; // Sample input text\n output: T; // Expected output matching schema\n }>;\n\n extractionRules?: string; // Extraction guidelines (e.g., \"Focus on tables in appendix\")\n contextPrompt?: string; // Document context (e.g., \"This is a legal document\")\n hints?: string[]; // Additional hints for the extractor\n};\n\n/** Node & runner */\nexport type StepMetric = {\n step: string;\n configStepId?: string; // Flow-level step ID for config lookups (schemaRef, promptRef)\n startMs: number; // Absolute timestamp when step started (Date.now())\n provider?: string;\n model?: string;\n ms: number; // Total duration; for wrappers with rollup=true, includes child work\n costUSD?: number;\n inputTokens?: number;\n outputTokens?: number;\n cacheCreationInputTokens?: number;\n cacheReadInputTokens?: number;\n attemptNumber?: number; // Retry attempt number (1 = first attempt, 2+ = retries)\n metadata?: {\n kind?: 'leaf' | 'wrapper' | 'prep'; // 'leaf' = actual LLM call, 'wrapper' = composite overhead, 'prep' = preparation step\n rollup?: boolean; // True if ms includes child work (for wrappers with children)\n overheadMs?: number; // Pure overhead time excluding child work (for wrappers with children)\n /** Additional metadata fields */\n [key: string]: string | number | boolean | undefined;\n };\n};\n\n/** Aggregated metrics for multi-step flows */\nexport interface AggregatedMetrics {\n totalDurationMs: number;\n totalCostUSD: number;\n totalInputTokens: number;\n totalOutputTokens: number;\n totalCacheCreationTokens: number;\n totalCacheReadTokens: number;\n stepCount: number;\n byProvider: Record<string, {\n costUSD: number;\n inputTokens: number;\n outputTokens: number;\n callCount: number;\n }>;\n}\n\n/**\n * Aggregate metrics from multiple steps\n * @param metrics - Array of step metrics\n * @returns Aggregated totals and per-provider breakdowns\n */\nexport function aggregateMetrics(metrics: StepMetric[]): AggregatedMetrics {\n const byProvider: Record<string, {\n costUSD: number;\n inputTokens: number;\n outputTokens: number;\n callCount: number;\n }> = {};\n\n const result = metrics.reduce((acc, m) => {\n acc.totalDurationMs += m.ms;\n acc.totalCostUSD += m.costUSD || 0;\n acc.totalInputTokens += m.inputTokens || 0;\n acc.totalOutputTokens += m.outputTokens || 0;\n acc.totalCacheCreationTokens += m.cacheCreationInputTokens || 0;\n acc.totalCacheReadTokens += m.cacheReadInputTokens || 0;\n\n // Group by provider\n if (m.provider) {\n if (!byProvider[m.provider]) {\n byProvider[m.provider] = { costUSD: 0, inputTokens: 0, outputTokens: 0, callCount: 0 };\n }\n byProvider[m.provider].costUSD += m.costUSD || 0;\n byProvider[m.provider].inputTokens += m.inputTokens || 0;\n byProvider[m.provider].outputTokens += m.outputTokens || 0;\n byProvider[m.provider].callCount += 1;\n }\n\n return acc;\n }, {\n totalDurationMs: 0,\n totalCostUSD: 0,\n totalInputTokens: 0,\n totalOutputTokens: 0,\n totalCacheCreationTokens: 0,\n totalCacheReadTokens: 0,\n stepCount: metrics.length,\n byProvider\n });\n\n return result;\n}\n\n/**\n * Execution context passed to conditional functions and trigger nodes\n * Provides access to artifacts and metrics from all previous steps\n */\nexport interface FlowContext {\n /** Outputs from all completed steps, indexed by step ID */\n artifacts: Record<string, any>;\n /** Performance metrics from all completed steps */\n metrics: StepMetric[];\n /** Call stack for tracking nested flow execution (for circular dependency detection) */\n callStack?: string[];\n /** Maximum nesting depth for flow triggers (default: 10) */\n maxDepth?: number;\n}\n\n/**\n * W3C Trace Context for distributed tracing.\n * Compatible with observability module's TraceContext.\n */\nexport interface TraceContextLite {\n traceId: string;\n spanId: string;\n parentSpanId?: string;\n traceFlags: number; // W3C trace flags (0x01 = sampled), required for compatibility\n traceState?: string;\n}\n\n/**\n * Observability context passed to node executions.\n * Uses 'any' for config and traceContext to avoid circular imports and\n * maintain compatibility with the full observability types.\n */\nexport type NodeObservabilityContext = {\n /** Observability configuration - full type in observability module */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n config?: any;\n flowId?: string;\n executionId?: string;\n stepId?: string;\n stepIndex?: number;\n /** W3C Trace Context - compatible with TraceContext from observability module */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n traceContext?: any;\n metadata?: Record<string, unknown>;\n};\n\nexport type NodeCtx = {\n stepId?: string; // Flow-level step ID for metrics tracking\n artifacts: Record<string, unknown>;\n emit: (key: string, value: unknown) => void;\n metrics: { push: (m: StepMetric) => void };\n /** Observability context for hooks (optional) */\n observability?: NodeObservabilityContext;\n};\n\n/** Node type metadata for runtime validation */\nexport type NodeTypeInfo = {\n /** Input types this node accepts (e.g., ['FlowInput', 'DocumentIR']) */\n inputTypes: string[];\n /**\n * Output type this node produces - can be string or function for config-dependent types.\n * When a function, it receives the node's specific config and returns the output type string.\n * Uses 'any' parameter to allow nodes to use their specific config types.\n */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n outputType: string | ((config: any) => string);\n /** Provider types this node requires (if any) */\n requiresProvider?: ('OCR' | 'VLM' | 'LLM')[];\n /** Whether this node can accept array input */\n acceptsArray?: boolean;\n /**\n * Whether this node always outputs an array (or function for config-dependent).\n * Uses 'any' parameter to allow nodes to use their specific config types.\n */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n outputsArray?: boolean | ((config: any) => boolean);\n /** Human-readable description of what this node does */\n description?: string;\n};\n\nexport type NodeDef<I, O> = {\n key: string;\n run: (input: I, ctx: NodeCtx) => Promise<O>;\n /** Optional type metadata for validation */\n __meta?: NodeTypeInfo;\n};\n\nexport const node = <I, O>(key: string, run: NodeDef<I, O>[\"run\"]): NodeDef<I, O> => ({ key, run });\n\nexport async function runPipeline(\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n steps: NodeDef<any, any>[],\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n input: any,\n observabilityContext?: NodeObservabilityContext,\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n flowArtifacts?: Record<string, any>\n) {\n // Merge flow artifacts with local (flow artifacts as read-only base for source access)\n const artifacts: Record<string, unknown> = flowArtifacts ? { ...flowArtifacts } : {};\n const metrics: StepMetric[] = [];\n const ctx: NodeCtx = {\n stepId: observabilityContext?.stepId,\n artifacts,\n emit: (k, v) => { artifacts[k] = v; },\n metrics: { push: (m) => metrics.push(m) },\n observability: observabilityContext\n };\n let acc = input;\n for (const s of steps) {\n acc = await s.run(acc, ctx);\n ctx.emit(s.key, acc);\n }\n return { output: acc, artifacts, metrics };\n}\n\n/**\n * Flow execution error with step context\n *\n * Thrown when a flow step fails during execution. Includes:\n * - Which step failed (ID, index, type)\n * - Which steps completed successfully\n * - Partial artifacts from completed steps (for debugging)\n * - The original error that caused the failure\n *\n * This makes debugging flow failures much easier by showing exactly where the error occurred\n * and what data was produced before the failure.\n *\n * @example\n * ```typescript\n * try {\n * await flow.run(input);\n * } catch (error) {\n * if (error instanceof FlowExecutionError) {\n * console.error(`Failed at step ${error.failedStepIndex}: ${error.failedStepType}`);\n * console.error(`Step ID: ${error.failedStep}`);\n * console.error(`Completed: ${error.completedSteps.join(', ')}`);\n * console.error(`Original error: ${error.originalError.message}`);\n *\n * // Access partial results from completed steps\n * if (error.partialArtifacts?.qualify) {\n * console.log('Quality assessment completed:', error.partialArtifacts.qualify);\n * }\n * }\n * }\n * ```\n */\n\n/**\n * Extracts a human-readable error message from potentially JSON error responses.\n *\n * Handles common API error formats:\n * - { \"detail\": \"...\" } (Surya-style)\n * - { \"error\": { \"message\": \"...\" } } (OpenAI, Anthropic)\n * - { \"error\": \"...\" } (Simple format)\n * - { \"message\": \"...\" } (Direct format)\n * - Plain text (returned as-is)\n *\n * @param errorText - The error text which may contain JSON\n * @returns A human-readable error message\n */\nexport function extractErrorMessage(errorText: string): string {\n // If it's short or doesn't look like JSON, return as-is\n if (errorText.length < 10 || !errorText.trim().startsWith('{')) {\n return errorText;\n }\n\n try {\n const parsed = JSON.parse(errorText);\n\n // Surya-style: { \"detail\": \"...\" }\n if (parsed.detail) {\n return parsed.detail;\n }\n\n // OpenAI/Anthropic style: { error: { message: \"...\" } }\n if (parsed.error?.message) {\n return parsed.error.message;\n }\n\n // Simple style: { error: \"...\" }\n if (typeof parsed.error === 'string') {\n return parsed.error;\n }\n\n // Direct style: { message: \"...\" }\n if (parsed.message) {\n return parsed.message;\n }\n\n // Google style: { error: { status: \"...\", message: \"...\" } }\n if (parsed.error?.status && parsed.error?.message) {\n return `${parsed.error.status}: ${parsed.error.message}`;\n }\n\n // Fallback: return original but truncated if very long\n return errorText.length > 200\n ? errorText.substring(0, 200) + '...'\n : errorText;\n } catch {\n // Not valid JSON, return as-is (truncated if needed)\n return errorText.length > 500\n ? errorText.substring(0, 500) + '...'\n : errorText;\n }\n}\n\n/**\n * Represents a step location in a flow hierarchy.\n * Used to track the execution path through nested flows.\n */\nexport interface FlowStepLocation {\n /** Step ID */\n stepId: string;\n /** Step index within this flow (0-based) */\n stepIndex: number;\n /** Step type (e.g., 'parse', 'conditional', 'forEach') */\n stepType: string;\n /** Branch name if within a conditional (e.g., \"Invoice\", \"Receipt\") */\n branch?: string;\n /** Item index if within a forEach iteration */\n itemIndex?: number;\n}\n\nexport class FlowExecutionError extends Error {\n constructor(\n message: string,\n /** The ID of the step that failed (e.g., 'parse_node123') */\n public readonly failedStep: string,\n /** The index of the failed step in the flow (0-based) */\n public readonly failedStepIndex: number,\n /** The type of the failed step (e.g., 'parse', 'extract', 'step', 'conditional', 'forEach') */\n public readonly failedStepType: string,\n /** Array of step IDs that completed successfully before the failure */\n public readonly completedSteps: string[],\n /** The original error that caused the failure */\n public readonly originalError: Error,\n /** Partial artifacts from steps that completed before the failure */\n public readonly partialArtifacts?: Record<string, any>,\n /** Execution path through nested flows (for hierarchical context) */\n public readonly flowPath?: FlowStepLocation[],\n /** All completed steps aggregated across flow boundaries */\n public readonly allCompletedSteps?: string[]\n ) {\n super(message);\n this.name = 'FlowExecutionError';\n\n // Maintain proper stack trace for V8 engines\n if (Error.captureStackTrace) {\n Error.captureStackTrace(this, FlowExecutionError);\n }\n }\n\n /**\n * Returns a formatted string showing the execution path.\n * Example: \"parse → conditional:Invoice → extract\"\n */\n getFormattedPath(): string {\n if (!this.flowPath || this.flowPath.length === 0) {\n return this.failedStep;\n }\n\n return this.flowPath.map(loc => {\n let label = loc.stepId;\n if (loc.branch) {\n label += `:${loc.branch}`;\n }\n if (loc.itemIndex !== undefined) {\n label += `[${loc.itemIndex}]`;\n }\n return label;\n }).join(' → ');\n }\n\n /**\n * Returns the root cause error (innermost originalError).\n * Useful when errors are nested multiple levels deep.\n */\n getRootCause(): Error {\n let cause: Error = this.originalError;\n while (cause instanceof FlowExecutionError && cause.originalError) {\n cause = cause.originalError;\n }\n return cause;\n }\n}\n\n/**\n * Flow validation error for invalid node connections\n *\n * Thrown when building a flow with incompatible node connections.\n * Provides helpful error messages and suggestions for fixing the issue.\n *\n * @example\n * ```typescript\n * try {\n * const flow = createFlow()\n * .step('parse', parse({ provider: ocrProvider }))\n * .step('combine', combine()) // Invalid: combine needs array input\n * .build();\n * } catch (error) {\n * if (error instanceof FlowValidationError) {\n * console.error(error.message);\n * console.error('Reason:', error.reason);\n * console.log('Suggestions:', error.suggestions?.join('\\n'));\n * }\n * }\n * ```\n */\nexport class FlowValidationError extends Error {\n constructor(\n message: string,\n public readonly reason?: string,\n public readonly suggestions?: string[],\n public readonly sourceNode?: string,\n public readonly targetNode?: string,\n public readonly sourceOutputType?: string,\n public readonly targetInputTypes?: string[]\n ) {\n super(message);\n this.name = 'FlowValidationError';\n\n // Maintain proper stack trace for V8 engines\n if (Error.captureStackTrace) {\n Error.captureStackTrace(this, FlowValidationError);\n }\n }\n}\n\n/** Node type names for validation */\nexport type NodeTypeName = 'parse' | 'split' | 'categorize' | 'extract' | 'chunk' | 'combine' | 'trigger' | 'output';\n\n/** Compatibility rule for node connections */\nexport type CompatibilityRule = {\n valid: boolean;\n requiresForEach?: boolean;\n /** Indicates this connection cannot be fully validated at build-time and requires runtime type checking */\n requiresRuntimeValidation?: boolean;\n reason?: string;\n note?: string;\n};\n\n/**\n * Node Compatibility Matrix\n *\n * Defines which nodes can connect to which other nodes.\n * This is the single source of truth for node connection validation.\n *\n * Rules based on input/output type compatibility:\n * - parse: FlowInput → DocumentIR (or DocumentIR[] if chunked)\n * - split: FlowInput → SplitDocument[] (requires forEach)\n * - categorize: DocumentIR|FlowInput → {input, category}\n * - extract: DocumentIR|FlowInput|ChunkOutput → T (typed JSON)\n * - chunk: DocumentIR|DocumentIR[] → ChunkOutput\n * - combine: T[] → T|T[] (merges forEach results)\n * - trigger: any → TOutput (depends on child flow)\n *\n * Special behaviors:\n * - forEach auto-unwraps SplitDocument.input → FlowInput\n * - Conditional auto-unwraps {input, category} → input\n * - parse with chunked:true outputs DocumentIR[] instead of DocumentIR\n */\nexport const NODE_COMPATIBILITY_MATRIX: Record<NodeTypeName, Record<NodeTypeName, CompatibilityRule>> = {\n parse: {\n parse: {\n valid: false,\n reason: 'Cannot chain parse nodes. Parse is typically the starting node.'\n },\n split: {\n valid: false,\n reason: 'Split requires FlowInput, but parse outputs DocumentIR. Use split directly on input instead.',\n note: 'If you need to re-split after parsing, use trigger to invoke a child flow with FlowInput.'\n },\n categorize: {\n valid: true,\n note: 'categorize accepts DocumentIR and wraps it with {input, category}'\n },\n extract: {\n valid: true,\n note: 'extract accepts DocumentIR and produces typed JSON'\n },\n chunk: {\n valid: true,\n note: 'chunk accepts DocumentIR and produces ChunkOutput for RAG'\n },\n combine: {\n valid: false,\n reason: 'Parse outputs DocumentIR (single document), not an array. Combine requires array input from forEach.',\n note: 'Use parse with chunked:true to output DocumentIR[], then use combine.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n split: {\n parse: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input → FlowInput for parse.',\n note: 'Enable forEach on split node before connecting to parse.'\n },\n split: {\n valid: false,\n reason: 'Cannot nest split operations. Split nodes cannot appear in forEach itemFlow.'\n },\n categorize: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input for categorize.'\n },\n extract: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input for extract.'\n },\n chunk: {\n valid: false,\n reason: 'SplitDocument output is incompatible with Chunk input. Chunk expects DocumentIR or DocumentIR[].',\n note: 'Use parse in forEach after split to convert SplitDocument → DocumentIR, then chunk.'\n },\n combine: {\n valid: false,\n reason: 'Combine should appear AFTER forEach completes, not as a forEach itemFlow step.',\n note: 'Place combine after the forEach block to merge results.'\n },\n trigger: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach for processing.',\n note: 'forEach auto-unwraps SplitDocument.input for child flow.'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n categorize: {\n parse: {\n valid: true,\n note: 'categorize outputs {input, category}. Conditional can unwrap this or use directly.'\n },\n split: {\n valid: false,\n reason: 'Split requires FlowInput, but categorize outputs {input, category}.',\n note: 'Use conditional to unwrap and pass input field to split.'\n },\n categorize: {\n valid: true,\n note: 'Can chain categorize nodes for multi-level classification.'\n },\n extract: {\n valid: true,\n note: 'extract can process the categorized document.'\n },\n chunk: {\n valid: false,\n reason: 'Categorize wraps input as {input, category}. Chunk needs unwrapped DocumentIR.',\n note: 'Use conditional to unwrap input field before chunk.'\n },\n combine: {\n valid: false,\n reason: 'Categorize outputs single result {input, category}, not an array. Combine requires array input.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type, including {input, category}'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n extract: {\n parse: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot pipe JSON to parse.',\n note: 'Extract should be one of the last steps in a flow. Use combine if extracting in parallel.'\n },\n split: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot pipe JSON to split.'\n },\n categorize: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot pipe JSON to categorize.'\n },\n extract: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot chain extractions on JSON output.',\n note: 'If you need multi-step extraction, extract from DocumentIR/ChunkOutput in parallel, then combine.'\n },\n chunk: {\n valid: false,\n reason: 'Extract outputs typed JSON, not DocumentIR. Chunk expects DocumentIR input.'\n },\n combine: {\n valid: true,\n note: 'Use combine to merge parallel extraction results from forEach.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type, including extracted JSON'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n chunk: {\n parse: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput (specialized type), not FlowInput. Parse expects FlowInput as input.'\n },\n split: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput, incompatible with Split input (FlowInput).'\n },\n categorize: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput, incompatible with Categorize input (DocumentIR|FlowInput).',\n note: 'Categorize before chunking, not after.'\n },\n extract: {\n valid: true,\n note: 'extract has special handling for ChunkOutput - extracts data from chunks.'\n },\n chunk: {\n valid: false,\n reason: 'Cannot chain chunk operations. Chunk only once per document.',\n note: 'Different chunking strategies should be applied to the original DocumentIR, not to chunks.'\n },\n combine: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput (specialized type), not an array type. Combine expects T[].',\n note: 'Use chunk on individual documents in forEach, then extract, then combine extractions.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type, including ChunkOutput'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n combine: {\n parse: {\n valid: true,\n note: 'After combining, result can be re-parsed if needed.'\n },\n split: {\n valid: false,\n reason: 'Combine output depends on strategy. Split requires FlowInput.',\n note: 'Most combine strategies output merged objects/arrays, not FlowInput.'\n },\n categorize: {\n valid: true,\n note: 'Can categorize combined results.'\n },\n extract: {\n valid: true,\n note: 'Can extract from combined results.'\n },\n chunk: {\n valid: true,\n note: 'Can chunk combined DocumentIR. Only valid if combine output is DocumentIR or DocumentIR[].'\n },\n combine: {\n valid: false,\n reason: 'Cannot chain combine nodes. Combine once per forEach operation.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n trigger: {\n parse: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns FlowInput. Type safety cannot be guaranteed at build-time.'\n },\n split: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns FlowInput. Type safety cannot be guaranteed at build-time.'\n },\n categorize: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns DocumentIR or FlowInput. Type safety cannot be guaranteed at build-time.'\n },\n extract: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns DocumentIR, FlowInput, or ChunkOutput. Type safety cannot be guaranteed at build-time.'\n },\n chunk: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns DocumentIR or DocumentIR[]. Type safety cannot be guaranteed at build-time.'\n },\n combine: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns an array (T[]). Type safety cannot be guaranteed at build-time.'\n },\n trigger: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Can nest trigger nodes (with circular dependency detection and max depth limits). Output type depends on nested child flow.'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n output: {\n parse: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n split: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n categorize: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n extract: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n chunk: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n combine: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n trigger: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n output: {\n valid: true,\n note: 'Multiple output nodes are allowed to create multiple named outputs from a flow.'\n }\n }\n};\n\n/**\n * Get node type name from a NodeDef\n * @param node - Node definition\n * @returns Node type name (e.g., 'parse', 'extract')\n */\nexport function getNodeTypeName(node: NodeDef<any, any>): NodeTypeName | null {\n if (!node || !node.key) return null;\n const key = node.key;\n\n // Check if it's a known node type\n const knownTypes: NodeTypeName[] = ['parse', 'split', 'categorize', 'extract', 'chunk', 'combine', 'trigger', 'output'];\n return knownTypes.includes(key as NodeTypeName) ? (key as NodeTypeName) : null;\n}\n\n/**\n * Get type information from a node\n * @param node - Node definition\n * @returns NodeTypeInfo if available\n */\nexport function getNodeTypeInfo(node: NodeDef<any, any>): NodeTypeInfo | null {\n return node.__meta || null;\n}\n\n/**\n * Get compatible target nodes for a given source node\n * @param sourceType - Source node type name\n * @param includeForEach - Include connections that require forEach\n * @returns Array of compatible target node types\n */\nexport function getCompatibleTargets(sourceType: NodeTypeName, includeForEach: boolean = false): NodeTypeName[] {\n const rules = NODE_COMPATIBILITY_MATRIX[sourceType];\n if (!rules) return [];\n\n return Object.entries(rules)\n .filter(([_, rule]) => {\n if (!rule.valid) return false;\n if (rule.requiresForEach && !includeForEach) return false;\n return true;\n })\n .map(([targetType, _]) => targetType as NodeTypeName);\n}\n\n/**\n * Get suggested connections when a connection is invalid\n * @param sourceType - Source node type name\n * @returns Array of suggestion strings\n */\nexport function getSuggestedConnections(sourceType: NodeTypeName): string[] {\n const compatibleTargets = getCompatibleTargets(sourceType, false);\n const forEachTargets = getCompatibleTargets(sourceType, true).filter(\n t => !compatibleTargets.includes(t)\n );\n\n if (compatibleTargets.length === 0 && forEachTargets.length === 0) {\n return [`${sourceType} has no standard outgoing connections (terminal node).`];\n }\n\n const suggestions: string[] = [];\n\n if (compatibleTargets.length > 0) {\n suggestions.push(`${sourceType} can connect to:`);\n compatibleTargets.forEach(target => {\n const rule = NODE_COMPATIBILITY_MATRIX[sourceType][target];\n suggestions.push(` • ${target}${rule.note ? ` - ${rule.note}` : ''}`);\n });\n }\n\n if (forEachTargets.length > 0) {\n suggestions.push(`${sourceType} can connect to (with forEach enabled):`);\n forEachTargets.forEach(target => {\n const rule = NODE_COMPATIBILITY_MATRIX[sourceType][target];\n suggestions.push(` • ${target}${rule.note ? ` - ${rule.note}` : ''}`);\n });\n }\n\n return suggestions;\n}\n\n/**\n * Validation result for node connections\n */\nexport type ValidationResult = {\n valid: boolean;\n reason?: string;\n suggestions?: string[];\n requiresForEach?: boolean;\n /** Warning message for connections that are valid but require runtime type checking */\n warning?: string;\n};\n\n/**\n * Validate if two node types can be connected\n * @param sourceType - Source node type name\n * @param targetType - Target node type name\n * @param forEachEnabled - Whether forEach is enabled on the source node\n * @returns Validation result with reason and suggestions\n */\nexport function validateNodeConnection(\n sourceType: NodeTypeName,\n targetType: NodeTypeName,\n forEachEnabled: boolean = false\n): ValidationResult {\n const rule = NODE_COMPATIBILITY_MATRIX[sourceType]?.[targetType];\n\n if (!rule) {\n return {\n valid: false,\n reason: `Unknown node type combination: ${sourceType} → ${targetType}`,\n suggestions: ['Ensure both nodes are valid node types.']\n };\n }\n\n if (!rule.valid) {\n return {\n valid: false,\n reason: rule.reason,\n suggestions: getSuggestedConnections(sourceType)\n };\n }\n\n // Check forEach requirement\n if (rule.requiresForEach && !forEachEnabled) {\n return {\n valid: false,\n reason: `Cannot connect ${sourceType} to ${targetType} without forEach enabled.`,\n suggestions: [\n `Enable forEach on the ${sourceType} node:`,\n ` 1. Click the ${sourceType} node`,\n ` 2. Enable \"forEach Processing\" in the configuration`,\n ` 3. Try connecting again`,\n '',\n ...getSuggestedConnections(sourceType)\n ],\n requiresForEach: true\n };\n }\n\n // Check if runtime validation is required\n if (rule.requiresRuntimeValidation) {\n return {\n valid: true,\n warning: `⚠️ ${sourceType} → ${targetType}: ${rule.note || 'Type compatibility depends on runtime values and cannot be validated at build-time.'}`\n };\n }\n\n return {\n valid: true\n };\n}\n\n/**\n * Get valid starting nodes for forEach itemFlow based on parent node type\n *\n * When a node outputs an array and uses forEach, the itemFlow receives individual\n * array items. This function returns which node types can accept those items.\n *\n * @param parentType - The node type that outputs the array (e.g., 'split', 'parse')\n * @returns Array of node types that can start the forEach itemFlow\n *\n * @example\n * ```typescript\n * // split outputs SplitDocument[], itemFlow gets SplitDocument\n * getValidForEachStarters('split') // ['parse', 'extract', 'categorize', 'trigger']\n *\n * // parse(chunked:true) outputs DocumentIR[], itemFlow gets DocumentIR\n * getValidForEachStarters('parse') // ['categorize', 'extract', 'chunk']\n * ```\n */\nexport function getValidForEachStarters(parentType: NodeTypeName): NodeTypeName[] {\n const rules = NODE_COMPATIBILITY_MATRIX[parentType];\n if (!rules) return [];\n\n // Get all targets that require forEach (these are valid itemFlow starters)\n return Object.entries(rules)\n .filter(([_, rule]) => rule.valid && rule.requiresForEach)\n .map(([targetType, _]) => targetType as NodeTypeName);\n}\n\n/**\n * Validate if a node type can start a forEach itemFlow for a given parent\n *\n * @param parentType - The node type that outputs the array (e.g., 'split')\n * @param starterType - The node type to validate as itemFlow starter\n * @returns ValidationResult with detailed error messages and suggestions\n *\n * @example\n * ```typescript\n * // Valid: split → forEach → parse\n * canStartForEachItemFlow('split', 'parse') // { valid: true }\n *\n * // Invalid: split → forEach → chunk\n * canStartForEachItemFlow('split', 'chunk')\n * // {\n * // valid: false,\n * // reason: 'chunk cannot start forEach itemFlow after split...',\n * // suggestions: ['Valid starters: parse, extract, categorize, trigger']\n * // }\n * ```\n */\nexport function canStartForEachItemFlow(\n parentType: NodeTypeName,\n starterType: NodeTypeName\n): ValidationResult {\n const rule = NODE_COMPATIBILITY_MATRIX[parentType]?.[starterType];\n\n if (!rule) {\n return {\n valid: false,\n reason: `Unknown node type combination: ${parentType} → forEach → ${starterType}`,\n suggestions: ['Ensure both nodes are valid node types.']\n };\n }\n\n // Check if this connection requires forEach (meaning it's valid in itemFlow)\n if (rule.valid && rule.requiresForEach) {\n return {\n valid: true\n };\n }\n\n // If the rule is invalid, provide error\n if (!rule.valid) {\n const validStarters = getValidForEachStarters(parentType);\n return {\n valid: false,\n reason: `${starterType} cannot start forEach itemFlow after ${parentType}. ${rule.reason || 'Type incompatible with forEach unwrapped item.'}`,\n suggestions: validStarters.length > 0\n ? [`Valid itemFlow starters for ${parentType}: ${validStarters.join(', ')}`]\n : [`${parentType} has no valid forEach itemFlow starters.`]\n };\n }\n\n // If valid but doesn't require forEach, it's not a valid itemFlow starter\n const validStarters = getValidForEachStarters(parentType);\n return {\n valid: false,\n reason: `${starterType} cannot start forEach itemFlow after ${parentType}. This connection does not require forEach, meaning it expects the full array, not individual items.`,\n suggestions: validStarters.length > 0\n ? [`Valid itemFlow starters for ${parentType}: ${validStarters.join(', ')}`]\n : [`${parentType} has no valid forEach itemFlow starters.`]\n };\n}\n\n/**\n * JSON Schema node structure for validation.\n * Represents a node in a JSON Schema definition.\n */\nexport interface JSONSchemaNode {\n type?: string | string[];\n properties?: Record<string, JSONSchemaNode>;\n items?: JSONSchemaNode | JSONSchemaNode[];\n required?: string[];\n enum?: (string | number | boolean | null)[];\n nullable?: boolean;\n anyOf?: JSONSchemaNode[];\n oneOf?: JSONSchemaNode[];\n allOf?: JSONSchemaNode[];\n const?: unknown;\n additionalProperties?: boolean | JSONSchemaNode;\n minLength?: number;\n maxLength?: number;\n minimum?: number;\n maximum?: number;\n minItems?: number;\n maxItems?: number;\n pattern?: string;\n format?: string;\n description?: string;\n default?: unknown;\n $ref?: string;\n}\n\n/**\n * Lightweight JSON Schema validator for Edge Runtime compatibility\n *\n * Validates data against a JSON Schema without using AJV's code generation.\n * This is fully Edge Runtime compatible with zero dependencies.\n *\n * @param data - The data to validate\n * @param schema - JSON Schema object (plain object, not AJV JSONSchemaType)\n * @returns The validated data cast to type T\n * @throws Error if validation fails\n */\nexport function validateJson<T>(data: unknown, schema: JSONSchemaNode): T {\n const errors: string[] = [];\n const MAX_DEPTH = 50; // Prevent DoS via deeply nested objects\n\n function validate(value: unknown, schema: JSONSchemaNode, path: string = '', depth: number = 0): void {\n // Check recursion depth to prevent DoS attacks\n if (depth > MAX_DEPTH) {\n errors.push(`${path || 'root'}: maximum nesting depth (${MAX_DEPTH}) exceeded`);\n return;\n }\n\n // Handle nullable values\n if (schema.nullable && (value === null || value === undefined)) {\n return;\n }\n\n if (value === null || value === undefined) {\n if (schema.nullable !== true) {\n errors.push(`${path || 'root'}: value is null or undefined`);\n }\n return;\n }\n\n // Validate type\n const actualType = Array.isArray(value) ? 'array' : typeof value;\n const expectedType = schema.type;\n\n if (expectedType) {\n // Handle type validation\n if (expectedType === 'integer') {\n if (typeof value !== 'number' || !Number.isInteger(value)) {\n errors.push(`${path || 'root'}: expected integer, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'number') {\n if (typeof value !== 'number') {\n errors.push(`${path || 'root'}: expected number, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'string') {\n if (typeof value !== 'string') {\n errors.push(`${path || 'root'}: expected string, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'boolean') {\n if (typeof value !== 'boolean') {\n errors.push(`${path || 'root'}: expected boolean, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'object') {\n if (typeof value !== 'object' || Array.isArray(value)) {\n errors.push(`${path || 'root'}: expected object, got ${actualType}`);\n return;\n }\n\n // Validate required properties\n if (schema.required && Array.isArray(schema.required)) {\n for (const reqProp of schema.required) {\n if (!(reqProp in value)) {\n errors.push(`${path}.${reqProp}: required property missing`);\n }\n }\n }\n\n // Validate additionalProperties and check for prototype pollution\n const dangerousProps = ['__proto__', 'constructor', 'prototype'];\n\n if (schema.additionalProperties === false && schema.properties) {\n const allowedProps = Object.keys(schema.properties);\n const requiredProps = schema.required || [];\n const allAllowedProps = new Set([...allowedProps, ...requiredProps]);\n\n // Check all keys including potentially dangerous ones\n for (const key of [...Object.keys(value), ...Object.getOwnPropertyNames(value)]) {\n // Explicitly reject dangerous properties\n if (dangerousProps.includes(key)) {\n errors.push(`${path}.${key}: dangerous property not allowed`);\n continue;\n }\n\n if (!allAllowedProps.has(key)) {\n errors.push(`${path}.${key}: additional property not allowed`);\n }\n }\n } else {\n // Even without additionalProperties: false, reject dangerous properties\n for (const key of dangerousProps) {\n if (key in value && Object.prototype.hasOwnProperty.call(value, key)) {\n errors.push(`${path}.${key}: dangerous property not allowed`);\n }\n }\n }\n\n // Validate properties\n if (schema.properties) {\n const valueObj = value as Record<string, unknown>;\n for (const [propName, propSchema] of Object.entries(schema.properties)) {\n if (propName in valueObj) {\n validate(valueObj[propName], propSchema, path ? `${path}.${propName}` : propName, depth + 1);\n }\n }\n }\n } else if (expectedType === 'array') {\n if (!Array.isArray(value)) {\n errors.push(`${path || 'root'}: expected array, got ${actualType}`);\n return;\n }\n\n // Validate array items\n if (schema.items && !Array.isArray(schema.items)) {\n const itemSchema = schema.items;\n value.forEach((item, index) => {\n validate(item, itemSchema, `${path}[${index}]`, depth + 1);\n });\n }\n }\n }\n }\n\n validate(data, schema);\n\n if (errors.length > 0) {\n throw new Error(`Schema validation failed:\\n${errors.join('\\n')}`);\n }\n\n return data as T;\n}\n\n/**\n * Reserved variables that are auto-injected per node type.\n * These variables come from config or computed data and cannot be overridden by users.\n */\nexport const RESERVED_VARIABLES = {\n extract: ['schema', 'documentText', 'schemaTitle', 'schemaDescription', 'structuredFormat'],\n categorize: ['categories', 'documentText'],\n parse: ['format', 'schema', 'describeFigures', 'citationsEnabled']\n} as const;\n\n/**\n * Validates that user-provided promptVariables don't attempt to override reserved variables.\n * Emits console warnings if reserved variables are found in user variables and removes them.\n *\n * @param nodeType - The type of node (extract, categorize, parse)\n * @param userVariables - The user-provided promptVariables object\n * @param autoInjectedVariables - The auto-injected variables object\n * @returns A cleaned variables object with reserved variables protected\n */\nexport function protectReservedVariables(\n nodeType: 'extract' | 'categorize' | 'parse',\n userVariables: Record<string, any> | undefined,\n autoInjectedVariables: Record<string, any>\n): Record<string, any> {\n if (!userVariables || Object.keys(userVariables).length === 0) {\n return autoInjectedVariables;\n }\n\n const reserved = RESERVED_VARIABLES[nodeType];\n const warnings: string[] = [];\n\n // Check for reserved variable override attempts\n for (const key of reserved) {\n if (key in userVariables) {\n warnings.push(key);\n }\n }\n\n // Emit warnings if any reserved variables were attempted\n if (warnings.length > 0) {\n console.warn(\n `[doclo] Attempted to override reserved variables in ${nodeType} node: ${warnings.join(', ')}. ` +\n `These variables are auto-injected from config and cannot be overridden. ` +\n `They will be ignored.`\n );\n }\n\n // Merge: auto-injected first, then user variables (but reserved vars take precedence)\n return {\n ...autoInjectedVariables,\n ...userVariables,\n // Restore reserved variables to ensure they can't be overridden\n ...Object.fromEntries(\n reserved.map(key => [key, autoInjectedVariables[key]])\n )\n };\n}\n"],"mappings":";AA04BO,SAAS,iBAAiB,SAA0C;AACzE,QAAM,aAKD,CAAC;AAEN,QAAM,SAAS,QAAQ,OAAO,CAAC,KAAK,MAAM;AACxC,QAAI,mBAAmB,EAAE;AACzB,QAAI,gBAAgB,EAAE,WAAW;AACjC,QAAI,oBAAoB,EAAE,eAAe;AACzC,QAAI,qBAAqB,EAAE,gBAAgB;AAC3C,QAAI,4BAA4B,EAAE,4BAA4B;AAC9D,QAAI,wBAAwB,EAAE,wBAAwB;AAGtD,QAAI,EAAE,UAAU;AACd,UAAI,CAAC,WAAW,EAAE,QAAQ,GAAG;AAC3B,mBAAW,EAAE,QAAQ,IAAI,EAAE,SAAS,GAAG,aAAa,GAAG,cAAc,GAAG,WAAW,EAAE;AAAA,MACvF;AACA,iBAAW,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW;AAC/C,iBAAW,EAAE,QAAQ,EAAE,eAAe,EAAE,eAAe;AACvD,iBAAW,EAAE,QAAQ,EAAE,gBAAgB,EAAE,gBAAgB;AACzD,iBAAW,EAAE,QAAQ,EAAE,aAAa;AAAA,IACtC;AAEA,WAAO;AAAA,EACT,GAAG;AAAA,IACD,iBAAiB;AAAA,IACjB,cAAc;AAAA,IACd,kBAAkB;AAAA,IAClB,mBAAmB;AAAA,IACnB,0BAA0B;AAAA,IAC1B,sBAAsB;AAAA,IACtB,WAAW,QAAQ;AAAA,IACnB;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAyFO,IAAM,OAAO,CAAO,KAAa,SAA8C,EAAE,KAAK,IAAI;AAEjG,eAAsB,YAEpB,OAEA,OACA,sBAEA,eACA;AAEA,QAAM,YAAqC,gBAAgB,EAAE,GAAG,cAAc,IAAI,CAAC;AACnF,QAAM,UAAwB,CAAC;AAC/B,QAAM,MAAe;AAAA,IACnB,QAAQ,sBAAsB;AAAA,IAC9B;AAAA,IACA,MAAM,CAAC,GAAG,MAAM;AAAE,gBAAU,CAAC,IAAI;AAAA,IAAG;AAAA,IACpC,SAAS,EAAE,MAAM,CAAC,MAAM,QAAQ,KAAK,CAAC,EAAE;AAAA,IACxC,eAAe;AAAA,EACjB;AACA,MAAI,MAAM;AACV,aAAW,KAAK,OAAO;AACrB,UAAM,MAAM,EAAE,IAAI,KAAK,GAAG;AAC1B,QAAI,KAAK,EAAE,KAAK,GAAG;AAAA,EACrB;AACA,SAAO,EAAE,QAAQ,KAAK,WAAW,QAAQ;AAC3C;AA+CO,SAAS,oBAAoB,WAA2B;AAE7D,MAAI,UAAU,SAAS,MAAM,CAAC,UAAU,KAAK,EAAE,WAAW,GAAG,GAAG;AAC9D,WAAO;AAAA,EACT;AAEA,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,SAAS;AAGnC,QAAI,OAAO,QAAQ;AACjB,aAAO,OAAO;AAAA,IAChB;AAGA,QAAI,OAAO,OAAO,SAAS;AACzB,aAAO,OAAO,MAAM;AAAA,IACtB;AAGA,QAAI,OAAO,OAAO,UAAU,UAAU;AACpC,aAAO,OAAO;AAAA,IAChB;AAGA,QAAI,OAAO,SAAS;AAClB,aAAO,OAAO;AAAA,IAChB;AAGA,QAAI,OAAO,OAAO,UAAU,OAAO,OAAO,SAAS;AACjD,aAAO,GAAG,OAAO,MAAM,MAAM,KAAK,OAAO,MAAM,OAAO;AAAA,IACxD;AAGA,WAAO,UAAU,SAAS,MACtB,UAAU,UAAU,GAAG,GAAG,IAAI,QAC9B;AAAA,EACN,QAAQ;AAEN,WAAO,UAAU,SAAS,MACtB,UAAU,UAAU,GAAG,GAAG,IAAI,QAC9B;AAAA,EACN;AACF;AAmBO,IAAM,qBAAN,MAAM,4BAA2B,MAAM;AAAA,EAC5C,YACE,SAEgB,YAEA,iBAEA,gBAEA,gBAEA,eAEA,kBAEA,UAEA,mBAChB;AACA,UAAM,OAAO;AAhBG;AAEA;AAEA;AAEA;AAEA;AAEA;AAEA;AAEA;AAGhB,SAAK,OAAO;AAGZ,QAAI,MAAM,mBAAmB;AAC3B,YAAM,kBAAkB,MAAM,mBAAkB;AAAA,IAClD;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,mBAA2B;AACzB,QAAI,CAAC,KAAK,YAAY,KAAK,SAAS,WAAW,GAAG;AAChD,aAAO,KAAK;AAAA,IACd;AAEA,WAAO,KAAK,SAAS,IAAI,SAAO;AAC9B,UAAI,QAAQ,IAAI;AAChB,UAAI,IAAI,QAAQ;AACd,iBAAS,IAAI,IAAI,MAAM;AAAA,MACzB;AACA,UAAI,IAAI,cAAc,QAAW;AAC/B,iBAAS,IAAI,IAAI,SAAS;AAAA,MAC5B;AACA,aAAO;AAAA,IACT,CAAC,EAAE,KAAK,UAAK;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,eAAsB;AACpB,QAAI,QAAe,KAAK;AACxB,WAAO,iBAAiB,uBAAsB,MAAM,eAAe;AACjE,cAAQ,MAAM;AAAA,IAChB;AACA,WAAO;AAAA,EACT;AACF;AAwBO,IAAM,sBAAN,MAAM,6BAA4B,MAAM;AAAA,EAC7C,YACE,SACgB,QACA,aACA,YACA,YACA,kBACA,kBAChB;AACA,UAAM,OAAO;AAPG;AACA;AACA;AACA;AACA;AACA;AAGhB,SAAK,OAAO;AAGZ,QAAI,MAAM,mBAAmB;AAC3B,YAAM,kBAAkB,MAAM,oBAAmB;AAAA,IACnD;AAAA,EACF;AACF;AAmCO,IAAM,4BAA2F;AAAA,EACtG,OAAO;AAAA,IACL,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,OAAO;AAAA,IACL,OAAO;AAAA,MACL,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,YAAY;AAAA,IACV,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,SAAS;AAAA,IACP,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,OAAO;AAAA,IACL,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,SAAS;AAAA,IACP,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,SAAS;AAAA,IACP,OAAO;AAAA,MACL,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,QAAQ;AAAA,IACN,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AACF;AAOO,SAAS,gBAAgBA,OAA8C;AAC5E,MAAI,CAACA,SAAQ,CAACA,MAAK,IAAK,QAAO;AAC/B,QAAM,MAAMA,MAAK;AAGjB,QAAM,aAA6B,CAAC,SAAS,SAAS,cAAc,WAAW,SAAS,WAAW,WAAW,QAAQ;AACtH,SAAO,WAAW,SAAS,GAAmB,IAAK,MAAuB;AAC5E;AAOO,SAAS,gBAAgBA,OAA8C;AAC5E,SAAOA,MAAK,UAAU;AACxB;AAQO,SAAS,qBAAqB,YAA0B,iBAA0B,OAAuB;AAC9G,QAAM,QAAQ,0BAA0B,UAAU;AAClD,MAAI,CAAC,MAAO,QAAO,CAAC;AAEpB,SAAO,OAAO,QAAQ,KAAK,EACxB,OAAO,CAAC,CAAC,GAAG,IAAI,MAAM;AACrB,QAAI,CAAC,KAAK,MAAO,QAAO;AACxB,QAAI,KAAK,mBAAmB,CAAC,eAAgB,QAAO;AACpD,WAAO;AAAA,EACT,CAAC,EACA,IAAI,CAAC,CAAC,YAAY,CAAC,MAAM,UAA0B;AACxD;AAOO,SAAS,wBAAwB,YAAoC;AAC1E,QAAM,oBAAoB,qBAAqB,YAAY,KAAK;AAChE,QAAM,iBAAiB,qBAAqB,YAAY,IAAI,EAAE;AAAA,IAC5D,OAAK,CAAC,kBAAkB,SAAS,CAAC;AAAA,EACpC;AAEA,MAAI,kBAAkB,WAAW,KAAK,eAAe,WAAW,GAAG;AACjE,WAAO,CAAC,GAAG,UAAU,wDAAwD;AAAA,EAC/E;AAEA,QAAM,cAAwB,CAAC;AAE/B,MAAI,kBAAkB,SAAS,GAAG;AAChC,gBAAY,KAAK,GAAG,UAAU,kBAAkB;AAChD,sBAAkB,QAAQ,YAAU;AAClC,YAAM,OAAO,0BAA0B,UAAU,EAAE,MAAM;AACzD,kBAAY,KAAK,YAAO,MAAM,GAAG,KAAK,OAAO,MAAM,KAAK,IAAI,KAAK,EAAE,EAAE;AAAA,IACvE,CAAC;AAAA,EACH;AAEA,MAAI,eAAe,SAAS,GAAG;AAC7B,gBAAY,KAAK,GAAG,UAAU,yCAAyC;AACvE,mBAAe,QAAQ,YAAU;AAC/B,YAAM,OAAO,0BAA0B,UAAU,EAAE,MAAM;AACzD,kBAAY,KAAK,YAAO,MAAM,GAAG,KAAK,OAAO,MAAM,KAAK,IAAI,KAAK,EAAE,EAAE;AAAA,IACvE,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAqBO,SAAS,uBACd,YACA,YACA,iBAA0B,OACR;AAClB,QAAM,OAAO,0BAA0B,UAAU,IAAI,UAAU;AAE/D,MAAI,CAAC,MAAM;AACT,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,kCAAkC,UAAU,WAAM,UAAU;AAAA,MACpE,aAAa,CAAC,yCAAyC;AAAA,IACzD;AAAA,EACF;AAEA,MAAI,CAAC,KAAK,OAAO;AACf,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,KAAK;AAAA,MACb,aAAa,wBAAwB,UAAU;AAAA,IACjD;AAAA,EACF;AAGA,MAAI,KAAK,mBAAmB,CAAC,gBAAgB;AAC3C,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,kBAAkB,UAAU,OAAO,UAAU;AAAA,MACrD,aAAa;AAAA,QACX,yBAAyB,UAAU;AAAA,QACnC,kBAAkB,UAAU;AAAA,QAC5B;AAAA,QACA;AAAA,QACA;AAAA,QACA,GAAG,wBAAwB,UAAU;AAAA,MACvC;AAAA,MACA,iBAAiB;AAAA,IACnB;AAAA,EACF;AAGA,MAAI,KAAK,2BAA2B;AAClC,WAAO;AAAA,MACL,OAAO;AAAA,MACP,SAAS,iBAAO,UAAU,WAAM,UAAU,KAAK,KAAK,QAAQ,qFAAqF;AAAA,IACnJ;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,EACT;AACF;AAoBO,SAAS,wBAAwB,YAA0C;AAChF,QAAM,QAAQ,0BAA0B,UAAU;AAClD,MAAI,CAAC,MAAO,QAAO,CAAC;AAGpB,SAAO,OAAO,QAAQ,KAAK,EACxB,OAAO,CAAC,CAAC,GAAG,IAAI,MAAM,KAAK,SAAS,KAAK,eAAe,EACxD,IAAI,CAAC,CAAC,YAAY,CAAC,MAAM,UAA0B;AACxD;AAuBO,SAAS,wBACd,YACA,aACkB;AAClB,QAAM,OAAO,0BAA0B,UAAU,IAAI,WAAW;AAEhE,MAAI,CAAC,MAAM;AACT,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,kCAAkC,UAAU,0BAAgB,WAAW;AAAA,MAC/E,aAAa,CAAC,yCAAyC;AAAA,IACzD;AAAA,EACF;AAGA,MAAI,KAAK,SAAS,KAAK,iBAAiB;AACtC,WAAO;AAAA,MACL,OAAO;AAAA,IACT;AAAA,EACF;AAGA,MAAI,CAAC,KAAK,OAAO;AACf,UAAMC,iBAAgB,wBAAwB,UAAU;AACxD,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,GAAG,WAAW,wCAAwC,UAAU,KAAK,KAAK,UAAU,gDAAgD;AAAA,MAC5I,aAAaA,eAAc,SAAS,IAChC,CAAC,+BAA+B,UAAU,KAAKA,eAAc,KAAK,IAAI,CAAC,EAAE,IACzE,CAAC,GAAG,UAAU,0CAA0C;AAAA,IAC9D;AAAA,EACF;AAGA,QAAM,gBAAgB,wBAAwB,UAAU;AACxD,SAAO;AAAA,IACL,OAAO;AAAA,IACP,QAAQ,GAAG,WAAW,wCAAwC,UAAU;AAAA,IACxE,aAAa,cAAc,SAAS,IAChC,CAAC,+BAA+B,UAAU,KAAK,cAAc,KAAK,IAAI,CAAC,EAAE,IACzE,CAAC,GAAG,UAAU,0CAA0C;AAAA,EAC9D;AACF;AA0CO,SAAS,aAAgB,MAAe,QAA2B;AACxE,QAAM,SAAmB,CAAC;AAC1B,QAAM,YAAY;AAElB,WAAS,SAAS,OAAgBC,SAAwB,OAAe,IAAI,QAAgB,GAAS;AAEpG,QAAI,QAAQ,WAAW;AACrB,aAAO,KAAK,GAAG,QAAQ,MAAM,4BAA4B,SAAS,YAAY;AAC9E;AAAA,IACF;AAGA,QAAIA,QAAO,aAAa,UAAU,QAAQ,UAAU,SAAY;AAC9D;AAAA,IACF;AAEA,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,UAAIA,QAAO,aAAa,MAAM;AAC5B,eAAO,KAAK,GAAG,QAAQ,MAAM,8BAA8B;AAAA,MAC7D;AACA;AAAA,IACF;AAGA,UAAM,aAAa,MAAM,QAAQ,KAAK,IAAI,UAAU,OAAO;AAC3D,UAAM,eAAeA,QAAO;AAE5B,QAAI,cAAc;AAEhB,UAAI,iBAAiB,WAAW;AAC9B,YAAI,OAAO,UAAU,YAAY,CAAC,OAAO,UAAU,KAAK,GAAG;AACzD,iBAAO,KAAK,GAAG,QAAQ,MAAM,2BAA2B,UAAU,EAAE;AACpE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,UAAU;AACpC,YAAI,OAAO,UAAU,UAAU;AAC7B,iBAAO,KAAK,GAAG,QAAQ,MAAM,0BAA0B,UAAU,EAAE;AACnE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,UAAU;AACpC,YAAI,OAAO,UAAU,UAAU;AAC7B,iBAAO,KAAK,GAAG,QAAQ,MAAM,0BAA0B,UAAU,EAAE;AACnE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,WAAW;AACrC,YAAI,OAAO,UAAU,WAAW;AAC9B,iBAAO,KAAK,GAAG,QAAQ,MAAM,2BAA2B,UAAU,EAAE;AACpE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,UAAU;AACpC,YAAI,OAAO,UAAU,YAAY,MAAM,QAAQ,KAAK,GAAG;AACrD,iBAAO,KAAK,GAAG,QAAQ,MAAM,0BAA0B,UAAU,EAAE;AACnE;AAAA,QACF;AAGA,YAAIA,QAAO,YAAY,MAAM,QAAQA,QAAO,QAAQ,GAAG;AACrD,qBAAW,WAAWA,QAAO,UAAU;AACrC,gBAAI,EAAE,WAAW,QAAQ;AACvB,qBAAO,KAAK,GAAG,IAAI,IAAI,OAAO,6BAA6B;AAAA,YAC7D;AAAA,UACF;AAAA,QACF;AAGA,cAAM,iBAAiB,CAAC,aAAa,eAAe,WAAW;AAE/D,YAAIA,QAAO,yBAAyB,SAASA,QAAO,YAAY;AAC9D,gBAAM,eAAe,OAAO,KAAKA,QAAO,UAAU;AAClD,gBAAM,gBAAgBA,QAAO,YAAY,CAAC;AAC1C,gBAAM,kBAAkB,oBAAI,IAAI,CAAC,GAAG,cAAc,GAAG,aAAa,CAAC;AAGnE,qBAAW,OAAO,CAAC,GAAG,OAAO,KAAK,KAAK,GAAG,GAAG,OAAO,oBAAoB,KAAK,CAAC,GAAG;AAE/E,gBAAI,eAAe,SAAS,GAAG,GAAG;AAChC,qBAAO,KAAK,GAAG,IAAI,IAAI,GAAG,kCAAkC;AAC5D;AAAA,YACF;AAEA,gBAAI,CAAC,gBAAgB,IAAI,GAAG,GAAG;AAC7B,qBAAO,KAAK,GAAG,IAAI,IAAI,GAAG,mCAAmC;AAAA,YAC/D;AAAA,UACF;AAAA,QACF,OAAO;AAEL,qBAAW,OAAO,gBAAgB;AAChC,gBAAI,OAAO,SAAS,OAAO,UAAU,eAAe,KAAK,OAAO,GAAG,GAAG;AACpE,qBAAO,KAAK,GAAG,IAAI,IAAI,GAAG,kCAAkC;AAAA,YAC9D;AAAA,UACF;AAAA,QACF;AAGA,YAAIA,QAAO,YAAY;AACrB,gBAAM,WAAW;AACjB,qBAAW,CAAC,UAAU,UAAU,KAAK,OAAO,QAAQA,QAAO,UAAU,GAAG;AACtE,gBAAI,YAAY,UAAU;AACxB,uBAAS,SAAS,QAAQ,GAAG,YAAY,OAAO,GAAG,IAAI,IAAI,QAAQ,KAAK,UAAU,QAAQ,CAAC;AAAA,YAC7F;AAAA,UACF;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,SAAS;AACnC,YAAI,CAAC,MAAM,QAAQ,KAAK,GAAG;AACzB,iBAAO,KAAK,GAAG,QAAQ,MAAM,yBAAyB,UAAU,EAAE;AAClE;AAAA,QACF;AAGA,YAAIA,QAAO,SAAS,CAAC,MAAM,QAAQA,QAAO,KAAK,GAAG;AAChD,gBAAM,aAAaA,QAAO;AAC1B,gBAAM,QAAQ,CAAC,MAAM,UAAU;AAC7B,qBAAS,MAAM,YAAY,GAAG,IAAI,IAAI,KAAK,KAAK,QAAQ,CAAC;AAAA,UAC3D,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,WAAS,MAAM,MAAM;AAErB,MAAI,OAAO,SAAS,GAAG;AACrB,UAAM,IAAI,MAAM;AAAA,EAA8B,OAAO,KAAK,IAAI,CAAC,EAAE;AAAA,EACnE;AAEA,SAAO;AACT;AAMO,IAAM,qBAAqB;AAAA,EAChC,SAAS,CAAC,UAAU,gBAAgB,eAAe,qBAAqB,kBAAkB;AAAA,EAC1F,YAAY,CAAC,cAAc,cAAc;AAAA,EACzC,OAAO,CAAC,UAAU,UAAU,mBAAmB,kBAAkB;AACnE;AAWO,SAAS,yBACd,UACA,eACA,uBACqB;AACrB,MAAI,CAAC,iBAAiB,OAAO,KAAK,aAAa,EAAE,WAAW,GAAG;AAC7D,WAAO;AAAA,EACT;AAEA,QAAM,WAAW,mBAAmB,QAAQ;AAC5C,QAAM,WAAqB,CAAC;AAG5B,aAAW,OAAO,UAAU;AAC1B,QAAI,OAAO,eAAe;AACxB,eAAS,KAAK,GAAG;AAAA,IACnB;AAAA,EACF;AAGA,MAAI,SAAS,SAAS,GAAG;AACvB,YAAQ;AAAA,MACN,uDAAuD,QAAQ,UAAU,SAAS,KAAK,IAAI,CAAC;AAAA,IAG9F;AAAA,EACF;AAGA,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAG;AAAA;AAAA,IAEH,GAAG,OAAO;AAAA,MACR,SAAS,IAAI,SAAO,CAAC,KAAK,sBAAsB,GAAG,CAAC,CAAC;AAAA,IACvD;AAAA,EACF;AACF;","names":["node","validStarters","schema"]}
1
+ {"version":3,"sources":["../../src/internal/validation-utils.ts"],"sourcesContent":["/**\n * Browser-safe validation utilities\n *\n * This module contains all validation code with ZERO Node.js dependencies.\n * It can be safely bundled for browser environments.\n */\n\n// Edge Runtime compatible - no AJV dependency\n\n// Re-export all types and constants from the validation section of index.ts\n// This file has NO fs imports and is completely browser-safe\n\n/** Page-centric IR */\nexport type BBox = { x: number; y: number; w: number; h: number };\nexport type IRLine = {\n text: string;\n bbox?: BBox;\n startChar?: number; // Character offset in full document text\n endChar?: number; // Character offset in full document text\n lineId?: string; // Unique line identifier (e.g., \"p1_l5\" for page 1, line 5)\n};\nexport type IRPage = {\n pageNumber?: number; // Explicit 1-indexed page number (for chunked documents)\n width: number;\n height: number;\n lines: IRLine[];\n markdown?: string; // Rich markdown preserving layout (tables, headers, lists)\n html?: string; // Rich HTML preserving layout (tables, headers, lists)\n extras?: Record<string, unknown>\n};\n\n/** Standard extras fields for DocumentIR */\nexport type DocumentIRExtras = {\n /** Total number of pages in the original document (for PDFs, DOCX, etc.) */\n pageCount?: number;\n /** Cost in USD for processing this document */\n costUSD?: number;\n /** Provider-specific raw response */\n raw?: unknown;\n /** For chunked documents: which chunk this is (0-indexed) */\n chunkIndex?: number;\n /** For chunked documents: total number of chunks */\n totalChunks?: number;\n /** For chunked documents: page range [startPage, endPage] (1-indexed, inclusive) */\n pageRange?: [number, number];\n /** For Unsiloed: total semantic chunks (not traditional pages) */\n totalSemanticChunks?: number;\n /** Allow arbitrary additional fields */\n [key: string]: unknown;\n};\n\nexport type DocumentIR = {\n pages: IRPage[];\n extras?: DocumentIRExtras;\n};\n\n/** Provider identity for 3-layer hierarchy (provider/model/method) */\nimport type { ProviderIdentity } from '../provider-identity.js';\n\n/** Provider capability contracts */\nexport type OCRProvider = {\n /** Full 3-layer identity (provider/model/method) */\n identity?: ProviderIdentity;\n /** Canonical name in \"provider:model\" format */\n name: string;\n parseToIR: (input: { url?: string; base64?: string }) => Promise<DocumentIR>;\n};\n\n/** Multimodal input for VLM providers */\nexport type MultimodalInput = {\n text?: string;\n images?: Array<{ url?: string; base64?: string; mimeType: string }>;\n pdfs?: Array<{ url?: string; base64?: string; fileId?: string }>;\n /** Optional system prompt (text-only, prepended to conversation) */\n systemPrompt?: string;\n};\n\n/** Effort level type for reasoning configuration */\nexport type ReasoningEffort = 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';\n\n/** Reasoning configuration (normalized across providers) */\nexport type ReasoningConfig = {\n /** Effort level - normalized across providers (xhigh: 95%, high: 80%, medium: 50%, low: 20%, minimal: 10%, none: 0%) */\n effort?: ReasoningEffort;\n /** Direct token budget - used by Anthropic/Google/Qwen models */\n max_tokens?: number;\n /** Exclude reasoning tokens from response (only use for accuracy, not visible) */\n exclude?: boolean;\n /** Enable reasoning with default (medium) effort. Set to false to explicitly disable. */\n enabled?: boolean;\n};\n\n/** Base LLM provider (text-only) */\nexport type LLMProvider = {\n /** Full 3-layer identity (provider/model/method) */\n identity?: ProviderIdentity;\n /** Canonical name in \"provider:model\" format */\n name: string;\n completeJson: (input: { prompt: string; schema: object; max_tokens?: number; reasoning?: ReasoningConfig }) =>\n Promise<{ json: unknown; rawText?: string; costUSD?: number; inputTokens?: number; outputTokens?: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number }>;\n};\n\n/** Text completion response (for non-JSON outputs like JSX/code) */\nexport type TextResponse = {\n text: string;\n rawText?: string;\n inputTokens?: number;\n outputTokens?: number;\n costUSD?: number;\n};\n\n/** Vision-capable LLM provider */\nexport type VLMProvider = {\n /** Full 3-layer identity (provider/model/method) */\n identity?: ProviderIdentity;\n /** Canonical name in \"provider:model\" format */\n name: string;\n completeJson: (input: { prompt: string | MultimodalInput; schema: object; max_tokens?: number; reasoning?: ReasoningConfig }) =>\n Promise<{ json: unknown; rawText?: string; costUSD?: number; inputTokens?: number; outputTokens?: number; cacheCreationInputTokens?: number; cacheReadInputTokens?: number }>;\n /**\n * Complete a text prompt without JSON mode (optional).\n * Use this when you need raw text output (JSX, code, markdown, etc.)\n */\n completeText?: (input: { input: MultimodalInput; max_tokens?: number; reasoning?: ReasoningConfig }) =>\n Promise<TextResponse>;\n capabilities: {\n supportsImages: true;\n supportsPDFs: boolean;\n maxPDFPages?: number;\n };\n};\n\n/** Legacy alias for backward compatibility */\nexport type LLMJsonProvider = VLMProvider;\n\n// ============================================================================\n// Processing Options - Normalized types for provider-agnostic configuration\n// ============================================================================\n\n/**\n * Processing quality/speed tradeoff modes\n * Providers map their specific modes to these normalized values\n */\nexport type ProcessingMode = 'fast' | 'balanced' | 'high_accuracy';\n\n/**\n * Page range specification for partial document processing\n * Allows processing a subset of pages for cost savings\n */\nexport type PageRangeOptions = {\n /** Process only the first N pages */\n maxPages?: number;\n /** Specific page range (0-indexed), e.g., \"0,2-4,10\" */\n pageRange?: string;\n};\n\n/**\n * Language hints for OCR processing\n */\nexport type LanguageOptions = {\n /** ISO language codes for OCR, e.g., ['en', 'de', 'fr'] */\n langs?: string[];\n};\n\n/**\n * Document segmentation result for splitting \"stapled\" PDFs\n * Returns page boundaries for each detected document type\n */\nexport type SegmentationResult = {\n segments: Array<{\n /** Document type name (e.g., 'invoice', 'contract') */\n name: string;\n /** Page indices (0-indexed) belonging to this segment */\n pages: number[];\n /** Confidence level of segmentation */\n confidence: 'high' | 'medium' | 'low';\n }>;\n metadata: {\n /** Total pages in the original document */\n totalPages: number;\n /** How segmentation was performed */\n segmentationMethod: 'auto' | 'schema' | 'manual';\n };\n};\n\n/**\n * Extracted image from a document\n * Represents figures, charts, or embedded images\n */\nexport type ExtractedImage = {\n /** Block ID or reference (provider-specific) */\n id: string;\n /** Page number where image appears (0-indexed) */\n pageNumber: number;\n /** Base64-encoded image data */\n base64: string;\n /** MIME type of the image */\n mimeType: string;\n /** Location on page (normalized 0-1 coordinates) */\n bbox?: NormalizedBBox;\n /** Caption text if detected */\n caption?: string;\n};\n\n/**\n * Extended OCR provider options (beyond basic parseToIR)\n * These options are normalized across different OCR providers\n */\nexport type OCRProviderOptions = PageRangeOptions & LanguageOptions & {\n /** Processing quality/speed tradeoff */\n mode?: ProcessingMode;\n /** Force OCR even on text-based PDFs */\n forceOCR?: boolean;\n /** Extract embedded images from document */\n extractImages?: boolean;\n /** Add page delimiters to output */\n paginate?: boolean;\n /** Remove and redo existing OCR */\n stripExistingOCR?: boolean;\n};\n\n/**\n * Output format options for LLM-based text fields\n * Controls how text content is formatted in the response\n */\nexport type OutputFormat = 'markdown' | 'html' | 'json' | 'text';\n\n/**\n * Table format options for tabular data in responses\n */\nexport type TableFormat = 'markdown' | 'html' | 'csv';\n\n/**\n * Chunking strategy options for document segmentation\n */\nexport type ChunkingStrategy = 'page' | 'section' | 'paragraph' | 'semantic';\n\n/**\n * LLM-derived feature options\n * These features are implemented via prompting rather than native API support\n */\nexport type LLMDerivedOptions = {\n /** Format for text output in string fields */\n outputFormat?: OutputFormat;\n /** Format for tables within text fields */\n tableFormat?: TableFormat;\n /** Add page break markers (---) between pages */\n pageMarkers?: boolean;\n /** Include per-field confidence scores (attached to result, not in JSON) */\n includeConfidence?: boolean;\n /** Include source citations with bounding boxes (attached to result, not in JSON) */\n includeSources?: boolean;\n /** Include block type classification for each extracted element */\n includeBlockTypes?: boolean;\n /** Extract document headers (repeated content at top of pages) */\n extractHeaders?: boolean;\n /** Extract document footers (repeated content at bottom of pages) */\n extractFooters?: boolean;\n /** Document chunking strategy */\n chunkingStrategy?: ChunkingStrategy;\n /** Maximum chunk size in characters (when using chunking) */\n maxChunkSize?: number;\n /** Language hints for the document (e.g., ['English', 'German']) */\n languageHints?: string[];\n};\n\n/**\n * Extended VLM provider options for document extraction\n * These options are normalized across different VLM providers\n */\nexport type VLMProviderOptions = PageRangeOptions & LanguageOptions & LLMDerivedOptions & {\n /** Processing quality/speed tradeoff */\n mode?: ProcessingMode;\n /** Force OCR even on text-based PDFs */\n forceOCR?: boolean;\n /** Additional prompt/instructions for extraction */\n prompt?: string;\n /** Schema for auto-segmentation of multi-document PDFs */\n segmentationSchema?: object;\n};\n\n/**\n * Provider citation from source document\n * Maps extracted fields to their source locations\n */\nexport type ProviderCitation = {\n /** JSON path to extracted field (e.g., \"invoice.total\") */\n fieldPath: string;\n /** Source block IDs from the provider */\n blockIds: string[];\n /** Confidence score (0-1) */\n confidence?: number;\n};\n\n/** Consensus configuration for any node */\nexport type ConsensusConfig = {\n runs: number; // Number of times to run\n strategy?: 'majority' | 'unanimous'; // Default: majority\n onTie?: 'random' | 'fail' | 'retry'; // Default: random\n parallel?: boolean; // Run consensus in parallel (default: true)\n includeMetadata?: boolean; // Include detailed consensus metadata (default: false)\n level?: 'object' | 'field'; // Voting level: object (default) or per-field\n retryOnFailure?: boolean; // Retry failed/empty runs (default: false)\n maxRetries?: number; // Max retries per run (default: 1)\n};\n\n/** Individual consensus run result */\nexport type ConsensusRunResult<T = any> = {\n runIndex: number;\n value: T | null;\n success: boolean;\n error?: string;\n startTime: number;\n endTime: number;\n duration: number;\n attempts?: number; // Number of attempts (1 = no retry, >1 = retried)\n};\n\n/** Field-level voting details */\nexport type FieldVotingDetails = {\n fieldPath: string;\n values: Array<{\n /** The actual value for this voting option - can be any JSON-serializable type */\n value: unknown;\n count: number;\n percentage: number;\n runIndices: number[];\n }>;\n /** The winning value from consensus - can be any JSON-serializable type */\n winner: unknown;\n isTie: boolean;\n agreementScore: number; // 0.0 to 1.0\n};\n\n/** Consensus execution metadata */\nexport type ConsensusMetadata<T = unknown> = {\n totalRuns: number;\n successfulRuns: number;\n failedRuns: number;\n strategy: 'majority' | 'unanimous';\n selectedResult: T;\n selectedRunIndex: number;\n confidence: 'high' | 'medium' | 'low';\n overallAgreement: number; // 0.0 to 1.0\n fieldAgreement: Record<string, number>; // Field path -> agreement score\n votingDetails: FieldVotingDetails[];\n runs: ConsensusRunResult<T>[];\n executionTime: number;\n wasRetry: boolean;\n tieBreakerUsed?: 'random' | 'retry' | 'fail' | null;\n // New fields for enhanced consensus features\n votingLevel?: 'object' | 'field';\n isSyntheticResult?: boolean; // true if field-level voting composed a new object\n totalRetries?: number; // Total retry attempts across all runs\n emptyResultsFiltered?: number; // Number of empty results filtered out\n};\n\n/** Output with consensus metadata wrapper */\nexport type OutputWithConsensus<T = unknown> = {\n data: T;\n consensus: ConsensusMetadata<T>;\n};\n\n/** Conditional type helper for consensus metadata */\nexport type MaybeWithConsensusMetadata<T, Config> = Config extends { includeMetadata: true }\n ? OutputWithConsensus<T>\n : T;\n\n/** Flow input/output types */\nexport type FlowInput = {\n url?: string;\n base64?: string;\n pages?: number[]; // For post-split runs\n bounds?: BBox; // For post-split runs\n};\n\n/**\n * All MIME types supported by at least one provider.\n * This is the union of all provider capabilities.\n */\nexport type SupportedMimeType =\n // PDF\n | 'application/pdf'\n // Images - common\n | 'image/jpeg'\n | 'image/png'\n | 'image/gif'\n | 'image/webp'\n // Images - additional\n | 'image/tiff'\n | 'image/bmp'\n | 'image/heic'\n | 'image/heif'\n | 'image/vnd.adobe.photoshop' // PSD\n // Microsoft Office\n | 'application/msword' // DOC\n | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' // DOCX\n | 'application/vnd.ms-excel' // XLS\n | 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' // XLSX\n | 'application/vnd.ms-powerpoint' // PPT\n | 'application/vnd.openxmlformats-officedocument.presentationml.presentation' // PPTX\n // OpenDocument formats (Datalab)\n | 'application/vnd.oasis.opendocument.text' // ODT\n | 'application/vnd.oasis.opendocument.spreadsheet' // ODS\n | 'application/vnd.oasis.opendocument.presentation' // ODP\n // Text formats\n | 'text/plain' // TXT\n | 'text/csv' // CSV\n | 'text/html' // HTML\n | 'application/rtf' // RTF\n // Other\n | 'application/epub+zip'; // EPUB\n\n/**\n * Flow-level input validation configuration\n *\n * Allows specifying accepted MIME types for early validation\n * before flow execution begins.\n */\nexport type FlowInputValidation = {\n /**\n * List of accepted MIME types.\n * If specified, input must match one of these types or validation fails.\n * If empty/undefined, all supported types are accepted.\n */\n acceptedFormats?: SupportedMimeType[];\n /**\n * Whether to throw on validation failure.\n * @default true\n */\n throwOnInvalid?: boolean;\n};\n\nexport type FlowResult<T = any> = {\n output: T;\n metrics: StepMetric[];\n aggregated: AggregatedMetrics;\n artifacts: Record<string, any>;\n error?: Error;\n};\n\nexport type SplitDocument = {\n type: string; // 'invoice', 'bunker', 'other'\n schema?: object; // Matched schema (optional - only present when schemas provided)\n pages: number[]; // Page numbers\n bounds?: BBox; // Bounding box\n input: FlowInput; // Original input for re-processing\n};\n\n/** Citation and source tracking types */\n\n/** Citation source type indicating data provenance */\nexport type CitationSourceType = 'ocr' | 'vlm' | 'llm' | 'inferred';\n\n/** Normalized bounding box (0-1 coordinates relative to page dimensions) */\nexport type NormalizedBBox = {\n x: number; // Left edge (0-1)\n y: number; // Top edge (0-1)\n w: number; // Width (0-1)\n h: number; // Height (0-1)\n};\n\n/** Line-level citation reference with spatial information */\nexport type LineCitation = {\n pageNumber: number; // 1-indexed page number\n lineIndex: number; // 0-indexed line position on page\n bbox?: NormalizedBBox; // Normalized bounding box (0-1 coordinates)\n text: string; // Text snippet for verification\n confidence?: number; // 0-1 confidence score\n sourceType: CitationSourceType;\n startChar?: number; // Character offset in full document\n endChar?: number; // Character offset in full document\n};\n\n/** Field-level citation mapping extracted values to sources */\nexport type FieldCitation = {\n fieldPath: string; // JSON path to field (e.g., \"invoice.lineItems[0].amount\")\n /** Extracted value - can be any JSON-serializable type */\n value: unknown;\n citations: LineCitation[]; // Source lines supporting this value\n reasoning?: string; // LLM explanation for inferred values\n confidence?: number; // Overall confidence (0-1)\n};\n\n/** Citation configuration for nodes */\nexport type CitationConfig = {\n enabled: boolean; // Enable citation tracking (default: false)\n includeTextSnippets?: boolean; // Include text snippets in citations (default: true)\n includeBoundingBoxes?: boolean; // Include bboxes when available (default: true)\n includeConfidence?: boolean; // Include confidence scores (default: true)\n minConfidence?: number; // Minimum confidence threshold (0-1, default: 0.0)\n detectInferred?: boolean; // Use LLM to detect inferred values (default: false)\n};\n\n/** Extended output with citations */\nexport type OutputWithCitations<T> = {\n data: T; // Extracted data\n citations: FieldCitation[]; // Field-level citations\n metadata: {\n totalPages?: number; // Total pages processed\n sourceType: CitationSourceType; // Primary source type\n hasInferredValues?: boolean; // Whether any values were inferred\n processingTime?: number; // Processing time in ms\n };\n};\n\n/** Node configuration types */\nexport type ParseNodeConfig = {\n provider: OCRProvider | VLMProvider;\n consensus?: ConsensusConfig;\n chunked?: {\n maxPagesPerChunk: number;\n overlap?: number; // Default: 0\n parallel?: boolean; // Default: true - process chunks in parallel for speed\n };\n format?: 'text' | 'markdown' | 'html'; // Output format: text (default, line-level citations), markdown/html (page-level citations, preserves structure)\n describeFigures?: boolean; // When true, VLM providers describe charts/figures/diagrams in text. Default: false\n includeImages?: boolean; // When true, providers extract images (figures/tables/charts) from documents. Supported by Surya/Marker. Default: false\n additionalPrompt?: string; // Custom OCR guidance or instructions\n citations?: CitationConfig; // Citation tracking config\n\n // NEW: Prompt asset support\n promptRef?: string; // Reference to prompt asset (e.g., \"default-parse@1.0.0\")\n /**\n * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).\n *\n * Auto-injected variables (no need to pass manually):\n * - format: From config.format\n * - schema: Constructed schema (if applicable)\n * - describeFigures: From config.describeFigures\n * - citationsEnabled: From config.citations?.enabled\n *\n * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).\n */\n promptVariables?: Record<string, any>;\n\n /**\n * Additional instructions to append to the default prompt.\n * This provides a simple way to customize the prompt without creating a custom prompt asset.\n * The instructions will be added after the main prompt content.\n *\n * @example\n * ```typescript\n * parse({\n * provider: vlmProvider,\n * format: 'markdown',\n * additionalInstructions: \"Pay special attention to preserving table structures and footnotes.\"\n * })\n * ```\n */\n additionalInstructions?: string;\n\n /**\n * When using promptRef, automatically inject format instruction if {{format}} placeholder is not present.\n * This ensures the UI format selection always takes effect.\n * Default: true\n *\n * @example\n * ```typescript\n * parse({\n * provider: vlmProvider,\n * format: 'markdown',\n * promptRef: 'my-custom-prompt',\n * autoInjectFormat: false // Disable auto-injection\n * })\n * ```\n */\n autoInjectFormat?: boolean;\n\n /**\n * Enable extended reasoning/thinking for VLM providers that support it.\n * Only applies when using a VLM provider (not OCR).\n *\n * @example\n * ```typescript\n * parse({\n * provider: vlmProvider,\n * format: 'markdown',\n * reasoning: { enabled: true, effort: 'medium' }\n * })\n * ```\n */\n reasoning?: {\n effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';\n max_tokens?: number;\n exclude?: boolean;\n enabled?: boolean;\n };\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\nexport type SplitNodeConfig = {\n provider: VLMProvider;\n\n /**\n * Simple category definitions (recommended).\n * Each category can be a string or an object with name and optional description.\n *\n * @example\n * ```typescript\n * split({\n * provider: vlmProvider,\n * categories: [\n * 'invoice',\n * { name: 'cover_letter', description: 'Cover letter or transmittal pages' },\n * { name: 'contract', description: 'Legal agreements with terms and signatures' }\n * ]\n * })\n * ```\n */\n categories?: (string | { name: string; description?: string })[];\n\n /**\n * @deprecated Use `categories` instead. Full schema definitions for backwards compatibility.\n * Schema names are used as category names, but schemas are no longer attached to output.\n */\n schemas?: Record<string, object>; // { invoice: Schema, bunker: Schema }\n\n includeOther?: boolean; // Default: true\n consensus?: ConsensusConfig;\n schemaRef?: string; // Reference to schema asset (e.g., \"document-split@2.0.0\")\n\n /**\n * Enable extended reasoning/thinking for providers that support it.\n *\n * @example\n * ```typescript\n * split({\n * provider: vlmProvider,\n * categories: ['invoice', 'receipt', 'contract'],\n * reasoning: { enabled: true, effort: 'high' }\n * })\n * ```\n */\n reasoning?: {\n effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';\n max_tokens?: number;\n exclude?: boolean;\n enabled?: boolean;\n };\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\nexport type CategorizeNodeConfig = {\n provider: LLMProvider | VLMProvider;\n categories: (string | { name: string; description?: string })[];\n consensus?: ConsensusConfig;\n additionalPrompt?: string; // Custom categorization instructions\n\n // NEW: Prompt asset support\n promptRef?: string; // Reference to prompt asset (e.g., \"default-categorize@1.0.0\")\n /**\n * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).\n *\n * Auto-injected variables (no need to pass manually):\n * - categories: From config.categories\n * - documentText: Computed from DocumentIR input\n *\n * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).\n */\n promptVariables?: Record<string, any>;\n\n /**\n * Additional instructions to append to the default prompt.\n * This provides a simple way to customize the prompt without creating a custom prompt asset.\n * The instructions will be added after the main prompt content.\n *\n * @example\n * ```typescript\n * categorize({\n * provider: llmProvider,\n * categories: ['invoice', 'receipt', 'contract'],\n * additionalInstructions: \"Consider the document's header and footer when categorizing.\"\n * })\n * ```\n */\n additionalInstructions?: string;\n\n /**\n * Enable extended reasoning/thinking for providers that support it.\n *\n * @example\n * ```typescript\n * categorize({\n * provider: vlmProvider,\n * categories: ['invoice', 'receipt', 'contract'],\n * reasoning: { enabled: true, effort: 'low' }\n * })\n * ```\n */\n reasoning?: {\n effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';\n max_tokens?: number;\n exclude?: boolean;\n enabled?: boolean;\n };\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\n/**\n * Controls what inputs the extract node ingests.\n * - 'auto': Automatically detect input type and route appropriately (default)\n * - 'ir': Only DocumentIR from previous step (text-only extraction)\n * - 'ir+source': Both DocumentIR AND source document (multimodal with parsed text)\n * - 'source': Only raw source document (direct VLM extraction, no parsed text)\n *\n * Auto mode logic:\n * - If DocumentIR available AND source available AND VLM provider -> 'ir+source'\n * - If only DocumentIR available -> 'ir'\n * - If only FlowInput available AND VLM provider -> 'source'\n */\nexport type ExtractInputMode = 'auto' | 'ir' | 'ir+source' | 'source';\n\nexport type ExtractNodeConfig<T = any> = {\n provider: LLMProvider | VLMProvider;\n schema: object | EnhancedExtractionSchema<T> | { ref: string }; // Accept plain, enhanced, or reference\n consensus?: ConsensusConfig;\n reasoning?: {\n effort?: 'xhigh' | 'high' | 'medium' | 'low' | 'minimal' | 'none';\n max_tokens?: number;\n exclude?: boolean;\n enabled?: boolean;\n };\n additionalPrompt?: string; // Custom extraction instructions (appended after schema)\n citations?: CitationConfig; // Citation tracking config\n\n // NEW: Prompt asset support\n promptRef?: string; // Reference to prompt asset (e.g., \"default-extraction@1.0.0\")\n /**\n * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).\n *\n * Auto-injected variables (no need to pass manually):\n * - schema: From config.schema\n * - documentText: Computed from DocumentIR or FlowInput\n * - schemaTitle: From schema.title or default \"the provided schema\"\n * - schemaDescription: From schema.description or empty string\n * - structuredFormat: Generated formatting instructions (for markdown/html)\n *\n * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).\n */\n promptVariables?: Record<string, any>;\n\n /**\n * Additional instructions to append to the default prompt.\n * This provides a simple way to customize the prompt without creating a custom prompt asset.\n * The instructions will be added after the main prompt content.\n *\n * @example\n * ```typescript\n * extract({\n * provider: llmProvider,\n * schema: mySchema,\n * additionalInstructions: \"Be strict with date formats. Use YYYY-MM-DD format only.\"\n * })\n * ```\n */\n additionalInstructions?: string;\n\n /**\n * Controls what inputs the extract node ingests.\n * - 'auto': Automatically detect input type and route appropriately (default)\n * - 'ir': Only DocumentIR from previous step (text-only extraction)\n * - 'ir+source': Both DocumentIR AND source document (multimodal with parsed text)\n * - 'source': Only raw source document (direct VLM extraction, no parsed text)\n * @default 'auto'\n */\n inputMode?: ExtractInputMode;\n\n /**\n * In split/forEach contexts, use the original unsplit document instead of the segment.\n * Only applies when inputMode includes source ('ir+source' or 'source').\n * @default false (uses split segment source)\n */\n useOriginalSource?: boolean;\n\n /**\n * When auto mode has both IR and source available with VLM provider:\n * - true: use 'ir+source' for maximum context (hybrid multimodal)\n * - false: use 'ir' for text-only extraction (lower cost)\n * Only applies when inputMode='auto'.\n * @default true\n */\n preferVisual?: boolean;\n\n /**\n * Maximum tokens for the LLM response.\n * If not specified, the provider's default will be used (typically 4096).\n */\n maxTokens?: number;\n};\n\n/** Chunk output structure */\nexport type ChunkMetadata = {\n // Core content\n content: string;\n id: string; // Unique chunk identifier\n\n // Position metadata\n index: number; // Chunk position in sequence\n startChar: number;\n endChar: number;\n\n // Document context\n pageNumbers: number[]; // Pages this chunk spans\n section?: string; // Section/chapter title\n headers?: string[]; // Hierarchy of headers above this chunk\n\n // Chunking metadata\n strategy: string; // Which strategy created this chunk\n tokenCount?: number; // For LLM context planning\n wordCount: number;\n charCount: number;\n};\n\nexport type ChunkOutput = {\n chunks: ChunkMetadata[];\n totalChunks: number;\n averageChunkSize: number;\n sourceMetadata?: {\n providerType?: string; // 'ocr' | 'vlm' - original provider type\n };\n sourceDocument?: DocumentIR; // Original DocumentIR for citation mapping\n};\n\nexport type ChunkNodeConfig = {\n strategy: 'recursive' | 'section' | 'page' | 'fixed';\n maxSize?: number; // Max characters per chunk (recursive, section)\n minSize?: number; // Min characters per chunk (default: 100)\n overlap?: number; // Character overlap between chunks (default: 0)\n separators?: string[]; // Hierarchical separators (recursive)\n pagesPerChunk?: number; // Pages per chunk (page strategy)\n combineShortPages?: boolean; // Combine short pages (page strategy)\n minPageContent?: number; // Min content length to keep page (page strategy)\n size?: number; // Fixed size for fixed strategy\n unit?: 'tokens' | 'characters'; // Unit for fixed strategy\n};\n\nexport type CombineNodeConfig = {\n strategy: 'merge' | 'concatenate' | 'first' | 'last';\n};\n\nexport type OutputNodeConfig = {\n source?: string | string[];\n transform?: 'first' | 'last' | 'merge' | 'pick' | 'custom';\n fields?: string[];\n name?: string;\n /**\n * Custom transform function for 'custom' transform mode.\n * @param inputs - The input value(s) from the source step(s)\n * @param artifacts - All artifacts from the flow execution\n * @returns The transformed output value\n */\n customTransform?: (inputs: unknown | unknown[], artifacts: Record<string, unknown>) => unknown;\n};\n\n/** Enhanced extraction schema with examples and guidance */\nexport type EnhancedExtractionSchema<T = unknown> = {\n // Core schema (JSON Schema or Zod schema)\n schema: object;\n\n // Optional extraction enhancements\n examples?: Array<{\n description: string; // Description of this example\n input: string; // Sample input text\n output: T; // Expected output matching schema\n }>;\n\n extractionRules?: string; // Extraction guidelines (e.g., \"Focus on tables in appendix\")\n contextPrompt?: string; // Document context (e.g., \"This is a legal document\")\n hints?: string[]; // Additional hints for the extractor\n};\n\n/** Node & runner */\nexport type StepMetric = {\n step: string;\n configStepId?: string; // Flow-level step ID for config lookups (schemaRef, promptRef)\n startMs: number; // Absolute timestamp when step started (Date.now())\n provider?: string;\n model?: string;\n ms: number; // Total duration; for wrappers with rollup=true, includes child work\n costUSD?: number;\n inputTokens?: number;\n outputTokens?: number;\n cacheCreationInputTokens?: number;\n cacheReadInputTokens?: number;\n attemptNumber?: number; // Retry attempt number (1 = first attempt, 2+ = retries)\n metadata?: {\n kind?: 'leaf' | 'wrapper' | 'prep'; // 'leaf' = actual LLM call, 'wrapper' = composite overhead, 'prep' = preparation step\n rollup?: boolean; // True if ms includes child work (for wrappers with children)\n overheadMs?: number; // Pure overhead time excluding child work (for wrappers with children)\n /** Additional metadata fields */\n [key: string]: string | number | boolean | undefined;\n };\n};\n\n/** Aggregated metrics for multi-step flows */\nexport interface AggregatedMetrics {\n totalDurationMs: number;\n totalCostUSD: number;\n totalInputTokens: number;\n totalOutputTokens: number;\n totalCacheCreationTokens: number;\n totalCacheReadTokens: number;\n stepCount: number;\n byProvider: Record<string, {\n costUSD: number;\n inputTokens: number;\n outputTokens: number;\n callCount: number;\n }>;\n}\n\n/**\n * Aggregate metrics from multiple steps\n * @param metrics - Array of step metrics\n * @returns Aggregated totals and per-provider breakdowns\n */\nexport function aggregateMetrics(metrics: StepMetric[]): AggregatedMetrics {\n const byProvider: Record<string, {\n costUSD: number;\n inputTokens: number;\n outputTokens: number;\n callCount: number;\n }> = {};\n\n const result = metrics.reduce((acc, m) => {\n acc.totalDurationMs += m.ms;\n acc.totalCostUSD += m.costUSD || 0;\n acc.totalInputTokens += m.inputTokens || 0;\n acc.totalOutputTokens += m.outputTokens || 0;\n acc.totalCacheCreationTokens += m.cacheCreationInputTokens || 0;\n acc.totalCacheReadTokens += m.cacheReadInputTokens || 0;\n\n // Group by provider\n if (m.provider) {\n if (!byProvider[m.provider]) {\n byProvider[m.provider] = { costUSD: 0, inputTokens: 0, outputTokens: 0, callCount: 0 };\n }\n byProvider[m.provider].costUSD += m.costUSD || 0;\n byProvider[m.provider].inputTokens += m.inputTokens || 0;\n byProvider[m.provider].outputTokens += m.outputTokens || 0;\n byProvider[m.provider].callCount += 1;\n }\n\n return acc;\n }, {\n totalDurationMs: 0,\n totalCostUSD: 0,\n totalInputTokens: 0,\n totalOutputTokens: 0,\n totalCacheCreationTokens: 0,\n totalCacheReadTokens: 0,\n stepCount: metrics.length,\n byProvider\n });\n\n return result;\n}\n\n/**\n * Execution context passed to conditional functions and trigger nodes\n * Provides access to artifacts and metrics from all previous steps\n */\nexport interface FlowContext {\n /** Outputs from all completed steps, indexed by step ID */\n artifacts: Record<string, any>;\n /** Performance metrics from all completed steps */\n metrics: StepMetric[];\n /** Call stack for tracking nested flow execution (for circular dependency detection) */\n callStack?: string[];\n /** Maximum nesting depth for flow triggers (default: 10) */\n maxDepth?: number;\n}\n\n/**\n * W3C Trace Context for distributed tracing.\n * Compatible with observability module's TraceContext.\n */\nexport interface TraceContextLite {\n traceId: string;\n spanId: string;\n parentSpanId?: string;\n traceFlags: number; // W3C trace flags (0x01 = sampled), required for compatibility\n traceState?: string;\n}\n\n/**\n * Observability context passed to node executions.\n * Uses 'any' for config and traceContext to avoid circular imports and\n * maintain compatibility with the full observability types.\n */\nexport type NodeObservabilityContext = {\n /** Observability configuration - full type in observability module */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n config?: any;\n flowId?: string;\n executionId?: string;\n stepId?: string;\n stepIndex?: number;\n /** W3C Trace Context - compatible with TraceContext from observability module */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n traceContext?: any;\n metadata?: Record<string, unknown>;\n};\n\nexport type NodeCtx = {\n stepId?: string; // Flow-level step ID for metrics tracking\n artifacts: Record<string, unknown>;\n emit: (key: string, value: unknown) => void;\n metrics: { push: (m: StepMetric) => void };\n /** Observability context for hooks (optional) */\n observability?: NodeObservabilityContext;\n};\n\n/** Node type metadata for runtime validation */\nexport type NodeTypeInfo = {\n /** Input types this node accepts (e.g., ['FlowInput', 'DocumentIR']) */\n inputTypes: string[];\n /**\n * Output type this node produces - can be string or function for config-dependent types.\n * When a function, it receives the node's specific config and returns the output type string.\n * Uses 'any' parameter to allow nodes to use their specific config types.\n */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n outputType: string | ((config: any) => string);\n /** Provider types this node requires (if any) */\n requiresProvider?: ('OCR' | 'VLM' | 'LLM')[];\n /** Whether this node can accept array input */\n acceptsArray?: boolean;\n /**\n * Whether this node always outputs an array (or function for config-dependent).\n * Uses 'any' parameter to allow nodes to use their specific config types.\n */\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n outputsArray?: boolean | ((config: any) => boolean);\n /** Human-readable description of what this node does */\n description?: string;\n};\n\nexport type NodeDef<I, O> = {\n key: string;\n run: (input: I, ctx: NodeCtx) => Promise<O>;\n /** Optional type metadata for validation */\n __meta?: NodeTypeInfo;\n};\n\nexport const node = <I, O>(key: string, run: NodeDef<I, O>[\"run\"]): NodeDef<I, O> => ({ key, run });\n\nexport async function runPipeline(\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n steps: NodeDef<any, any>[],\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n input: any,\n observabilityContext?: NodeObservabilityContext,\n // eslint-disable-next-line @typescript-eslint/no-explicit-any\n flowArtifacts?: Record<string, any>\n) {\n // Merge flow artifacts with local (flow artifacts as read-only base for source access)\n const artifacts: Record<string, unknown> = flowArtifacts ? { ...flowArtifacts } : {};\n const metrics: StepMetric[] = [];\n const ctx: NodeCtx = {\n stepId: observabilityContext?.stepId,\n artifacts,\n emit: (k, v) => { artifacts[k] = v; },\n metrics: { push: (m) => metrics.push(m) },\n observability: observabilityContext\n };\n let acc = input;\n for (const s of steps) {\n acc = await s.run(acc, ctx);\n ctx.emit(s.key, acc);\n }\n return { output: acc, artifacts, metrics };\n}\n\n/**\n * Flow execution error with step context\n *\n * Thrown when a flow step fails during execution. Includes:\n * - Which step failed (ID, index, type)\n * - Which steps completed successfully\n * - Partial artifacts from completed steps (for debugging)\n * - The original error that caused the failure\n *\n * This makes debugging flow failures much easier by showing exactly where the error occurred\n * and what data was produced before the failure.\n *\n * @example\n * ```typescript\n * try {\n * await flow.run(input);\n * } catch (error) {\n * if (error instanceof FlowExecutionError) {\n * console.error(`Failed at step ${error.failedStepIndex}: ${error.failedStepType}`);\n * console.error(`Step ID: ${error.failedStep}`);\n * console.error(`Completed: ${error.completedSteps.join(', ')}`);\n * console.error(`Original error: ${error.originalError.message}`);\n *\n * // Access partial results from completed steps\n * if (error.partialArtifacts?.qualify) {\n * console.log('Quality assessment completed:', error.partialArtifacts.qualify);\n * }\n * }\n * }\n * ```\n */\n\n/**\n * Extracts a human-readable error message from potentially JSON error responses.\n *\n * Handles common API error formats:\n * - { \"detail\": \"...\" } (Surya-style)\n * - { \"error\": { \"message\": \"...\" } } (OpenAI, Anthropic)\n * - { \"error\": \"...\" } (Simple format)\n * - { \"message\": \"...\" } (Direct format)\n * - Plain text (returned as-is)\n *\n * @param errorText - The error text which may contain JSON\n * @returns A human-readable error message\n */\nexport function extractErrorMessage(errorText: string): string {\n // If it's short or doesn't look like JSON, return as-is\n if (errorText.length < 10 || !errorText.trim().startsWith('{')) {\n return errorText;\n }\n\n try {\n const parsed = JSON.parse(errorText);\n\n // Surya-style: { \"detail\": \"...\" }\n if (parsed.detail) {\n return parsed.detail;\n }\n\n // OpenAI/Anthropic style: { error: { message: \"...\" } }\n if (parsed.error?.message) {\n return parsed.error.message;\n }\n\n // Simple style: { error: \"...\" }\n if (typeof parsed.error === 'string') {\n return parsed.error;\n }\n\n // Direct style: { message: \"...\" }\n if (parsed.message) {\n return parsed.message;\n }\n\n // Google style: { error: { status: \"...\", message: \"...\" } }\n if (parsed.error?.status && parsed.error?.message) {\n return `${parsed.error.status}: ${parsed.error.message}`;\n }\n\n // Fallback: return original but truncated if very long\n return errorText.length > 200\n ? errorText.substring(0, 200) + '...'\n : errorText;\n } catch {\n // Not valid JSON, return as-is (truncated if needed)\n return errorText.length > 500\n ? errorText.substring(0, 500) + '...'\n : errorText;\n }\n}\n\n/**\n * Represents a step location in a flow hierarchy.\n * Used to track the execution path through nested flows.\n */\nexport interface FlowStepLocation {\n /** Step ID */\n stepId: string;\n /** Step index within this flow (0-based) */\n stepIndex: number;\n /** Step type (e.g., 'parse', 'conditional', 'forEach') */\n stepType: string;\n /** Branch name if within a conditional (e.g., \"Invoice\", \"Receipt\") */\n branch?: string;\n /** Item index if within a forEach iteration */\n itemIndex?: number;\n}\n\nexport class FlowExecutionError extends Error {\n constructor(\n message: string,\n /** The ID of the step that failed (e.g., 'parse_node123') */\n public readonly failedStep: string,\n /** The index of the failed step in the flow (0-based) */\n public readonly failedStepIndex: number,\n /** The type of the failed step (e.g., 'parse', 'extract', 'step', 'conditional', 'forEach') */\n public readonly failedStepType: string,\n /** Array of step IDs that completed successfully before the failure */\n public readonly completedSteps: string[],\n /** The original error that caused the failure */\n public readonly originalError: Error,\n /** Partial artifacts from steps that completed before the failure */\n public readonly partialArtifacts?: Record<string, any>,\n /** Execution path through nested flows (for hierarchical context) */\n public readonly flowPath?: FlowStepLocation[],\n /** All completed steps aggregated across flow boundaries */\n public readonly allCompletedSteps?: string[]\n ) {\n super(message);\n this.name = 'FlowExecutionError';\n\n // Maintain proper stack trace for V8 engines\n if (Error.captureStackTrace) {\n Error.captureStackTrace(this, FlowExecutionError);\n }\n }\n\n /**\n * Returns a formatted string showing the execution path.\n * Example: \"parse → conditional:Invoice → extract\"\n */\n getFormattedPath(): string {\n if (!this.flowPath || this.flowPath.length === 0) {\n return this.failedStep;\n }\n\n return this.flowPath.map(loc => {\n let label = loc.stepId;\n if (loc.branch) {\n label += `:${loc.branch}`;\n }\n if (loc.itemIndex !== undefined) {\n label += `[${loc.itemIndex}]`;\n }\n return label;\n }).join(' → ');\n }\n\n /**\n * Returns the root cause error (innermost originalError).\n * Useful when errors are nested multiple levels deep.\n */\n getRootCause(): Error {\n let cause: Error = this.originalError;\n while (cause instanceof FlowExecutionError && cause.originalError) {\n cause = cause.originalError;\n }\n return cause;\n }\n}\n\n/**\n * Flow validation error for invalid node connections\n *\n * Thrown when building a flow with incompatible node connections.\n * Provides helpful error messages and suggestions for fixing the issue.\n *\n * @example\n * ```typescript\n * try {\n * const flow = createFlow()\n * .step('parse', parse({ provider: ocrProvider }))\n * .step('combine', combine()) // Invalid: combine needs array input\n * .build();\n * } catch (error) {\n * if (error instanceof FlowValidationError) {\n * console.error(error.message);\n * console.error('Reason:', error.reason);\n * console.log('Suggestions:', error.suggestions?.join('\\n'));\n * }\n * }\n * ```\n */\nexport class FlowValidationError extends Error {\n constructor(\n message: string,\n public readonly reason?: string,\n public readonly suggestions?: string[],\n public readonly sourceNode?: string,\n public readonly targetNode?: string,\n public readonly sourceOutputType?: string,\n public readonly targetInputTypes?: string[]\n ) {\n super(message);\n this.name = 'FlowValidationError';\n\n // Maintain proper stack trace for V8 engines\n if (Error.captureStackTrace) {\n Error.captureStackTrace(this, FlowValidationError);\n }\n }\n}\n\n/** Node type names for validation */\nexport type NodeTypeName = 'parse' | 'split' | 'categorize' | 'extract' | 'chunk' | 'combine' | 'trigger' | 'output';\n\n/** Compatibility rule for node connections */\nexport type CompatibilityRule = {\n valid: boolean;\n requiresForEach?: boolean;\n /** Indicates this connection cannot be fully validated at build-time and requires runtime type checking */\n requiresRuntimeValidation?: boolean;\n reason?: string;\n note?: string;\n};\n\n/**\n * Node Compatibility Matrix\n *\n * Defines which nodes can connect to which other nodes.\n * This is the single source of truth for node connection validation.\n *\n * Rules based on input/output type compatibility:\n * - parse: FlowInput → DocumentIR (or DocumentIR[] if chunked)\n * - split: FlowInput → SplitDocument[] (requires forEach)\n * - categorize: DocumentIR|FlowInput → {input, category}\n * - extract: DocumentIR|FlowInput|ChunkOutput → T (typed JSON)\n * - chunk: DocumentIR|DocumentIR[] → ChunkOutput\n * - combine: T[] → T|T[] (merges forEach results)\n * - trigger: any → TOutput (depends on child flow)\n *\n * Special behaviors:\n * - forEach auto-unwraps SplitDocument.input → FlowInput\n * - Conditional auto-unwraps {input, category} → input\n * - parse with chunked:true outputs DocumentIR[] instead of DocumentIR\n */\nexport const NODE_COMPATIBILITY_MATRIX: Record<NodeTypeName, Record<NodeTypeName, CompatibilityRule>> = {\n parse: {\n parse: {\n valid: false,\n reason: 'Cannot chain parse nodes. Parse is typically the starting node.'\n },\n split: {\n valid: false,\n reason: 'Split requires FlowInput, but parse outputs DocumentIR. Use split directly on input instead.',\n note: 'If you need to re-split after parsing, use trigger to invoke a child flow with FlowInput.'\n },\n categorize: {\n valid: true,\n note: 'categorize accepts DocumentIR and wraps it with {input, category}'\n },\n extract: {\n valid: true,\n note: 'extract accepts DocumentIR and produces typed JSON'\n },\n chunk: {\n valid: true,\n note: 'chunk accepts DocumentIR and produces ChunkOutput for RAG'\n },\n combine: {\n valid: false,\n reason: 'Parse outputs DocumentIR (single document), not an array. Combine requires array input from forEach.',\n note: 'Use parse with chunked:true to output DocumentIR[], then use combine.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n split: {\n parse: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input → FlowInput for parse.',\n note: 'Enable forEach on split node before connecting to parse.'\n },\n split: {\n valid: false,\n reason: 'Cannot nest split operations. Split nodes cannot appear in forEach itemFlow.'\n },\n categorize: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input for categorize.'\n },\n extract: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input for extract.'\n },\n chunk: {\n valid: false,\n reason: 'SplitDocument output is incompatible with Chunk input. Chunk expects DocumentIR or DocumentIR[].',\n note: 'Use parse in forEach after split to convert SplitDocument → DocumentIR, then chunk.'\n },\n combine: {\n valid: false,\n reason: 'Combine should appear AFTER forEach completes, not as a forEach itemFlow step.',\n note: 'Place combine after the forEach block to merge results.'\n },\n trigger: {\n valid: true,\n requiresForEach: true,\n reason: 'Split outputs SplitDocument[] which requires forEach for processing.',\n note: 'forEach auto-unwraps SplitDocument.input for child flow.'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n categorize: {\n parse: {\n valid: true,\n note: 'categorize outputs {input, category}. Conditional can unwrap this or use directly.'\n },\n split: {\n valid: false,\n reason: 'Split requires FlowInput, but categorize outputs {input, category}.',\n note: 'Use conditional to unwrap and pass input field to split.'\n },\n categorize: {\n valid: true,\n note: 'Can chain categorize nodes for multi-level classification.'\n },\n extract: {\n valid: true,\n note: 'extract can process the categorized document.'\n },\n chunk: {\n valid: false,\n reason: 'Categorize wraps input as {input, category}. Chunk needs unwrapped DocumentIR.',\n note: 'Use conditional to unwrap input field before chunk.'\n },\n combine: {\n valid: false,\n reason: 'Categorize outputs single result {input, category}, not an array. Combine requires array input.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type, including {input, category}'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n extract: {\n parse: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot pipe JSON to parse.',\n note: 'Extract should be one of the last steps in a flow. Use combine if extracting in parallel.'\n },\n split: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot pipe JSON to split.'\n },\n categorize: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot pipe JSON to categorize.'\n },\n extract: {\n valid: false,\n reason: 'Extract outputs typed JSON (terminal node). Cannot chain extractions on JSON output.',\n note: 'If you need multi-step extraction, extract from DocumentIR/ChunkOutput in parallel, then combine.'\n },\n chunk: {\n valid: false,\n reason: 'Extract outputs typed JSON, not DocumentIR. Chunk expects DocumentIR input.'\n },\n combine: {\n valid: true,\n note: 'Use combine to merge parallel extraction results from forEach.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type, including extracted JSON'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n chunk: {\n parse: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput (specialized type), not FlowInput. Parse expects FlowInput as input.'\n },\n split: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput, incompatible with Split input (FlowInput).'\n },\n categorize: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput, incompatible with Categorize input (DocumentIR|FlowInput).',\n note: 'Categorize before chunking, not after.'\n },\n extract: {\n valid: true,\n note: 'extract has special handling for ChunkOutput - extracts data from chunks.'\n },\n chunk: {\n valid: false,\n reason: 'Cannot chain chunk operations. Chunk only once per document.',\n note: 'Different chunking strategies should be applied to the original DocumentIR, not to chunks.'\n },\n combine: {\n valid: false,\n reason: 'Chunk outputs ChunkOutput (specialized type), not an array type. Combine expects T[].',\n note: 'Use chunk on individual documents in forEach, then extract, then combine extractions.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type, including ChunkOutput'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n combine: {\n parse: {\n valid: true,\n note: 'After combining, result can be re-parsed if needed.'\n },\n split: {\n valid: false,\n reason: 'Combine output depends on strategy. Split requires FlowInput.',\n note: 'Most combine strategies output merged objects/arrays, not FlowInput.'\n },\n categorize: {\n valid: true,\n note: 'Can categorize combined results.'\n },\n extract: {\n valid: true,\n note: 'Can extract from combined results.'\n },\n chunk: {\n valid: true,\n note: 'Can chunk combined DocumentIR. Only valid if combine output is DocumentIR or DocumentIR[].'\n },\n combine: {\n valid: false,\n reason: 'Cannot chain combine nodes. Combine once per forEach operation.'\n },\n trigger: {\n valid: true,\n note: 'trigger accepts any input type'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n trigger: {\n parse: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns FlowInput. Type safety cannot be guaranteed at build-time.'\n },\n split: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns FlowInput. Type safety cannot be guaranteed at build-time.'\n },\n categorize: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns DocumentIR or FlowInput. Type safety cannot be guaranteed at build-time.'\n },\n extract: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns DocumentIR, FlowInput, or ChunkOutput. Type safety cannot be guaranteed at build-time.'\n },\n chunk: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns DocumentIR or DocumentIR[]. Type safety cannot be guaranteed at build-time.'\n },\n combine: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Valid only if child flow returns an array (T[]). Type safety cannot be guaranteed at build-time.'\n },\n trigger: {\n valid: true,\n requiresRuntimeValidation: true,\n note: 'Can nest trigger nodes (with circular dependency detection and max depth limits). Output type depends on nested child flow.'\n },\n output: {\n valid: true,\n note: 'output node can follow any node to select or transform results'\n }\n },\n output: {\n parse: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n split: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n categorize: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n extract: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n chunk: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n combine: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n trigger: {\n valid: false,\n reason: 'Output is a terminal node that selects/transforms results. Cannot chain to other nodes.'\n },\n output: {\n valid: true,\n note: 'Multiple output nodes are allowed to create multiple named outputs from a flow.'\n }\n }\n};\n\n/**\n * Get node type name from a NodeDef\n * @param node - Node definition\n * @returns Node type name (e.g., 'parse', 'extract')\n */\nexport function getNodeTypeName(node: NodeDef<any, any>): NodeTypeName | null {\n if (!node || !node.key) return null;\n const key = node.key;\n\n // Check if it's a known node type\n const knownTypes: NodeTypeName[] = ['parse', 'split', 'categorize', 'extract', 'chunk', 'combine', 'trigger', 'output'];\n return knownTypes.includes(key as NodeTypeName) ? (key as NodeTypeName) : null;\n}\n\n/**\n * Get type information from a node\n * @param node - Node definition\n * @returns NodeTypeInfo if available\n */\nexport function getNodeTypeInfo(node: NodeDef<any, any>): NodeTypeInfo | null {\n return node.__meta || null;\n}\n\n/**\n * Get compatible target nodes for a given source node\n * @param sourceType - Source node type name\n * @param includeForEach - Include connections that require forEach\n * @returns Array of compatible target node types\n */\nexport function getCompatibleTargets(sourceType: NodeTypeName, includeForEach: boolean = false): NodeTypeName[] {\n const rules = NODE_COMPATIBILITY_MATRIX[sourceType];\n if (!rules) return [];\n\n return Object.entries(rules)\n .filter(([_, rule]) => {\n if (!rule.valid) return false;\n if (rule.requiresForEach && !includeForEach) return false;\n return true;\n })\n .map(([targetType, _]) => targetType as NodeTypeName);\n}\n\n/**\n * Get suggested connections when a connection is invalid\n * @param sourceType - Source node type name\n * @returns Array of suggestion strings\n */\nexport function getSuggestedConnections(sourceType: NodeTypeName): string[] {\n const compatibleTargets = getCompatibleTargets(sourceType, false);\n const forEachTargets = getCompatibleTargets(sourceType, true).filter(\n t => !compatibleTargets.includes(t)\n );\n\n if (compatibleTargets.length === 0 && forEachTargets.length === 0) {\n return [`${sourceType} has no standard outgoing connections (terminal node).`];\n }\n\n const suggestions: string[] = [];\n\n if (compatibleTargets.length > 0) {\n suggestions.push(`${sourceType} can connect to:`);\n compatibleTargets.forEach(target => {\n const rule = NODE_COMPATIBILITY_MATRIX[sourceType][target];\n suggestions.push(` • ${target}${rule.note ? ` - ${rule.note}` : ''}`);\n });\n }\n\n if (forEachTargets.length > 0) {\n suggestions.push(`${sourceType} can connect to (with forEach enabled):`);\n forEachTargets.forEach(target => {\n const rule = NODE_COMPATIBILITY_MATRIX[sourceType][target];\n suggestions.push(` • ${target}${rule.note ? ` - ${rule.note}` : ''}`);\n });\n }\n\n return suggestions;\n}\n\n/**\n * Validation result for node connections\n */\nexport type ValidationResult = {\n valid: boolean;\n reason?: string;\n suggestions?: string[];\n requiresForEach?: boolean;\n /** Warning message for connections that are valid but require runtime type checking */\n warning?: string;\n};\n\n/**\n * Validate if two node types can be connected\n * @param sourceType - Source node type name\n * @param targetType - Target node type name\n * @param forEachEnabled - Whether forEach is enabled on the source node\n * @returns Validation result with reason and suggestions\n */\nexport function validateNodeConnection(\n sourceType: NodeTypeName,\n targetType: NodeTypeName,\n forEachEnabled: boolean = false\n): ValidationResult {\n const rule = NODE_COMPATIBILITY_MATRIX[sourceType]?.[targetType];\n\n if (!rule) {\n return {\n valid: false,\n reason: `Unknown node type combination: ${sourceType} → ${targetType}`,\n suggestions: ['Ensure both nodes are valid node types.']\n };\n }\n\n if (!rule.valid) {\n return {\n valid: false,\n reason: rule.reason,\n suggestions: getSuggestedConnections(sourceType)\n };\n }\n\n // Check forEach requirement\n if (rule.requiresForEach && !forEachEnabled) {\n return {\n valid: false,\n reason: `Cannot connect ${sourceType} to ${targetType} without forEach enabled.`,\n suggestions: [\n `Enable forEach on the ${sourceType} node:`,\n ` 1. Click the ${sourceType} node`,\n ` 2. Enable \"forEach Processing\" in the configuration`,\n ` 3. Try connecting again`,\n '',\n ...getSuggestedConnections(sourceType)\n ],\n requiresForEach: true\n };\n }\n\n // Check if runtime validation is required\n if (rule.requiresRuntimeValidation) {\n return {\n valid: true,\n warning: `⚠️ ${sourceType} → ${targetType}: ${rule.note || 'Type compatibility depends on runtime values and cannot be validated at build-time.'}`\n };\n }\n\n return {\n valid: true\n };\n}\n\n/**\n * Get valid starting nodes for forEach itemFlow based on parent node type\n *\n * When a node outputs an array and uses forEach, the itemFlow receives individual\n * array items. This function returns which node types can accept those items.\n *\n * @param parentType - The node type that outputs the array (e.g., 'split', 'parse')\n * @returns Array of node types that can start the forEach itemFlow\n *\n * @example\n * ```typescript\n * // split outputs SplitDocument[], itemFlow gets SplitDocument\n * getValidForEachStarters('split') // ['parse', 'extract', 'categorize', 'trigger']\n *\n * // parse(chunked:true) outputs DocumentIR[], itemFlow gets DocumentIR\n * getValidForEachStarters('parse') // ['categorize', 'extract', 'chunk']\n * ```\n */\nexport function getValidForEachStarters(parentType: NodeTypeName): NodeTypeName[] {\n const rules = NODE_COMPATIBILITY_MATRIX[parentType];\n if (!rules) return [];\n\n // Get all targets that require forEach (these are valid itemFlow starters)\n return Object.entries(rules)\n .filter(([_, rule]) => rule.valid && rule.requiresForEach)\n .map(([targetType, _]) => targetType as NodeTypeName);\n}\n\n/**\n * Validate if a node type can start a forEach itemFlow for a given parent\n *\n * @param parentType - The node type that outputs the array (e.g., 'split')\n * @param starterType - The node type to validate as itemFlow starter\n * @returns ValidationResult with detailed error messages and suggestions\n *\n * @example\n * ```typescript\n * // Valid: split → forEach → parse\n * canStartForEachItemFlow('split', 'parse') // { valid: true }\n *\n * // Invalid: split → forEach → chunk\n * canStartForEachItemFlow('split', 'chunk')\n * // {\n * // valid: false,\n * // reason: 'chunk cannot start forEach itemFlow after split...',\n * // suggestions: ['Valid starters: parse, extract, categorize, trigger']\n * // }\n * ```\n */\nexport function canStartForEachItemFlow(\n parentType: NodeTypeName,\n starterType: NodeTypeName\n): ValidationResult {\n const rule = NODE_COMPATIBILITY_MATRIX[parentType]?.[starterType];\n\n if (!rule) {\n return {\n valid: false,\n reason: `Unknown node type combination: ${parentType} → forEach → ${starterType}`,\n suggestions: ['Ensure both nodes are valid node types.']\n };\n }\n\n // Check if this connection requires forEach (meaning it's valid in itemFlow)\n if (rule.valid && rule.requiresForEach) {\n return {\n valid: true\n };\n }\n\n // If the rule is invalid, provide error\n if (!rule.valid) {\n const validStarters = getValidForEachStarters(parentType);\n return {\n valid: false,\n reason: `${starterType} cannot start forEach itemFlow after ${parentType}. ${rule.reason || 'Type incompatible with forEach unwrapped item.'}`,\n suggestions: validStarters.length > 0\n ? [`Valid itemFlow starters for ${parentType}: ${validStarters.join(', ')}`]\n : [`${parentType} has no valid forEach itemFlow starters.`]\n };\n }\n\n // If valid but doesn't require forEach, it's not a valid itemFlow starter\n const validStarters = getValidForEachStarters(parentType);\n return {\n valid: false,\n reason: `${starterType} cannot start forEach itemFlow after ${parentType}. This connection does not require forEach, meaning it expects the full array, not individual items.`,\n suggestions: validStarters.length > 0\n ? [`Valid itemFlow starters for ${parentType}: ${validStarters.join(', ')}`]\n : [`${parentType} has no valid forEach itemFlow starters.`]\n };\n}\n\n/**\n * JSON Schema node structure for validation.\n * Represents a node in a JSON Schema definition.\n */\nexport interface JSONSchemaNode {\n type?: string | string[];\n properties?: Record<string, JSONSchemaNode>;\n items?: JSONSchemaNode | JSONSchemaNode[];\n required?: string[];\n enum?: (string | number | boolean | null)[];\n nullable?: boolean;\n anyOf?: JSONSchemaNode[];\n oneOf?: JSONSchemaNode[];\n allOf?: JSONSchemaNode[];\n const?: unknown;\n additionalProperties?: boolean | JSONSchemaNode;\n minLength?: number;\n maxLength?: number;\n minimum?: number;\n maximum?: number;\n minItems?: number;\n maxItems?: number;\n pattern?: string;\n format?: string;\n description?: string;\n default?: unknown;\n $ref?: string;\n}\n\n/**\n * Lightweight JSON Schema validator for Edge Runtime compatibility\n *\n * Validates data against a JSON Schema without using AJV's code generation.\n * This is fully Edge Runtime compatible with zero dependencies.\n *\n * @param data - The data to validate\n * @param schema - JSON Schema object (plain object, not AJV JSONSchemaType)\n * @returns The validated data cast to type T\n * @throws Error if validation fails\n */\nexport function validateJson<T>(data: unknown, schema: JSONSchemaNode): T {\n const errors: string[] = [];\n const MAX_DEPTH = 50; // Prevent DoS via deeply nested objects\n\n function validate(value: unknown, schema: JSONSchemaNode, path: string = '', depth: number = 0): void {\n // Check recursion depth to prevent DoS attacks\n if (depth > MAX_DEPTH) {\n errors.push(`${path || 'root'}: maximum nesting depth (${MAX_DEPTH}) exceeded`);\n return;\n }\n\n // Handle nullable values\n if (schema.nullable && (value === null || value === undefined)) {\n return;\n }\n\n if (value === null || value === undefined) {\n if (schema.nullable !== true) {\n errors.push(`${path || 'root'}: value is null or undefined`);\n }\n return;\n }\n\n // Validate type\n const actualType = Array.isArray(value) ? 'array' : typeof value;\n const expectedType = schema.type;\n\n if (expectedType) {\n // Handle type validation\n if (expectedType === 'integer') {\n if (typeof value !== 'number' || !Number.isInteger(value)) {\n errors.push(`${path || 'root'}: expected integer, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'number') {\n if (typeof value !== 'number') {\n errors.push(`${path || 'root'}: expected number, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'string') {\n if (typeof value !== 'string') {\n errors.push(`${path || 'root'}: expected string, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'boolean') {\n if (typeof value !== 'boolean') {\n errors.push(`${path || 'root'}: expected boolean, got ${actualType}`);\n return;\n }\n } else if (expectedType === 'object') {\n if (typeof value !== 'object' || Array.isArray(value)) {\n errors.push(`${path || 'root'}: expected object, got ${actualType}`);\n return;\n }\n\n // Validate required properties\n if (schema.required && Array.isArray(schema.required)) {\n for (const reqProp of schema.required) {\n if (!(reqProp in value)) {\n errors.push(`${path}.${reqProp}: required property missing`);\n }\n }\n }\n\n // Validate additionalProperties and check for prototype pollution\n const dangerousProps = ['__proto__', 'constructor', 'prototype'];\n\n if (schema.additionalProperties === false && schema.properties) {\n const allowedProps = Object.keys(schema.properties);\n // required can be boolean in malformed schemas - must check Array.isArray\n const requiredProps = Array.isArray(schema.required) ? schema.required : [];\n const allAllowedProps = new Set([...allowedProps, ...requiredProps]);\n\n // Check all keys including potentially dangerous ones\n for (const key of [...Object.keys(value), ...Object.getOwnPropertyNames(value)]) {\n // Explicitly reject dangerous properties\n if (dangerousProps.includes(key)) {\n errors.push(`${path}.${key}: dangerous property not allowed`);\n continue;\n }\n\n if (!allAllowedProps.has(key)) {\n errors.push(`${path}.${key}: additional property not allowed`);\n }\n }\n } else {\n // Even without additionalProperties: false, reject dangerous properties\n for (const key of dangerousProps) {\n if (key in value && Object.prototype.hasOwnProperty.call(value, key)) {\n errors.push(`${path}.${key}: dangerous property not allowed`);\n }\n }\n }\n\n // Validate properties\n if (schema.properties) {\n const valueObj = value as Record<string, unknown>;\n for (const [propName, propSchema] of Object.entries(schema.properties)) {\n if (propName in valueObj) {\n validate(valueObj[propName], propSchema, path ? `${path}.${propName}` : propName, depth + 1);\n }\n }\n }\n } else if (expectedType === 'array') {\n if (!Array.isArray(value)) {\n errors.push(`${path || 'root'}: expected array, got ${actualType}`);\n return;\n }\n\n // Validate array items\n if (schema.items && !Array.isArray(schema.items)) {\n const itemSchema = schema.items;\n value.forEach((item, index) => {\n validate(item, itemSchema, `${path}[${index}]`, depth + 1);\n });\n }\n }\n }\n }\n\n validate(data, schema);\n\n if (errors.length > 0) {\n throw new Error(`Schema validation failed:\\n${errors.join('\\n')}`);\n }\n\n return data as T;\n}\n\n/**\n * Reserved variables that are auto-injected per node type.\n * These variables come from config or computed data and cannot be overridden by users.\n */\nexport const RESERVED_VARIABLES = {\n extract: ['schema', 'documentText', 'schemaTitle', 'schemaDescription', 'structuredFormat'],\n categorize: ['categories', 'documentText'],\n parse: ['format', 'schema', 'describeFigures', 'citationsEnabled']\n} as const;\n\n/**\n * Validates that user-provided promptVariables don't attempt to override reserved variables.\n * Emits console warnings if reserved variables are found in user variables and removes them.\n *\n * @param nodeType - The type of node (extract, categorize, parse)\n * @param userVariables - The user-provided promptVariables object\n * @param autoInjectedVariables - The auto-injected variables object\n * @returns A cleaned variables object with reserved variables protected\n */\nexport function protectReservedVariables(\n nodeType: 'extract' | 'categorize' | 'parse',\n userVariables: Record<string, any> | undefined,\n autoInjectedVariables: Record<string, any>\n): Record<string, any> {\n if (!userVariables || Object.keys(userVariables).length === 0) {\n return autoInjectedVariables;\n }\n\n const reserved = RESERVED_VARIABLES[nodeType];\n const warnings: string[] = [];\n\n // Check for reserved variable override attempts\n for (const key of reserved) {\n if (key in userVariables) {\n warnings.push(key);\n }\n }\n\n // Emit warnings if any reserved variables were attempted\n if (warnings.length > 0) {\n console.warn(\n `[doclo] Attempted to override reserved variables in ${nodeType} node: ${warnings.join(', ')}. ` +\n `These variables are auto-injected from config and cannot be overridden. ` +\n `They will be ignored.`\n );\n }\n\n // Merge: auto-injected first, then user variables (but reserved vars take precedence)\n return {\n ...autoInjectedVariables,\n ...userVariables,\n // Restore reserved variables to ensure they can't be overridden\n ...Object.fromEntries(\n reserved.map(key => [key, autoInjectedVariables[key]])\n )\n };\n}\n"],"mappings":";AAo6BO,SAAS,iBAAiB,SAA0C;AACzE,QAAM,aAKD,CAAC;AAEN,QAAM,SAAS,QAAQ,OAAO,CAAC,KAAK,MAAM;AACxC,QAAI,mBAAmB,EAAE;AACzB,QAAI,gBAAgB,EAAE,WAAW;AACjC,QAAI,oBAAoB,EAAE,eAAe;AACzC,QAAI,qBAAqB,EAAE,gBAAgB;AAC3C,QAAI,4BAA4B,EAAE,4BAA4B;AAC9D,QAAI,wBAAwB,EAAE,wBAAwB;AAGtD,QAAI,EAAE,UAAU;AACd,UAAI,CAAC,WAAW,EAAE,QAAQ,GAAG;AAC3B,mBAAW,EAAE,QAAQ,IAAI,EAAE,SAAS,GAAG,aAAa,GAAG,cAAc,GAAG,WAAW,EAAE;AAAA,MACvF;AACA,iBAAW,EAAE,QAAQ,EAAE,WAAW,EAAE,WAAW;AAC/C,iBAAW,EAAE,QAAQ,EAAE,eAAe,EAAE,eAAe;AACvD,iBAAW,EAAE,QAAQ,EAAE,gBAAgB,EAAE,gBAAgB;AACzD,iBAAW,EAAE,QAAQ,EAAE,aAAa;AAAA,IACtC;AAEA,WAAO;AAAA,EACT,GAAG;AAAA,IACD,iBAAiB;AAAA,IACjB,cAAc;AAAA,IACd,kBAAkB;AAAA,IAClB,mBAAmB;AAAA,IACnB,0BAA0B;AAAA,IAC1B,sBAAsB;AAAA,IACtB,WAAW,QAAQ;AAAA,IACnB;AAAA,EACF,CAAC;AAED,SAAO;AACT;AAyFO,IAAM,OAAO,CAAO,KAAa,SAA8C,EAAE,KAAK,IAAI;AAEjG,eAAsB,YAEpB,OAEA,OACA,sBAEA,eACA;AAEA,QAAM,YAAqC,gBAAgB,EAAE,GAAG,cAAc,IAAI,CAAC;AACnF,QAAM,UAAwB,CAAC;AAC/B,QAAM,MAAe;AAAA,IACnB,QAAQ,sBAAsB;AAAA,IAC9B;AAAA,IACA,MAAM,CAAC,GAAG,MAAM;AAAE,gBAAU,CAAC,IAAI;AAAA,IAAG;AAAA,IACpC,SAAS,EAAE,MAAM,CAAC,MAAM,QAAQ,KAAK,CAAC,EAAE;AAAA,IACxC,eAAe;AAAA,EACjB;AACA,MAAI,MAAM;AACV,aAAW,KAAK,OAAO;AACrB,UAAM,MAAM,EAAE,IAAI,KAAK,GAAG;AAC1B,QAAI,KAAK,EAAE,KAAK,GAAG;AAAA,EACrB;AACA,SAAO,EAAE,QAAQ,KAAK,WAAW,QAAQ;AAC3C;AA+CO,SAAS,oBAAoB,WAA2B;AAE7D,MAAI,UAAU,SAAS,MAAM,CAAC,UAAU,KAAK,EAAE,WAAW,GAAG,GAAG;AAC9D,WAAO;AAAA,EACT;AAEA,MAAI;AACF,UAAM,SAAS,KAAK,MAAM,SAAS;AAGnC,QAAI,OAAO,QAAQ;AACjB,aAAO,OAAO;AAAA,IAChB;AAGA,QAAI,OAAO,OAAO,SAAS;AACzB,aAAO,OAAO,MAAM;AAAA,IACtB;AAGA,QAAI,OAAO,OAAO,UAAU,UAAU;AACpC,aAAO,OAAO;AAAA,IAChB;AAGA,QAAI,OAAO,SAAS;AAClB,aAAO,OAAO;AAAA,IAChB;AAGA,QAAI,OAAO,OAAO,UAAU,OAAO,OAAO,SAAS;AACjD,aAAO,GAAG,OAAO,MAAM,MAAM,KAAK,OAAO,MAAM,OAAO;AAAA,IACxD;AAGA,WAAO,UAAU,SAAS,MACtB,UAAU,UAAU,GAAG,GAAG,IAAI,QAC9B;AAAA,EACN,QAAQ;AAEN,WAAO,UAAU,SAAS,MACtB,UAAU,UAAU,GAAG,GAAG,IAAI,QAC9B;AAAA,EACN;AACF;AAmBO,IAAM,qBAAN,MAAM,4BAA2B,MAAM;AAAA,EAC5C,YACE,SAEgB,YAEA,iBAEA,gBAEA,gBAEA,eAEA,kBAEA,UAEA,mBAChB;AACA,UAAM,OAAO;AAhBG;AAEA;AAEA;AAEA;AAEA;AAEA;AAEA;AAEA;AAGhB,SAAK,OAAO;AAGZ,QAAI,MAAM,mBAAmB;AAC3B,YAAM,kBAAkB,MAAM,mBAAkB;AAAA,IAClD;AAAA,EACF;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,mBAA2B;AACzB,QAAI,CAAC,KAAK,YAAY,KAAK,SAAS,WAAW,GAAG;AAChD,aAAO,KAAK;AAAA,IACd;AAEA,WAAO,KAAK,SAAS,IAAI,SAAO;AAC9B,UAAI,QAAQ,IAAI;AAChB,UAAI,IAAI,QAAQ;AACd,iBAAS,IAAI,IAAI,MAAM;AAAA,MACzB;AACA,UAAI,IAAI,cAAc,QAAW;AAC/B,iBAAS,IAAI,IAAI,SAAS;AAAA,MAC5B;AACA,aAAO;AAAA,IACT,CAAC,EAAE,KAAK,UAAK;AAAA,EACf;AAAA;AAAA;AAAA;AAAA;AAAA,EAMA,eAAsB;AACpB,QAAI,QAAe,KAAK;AACxB,WAAO,iBAAiB,uBAAsB,MAAM,eAAe;AACjE,cAAQ,MAAM;AAAA,IAChB;AACA,WAAO;AAAA,EACT;AACF;AAwBO,IAAM,sBAAN,MAAM,6BAA4B,MAAM;AAAA,EAC7C,YACE,SACgB,QACA,aACA,YACA,YACA,kBACA,kBAChB;AACA,UAAM,OAAO;AAPG;AACA;AACA;AACA;AACA;AACA;AAGhB,SAAK,OAAO;AAGZ,QAAI,MAAM,mBAAmB;AAC3B,YAAM,kBAAkB,MAAM,oBAAmB;AAAA,IACnD;AAAA,EACF;AACF;AAmCO,IAAM,4BAA2F;AAAA,EACtG,OAAO;AAAA,IACL,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,OAAO;AAAA,IACL,OAAO;AAAA,MACL,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,iBAAiB;AAAA,MACjB,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,YAAY;AAAA,IACV,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,SAAS;AAAA,IACP,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,OAAO;AAAA,IACL,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,SAAS;AAAA,IACP,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,MACR,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,SAAS;AAAA,IACP,OAAO;AAAA,MACL,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,2BAA2B;AAAA,MAC3B,MAAM;AAAA,IACR;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AAAA,EACA,QAAQ;AAAA,IACN,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,YAAY;AAAA,MACV,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,OAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,SAAS;AAAA,MACP,OAAO;AAAA,MACP,QAAQ;AAAA,IACV;AAAA,IACA,QAAQ;AAAA,MACN,OAAO;AAAA,MACP,MAAM;AAAA,IACR;AAAA,EACF;AACF;AAOO,SAAS,gBAAgBA,OAA8C;AAC5E,MAAI,CAACA,SAAQ,CAACA,MAAK,IAAK,QAAO;AAC/B,QAAM,MAAMA,MAAK;AAGjB,QAAM,aAA6B,CAAC,SAAS,SAAS,cAAc,WAAW,SAAS,WAAW,WAAW,QAAQ;AACtH,SAAO,WAAW,SAAS,GAAmB,IAAK,MAAuB;AAC5E;AAOO,SAAS,gBAAgBA,OAA8C;AAC5E,SAAOA,MAAK,UAAU;AACxB;AAQO,SAAS,qBAAqB,YAA0B,iBAA0B,OAAuB;AAC9G,QAAM,QAAQ,0BAA0B,UAAU;AAClD,MAAI,CAAC,MAAO,QAAO,CAAC;AAEpB,SAAO,OAAO,QAAQ,KAAK,EACxB,OAAO,CAAC,CAAC,GAAG,IAAI,MAAM;AACrB,QAAI,CAAC,KAAK,MAAO,QAAO;AACxB,QAAI,KAAK,mBAAmB,CAAC,eAAgB,QAAO;AACpD,WAAO;AAAA,EACT,CAAC,EACA,IAAI,CAAC,CAAC,YAAY,CAAC,MAAM,UAA0B;AACxD;AAOO,SAAS,wBAAwB,YAAoC;AAC1E,QAAM,oBAAoB,qBAAqB,YAAY,KAAK;AAChE,QAAM,iBAAiB,qBAAqB,YAAY,IAAI,EAAE;AAAA,IAC5D,OAAK,CAAC,kBAAkB,SAAS,CAAC;AAAA,EACpC;AAEA,MAAI,kBAAkB,WAAW,KAAK,eAAe,WAAW,GAAG;AACjE,WAAO,CAAC,GAAG,UAAU,wDAAwD;AAAA,EAC/E;AAEA,QAAM,cAAwB,CAAC;AAE/B,MAAI,kBAAkB,SAAS,GAAG;AAChC,gBAAY,KAAK,GAAG,UAAU,kBAAkB;AAChD,sBAAkB,QAAQ,YAAU;AAClC,YAAM,OAAO,0BAA0B,UAAU,EAAE,MAAM;AACzD,kBAAY,KAAK,YAAO,MAAM,GAAG,KAAK,OAAO,MAAM,KAAK,IAAI,KAAK,EAAE,EAAE;AAAA,IACvE,CAAC;AAAA,EACH;AAEA,MAAI,eAAe,SAAS,GAAG;AAC7B,gBAAY,KAAK,GAAG,UAAU,yCAAyC;AACvE,mBAAe,QAAQ,YAAU;AAC/B,YAAM,OAAO,0BAA0B,UAAU,EAAE,MAAM;AACzD,kBAAY,KAAK,YAAO,MAAM,GAAG,KAAK,OAAO,MAAM,KAAK,IAAI,KAAK,EAAE,EAAE;AAAA,IACvE,CAAC;AAAA,EACH;AAEA,SAAO;AACT;AAqBO,SAAS,uBACd,YACA,YACA,iBAA0B,OACR;AAClB,QAAM,OAAO,0BAA0B,UAAU,IAAI,UAAU;AAE/D,MAAI,CAAC,MAAM;AACT,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,kCAAkC,UAAU,WAAM,UAAU;AAAA,MACpE,aAAa,CAAC,yCAAyC;AAAA,IACzD;AAAA,EACF;AAEA,MAAI,CAAC,KAAK,OAAO;AACf,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,KAAK;AAAA,MACb,aAAa,wBAAwB,UAAU;AAAA,IACjD;AAAA,EACF;AAGA,MAAI,KAAK,mBAAmB,CAAC,gBAAgB;AAC3C,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,kBAAkB,UAAU,OAAO,UAAU;AAAA,MACrD,aAAa;AAAA,QACX,yBAAyB,UAAU;AAAA,QACnC,kBAAkB,UAAU;AAAA,QAC5B;AAAA,QACA;AAAA,QACA;AAAA,QACA,GAAG,wBAAwB,UAAU;AAAA,MACvC;AAAA,MACA,iBAAiB;AAAA,IACnB;AAAA,EACF;AAGA,MAAI,KAAK,2BAA2B;AAClC,WAAO;AAAA,MACL,OAAO;AAAA,MACP,SAAS,iBAAO,UAAU,WAAM,UAAU,KAAK,KAAK,QAAQ,qFAAqF;AAAA,IACnJ;AAAA,EACF;AAEA,SAAO;AAAA,IACL,OAAO;AAAA,EACT;AACF;AAoBO,SAAS,wBAAwB,YAA0C;AAChF,QAAM,QAAQ,0BAA0B,UAAU;AAClD,MAAI,CAAC,MAAO,QAAO,CAAC;AAGpB,SAAO,OAAO,QAAQ,KAAK,EACxB,OAAO,CAAC,CAAC,GAAG,IAAI,MAAM,KAAK,SAAS,KAAK,eAAe,EACxD,IAAI,CAAC,CAAC,YAAY,CAAC,MAAM,UAA0B;AACxD;AAuBO,SAAS,wBACd,YACA,aACkB;AAClB,QAAM,OAAO,0BAA0B,UAAU,IAAI,WAAW;AAEhE,MAAI,CAAC,MAAM;AACT,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,kCAAkC,UAAU,0BAAgB,WAAW;AAAA,MAC/E,aAAa,CAAC,yCAAyC;AAAA,IACzD;AAAA,EACF;AAGA,MAAI,KAAK,SAAS,KAAK,iBAAiB;AACtC,WAAO;AAAA,MACL,OAAO;AAAA,IACT;AAAA,EACF;AAGA,MAAI,CAAC,KAAK,OAAO;AACf,UAAMC,iBAAgB,wBAAwB,UAAU;AACxD,WAAO;AAAA,MACL,OAAO;AAAA,MACP,QAAQ,GAAG,WAAW,wCAAwC,UAAU,KAAK,KAAK,UAAU,gDAAgD;AAAA,MAC5I,aAAaA,eAAc,SAAS,IAChC,CAAC,+BAA+B,UAAU,KAAKA,eAAc,KAAK,IAAI,CAAC,EAAE,IACzE,CAAC,GAAG,UAAU,0CAA0C;AAAA,IAC9D;AAAA,EACF;AAGA,QAAM,gBAAgB,wBAAwB,UAAU;AACxD,SAAO;AAAA,IACL,OAAO;AAAA,IACP,QAAQ,GAAG,WAAW,wCAAwC,UAAU;AAAA,IACxE,aAAa,cAAc,SAAS,IAChC,CAAC,+BAA+B,UAAU,KAAK,cAAc,KAAK,IAAI,CAAC,EAAE,IACzE,CAAC,GAAG,UAAU,0CAA0C;AAAA,EAC9D;AACF;AA0CO,SAAS,aAAgB,MAAe,QAA2B;AACxE,QAAM,SAAmB,CAAC;AAC1B,QAAM,YAAY;AAElB,WAAS,SAAS,OAAgBC,SAAwB,OAAe,IAAI,QAAgB,GAAS;AAEpG,QAAI,QAAQ,WAAW;AACrB,aAAO,KAAK,GAAG,QAAQ,MAAM,4BAA4B,SAAS,YAAY;AAC9E;AAAA,IACF;AAGA,QAAIA,QAAO,aAAa,UAAU,QAAQ,UAAU,SAAY;AAC9D;AAAA,IACF;AAEA,QAAI,UAAU,QAAQ,UAAU,QAAW;AACzC,UAAIA,QAAO,aAAa,MAAM;AAC5B,eAAO,KAAK,GAAG,QAAQ,MAAM,8BAA8B;AAAA,MAC7D;AACA;AAAA,IACF;AAGA,UAAM,aAAa,MAAM,QAAQ,KAAK,IAAI,UAAU,OAAO;AAC3D,UAAM,eAAeA,QAAO;AAE5B,QAAI,cAAc;AAEhB,UAAI,iBAAiB,WAAW;AAC9B,YAAI,OAAO,UAAU,YAAY,CAAC,OAAO,UAAU,KAAK,GAAG;AACzD,iBAAO,KAAK,GAAG,QAAQ,MAAM,2BAA2B,UAAU,EAAE;AACpE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,UAAU;AACpC,YAAI,OAAO,UAAU,UAAU;AAC7B,iBAAO,KAAK,GAAG,QAAQ,MAAM,0BAA0B,UAAU,EAAE;AACnE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,UAAU;AACpC,YAAI,OAAO,UAAU,UAAU;AAC7B,iBAAO,KAAK,GAAG,QAAQ,MAAM,0BAA0B,UAAU,EAAE;AACnE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,WAAW;AACrC,YAAI,OAAO,UAAU,WAAW;AAC9B,iBAAO,KAAK,GAAG,QAAQ,MAAM,2BAA2B,UAAU,EAAE;AACpE;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,UAAU;AACpC,YAAI,OAAO,UAAU,YAAY,MAAM,QAAQ,KAAK,GAAG;AACrD,iBAAO,KAAK,GAAG,QAAQ,MAAM,0BAA0B,UAAU,EAAE;AACnE;AAAA,QACF;AAGA,YAAIA,QAAO,YAAY,MAAM,QAAQA,QAAO,QAAQ,GAAG;AACrD,qBAAW,WAAWA,QAAO,UAAU;AACrC,gBAAI,EAAE,WAAW,QAAQ;AACvB,qBAAO,KAAK,GAAG,IAAI,IAAI,OAAO,6BAA6B;AAAA,YAC7D;AAAA,UACF;AAAA,QACF;AAGA,cAAM,iBAAiB,CAAC,aAAa,eAAe,WAAW;AAE/D,YAAIA,QAAO,yBAAyB,SAASA,QAAO,YAAY;AAC9D,gBAAM,eAAe,OAAO,KAAKA,QAAO,UAAU;AAElD,gBAAM,gBAAgB,MAAM,QAAQA,QAAO,QAAQ,IAAIA,QAAO,WAAW,CAAC;AAC1E,gBAAM,kBAAkB,oBAAI,IAAI,CAAC,GAAG,cAAc,GAAG,aAAa,CAAC;AAGnE,qBAAW,OAAO,CAAC,GAAG,OAAO,KAAK,KAAK,GAAG,GAAG,OAAO,oBAAoB,KAAK,CAAC,GAAG;AAE/E,gBAAI,eAAe,SAAS,GAAG,GAAG;AAChC,qBAAO,KAAK,GAAG,IAAI,IAAI,GAAG,kCAAkC;AAC5D;AAAA,YACF;AAEA,gBAAI,CAAC,gBAAgB,IAAI,GAAG,GAAG;AAC7B,qBAAO,KAAK,GAAG,IAAI,IAAI,GAAG,mCAAmC;AAAA,YAC/D;AAAA,UACF;AAAA,QACF,OAAO;AAEL,qBAAW,OAAO,gBAAgB;AAChC,gBAAI,OAAO,SAAS,OAAO,UAAU,eAAe,KAAK,OAAO,GAAG,GAAG;AACpE,qBAAO,KAAK,GAAG,IAAI,IAAI,GAAG,kCAAkC;AAAA,YAC9D;AAAA,UACF;AAAA,QACF;AAGA,YAAIA,QAAO,YAAY;AACrB,gBAAM,WAAW;AACjB,qBAAW,CAAC,UAAU,UAAU,KAAK,OAAO,QAAQA,QAAO,UAAU,GAAG;AACtE,gBAAI,YAAY,UAAU;AACxB,uBAAS,SAAS,QAAQ,GAAG,YAAY,OAAO,GAAG,IAAI,IAAI,QAAQ,KAAK,UAAU,QAAQ,CAAC;AAAA,YAC7F;AAAA,UACF;AAAA,QACF;AAAA,MACF,WAAW,iBAAiB,SAAS;AACnC,YAAI,CAAC,MAAM,QAAQ,KAAK,GAAG;AACzB,iBAAO,KAAK,GAAG,QAAQ,MAAM,yBAAyB,UAAU,EAAE;AAClE;AAAA,QACF;AAGA,YAAIA,QAAO,SAAS,CAAC,MAAM,QAAQA,QAAO,KAAK,GAAG;AAChD,gBAAM,aAAaA,QAAO;AAC1B,gBAAM,QAAQ,CAAC,MAAM,UAAU;AAC7B,qBAAS,MAAM,YAAY,GAAG,IAAI,IAAI,KAAK,KAAK,QAAQ,CAAC;AAAA,UAC3D,CAAC;AAAA,QACH;AAAA,MACF;AAAA,IACF;AAAA,EACF;AAEA,WAAS,MAAM,MAAM;AAErB,MAAI,OAAO,SAAS,GAAG;AACrB,UAAM,IAAI,MAAM;AAAA,EAA8B,OAAO,KAAK,IAAI,CAAC,EAAE;AAAA,EACnE;AAEA,SAAO;AACT;AAMO,IAAM,qBAAqB;AAAA,EAChC,SAAS,CAAC,UAAU,gBAAgB,eAAe,qBAAqB,kBAAkB;AAAA,EAC1F,YAAY,CAAC,cAAc,cAAc;AAAA,EACzC,OAAO,CAAC,UAAU,UAAU,mBAAmB,kBAAkB;AACnE;AAWO,SAAS,yBACd,UACA,eACA,uBACqB;AACrB,MAAI,CAAC,iBAAiB,OAAO,KAAK,aAAa,EAAE,WAAW,GAAG;AAC7D,WAAO;AAAA,EACT;AAEA,QAAM,WAAW,mBAAmB,QAAQ;AAC5C,QAAM,WAAqB,CAAC;AAG5B,aAAW,OAAO,UAAU;AAC1B,QAAI,OAAO,eAAe;AACxB,eAAS,KAAK,GAAG;AAAA,IACnB;AAAA,EACF;AAGA,MAAI,SAAS,SAAS,GAAG;AACvB,YAAQ;AAAA,MACN,uDAAuD,QAAQ,UAAU,SAAS,KAAK,IAAI,CAAC;AAAA,IAG9F;AAAA,EACF;AAGA,SAAO;AAAA,IACL,GAAG;AAAA,IACH,GAAG;AAAA;AAAA,IAEH,GAAG,OAAO;AAAA,MACR,SAAS,IAAI,SAAO,CAAC,KAAK,sBAAsB,GAAG,CAAC,CAAC;AAAA,IACvD;AAAA,EACF;AACF;","names":["node","validStarters","schema"]}
@@ -1,4 +1,4 @@
1
- import { D as DocumentIR } from './validation-BQO54qAY.js';
1
+ import { D as DocumentIR } from './validation-C_RN-Xqr.js';
2
2
 
3
3
  /**
4
4
  * PDF Utilities
@@ -10,24 +10,29 @@ import { D as DocumentIR } from './validation-BQO54qAY.js';
10
10
  /**
11
11
  * Get the total number of pages in a PDF document
12
12
  *
13
- * @param dataUrl - PDF data URI in format: data:application/pdf;base64,{base64data}
13
+ * @param input - PDF data URL (data:application/pdf;base64,...) or raw base64 string
14
14
  * @returns Total page count
15
- * @throws {Error} If the input is not a valid PDF data URL
15
+ * @throws {Error} If the input is not valid PDF data
16
16
  *
17
17
  * @example
18
18
  * ```typescript
19
+ * // With data URL
19
20
  * const pageCount = await getPDFPageCount('data:application/pdf;base64,JVBERi0...');
21
+ *
22
+ * // With raw base64
23
+ * const pageCount = await getPDFPageCount('JVBERi0xLjQK...');
24
+ *
20
25
  * console.log(`PDF has ${pageCount} pages`);
21
26
  * ```
22
27
  */
23
- declare function getPDFPageCount(dataUrl: string): Promise<number>;
28
+ declare function getPDFPageCount(input: string): Promise<number>;
24
29
  /**
25
30
  * Split a PDF into multiple smaller PDFs based on page ranges
26
31
  *
27
- * @param dataUrl - PDF data URI in format: data:application/pdf;base64,{base64data}
32
+ * @param input - PDF data URL (data:application/pdf;base64,...) or raw base64 string
28
33
  * @param pageRanges - Array of [startPage, endPage] tuples (1-indexed, inclusive)
29
34
  * @returns Array of PDF data URLs, one for each page range
30
- * @throws {Error} If the input is not a valid PDF data URL or page ranges are invalid
35
+ * @throws {Error} If the input is not valid PDF data or page ranges are invalid
31
36
  *
32
37
  * @example
33
38
  * ```typescript
@@ -40,7 +45,7 @@ declare function getPDFPageCount(dataUrl: string): Promise<number>;
40
45
  * console.log(`Created ${chunks.length} PDF chunks`);
41
46
  * ```
42
47
  */
43
- declare function splitPDFIntoChunks(dataUrl: string, pageRanges: Array<[number, number]>): Promise<string[]>;
48
+ declare function splitPDFIntoChunks(input: string, pageRanges: Array<[number, number]>): Promise<string[]>;
44
49
  /**
45
50
  * Get the page count from a DocumentIR, with fallback logic
46
51
  *
package/dist/pdf-utils.js CHANGED
@@ -31,22 +31,21 @@ function uint8ArrayToBase64(bytes) {
31
31
  }
32
32
 
33
33
  // src/pdf-utils.ts
34
- async function getPDFPageCount(dataUrl) {
35
- const base64Match = dataUrl.match(/^data:application\/pdf;base64,(.+)$/);
36
- if (!base64Match) {
37
- throw new Error("Invalid PDF data URL format. Expected: data:application/pdf;base64,{base64data}");
34
+ function extractPDFBase64(input) {
35
+ const dataUrlMatch = input.match(/^data:application\/pdf;base64,(.+)$/);
36
+ if (dataUrlMatch) {
37
+ return dataUrlMatch[1];
38
38
  }
39
- const base64Data = base64Match[1];
39
+ return input;
40
+ }
41
+ async function getPDFPageCount(input) {
42
+ const base64Data = extractPDFBase64(input);
40
43
  const pdfBytes = base64ToArrayBuffer(base64Data);
41
44
  const pdfDoc = await PDFDocument.load(pdfBytes);
42
45
  return pdfDoc.getPageCount();
43
46
  }
44
- async function splitPDFIntoChunks(dataUrl, pageRanges) {
45
- const base64Match = dataUrl.match(/^data:application\/pdf;base64,(.+)$/);
46
- if (!base64Match) {
47
- throw new Error("Invalid PDF data URL format. Expected: data:application/pdf;base64,{base64data}");
48
- }
49
- const base64Data = base64Match[1];
47
+ async function splitPDFIntoChunks(input, pageRanges) {
48
+ const base64Data = extractPDFBase64(input);
50
49
  const pdfBytes = base64ToArrayBuffer(base64Data);
51
50
  const pdfDoc = await PDFDocument.load(pdfBytes);
52
51
  const totalPages = pdfDoc.getPageCount();