@doclo/core 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1126 @@
1
+ /**
2
+ * Provider Identity Types
3
+ *
4
+ * Implements the 3-layer hierarchy for provider identification:
5
+ * 1. Provider (Company/Vendor) - e.g., datalab, openai, anthropic
6
+ * 2. Model - e.g., surya, marker-ocr, claude-sonnet-4.5
7
+ * 3. Method - e.g., native, openrouter, self-hosted
8
+ */
9
+ /**
10
+ * Provider vendors (companies)
11
+ * These represent the company or organization providing the service
12
+ */
13
+ type ProviderVendor = 'datalab' | 'reducto' | 'unsiloed' | 'openai' | 'anthropic' | 'google' | 'xai';
14
+ /**
15
+ * Access methods for providers
16
+ * - native: Direct API call to provider's official endpoint
17
+ * - openrouter: Via OpenRouter aggregator (LLM only)
18
+ * - self-hosted: Self-hosted instance (e.g., pip install surya-ocr)
19
+ */
20
+ type AccessMethod = 'native' | 'openrouter' | 'self-hosted';
21
+ /**
22
+ * Complete provider identity combining all three layers
23
+ */
24
+ interface ProviderIdentity {
25
+ /** The company/vendor (e.g., 'datalab') */
26
+ readonly provider: ProviderVendor;
27
+ /** The specific model/version (e.g., 'surya', 'marker-vlm') */
28
+ readonly model: string;
29
+ /** How the provider is accessed (e.g., 'native', 'self-hosted') */
30
+ readonly method: AccessMethod;
31
+ }
32
+ /**
33
+ * Convert provider identity to canonical string format
34
+ * Format: "provider:model" (e.g., "datalab:surya")
35
+ *
36
+ * @example
37
+ * ```typescript
38
+ * toProviderString({ provider: 'datalab', model: 'surya', method: 'native' })
39
+ * // => "datalab:surya"
40
+ * ```
41
+ */
42
+ declare function toProviderString(identity: ProviderIdentity): string;
43
+ /**
44
+ * Parse canonical provider string back to partial identity
45
+ * Note: method cannot be determined from string alone
46
+ *
47
+ * @example
48
+ * ```typescript
49
+ * parseProviderString("datalab:surya")
50
+ * // => { provider: 'datalab', model: 'surya' }
51
+ * ```
52
+ */
53
+ declare function parseProviderString(str: string): {
54
+ provider: string;
55
+ model: string;
56
+ };
57
+ /**
58
+ * Check if an endpoint appears to be self-hosted
59
+ * Used to determine the access method for OCR providers
60
+ */
61
+ declare function isLocalEndpoint(endpoint?: string): boolean;
62
+ /**
63
+ * Create a provider identity with inferred method
64
+ *
65
+ * @param provider - The vendor/company
66
+ * @param model - The model name
67
+ * @param opts - Options including endpoint for method inference
68
+ */
69
+ declare function createIdentity(provider: ProviderVendor, model: string, opts?: {
70
+ endpoint?: string;
71
+ via?: 'openrouter' | 'native';
72
+ }): ProviderIdentity;
73
+
74
+ /**
75
+ * Browser-safe validation utilities
76
+ *
77
+ * This module contains all validation code with ZERO Node.js dependencies.
78
+ * It can be safely bundled for browser environments.
79
+ */
80
+ /** Page-centric IR */
81
+ type BBox = {
82
+ x: number;
83
+ y: number;
84
+ w: number;
85
+ h: number;
86
+ };
87
+ type IRLine = {
88
+ text: string;
89
+ bbox?: BBox;
90
+ startChar?: number;
91
+ endChar?: number;
92
+ lineId?: string;
93
+ };
94
+ type IRPage = {
95
+ pageNumber?: number;
96
+ width: number;
97
+ height: number;
98
+ lines: IRLine[];
99
+ markdown?: string;
100
+ html?: string;
101
+ extras?: Record<string, unknown>;
102
+ };
103
+ /** Standard extras fields for DocumentIR */
104
+ type DocumentIRExtras = {
105
+ /** Total number of pages in the original document (for PDFs, DOCX, etc.) */
106
+ pageCount?: number;
107
+ /** Cost in USD for processing this document */
108
+ costUSD?: number;
109
+ /** Provider-specific raw response */
110
+ raw?: unknown;
111
+ /** For chunked documents: which chunk this is (0-indexed) */
112
+ chunkIndex?: number;
113
+ /** For chunked documents: total number of chunks */
114
+ totalChunks?: number;
115
+ /** For chunked documents: page range [startPage, endPage] (1-indexed, inclusive) */
116
+ pageRange?: [number, number];
117
+ /** For Unsiloed: total semantic chunks (not traditional pages) */
118
+ totalSemanticChunks?: number;
119
+ /** Allow arbitrary additional fields */
120
+ [key: string]: unknown;
121
+ };
122
+ type DocumentIR = {
123
+ pages: IRPage[];
124
+ extras?: DocumentIRExtras;
125
+ };
126
+
127
+ /** Provider capability contracts */
128
+ type OCRProvider = {
129
+ /** Full 3-layer identity (provider/model/method) */
130
+ identity?: ProviderIdentity;
131
+ /** Canonical name in "provider:model" format */
132
+ name: string;
133
+ parseToIR: (input: {
134
+ url?: string;
135
+ base64?: string;
136
+ }) => Promise<DocumentIR>;
137
+ };
138
+ /** Multimodal input for VLM providers */
139
+ type MultimodalInput = {
140
+ text?: string;
141
+ images?: Array<{
142
+ url?: string;
143
+ base64?: string;
144
+ mimeType: string;
145
+ }>;
146
+ pdfs?: Array<{
147
+ url?: string;
148
+ base64?: string;
149
+ fileId?: string;
150
+ }>;
151
+ };
152
+ /** Reasoning configuration (normalized across providers) */
153
+ type ReasoningConfig = {
154
+ /** Reasoning effort level: low (20% budget), medium (50%), high (80%) */
155
+ effort?: 'low' | 'medium' | 'high';
156
+ /** Exclude reasoning tokens from response (only use for accuracy, not visible) */
157
+ exclude?: boolean;
158
+ /** Enable reasoning with default (medium) effort */
159
+ enabled?: boolean;
160
+ };
161
+ /** Base LLM provider (text-only) */
162
+ type LLMProvider = {
163
+ /** Full 3-layer identity (provider/model/method) */
164
+ identity?: ProviderIdentity;
165
+ /** Canonical name in "provider:model" format */
166
+ name: string;
167
+ completeJson: (input: {
168
+ prompt: string;
169
+ schema: object;
170
+ max_tokens?: number;
171
+ reasoning?: ReasoningConfig;
172
+ }) => Promise<{
173
+ json: unknown;
174
+ rawText?: string;
175
+ costUSD?: number;
176
+ inputTokens?: number;
177
+ outputTokens?: number;
178
+ cacheCreationInputTokens?: number;
179
+ cacheReadInputTokens?: number;
180
+ }>;
181
+ };
182
+ /** Vision-capable LLM provider */
183
+ type VLMProvider = {
184
+ /** Full 3-layer identity (provider/model/method) */
185
+ identity?: ProviderIdentity;
186
+ /** Canonical name in "provider:model" format */
187
+ name: string;
188
+ completeJson: (input: {
189
+ prompt: string | MultimodalInput;
190
+ schema: object;
191
+ max_tokens?: number;
192
+ reasoning?: ReasoningConfig;
193
+ }) => Promise<{
194
+ json: unknown;
195
+ rawText?: string;
196
+ costUSD?: number;
197
+ inputTokens?: number;
198
+ outputTokens?: number;
199
+ cacheCreationInputTokens?: number;
200
+ cacheReadInputTokens?: number;
201
+ }>;
202
+ capabilities: {
203
+ supportsImages: true;
204
+ supportsPDFs: boolean;
205
+ maxPDFPages?: number;
206
+ };
207
+ };
208
+ /** Legacy alias for backward compatibility */
209
+ type LLMJsonProvider = VLMProvider;
210
+ /**
211
+ * Processing quality/speed tradeoff modes
212
+ * Providers map their specific modes to these normalized values
213
+ */
214
+ type ProcessingMode = 'fast' | 'balanced' | 'high_accuracy';
215
+ /**
216
+ * Page range specification for partial document processing
217
+ * Allows processing a subset of pages for cost savings
218
+ */
219
+ type PageRangeOptions = {
220
+ /** Process only the first N pages */
221
+ maxPages?: number;
222
+ /** Specific page range (0-indexed), e.g., "0,2-4,10" */
223
+ pageRange?: string;
224
+ };
225
+ /**
226
+ * Language hints for OCR processing
227
+ */
228
+ type LanguageOptions = {
229
+ /** ISO language codes for OCR, e.g., ['en', 'de', 'fr'] */
230
+ langs?: string[];
231
+ };
232
+ /**
233
+ * Document segmentation result for splitting "stapled" PDFs
234
+ * Returns page boundaries for each detected document type
235
+ */
236
+ type SegmentationResult = {
237
+ segments: Array<{
238
+ /** Document type name (e.g., 'invoice', 'contract') */
239
+ name: string;
240
+ /** Page indices (0-indexed) belonging to this segment */
241
+ pages: number[];
242
+ /** Confidence level of segmentation */
243
+ confidence: 'high' | 'medium' | 'low';
244
+ }>;
245
+ metadata: {
246
+ /** Total pages in the original document */
247
+ totalPages: number;
248
+ /** How segmentation was performed */
249
+ segmentationMethod: 'auto' | 'schema' | 'manual';
250
+ };
251
+ };
252
+ /**
253
+ * Extracted image from a document
254
+ * Represents figures, charts, or embedded images
255
+ */
256
+ type ExtractedImage = {
257
+ /** Block ID or reference (provider-specific) */
258
+ id: string;
259
+ /** Page number where image appears (0-indexed) */
260
+ pageNumber: number;
261
+ /** Base64-encoded image data */
262
+ base64: string;
263
+ /** MIME type of the image */
264
+ mimeType: string;
265
+ /** Location on page (normalized 0-1 coordinates) */
266
+ bbox?: NormalizedBBox;
267
+ /** Caption text if detected */
268
+ caption?: string;
269
+ };
270
+ /**
271
+ * Extended OCR provider options (beyond basic parseToIR)
272
+ * These options are normalized across different OCR providers
273
+ */
274
+ type OCRProviderOptions = PageRangeOptions & LanguageOptions & {
275
+ /** Processing quality/speed tradeoff */
276
+ mode?: ProcessingMode;
277
+ /** Force OCR even on text-based PDFs */
278
+ forceOCR?: boolean;
279
+ /** Extract embedded images from document */
280
+ extractImages?: boolean;
281
+ /** Add page delimiters to output */
282
+ paginate?: boolean;
283
+ /** Remove and redo existing OCR */
284
+ stripExistingOCR?: boolean;
285
+ };
286
+ /**
287
+ * Extended VLM provider options for document extraction
288
+ * These options are normalized across different VLM providers
289
+ */
290
+ type VLMProviderOptions = PageRangeOptions & LanguageOptions & {
291
+ /** Processing quality/speed tradeoff */
292
+ mode?: ProcessingMode;
293
+ /** Force OCR even on text-based PDFs */
294
+ forceOCR?: boolean;
295
+ /** Additional prompt/instructions for extraction */
296
+ prompt?: string;
297
+ /** Schema for auto-segmentation of multi-document PDFs */
298
+ segmentationSchema?: object;
299
+ };
300
+ /**
301
+ * Provider citation from source document
302
+ * Maps extracted fields to their source locations
303
+ */
304
+ type ProviderCitation = {
305
+ /** JSON path to extracted field (e.g., "invoice.total") */
306
+ fieldPath: string;
307
+ /** Source block IDs from the provider */
308
+ blockIds: string[];
309
+ /** Confidence score (0-1) */
310
+ confidence?: number;
311
+ };
312
+ /** Consensus configuration for any node */
313
+ type ConsensusConfig = {
314
+ runs: number;
315
+ strategy?: 'majority' | 'unanimous';
316
+ onTie?: 'random' | 'fail' | 'retry';
317
+ parallel?: boolean;
318
+ includeMetadata?: boolean;
319
+ level?: 'object' | 'field';
320
+ retryOnFailure?: boolean;
321
+ maxRetries?: number;
322
+ };
323
+ /** Individual consensus run result */
324
+ type ConsensusRunResult<T = any> = {
325
+ runIndex: number;
326
+ value: T | null;
327
+ success: boolean;
328
+ error?: string;
329
+ startTime: number;
330
+ endTime: number;
331
+ duration: number;
332
+ attempts?: number;
333
+ };
334
+ /** Field-level voting details */
335
+ type FieldVotingDetails = {
336
+ fieldPath: string;
337
+ values: Array<{
338
+ /** The actual value for this voting option - can be any JSON-serializable type */
339
+ value: unknown;
340
+ count: number;
341
+ percentage: number;
342
+ runIndices: number[];
343
+ }>;
344
+ /** The winning value from consensus - can be any JSON-serializable type */
345
+ winner: unknown;
346
+ isTie: boolean;
347
+ agreementScore: number;
348
+ };
349
+ /** Consensus execution metadata */
350
+ type ConsensusMetadata<T = unknown> = {
351
+ totalRuns: number;
352
+ successfulRuns: number;
353
+ failedRuns: number;
354
+ strategy: 'majority' | 'unanimous';
355
+ selectedResult: T;
356
+ selectedRunIndex: number;
357
+ confidence: 'high' | 'medium' | 'low';
358
+ overallAgreement: number;
359
+ fieldAgreement: Record<string, number>;
360
+ votingDetails: FieldVotingDetails[];
361
+ runs: ConsensusRunResult<T>[];
362
+ executionTime: number;
363
+ wasRetry: boolean;
364
+ tieBreakerUsed?: 'random' | 'retry' | 'fail' | null;
365
+ votingLevel?: 'object' | 'field';
366
+ isSyntheticResult?: boolean;
367
+ totalRetries?: number;
368
+ emptyResultsFiltered?: number;
369
+ };
370
+ /** Output with consensus metadata wrapper */
371
+ type OutputWithConsensus<T = unknown> = {
372
+ data: T;
373
+ consensus: ConsensusMetadata<T>;
374
+ };
375
+ /** Conditional type helper for consensus metadata */
376
+ type MaybeWithConsensusMetadata<T, Config> = Config extends {
377
+ includeMetadata: true;
378
+ } ? OutputWithConsensus<T> : T;
379
+ /** Flow input/output types */
380
+ type FlowInput = {
381
+ url?: string;
382
+ base64?: string;
383
+ pages?: number[];
384
+ bounds?: BBox;
385
+ };
386
+ /**
387
+ * All MIME types supported by at least one provider.
388
+ * This is the union of all provider capabilities.
389
+ */
390
+ type SupportedMimeType = 'application/pdf' | 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp' | 'image/tiff' | 'image/bmp' | 'image/heic' | 'image/heif' | 'image/vnd.adobe.photoshop' | 'application/msword' | 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' | 'application/vnd.ms-excel' | 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet' | 'application/vnd.ms-powerpoint' | 'application/vnd.openxmlformats-officedocument.presentationml.presentation' | 'application/vnd.oasis.opendocument.text' | 'application/vnd.oasis.opendocument.spreadsheet' | 'application/vnd.oasis.opendocument.presentation' | 'text/plain' | 'text/csv' | 'text/html' | 'application/rtf' | 'application/epub+zip';
391
+ /**
392
+ * Flow-level input validation configuration
393
+ *
394
+ * Allows specifying accepted MIME types for early validation
395
+ * before flow execution begins.
396
+ */
397
+ type FlowInputValidation = {
398
+ /**
399
+ * List of accepted MIME types.
400
+ * If specified, input must match one of these types or validation fails.
401
+ * If empty/undefined, all supported types are accepted.
402
+ */
403
+ acceptedFormats?: SupportedMimeType[];
404
+ /**
405
+ * Whether to throw on validation failure.
406
+ * @default true
407
+ */
408
+ throwOnInvalid?: boolean;
409
+ };
410
+ type FlowResult<T = any> = {
411
+ output: T;
412
+ metrics: StepMetric[];
413
+ aggregated: AggregatedMetrics;
414
+ artifacts: Record<string, any>;
415
+ error?: Error;
416
+ };
417
+ type SplitDocument = {
418
+ type: string;
419
+ schema: object;
420
+ pages: number[];
421
+ bounds?: BBox;
422
+ input: FlowInput;
423
+ };
424
+ /** Citation and source tracking types */
425
+ /** Citation source type indicating data provenance */
426
+ type CitationSourceType = 'ocr' | 'vlm' | 'llm' | 'inferred';
427
+ /** Normalized bounding box (0-1 coordinates relative to page dimensions) */
428
+ type NormalizedBBox = {
429
+ x: number;
430
+ y: number;
431
+ w: number;
432
+ h: number;
433
+ };
434
+ /** Line-level citation reference with spatial information */
435
+ type LineCitation = {
436
+ pageNumber: number;
437
+ lineIndex: number;
438
+ bbox?: NormalizedBBox;
439
+ text: string;
440
+ confidence?: number;
441
+ sourceType: CitationSourceType;
442
+ startChar?: number;
443
+ endChar?: number;
444
+ };
445
+ /** Field-level citation mapping extracted values to sources */
446
+ type FieldCitation = {
447
+ fieldPath: string;
448
+ /** Extracted value - can be any JSON-serializable type */
449
+ value: unknown;
450
+ citations: LineCitation[];
451
+ reasoning?: string;
452
+ confidence?: number;
453
+ };
454
+ /** Citation configuration for nodes */
455
+ type CitationConfig = {
456
+ enabled: boolean;
457
+ includeTextSnippets?: boolean;
458
+ includeBoundingBoxes?: boolean;
459
+ includeConfidence?: boolean;
460
+ minConfidence?: number;
461
+ detectInferred?: boolean;
462
+ };
463
+ /** Extended output with citations */
464
+ type OutputWithCitations<T> = {
465
+ data: T;
466
+ citations: FieldCitation[];
467
+ metadata: {
468
+ totalPages?: number;
469
+ sourceType: CitationSourceType;
470
+ hasInferredValues?: boolean;
471
+ processingTime?: number;
472
+ };
473
+ };
474
+ /** Node configuration types */
475
+ type ParseNodeConfig = {
476
+ provider: OCRProvider | VLMProvider;
477
+ consensus?: ConsensusConfig;
478
+ chunked?: {
479
+ maxPagesPerChunk: number;
480
+ overlap?: number;
481
+ parallel?: boolean;
482
+ };
483
+ format?: 'text' | 'markdown' | 'html';
484
+ describeFigures?: boolean;
485
+ includeImages?: boolean;
486
+ additionalPrompt?: string;
487
+ citations?: CitationConfig;
488
+ promptRef?: string;
489
+ /**
490
+ * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).
491
+ *
492
+ * Auto-injected variables (no need to pass manually):
493
+ * - format: From config.format
494
+ * - schema: Constructed schema (if applicable)
495
+ * - describeFigures: From config.describeFigures
496
+ * - citationsEnabled: From config.citations?.enabled
497
+ *
498
+ * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).
499
+ */
500
+ promptVariables?: Record<string, any>;
501
+ /**
502
+ * Additional instructions to append to the default prompt.
503
+ * This provides a simple way to customize the prompt without creating a custom prompt asset.
504
+ * The instructions will be added after the main prompt content.
505
+ *
506
+ * @example
507
+ * ```typescript
508
+ * parse({
509
+ * provider: vlmProvider,
510
+ * format: 'markdown',
511
+ * additionalInstructions: "Pay special attention to preserving table structures and footnotes."
512
+ * })
513
+ * ```
514
+ */
515
+ additionalInstructions?: string;
516
+ /**
517
+ * When using promptRef, automatically inject format instruction if {{format}} placeholder is not present.
518
+ * This ensures the UI format selection always takes effect.
519
+ * Default: true
520
+ *
521
+ * @example
522
+ * ```typescript
523
+ * parse({
524
+ * provider: vlmProvider,
525
+ * format: 'markdown',
526
+ * promptRef: 'my-custom-prompt',
527
+ * autoInjectFormat: false // Disable auto-injection
528
+ * })
529
+ * ```
530
+ */
531
+ autoInjectFormat?: boolean;
532
+ /**
533
+ * Enable extended reasoning/thinking for VLM providers that support it.
534
+ * Only applies when using a VLM provider (not OCR).
535
+ *
536
+ * @example
537
+ * ```typescript
538
+ * parse({
539
+ * provider: vlmProvider,
540
+ * format: 'markdown',
541
+ * reasoning: { enabled: true, effort: 'medium' }
542
+ * })
543
+ * ```
544
+ */
545
+ reasoning?: {
546
+ effort?: 'low' | 'medium' | 'high';
547
+ exclude?: boolean;
548
+ enabled?: boolean;
549
+ };
550
+ };
551
+ type SplitNodeConfig = {
552
+ provider: VLMProvider;
553
+ schemas: Record<string, object>;
554
+ includeOther?: boolean;
555
+ consensus?: ConsensusConfig;
556
+ schemaRef?: string;
557
+ /**
558
+ * Enable extended reasoning/thinking for providers that support it.
559
+ *
560
+ * @example
561
+ * ```typescript
562
+ * split({
563
+ * provider: vlmProvider,
564
+ * schemas: { invoice: invoiceSchema, receipt: receiptSchema },
565
+ * reasoning: { enabled: true, effort: 'high' }
566
+ * })
567
+ * ```
568
+ */
569
+ reasoning?: {
570
+ effort?: 'low' | 'medium' | 'high';
571
+ exclude?: boolean;
572
+ enabled?: boolean;
573
+ };
574
+ };
575
+ type CategorizeNodeConfig = {
576
+ provider: LLMProvider | VLMProvider;
577
+ categories: string[];
578
+ consensus?: ConsensusConfig;
579
+ additionalPrompt?: string;
580
+ promptRef?: string;
581
+ /**
582
+ * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).
583
+ *
584
+ * Auto-injected variables (no need to pass manually):
585
+ * - categories: From config.categories
586
+ * - documentText: Computed from DocumentIR input
587
+ *
588
+ * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).
589
+ */
590
+ promptVariables?: Record<string, any>;
591
+ /**
592
+ * Additional instructions to append to the default prompt.
593
+ * This provides a simple way to customize the prompt without creating a custom prompt asset.
594
+ * The instructions will be added after the main prompt content.
595
+ *
596
+ * @example
597
+ * ```typescript
598
+ * categorize({
599
+ * provider: llmProvider,
600
+ * categories: ['invoice', 'receipt', 'contract'],
601
+ * additionalInstructions: "Consider the document's header and footer when categorizing."
602
+ * })
603
+ * ```
604
+ */
605
+ additionalInstructions?: string;
606
+ /**
607
+ * Enable extended reasoning/thinking for providers that support it.
608
+ *
609
+ * @example
610
+ * ```typescript
611
+ * categorize({
612
+ * provider: vlmProvider,
613
+ * categories: ['invoice', 'receipt', 'contract'],
614
+ * reasoning: { enabled: true, effort: 'low' }
615
+ * })
616
+ * ```
617
+ */
618
+ reasoning?: {
619
+ effort?: 'low' | 'medium' | 'high';
620
+ exclude?: boolean;
621
+ enabled?: boolean;
622
+ };
623
+ };
624
+ type ExtractNodeConfig<T = any> = {
625
+ provider: LLMProvider | VLMProvider;
626
+ schema: object | EnhancedExtractionSchema<T> | {
627
+ ref: string;
628
+ };
629
+ consensus?: ConsensusConfig;
630
+ reasoning?: {
631
+ effort?: 'low' | 'medium' | 'high';
632
+ exclude?: boolean;
633
+ enabled?: boolean;
634
+ };
635
+ additionalPrompt?: string;
636
+ citations?: CitationConfig;
637
+ promptRef?: string;
638
+ /**
639
+ * Optional custom variables for prompt rendering (e.g., language, strictMode, tenantId).
640
+ *
641
+ * Auto-injected variables (no need to pass manually):
642
+ * - schema: From config.schema
643
+ * - documentText: Computed from DocumentIR or FlowInput
644
+ * - schemaTitle: From schema.title or default "the provided schema"
645
+ * - schemaDescription: From schema.description or empty string
646
+ * - structuredFormat: Generated formatting instructions (for markdown/html)
647
+ *
648
+ * Use promptVariables only for runtime context (localization, multi-tenancy, behavioral flags).
649
+ */
650
+ promptVariables?: Record<string, any>;
651
+ /**
652
+ * Additional instructions to append to the default prompt.
653
+ * This provides a simple way to customize the prompt without creating a custom prompt asset.
654
+ * The instructions will be added after the main prompt content.
655
+ *
656
+ * @example
657
+ * ```typescript
658
+ * extract({
659
+ * provider: llmProvider,
660
+ * schema: mySchema,
661
+ * additionalInstructions: "Be strict with date formats. Use YYYY-MM-DD format only."
662
+ * })
663
+ * ```
664
+ */
665
+ additionalInstructions?: string;
666
+ };
667
+ /** Chunk output structure */
668
+ type ChunkMetadata = {
669
+ content: string;
670
+ id: string;
671
+ index: number;
672
+ startChar: number;
673
+ endChar: number;
674
+ pageNumbers: number[];
675
+ section?: string;
676
+ headers?: string[];
677
+ strategy: string;
678
+ tokenCount?: number;
679
+ wordCount: number;
680
+ charCount: number;
681
+ };
682
+ type ChunkOutput = {
683
+ chunks: ChunkMetadata[];
684
+ totalChunks: number;
685
+ averageChunkSize: number;
686
+ sourceMetadata?: {
687
+ providerType?: string;
688
+ };
689
+ sourceDocument?: DocumentIR;
690
+ };
691
+ type ChunkNodeConfig = {
692
+ strategy: 'recursive' | 'section' | 'page' | 'fixed';
693
+ maxSize?: number;
694
+ minSize?: number;
695
+ overlap?: number;
696
+ separators?: string[];
697
+ pagesPerChunk?: number;
698
+ combineShortPages?: boolean;
699
+ minPageContent?: number;
700
+ size?: number;
701
+ unit?: 'tokens' | 'characters';
702
+ };
703
+ type CombineNodeConfig = {
704
+ strategy: 'merge' | 'concatenate' | 'first' | 'last';
705
+ };
706
+ type OutputNodeConfig = {
707
+ source?: string | string[];
708
+ transform?: 'first' | 'last' | 'merge' | 'pick' | 'custom';
709
+ fields?: string[];
710
+ name?: string;
711
+ /**
712
+ * Custom transform function for 'custom' transform mode.
713
+ * @param inputs - The input value(s) from the source step(s)
714
+ * @param artifacts - All artifacts from the flow execution
715
+ * @returns The transformed output value
716
+ */
717
+ customTransform?: (inputs: unknown | unknown[], artifacts: Record<string, unknown>) => unknown;
718
+ };
719
+ /** Enhanced extraction schema with examples and guidance */
720
+ type EnhancedExtractionSchema<T = unknown> = {
721
+ schema: object;
722
+ examples?: Array<{
723
+ description: string;
724
+ input: string;
725
+ output: T;
726
+ }>;
727
+ extractionRules?: string;
728
+ contextPrompt?: string;
729
+ hints?: string[];
730
+ };
731
+ /** Node & runner */
732
+ type StepMetric = {
733
+ step: string;
734
+ configStepId?: string;
735
+ startMs: number;
736
+ provider?: string;
737
+ model?: string;
738
+ ms: number;
739
+ costUSD?: number;
740
+ inputTokens?: number;
741
+ outputTokens?: number;
742
+ cacheCreationInputTokens?: number;
743
+ cacheReadInputTokens?: number;
744
+ attemptNumber?: number;
745
+ metadata?: {
746
+ kind?: 'leaf' | 'wrapper' | 'prep';
747
+ rollup?: boolean;
748
+ overheadMs?: number;
749
+ /** Additional metadata fields */
750
+ [key: string]: string | number | boolean | undefined;
751
+ };
752
+ };
753
+ /** Aggregated metrics for multi-step flows */
754
+ interface AggregatedMetrics {
755
+ totalDurationMs: number;
756
+ totalCostUSD: number;
757
+ totalInputTokens: number;
758
+ totalOutputTokens: number;
759
+ totalCacheCreationTokens: number;
760
+ totalCacheReadTokens: number;
761
+ stepCount: number;
762
+ byProvider: Record<string, {
763
+ costUSD: number;
764
+ inputTokens: number;
765
+ outputTokens: number;
766
+ callCount: number;
767
+ }>;
768
+ }
769
+ /**
770
+ * Aggregate metrics from multiple steps
771
+ * @param metrics - Array of step metrics
772
+ * @returns Aggregated totals and per-provider breakdowns
773
+ */
774
+ declare function aggregateMetrics(metrics: StepMetric[]): AggregatedMetrics;
775
+ /**
776
+ * Execution context passed to conditional functions and trigger nodes
777
+ * Provides access to artifacts and metrics from all previous steps
778
+ */
779
+ interface FlowContext {
780
+ /** Outputs from all completed steps, indexed by step ID */
781
+ artifacts: Record<string, any>;
782
+ /** Performance metrics from all completed steps */
783
+ metrics: StepMetric[];
784
+ /** Call stack for tracking nested flow execution (for circular dependency detection) */
785
+ callStack?: string[];
786
+ /** Maximum nesting depth for flow triggers (default: 10) */
787
+ maxDepth?: number;
788
+ }
789
+ /**
790
+ * W3C Trace Context for distributed tracing.
791
+ * Compatible with observability module's TraceContext.
792
+ */
793
+ interface TraceContextLite {
794
+ traceId: string;
795
+ spanId: string;
796
+ parentSpanId?: string;
797
+ traceFlags: number;
798
+ traceState?: string;
799
+ }
800
+ /**
801
+ * Observability context passed to node executions.
802
+ * Uses 'any' for config and traceContext to avoid circular imports and
803
+ * maintain compatibility with the full observability types.
804
+ */
805
+ type NodeObservabilityContext = {
806
+ /** Observability configuration - full type in observability module */
807
+ config?: any;
808
+ flowId?: string;
809
+ executionId?: string;
810
+ stepId?: string;
811
+ stepIndex?: number;
812
+ /** W3C Trace Context - compatible with TraceContext from observability module */
813
+ traceContext?: any;
814
+ metadata?: Record<string, unknown>;
815
+ };
816
+ type NodeCtx = {
817
+ stepId?: string;
818
+ artifacts: Record<string, unknown>;
819
+ emit: (key: string, value: unknown) => void;
820
+ metrics: {
821
+ push: (m: StepMetric) => void;
822
+ };
823
+ /** Observability context for hooks (optional) */
824
+ observability?: NodeObservabilityContext;
825
+ };
826
+ /** Node type metadata for runtime validation */
827
+ type NodeTypeInfo = {
828
+ /** Input types this node accepts (e.g., ['FlowInput', 'DocumentIR']) */
829
+ inputTypes: string[];
830
+ /**
831
+ * Output type this node produces - can be string or function for config-dependent types.
832
+ * When a function, it receives the node's specific config and returns the output type string.
833
+ * Uses 'any' parameter to allow nodes to use their specific config types.
834
+ */
835
+ outputType: string | ((config: any) => string);
836
+ /** Provider types this node requires (if any) */
837
+ requiresProvider?: ('OCR' | 'VLM' | 'LLM')[];
838
+ /** Whether this node can accept array input */
839
+ acceptsArray?: boolean;
840
+ /**
841
+ * Whether this node always outputs an array (or function for config-dependent).
842
+ * Uses 'any' parameter to allow nodes to use their specific config types.
843
+ */
844
+ outputsArray?: boolean | ((config: any) => boolean);
845
+ /** Human-readable description of what this node does */
846
+ description?: string;
847
+ };
848
+ type NodeDef<I, O> = {
849
+ key: string;
850
+ run: (input: I, ctx: NodeCtx) => Promise<O>;
851
+ /** Optional type metadata for validation */
852
+ __meta?: NodeTypeInfo;
853
+ };
854
+ declare const node: <I, O>(key: string, run: NodeDef<I, O>["run"]) => NodeDef<I, O>;
855
+ declare function runPipeline(steps: NodeDef<any, any>[], input: any, observabilityContext?: NodeObservabilityContext): Promise<{
856
+ output: any;
857
+ artifacts: Record<string, unknown>;
858
+ metrics: StepMetric[];
859
+ }>;
860
+ /**
861
+ * Flow execution error with step context
862
+ *
863
+ * Thrown when a flow step fails during execution. Includes:
864
+ * - Which step failed (ID, index, type)
865
+ * - Which steps completed successfully
866
+ * - Partial artifacts from completed steps (for debugging)
867
+ * - The original error that caused the failure
868
+ *
869
+ * This makes debugging flow failures much easier by showing exactly where the error occurred
870
+ * and what data was produced before the failure.
871
+ *
872
+ * @example
873
+ * ```typescript
874
+ * try {
875
+ * await flow.run(input);
876
+ * } catch (error) {
877
+ * if (error instanceof FlowExecutionError) {
878
+ * console.error(`Failed at step ${error.failedStepIndex}: ${error.failedStepType}`);
879
+ * console.error(`Step ID: ${error.failedStep}`);
880
+ * console.error(`Completed: ${error.completedSteps.join(', ')}`);
881
+ * console.error(`Original error: ${error.originalError.message}`);
882
+ *
883
+ * // Access partial results from completed steps
884
+ * if (error.partialArtifacts?.qualify) {
885
+ * console.log('Quality assessment completed:', error.partialArtifacts.qualify);
886
+ * }
887
+ * }
888
+ * }
889
+ * ```
890
+ */
891
+ declare class FlowExecutionError extends Error {
892
+ /** The ID of the step that failed (e.g., 'parse_node123') */
893
+ readonly failedStep: string;
894
+ /** The index of the failed step in the flow (0-based) */
895
+ readonly failedStepIndex: number;
896
+ /** The type of the failed step (e.g., 'parse', 'extract', 'step', 'conditional', 'forEach') */
897
+ readonly failedStepType: string;
898
+ /** Array of step IDs that completed successfully before the failure */
899
+ readonly completedSteps: string[];
900
+ /** The original error that caused the failure */
901
+ readonly originalError: Error;
902
+ /** Partial artifacts from steps that completed before the failure */
903
+ readonly partialArtifacts?: Record<string, any> | undefined;
904
+ constructor(message: string,
905
+ /** The ID of the step that failed (e.g., 'parse_node123') */
906
+ failedStep: string,
907
+ /** The index of the failed step in the flow (0-based) */
908
+ failedStepIndex: number,
909
+ /** The type of the failed step (e.g., 'parse', 'extract', 'step', 'conditional', 'forEach') */
910
+ failedStepType: string,
911
+ /** Array of step IDs that completed successfully before the failure */
912
+ completedSteps: string[],
913
+ /** The original error that caused the failure */
914
+ originalError: Error,
915
+ /** Partial artifacts from steps that completed before the failure */
916
+ partialArtifacts?: Record<string, any> | undefined);
917
+ }
918
+ /**
919
+ * Flow validation error for invalid node connections
920
+ *
921
+ * Thrown when building a flow with incompatible node connections.
922
+ * Provides helpful error messages and suggestions for fixing the issue.
923
+ *
924
+ * @example
925
+ * ```typescript
926
+ * try {
927
+ * const flow = createFlow()
928
+ * .step('parse', parse({ provider: ocrProvider }))
929
+ * .step('combine', combine()) // Invalid: combine needs array input
930
+ * .build();
931
+ * } catch (error) {
932
+ * if (error instanceof FlowValidationError) {
933
+ * console.error(error.message);
934
+ * console.error('Reason:', error.reason);
935
+ * console.log('Suggestions:', error.suggestions?.join('\n'));
936
+ * }
937
+ * }
938
+ * ```
939
+ */
940
+ declare class FlowValidationError extends Error {
941
+ readonly reason?: string | undefined;
942
+ readonly suggestions?: string[] | undefined;
943
+ readonly sourceNode?: string | undefined;
944
+ readonly targetNode?: string | undefined;
945
+ readonly sourceOutputType?: string | undefined;
946
+ readonly targetInputTypes?: string[] | undefined;
947
+ constructor(message: string, reason?: string | undefined, suggestions?: string[] | undefined, sourceNode?: string | undefined, targetNode?: string | undefined, sourceOutputType?: string | undefined, targetInputTypes?: string[] | undefined);
948
+ }
949
+ /** Node type names for validation */
950
+ type NodeTypeName = 'parse' | 'split' | 'categorize' | 'extract' | 'chunk' | 'combine' | 'trigger' | 'output';
951
+ /** Compatibility rule for node connections */
952
+ type CompatibilityRule = {
953
+ valid: boolean;
954
+ requiresForEach?: boolean;
955
+ /** Indicates this connection cannot be fully validated at build-time and requires runtime type checking */
956
+ requiresRuntimeValidation?: boolean;
957
+ reason?: string;
958
+ note?: string;
959
+ };
960
+ /**
961
+ * Node Compatibility Matrix
962
+ *
963
+ * Defines which nodes can connect to which other nodes.
964
+ * This is the single source of truth for node connection validation.
965
+ *
966
+ * Rules based on input/output type compatibility:
967
+ * - parse: FlowInput → DocumentIR (or DocumentIR[] if chunked)
968
+ * - split: FlowInput → SplitDocument[] (requires forEach)
969
+ * - categorize: DocumentIR|FlowInput → {input, category}
970
+ * - extract: DocumentIR|FlowInput|ChunkOutput → T (typed JSON)
971
+ * - chunk: DocumentIR|DocumentIR[] → ChunkOutput
972
+ * - combine: T[] → T|T[] (merges forEach results)
973
+ * - trigger: any → TOutput (depends on child flow)
974
+ *
975
+ * Special behaviors:
976
+ * - forEach auto-unwraps SplitDocument.input → FlowInput
977
+ * - Conditional auto-unwraps {input, category} → input
978
+ * - parse with chunked:true outputs DocumentIR[] instead of DocumentIR
979
+ */
980
+ declare const NODE_COMPATIBILITY_MATRIX: Record<NodeTypeName, Record<NodeTypeName, CompatibilityRule>>;
981
+ /**
982
+ * Get node type name from a NodeDef
983
+ * @param node - Node definition
984
+ * @returns Node type name (e.g., 'parse', 'extract')
985
+ */
986
+ declare function getNodeTypeName(node: NodeDef<any, any>): NodeTypeName | null;
987
+ /**
988
+ * Get type information from a node
989
+ * @param node - Node definition
990
+ * @returns NodeTypeInfo if available
991
+ */
992
+ declare function getNodeTypeInfo(node: NodeDef<any, any>): NodeTypeInfo | null;
993
+ /**
994
+ * Get compatible target nodes for a given source node
995
+ * @param sourceType - Source node type name
996
+ * @param includeForEach - Include connections that require forEach
997
+ * @returns Array of compatible target node types
998
+ */
999
+ declare function getCompatibleTargets(sourceType: NodeTypeName, includeForEach?: boolean): NodeTypeName[];
1000
+ /**
1001
+ * Get suggested connections when a connection is invalid
1002
+ * @param sourceType - Source node type name
1003
+ * @returns Array of suggestion strings
1004
+ */
1005
+ declare function getSuggestedConnections(sourceType: NodeTypeName): string[];
1006
+ /**
1007
+ * Validation result for node connections
1008
+ */
1009
+ type ValidationResult = {
1010
+ valid: boolean;
1011
+ reason?: string;
1012
+ suggestions?: string[];
1013
+ requiresForEach?: boolean;
1014
+ /** Warning message for connections that are valid but require runtime type checking */
1015
+ warning?: string;
1016
+ };
1017
+ /**
1018
+ * Validate if two node types can be connected
1019
+ * @param sourceType - Source node type name
1020
+ * @param targetType - Target node type name
1021
+ * @param forEachEnabled - Whether forEach is enabled on the source node
1022
+ * @returns Validation result with reason and suggestions
1023
+ */
1024
+ declare function validateNodeConnection(sourceType: NodeTypeName, targetType: NodeTypeName, forEachEnabled?: boolean): ValidationResult;
1025
+ /**
1026
+ * Get valid starting nodes for forEach itemFlow based on parent node type
1027
+ *
1028
+ * When a node outputs an array and uses forEach, the itemFlow receives individual
1029
+ * array items. This function returns which node types can accept those items.
1030
+ *
1031
+ * @param parentType - The node type that outputs the array (e.g., 'split', 'parse')
1032
+ * @returns Array of node types that can start the forEach itemFlow
1033
+ *
1034
+ * @example
1035
+ * ```typescript
1036
+ * // split outputs SplitDocument[], itemFlow gets SplitDocument
1037
+ * getValidForEachStarters('split') // ['parse', 'extract', 'categorize', 'trigger']
1038
+ *
1039
+ * // parse(chunked:true) outputs DocumentIR[], itemFlow gets DocumentIR
1040
+ * getValidForEachStarters('parse') // ['categorize', 'extract', 'chunk']
1041
+ * ```
1042
+ */
1043
+ declare function getValidForEachStarters(parentType: NodeTypeName): NodeTypeName[];
1044
+ /**
1045
+ * Validate if a node type can start a forEach itemFlow for a given parent
1046
+ *
1047
+ * @param parentType - The node type that outputs the array (e.g., 'split')
1048
+ * @param starterType - The node type to validate as itemFlow starter
1049
+ * @returns ValidationResult with detailed error messages and suggestions
1050
+ *
1051
+ * @example
1052
+ * ```typescript
1053
+ * // Valid: split → forEach → parse
1054
+ * canStartForEachItemFlow('split', 'parse') // { valid: true }
1055
+ *
1056
+ * // Invalid: split → forEach → chunk
1057
+ * canStartForEachItemFlow('split', 'chunk')
1058
+ * // {
1059
+ * // valid: false,
1060
+ * // reason: 'chunk cannot start forEach itemFlow after split...',
1061
+ * // suggestions: ['Valid starters: parse, extract, categorize, trigger']
1062
+ * // }
1063
+ * ```
1064
+ */
1065
+ declare function canStartForEachItemFlow(parentType: NodeTypeName, starterType: NodeTypeName): ValidationResult;
1066
+ /**
1067
+ * JSON Schema node structure for validation.
1068
+ * Represents a node in a JSON Schema definition.
1069
+ */
1070
+ interface JSONSchemaNode {
1071
+ type?: string | string[];
1072
+ properties?: Record<string, JSONSchemaNode>;
1073
+ items?: JSONSchemaNode | JSONSchemaNode[];
1074
+ required?: string[];
1075
+ enum?: (string | number | boolean | null)[];
1076
+ nullable?: boolean;
1077
+ anyOf?: JSONSchemaNode[];
1078
+ oneOf?: JSONSchemaNode[];
1079
+ allOf?: JSONSchemaNode[];
1080
+ const?: unknown;
1081
+ additionalProperties?: boolean | JSONSchemaNode;
1082
+ minLength?: number;
1083
+ maxLength?: number;
1084
+ minimum?: number;
1085
+ maximum?: number;
1086
+ minItems?: number;
1087
+ maxItems?: number;
1088
+ pattern?: string;
1089
+ format?: string;
1090
+ description?: string;
1091
+ default?: unknown;
1092
+ $ref?: string;
1093
+ }
1094
+ /**
1095
+ * Lightweight JSON Schema validator for Edge Runtime compatibility
1096
+ *
1097
+ * Validates data against a JSON Schema without using AJV's code generation.
1098
+ * This is fully Edge Runtime compatible with zero dependencies.
1099
+ *
1100
+ * @param data - The data to validate
1101
+ * @param schema - JSON Schema object (plain object, not AJV JSONSchemaType)
1102
+ * @returns The validated data cast to type T
1103
+ * @throws Error if validation fails
1104
+ */
1105
+ declare function validateJson<T>(data: unknown, schema: JSONSchemaNode): T;
1106
+ /**
1107
+ * Reserved variables that are auto-injected per node type.
1108
+ * These variables come from config or computed data and cannot be overridden by users.
1109
+ */
1110
+ declare const RESERVED_VARIABLES: {
1111
+ readonly extract: readonly ["schema", "documentText", "schemaTitle", "schemaDescription", "structuredFormat"];
1112
+ readonly categorize: readonly ["categories", "documentText"];
1113
+ readonly parse: readonly ["format", "schema", "describeFigures", "citationsEnabled"];
1114
+ };
1115
+ /**
1116
+ * Validates that user-provided promptVariables don't attempt to override reserved variables.
1117
+ * Emits console warnings if reserved variables are found in user variables and removes them.
1118
+ *
1119
+ * @param nodeType - The type of node (extract, categorize, parse)
1120
+ * @param userVariables - The user-provided promptVariables object
1121
+ * @param autoInjectedVariables - The auto-injected variables object
1122
+ * @returns A cleaned variables object with reserved variables protected
1123
+ */
1124
+ declare function protectReservedVariables(nodeType: 'extract' | 'categorize' | 'parse', userVariables: Record<string, any> | undefined, autoInjectedVariables: Record<string, any>): Record<string, any>;
1125
+
1126
+ export { type ExtractedImage as $, type AccessMethod as A, type BBox as B, type ConsensusConfig as C, type DocumentIR as D, type ExtractNodeConfig as E, type FieldVotingDetails as F, type FlowContext as G, type NodeCtx as H, type IRLine as I, type NodeTypeInfo as J, type NodeDef as K, type LLMProvider as L, type MultimodalInput as M, type NormalizedBBox as N, type OCRProvider as O, type ProviderVendor as P, type NodeTypeName as Q, type ReasoningConfig as R, type SplitDocument as S, type CompatibilityRule as T, type ValidationResult as U, type VLMProvider as V, type JSONSchemaNode as W, type ProcessingMode as X, type PageRangeOptions as Y, type LanguageOptions as Z, type SegmentationResult as _, type IRPage as a, type OCRProviderOptions as a0, type VLMProviderOptions as a1, type ProviderCitation as a2, aggregateMetrics as a3, node as a4, runPipeline as a5, FlowExecutionError as a6, FlowValidationError as a7, NODE_COMPATIBILITY_MATRIX as a8, getNodeTypeName as a9, getNodeTypeInfo as aa, getCompatibleTargets as ab, getSuggestedConnections as ac, validateNodeConnection as ad, getValidForEachStarters as ae, canStartForEachItemFlow as af, validateJson as ag, RESERVED_VARIABLES as ah, protectReservedVariables as ai, type ProviderIdentity as aj, toProviderString as ak, parseProviderString as al, isLocalEndpoint as am, createIdentity as an, type SupportedMimeType as ao, type TraceContextLite as ap, type NodeObservabilityContext as aq, type DocumentIRExtras as b, type LLMJsonProvider as c, type ConsensusRunResult as d, type ConsensusMetadata as e, type OutputWithConsensus as f, type MaybeWithConsensusMetadata as g, type FlowInput as h, type FlowInputValidation as i, type FlowResult as j, type CitationSourceType as k, type LineCitation as l, type FieldCitation as m, type CitationConfig as n, type OutputWithCitations as o, type ParseNodeConfig as p, type SplitNodeConfig as q, type CategorizeNodeConfig as r, type ChunkMetadata as s, type ChunkOutput as t, type ChunkNodeConfig as u, type CombineNodeConfig as v, type OutputNodeConfig as w, type EnhancedExtractionSchema as x, type StepMetric as y, type AggregatedMetrics as z };