@doclo/flows 0.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,932 @@
1
+ import * as _doclo_core from '@doclo/core';
2
+ import { FlowInput, FlowInputValidation, AcceptedMimeType, NodeDef, FlowContext, FlowResult, OutputNodeConfig, JSONSchemaNode, VLMProvider, OCRProvider, DocumentIR, LLMJsonProvider } from '@doclo/core';
3
+ export { FlowContext, bufferToBase64, bufferToDataUri } from '@doclo/core';
4
+ export { SimpleOut, simpleSchema } from './schemas.js';
5
+ import { ObservabilityConfig, ExecutionContext, TraceContext } from '@doclo/core/observability';
6
+ export { BatchEndContext, BatchItemContext, BatchItemEndContext, BatchStartContext, CircuitBreakerContext, ConsensusCompleteContext, ConsensusRunContext, ConsensusStartContext, CustomMetric, ExecutionContext, FlowEndContext, FlowErrorContext, FlowStartContext, FlowStats, ObservabilityConfig, ProviderRequestContext, ProviderResponseContext, ProviderRetryContext, StepEndContext, StepErrorContext, StepStartContext, TraceContext } from '@doclo/core/observability';
7
+ import { ProviderRegistry as ProviderRegistry$1 } from '@doclo/nodes';
8
+ export { categorize, chunk, combine, extract, parse, split, trigger } from '@doclo/nodes';
9
+
10
+ /**
11
+ * Progress callback options for flow execution
12
+ */
13
+ interface FlowProgressCallbacks {
14
+ /** Called when a step starts execution */
15
+ onStepStart?: (stepId: string, stepIndex: number, stepType: string) => void;
16
+ /** Called when a step completes successfully */
17
+ onStepComplete?: (stepId: string, stepIndex: number, stepType: string, durationMs: number) => void;
18
+ /** Called when a step fails with an error */
19
+ onStepError?: (stepId: string, stepIndex: number, stepType: string, error: Error) => void;
20
+ }
21
+ /**
22
+ * Validation error for a flow step
23
+ */
24
+ interface FlowValidationError {
25
+ stepId: string;
26
+ stepIndex: number;
27
+ stepType: string;
28
+ message: string;
29
+ }
30
+ /**
31
+ * Result of flow validation
32
+ */
33
+ interface FlowValidationResult {
34
+ valid: boolean;
35
+ errors: FlowValidationError[];
36
+ warnings: string[];
37
+ }
38
+
39
+ /**
40
+ * Batch result type returned when flow has multiple outputs
41
+ */
42
+ type BatchFlowResult = {
43
+ results: FlowResult<any>[];
44
+ };
45
+ /**
46
+ * Type representing the built flow object returned by Flow.build()
47
+ */
48
+ type BuiltFlow<TInput = any, TOutput = any> = {
49
+ run: (input: TInput, callbacks?: FlowProgressCallbacks) => Promise<FlowResult<TOutput> | BatchFlowResult>;
50
+ validate: () => FlowValidationResult;
51
+ };
52
+ /**
53
+ * Type helper to extract the unwrapped input type from a wrapped type.
54
+ * If T has an 'input' property, returns the type of that property.
55
+ * Otherwise returns T unchanged.
56
+ *
57
+ * This matches the runtime behavior where conditionals receive wrapped data
58
+ * but pass unwrapped data to the selected node.
59
+ */
60
+ type UnwrapInput<T> = T extends {
61
+ input: infer I;
62
+ } ? I : T;
63
+ /**
64
+ * Options for creating a flow
65
+ */
66
+ interface FlowOptions {
67
+ /** Observability configuration */
68
+ observability?: ObservabilityConfig;
69
+ /** User metadata to include in all observability contexts */
70
+ metadata?: Record<string, unknown>;
71
+ /**
72
+ * Input format validation configuration.
73
+ * Allows specifying accepted MIME types for early validation
74
+ * before flow execution begins.
75
+ */
76
+ inputValidation?: FlowInputValidation;
77
+ }
78
+ /**
79
+ * Flow builder class for creating document processing pipelines.
80
+ * @template TInput - The input type for the flow
81
+ * @template TOutput - The output type for the flow
82
+ */
83
+ declare class Flow<TInput = any, TOutput = any> {
84
+ private steps;
85
+ private observability?;
86
+ private metadata?;
87
+ private inputValidation?;
88
+ private traceContextManager?;
89
+ private currentExecution?;
90
+ constructor(options?: FlowOptions);
91
+ /**
92
+ * Set accepted input formats for this flow (fluent API).
93
+ * Validates input format before flow execution begins.
94
+ *
95
+ * @param formats - List of accepted MIME types (e.g., ['application/pdf', 'image/jpeg'])
96
+ * @returns This flow instance for chaining
97
+ *
98
+ * @example
99
+ * ```typescript
100
+ * const pdfOnlyFlow = createFlow()
101
+ * .acceptFormats(['application/pdf'])
102
+ * .step('parse', parse({ provider }))
103
+ * .build();
104
+ *
105
+ * // Throws FlowInputValidationError if input is not a PDF
106
+ * await pdfOnlyFlow.run({ base64: jpegBase64 });
107
+ * ```
108
+ */
109
+ acceptFormats(formats: AcceptedMimeType[]): Flow<TInput, TOutput>;
110
+ /**
111
+ * Add a sequential step to the flow
112
+ */
113
+ step<TStepOutput>(id: string, node: NodeDef<TOutput, TStepOutput>, name?: string): Flow<TInput, TStepOutput>;
114
+ /**
115
+ * Add a conditional step that chooses a node based on input data
116
+ *
117
+ * IMPORTANT: Conditionals must return a NODE, not a promise or executed flow.
118
+ * The SDK will execute the returned node for you.
119
+ *
120
+ * The condition function receives the full wrapped data (e.g., { input, quality })
121
+ * but the returned node should accept the unwrapped input (e.g., just FlowInput).
122
+ * The SDK automatically unwraps the data before passing it to the selected node.
123
+ *
124
+ * ✅ CORRECT - Return a node (declarative):
125
+ * ```typescript
126
+ * .step('qualify', qualify({ provider, levels: ['low', 'medium', 'high'] }))
127
+ * .conditional('parse', (data) => {
128
+ * // data is { input: FlowInput, quality: string }
129
+ * if (data.quality === 'high') {
130
+ * return parse({ provider: fastProvider }); // Return the node
131
+ * }
132
+ * return parse({ provider: accurateProvider }); // Return the node
133
+ * })
134
+ * ```
135
+ *
136
+ * ❌ INCORRECT - Do NOT return a promise (imperative):
137
+ * ```typescript
138
+ * .conditional('parse', (data) => {
139
+ * // This will throw an error!
140
+ * return createFlow()
141
+ * .step('parse', parse({ provider }))
142
+ * .build()
143
+ * .run(data.input) // ❌ Don't call .run() here!
144
+ * .then(r => r.output);
145
+ * })
146
+ * ```
147
+ *
148
+ * 🆕 NEW - Access previous step outputs via context:
149
+ * ```typescript
150
+ * .step('categorize', categorize({ provider, categories }))
151
+ * .conditional('parse', (data) => parse({ provider }))
152
+ * .conditional('extract', (data, context) => {
153
+ * // Access category from earlier step via context.artifacts
154
+ * const category = context?.artifacts.categorize?.category;
155
+ * return extract({ provider, schema: SCHEMAS[category] });
156
+ * })
157
+ * ```
158
+ *
159
+ * Use the declarative pattern (return nodes) for consistent flow execution,
160
+ * proper error tracking, and accurate metrics collection.
161
+ */
162
+ conditional<TConditionalOutput>(id: string, condition: (data: TOutput, context?: FlowContext) => NodeDef<UnwrapInput<TOutput>, TConditionalOutput>, name?: string): Flow<TInput, TConditionalOutput>;
163
+ /**
164
+ * Process each item from previous step (which must return an array) with a child flow
165
+ * Each item is processed in parallel as its own isolated run
166
+ */
167
+ forEach<TItem, TForEachOutput>(id: string, childFlow: (item: TItem) => Flow<TItem, TForEachOutput>, name?: string): Flow<TInput, FlowResult<TForEachOutput>[]>;
168
+ /**
169
+ * Add an explicit output node to mark which data to return from the flow
170
+ *
171
+ * By default, flows return the output of the last step. Use output nodes to:
172
+ * - Return data from earlier steps
173
+ * - Return multiple named outputs
174
+ * - Transform outputs before returning
175
+ *
176
+ * @param config - Output configuration
177
+ * @returns Flow with output node added
178
+ *
179
+ * @example
180
+ * // Single output
181
+ * .output({ name: 'invoice_data' })
182
+ *
183
+ * // Select specific source
184
+ * .output({ name: 'result', source: 'step2' })
185
+ *
186
+ * // Multiple outputs
187
+ * .step('extract1', extract({ provider, schema1 }))
188
+ * .output({ name: 'summary', source: 'extract1' })
189
+ * .step('extract2', extract({ provider, schema2 }))
190
+ * .output({ name: 'details', source: 'extract2' })
191
+ */
192
+ output<TOutputShape = TOutput>(config?: OutputNodeConfig): Flow<TInput, TOutputShape>;
193
+ /**
194
+ * Get current execution context
195
+ *
196
+ * Returns null if not currently executing.
197
+ */
198
+ getExecutionContext(): ExecutionContext | null;
199
+ /**
200
+ * Get current trace context
201
+ *
202
+ * Returns null if not currently executing or observability not configured.
203
+ */
204
+ getTraceContext(): TraceContext | null;
205
+ /**
206
+ * Set a custom attribute on the current execution
207
+ *
208
+ * Custom attributes appear in execution context and can be accessed by hooks.
209
+ */
210
+ setCustomAttribute(key: string, value: unknown): void;
211
+ /**
212
+ * Record a custom metric for the current execution
213
+ *
214
+ * Custom metrics appear in execution context and can be accessed by hooks.
215
+ */
216
+ recordMetric(name: string, value: number, unit?: string): void;
217
+ /**
218
+ * Build and return the executable flow
219
+ */
220
+ build(): BuiltFlow<TInput, TOutput>;
221
+ /**
222
+ * Generate a unique step ID for unnamed output nodes
223
+ * Prevents duplicate IDs when multiple .output() calls without names
224
+ */
225
+ private generateOutputStepId;
226
+ /**
227
+ * Validate the flow configuration
228
+ */
229
+ private validate;
230
+ /**
231
+ * Validate type compatibility between consecutive steps
232
+ */
233
+ private validateTypeCompatibility;
234
+ /**
235
+ * Check for inefficient flow patterns and add warnings.
236
+ *
237
+ * Detects patterns like:
238
+ * - parse() → extract(raw-document-provider): The extract provider ignores parse output
239
+ */
240
+ private checkEfficiencyPatterns;
241
+ /**
242
+ * Extract provider ID from a node definition.
243
+ * Returns undefined if provider cannot be determined.
244
+ */
245
+ private getProviderFromNode;
246
+ /**
247
+ * Execute the flow with optional progress callbacks
248
+ */
249
+ private execute;
250
+ }
251
+ /**
252
+ * Create a new flow builder
253
+ *
254
+ * @param options - Flow configuration options including observability and metadata
255
+ * @example
256
+ * ```typescript
257
+ * const flow = createFlow({
258
+ * observability: {
259
+ * onFlowStart: (ctx) => console.log('Flow started:', ctx.flowId),
260
+ * onStepEnd: (ctx) => console.log('Step done:', ctx.stepId, ctx.duration),
261
+ * },
262
+ * metadata: { environment: 'production', userId: 'user_123' }
263
+ * });
264
+ * ```
265
+ */
266
+ declare function createFlow<TInput = FlowInput>(options?: FlowOptions): Flow<TInput, TInput>;
267
+
268
+ /**
269
+ * Flow Registry for Serializable Trigger Nodes
270
+ *
271
+ * This registry allows flows to be referenced by string IDs in serialized configs.
272
+ * Used by the config API (serializable version) of trigger nodes.
273
+ *
274
+ * ## Usage
275
+ *
276
+ * ### Registration
277
+ * ```typescript
278
+ * import { registerFlow } from '@doclo/flows';
279
+ * import { createFlow } from '@doclo/flows';
280
+ * import { parse, extract } from '@doclo/nodes';
281
+ *
282
+ * // Register a flow builder
283
+ * registerFlow('invoice-processing-v2', (providers) =>
284
+ * createFlow()
285
+ * .step('parse', parse({ provider: providers.ocr }))
286
+ * .step('extract', extract({ provider: providers.vlm, schema: invoiceSchema }))
287
+ * );
288
+ * ```
289
+ *
290
+ * ### Retrieval
291
+ * ```typescript
292
+ * import { getFlow } from '@doclo/flows';
293
+ *
294
+ * const flowBuilder = getFlow('invoice-processing-v2');
295
+ * if (flowBuilder) {
296
+ * const flow = flowBuilder(myProviders);
297
+ * const result = await flow.build().run(input);
298
+ * }
299
+ * ```
300
+ *
301
+ * ### Serialization
302
+ * ```typescript
303
+ * import { buildFlowFromConfig } from '@doclo/flows';
304
+ *
305
+ * const flowDef = {
306
+ * version: '1.0.0',
307
+ * steps: [
308
+ * {
309
+ * type: 'step',
310
+ * nodeType: 'trigger',
311
+ * config: {
312
+ * type: 'trigger',
313
+ * flowRef: 'invoice-processing-v2' // References registered flow
314
+ * }
315
+ * }
316
+ * ]
317
+ * };
318
+ *
319
+ * const flow = buildFlowFromConfig(flowDef, { providers, flows: FLOW_REGISTRY });
320
+ * ```
321
+ */
322
+
323
+ /**
324
+ * Flow builder function signature
325
+ * Takes optional provider registry and returns a Flow instance with build() method
326
+ *
327
+ * A FlowBuilder is a function that:
328
+ * 1. Accepts an optional ProviderRegistry (for provider injection/override)
329
+ * 2. Returns a Flow instance (from createFlow()) that has a build() method
330
+ * 3. The build() method returns a BuiltFlow with run() and validate()
331
+ */
332
+ type FlowBuilder<TInput = any, TOutput = any> = (providers?: ProviderRegistry$1) => {
333
+ build: () => BuiltFlow<TInput, TOutput>;
334
+ };
335
+ /**
336
+ * Global flow registry
337
+ * Maps flow IDs to flow builder functions
338
+ */
339
+ declare const FLOW_REGISTRY: Map<string, FlowBuilder<any, any>>;
340
+ /**
341
+ * Register a flow builder in the global registry
342
+ *
343
+ * @param id - Unique identifier for the flow
344
+ * @param builder - Flow builder function that accepts providers
345
+ *
346
+ * @example
347
+ * ```typescript
348
+ * registerFlow('invoice-processing', (providers) =>
349
+ * createFlow()
350
+ * .step('parse', parse({ provider: providers.ocr }))
351
+ * .step('extract', extract({ provider: providers.vlm, schema }))
352
+ * );
353
+ * ```
354
+ */
355
+ declare function registerFlow<TInput = any, TOutput = any>(id: string, builder: FlowBuilder<TInput, TOutput>): void;
356
+ /**
357
+ * Get a flow builder from the registry
358
+ *
359
+ * @param id - Flow identifier
360
+ * @returns Flow builder function or undefined if not found
361
+ *
362
+ * @example
363
+ * ```typescript
364
+ * const builder = getFlow('invoice-processing');
365
+ * if (builder) {
366
+ * const flow = builder(providers);
367
+ * const result = await flow.build().run(input);
368
+ * }
369
+ * ```
370
+ */
371
+ declare function getFlow<TInput = any, TOutput = any>(id: string): FlowBuilder<TInput, TOutput> | undefined;
372
+ /**
373
+ * Check if a flow is registered
374
+ *
375
+ * @param id - Flow identifier
376
+ * @returns true if flow is registered
377
+ */
378
+ declare function hasFlow(id: string): boolean;
379
+ /**
380
+ * Unregister a flow from the registry
381
+ *
382
+ * @param id - Flow identifier
383
+ * @returns true if flow was removed, false if it didn't exist
384
+ */
385
+ declare function unregisterFlow(id: string): boolean;
386
+ /**
387
+ * Clear all registered flows
388
+ * Useful for testing or resetting state
389
+ */
390
+ declare function clearRegistry(): void;
391
+ /**
392
+ * Get all registered flow IDs
393
+ *
394
+ * @returns Array of flow identifiers
395
+ */
396
+ declare function listFlows(): string[];
397
+ /**
398
+ * Get the number of registered flows
399
+ *
400
+ * @returns Number of flows in registry
401
+ */
402
+ declare function getFlowCount(): number;
403
+
404
+ /**
405
+ * Flow Serialization
406
+ *
407
+ * Provides serialization/deserialization for doclo-sdk flows.
408
+ * Supports all flow types: sequential steps, conditional branches, and forEach loops.
409
+ *
410
+ * Limitations:
411
+ * - Provider instances must be reconstructed at runtime
412
+ */
413
+
414
+ /**
415
+ * Union type for providers used in flow serialization
416
+ */
417
+ type FlowProvider = VLMProvider | OCRProvider;
418
+ /**
419
+ * JSON value type for literal field mappings
420
+ */
421
+ type JsonValue = string | number | boolean | null | JsonValue[] | {
422
+ [key: string]: JsonValue;
423
+ };
424
+ /**
425
+ * Serializable input validation configuration
426
+ */
427
+ type SerializableInputValidation = {
428
+ /**
429
+ * List of accepted MIME types.
430
+ * If specified, input must match one of these types or validation fails.
431
+ */
432
+ acceptedFormats?: Array<'application/pdf' | 'image/jpeg' | 'image/png' | 'image/gif' | 'image/webp'>;
433
+ /**
434
+ * Whether to throw on validation failure.
435
+ * @default true
436
+ */
437
+ throwOnInvalid?: boolean;
438
+ };
439
+ /**
440
+ * Serializable flow definition
441
+ */
442
+ type SerializableFlow = {
443
+ version: string;
444
+ steps: SerializableStep[];
445
+ /**
446
+ * Optional input format validation configuration.
447
+ * Allows specifying accepted MIME types for early validation.
448
+ */
449
+ inputValidation?: SerializableInputValidation;
450
+ };
451
+ /**
452
+ * Serializable step definition
453
+ */
454
+ type SerializableStep = SerializableStandardStep | SerializableConditionalStep | SerializableForEachStep;
455
+ /**
456
+ * Standard sequential step
457
+ */
458
+ type SerializableStandardStep = {
459
+ type: 'step';
460
+ id: string;
461
+ name?: string;
462
+ nodeType: 'parse' | 'extract' | 'split' | 'categorize' | 'trigger' | 'output';
463
+ config: NodeConfig;
464
+ };
465
+ /**
466
+ * Flow reference (alternative to inline SerializableFlow)
467
+ * Used to reduce JSON nesting depth for complex flows
468
+ */
469
+ type FlowReference = {
470
+ flowRef: string;
471
+ };
472
+ /**
473
+ * Conditional step (categorize + branches)
474
+ *
475
+ * Branches can be either inline flows or references to separate flows.
476
+ * Use references to avoid hitting database JSON nesting limits (e.g., Convex's 16-level limit).
477
+ */
478
+ type SerializableConditionalStep = {
479
+ type: 'conditional';
480
+ id: string;
481
+ name?: string;
482
+ nodeType: 'categorize';
483
+ config: CategorizeConfig;
484
+ branches: Record<string, SerializableFlow | FlowReference>;
485
+ };
486
+ /**
487
+ * ForEach step (split + item flow)
488
+ *
489
+ * itemFlow can be either an inline flow or a reference to a separate flow.
490
+ * Use references to avoid hitting database JSON nesting limits.
491
+ */
492
+ type SerializableForEachStep = {
493
+ type: 'forEach';
494
+ id: string;
495
+ name?: string;
496
+ nodeType: 'split';
497
+ config: SplitConfig;
498
+ itemFlow: SerializableFlow | FlowReference;
499
+ };
500
+ /**
501
+ * Input mapping configuration for trigger nodes
502
+ * Declarative alternatives to mapInput functions (for serialization)
503
+ */
504
+ type InputMappingConfig = {
505
+ type: 'passthrough';
506
+ } | {
507
+ type: 'unwrap';
508
+ } | {
509
+ type: 'artifact';
510
+ path: string;
511
+ } | {
512
+ type: 'merge';
513
+ artifactPath: string;
514
+ } | {
515
+ type: 'construct';
516
+ fields: Record<string, FieldMapping>;
517
+ };
518
+ type FieldMapping = {
519
+ source: 'input';
520
+ path?: string;
521
+ } | {
522
+ source: 'artifact';
523
+ path: string;
524
+ } | {
525
+ source: 'literal';
526
+ value: JsonValue;
527
+ };
528
+ /**
529
+ * Node configuration (without provider instances)
530
+ */
531
+ type NodeConfig = ParseConfig | ExtractConfig | SplitConfig | CategorizeConfig | TriggerConfig | OutputConfig;
532
+ type ParseConfig = {
533
+ type: 'parse';
534
+ providerRef: string;
535
+ consensus?: {
536
+ runs: number;
537
+ strategy?: 'majority' | 'unanimous';
538
+ onTie?: 'random' | 'fail' | 'retry';
539
+ };
540
+ };
541
+ type ExtractConfig = {
542
+ type: 'extract';
543
+ providerRef: string;
544
+ schema: JSONSchemaNode;
545
+ consensus?: {
546
+ runs: number;
547
+ strategy?: 'majority' | 'unanimous';
548
+ onTie?: 'random' | 'fail' | 'retry';
549
+ };
550
+ reasoning?: {
551
+ enabled?: boolean;
552
+ effort?: 'low' | 'medium' | 'high';
553
+ max_tokens?: number;
554
+ };
555
+ };
556
+ type SplitConfig = {
557
+ type: 'split';
558
+ providerRef: string;
559
+ schemas: Record<string, JSONSchemaNode>;
560
+ includeOther?: boolean;
561
+ consensus?: {
562
+ runs: number;
563
+ strategy?: 'majority' | 'unanimous';
564
+ onTie?: 'random' | 'fail' | 'retry';
565
+ };
566
+ schemaRef?: string;
567
+ };
568
+ type CategorizeConfig = {
569
+ type: 'categorize';
570
+ providerRef: string;
571
+ categories: string[];
572
+ consensus?: {
573
+ runs: number;
574
+ strategy?: 'majority' | 'unanimous';
575
+ onTie?: 'random' | 'fail' | 'retry';
576
+ };
577
+ promptRef?: string;
578
+ };
579
+ type TriggerConfig = {
580
+ type: 'trigger';
581
+ flowRef: string;
582
+ providerOverrides?: Record<string, string>;
583
+ inputMapping?: InputMappingConfig;
584
+ mergeMetrics?: boolean;
585
+ timeout?: number;
586
+ };
587
+ type OutputConfig = {
588
+ type: 'output';
589
+ name?: string;
590
+ source?: string | string[];
591
+ transform?: 'first' | 'last' | 'merge' | 'pick';
592
+ fields?: string[];
593
+ };
594
+ /**
595
+ * Provider registry for deserialization
596
+ */
597
+ type ProviderRegistry = Record<string, FlowProvider>;
598
+ /**
599
+ * Extract node metadata from a node (if available)
600
+ * Note: This is a best-effort extraction since nodes don't currently
601
+ * expose their config. Returns null for nodes without metadata.
602
+ */
603
+ declare function extractNodeMetadata(node: NodeDef<unknown, unknown>): {
604
+ nodeType: string;
605
+ config: NodeConfig;
606
+ } | null;
607
+ /**
608
+ * Validation error for flow serialization
609
+ */
610
+ declare class FlowSerializationError extends Error {
611
+ constructor(message: string);
612
+ }
613
+ /**
614
+ * Flow registry type
615
+ * Maps flow IDs to SerializableFlow objects (from database/Convex)
616
+ */
617
+ type FlowRegistry$1 = Record<string, SerializableFlow>;
618
+ /**
619
+ * Type guard to check if a value is a FlowReference
620
+ */
621
+ declare function isFlowReference(value: SerializableFlow | FlowReference): value is FlowReference;
622
+ /**
623
+ * Resolve a flow reference to a SerializableFlow
624
+ *
625
+ * @param flowOrRef - Either an inline flow or a flow reference
626
+ * @param flows - Flow registry to resolve references from
627
+ * @returns SerializableFlow
628
+ * @throws FlowSerializationError if reference cannot be resolved
629
+ */
630
+ declare function resolveFlowReference(flowOrRef: SerializableFlow | FlowReference, flows?: FlowRegistry$1): SerializableFlow;
631
+ /**
632
+ * Build a flow from a serializable definition
633
+ *
634
+ * @param flowDef - Serializable flow definition
635
+ * @param providers - Provider registry (map of provider refs to provider instances)
636
+ * @param flows - Optional flow registry for:
637
+ * - Trigger nodes (map of flow refs to flow builders)
638
+ * - Conditional branches (when using flowRef instead of inline SerializableFlow)
639
+ * - ForEach itemFlow (when using flowRef instead of inline SerializableFlow)
640
+ * @returns Executable flow
641
+ *
642
+ * @example
643
+ * ```typescript
644
+ * const flowDef: SerializableFlow = {
645
+ * version: '1.0.0',
646
+ * steps: [
647
+ * {
648
+ * type: 'step',
649
+ * id: 'parse',
650
+ * nodeType: 'parse',
651
+ * config: { type: 'parse', providerRef: 'ocr' }
652
+ * },
653
+ * {
654
+ * type: 'step',
655
+ * id: 'extract',
656
+ * nodeType: 'extract',
657
+ * config: {
658
+ * type: 'extract',
659
+ * providerRef: 'llm',
660
+ * schema: { ... }
661
+ * }
662
+ * }
663
+ * ]
664
+ * };
665
+ *
666
+ * const providers = {
667
+ * ocr: suryaProvider,
668
+ * llm: geminiProvider
669
+ * };
670
+ *
671
+ * const flow = buildFlowFromConfig(flowDef, providers);
672
+ * ```
673
+ */
674
+ declare function buildFlowFromConfig(flowDef: SerializableFlow, providers: ProviderRegistry, flows?: FlowRegistry$1, options?: FlowOptions): BuiltFlow<FlowInput, unknown>;
675
+ /**
676
+ * Helper to create a serializable flow definition
677
+ *
678
+ * @example
679
+ * ```typescript
680
+ * const flowDef = defineFlowConfig({
681
+ * version: '1.0.0',
682
+ * steps: [
683
+ * {
684
+ * type: 'step',
685
+ * id: 'parse',
686
+ * nodeType: 'parse',
687
+ * config: { type: 'parse', providerRef: 'ocr' }
688
+ * }
689
+ * ]
690
+ * });
691
+ *
692
+ * // Save to database
693
+ * await db.flows.create({ definition: JSON.stringify(flowDef) });
694
+ *
695
+ * // Later, load and build
696
+ * const loaded = JSON.parse(row.definition);
697
+ * const flow = buildFlowFromConfig(loaded, providers);
698
+ * ```
699
+ */
700
+ declare function defineFlowConfig(config: Omit<SerializableFlow, 'version'>): SerializableFlow;
701
+
702
+ /**
703
+ * Composite nodes for conditional and forEach execution
704
+ *
705
+ * These nodes wrap complex multi-step operations (categorize + branch, split + forEach)
706
+ * into single logical steps with proper observability, metrics, and error handling.
707
+ */
708
+
709
+ /**
710
+ * Flow registry type
711
+ * Maps flow IDs to SerializableFlow objects (from database/Convex)
712
+ */
713
+ type FlowRegistry = Record<string, SerializableFlow>;
714
+ /**
715
+ * Configuration for conditional composite node
716
+ */
717
+ interface ConditionalCompositeConfig {
718
+ stepId: string;
719
+ categorizeConfig: CategorizeConfig;
720
+ branches: Record<string, SerializableFlow | FlowReference>;
721
+ providers: ProviderRegistry;
722
+ flows: FlowRegistry;
723
+ }
724
+ /**
725
+ * Creates a composite node that:
726
+ * 1. Executes a categorize node to determine the category
727
+ * 2. Selects and executes the appropriate branch flow
728
+ * 3. Returns the branch flow's output
729
+ *
730
+ * Includes full observability, metrics merging, and error context.
731
+ */
732
+ declare function createConditionalCompositeNode(config: ConditionalCompositeConfig): NodeDef<FlowInput, unknown>;
733
+ /**
734
+ * Configuration for forEach composite node
735
+ */
736
+ interface ForEachCompositeConfig {
737
+ stepId: string;
738
+ splitConfig: SplitConfig;
739
+ itemFlow: SerializableFlow | FlowReference;
740
+ providers: ProviderRegistry;
741
+ flows: FlowRegistry;
742
+ }
743
+ /**
744
+ * Creates a composite node that:
745
+ * 1. Executes a split node to get an array of items
746
+ * 2. Executes the item flow for each item in parallel
747
+ * 3. Returns aggregated results
748
+ *
749
+ * Includes full observability, metrics merging, and error context.
750
+ */
751
+ declare function createForEachCompositeNode(config: ForEachCompositeConfig): NodeDef<FlowInput, unknown[]>;
752
+
753
+ /**
754
+ * Flow Validation
755
+ *
756
+ * Provides validation for flow configurations before execution.
757
+ */
758
+
759
+ /**
760
+ * Validation result
761
+ */
762
+ type ValidationResult = {
763
+ valid: boolean;
764
+ errors: ValidationError[];
765
+ warnings: ValidationWarning[];
766
+ };
767
+ /**
768
+ * Validation error
769
+ */
770
+ type ValidationError = {
771
+ type: 'missing_provider' | 'invalid_schema' | 'invalid_config' | 'version_mismatch';
772
+ stepId?: string;
773
+ message: string;
774
+ details?: Record<string, unknown>;
775
+ };
776
+ /**
777
+ * Validation warning
778
+ */
779
+ type ValidationWarning = {
780
+ type: 'deprecated' | 'performance' | 'best_practice';
781
+ stepId?: string;
782
+ message: string;
783
+ details?: Record<string, unknown>;
784
+ };
785
+ /**
786
+ * Provider instance used for validation (minimal interface)
787
+ */
788
+ interface ValidationProviderInstance {
789
+ name?: string;
790
+ [key: string]: unknown;
791
+ }
792
+ /**
793
+ * Validation options
794
+ */
795
+ type ValidationOptions = {
796
+ checkProviders?: boolean;
797
+ checkSchemas?: boolean;
798
+ checkVersion?: boolean;
799
+ providers?: Record<string, ValidationProviderInstance>;
800
+ };
801
+ /**
802
+ * Validate a serializable flow definition
803
+ *
804
+ * @param flowDef - Flow definition to validate
805
+ * @param options - Validation options
806
+ * @returns Validation result with errors and warnings
807
+ *
808
+ * @example
809
+ * ```typescript
810
+ * const result = validateFlow(flowDef, {
811
+ * checkProviders: true,
812
+ * checkSchemas: true,
813
+ * providers: { ocr: suryaProvider, llm: geminiProvider }
814
+ * });
815
+ *
816
+ * if (!result.valid) {
817
+ * console.error('Flow validation failed:', result.errors);
818
+ * }
819
+ * ```
820
+ */
821
+ declare function validateFlow(flowDef: SerializableFlow, options?: ValidationOptions): ValidationResult;
822
+ /**
823
+ * Validate and throw if invalid
824
+ *
825
+ * @param flowDef - Flow definition to validate
826
+ * @param options - Validation options
827
+ * @throws ValidationError if flow is invalid
828
+ */
829
+ declare function validateFlowOrThrow(flowDef: SerializableFlow, options?: ValidationOptions): void;
830
+
831
+ /**
832
+ * Build a flow with automatic fallback between multiple LLM providers
833
+ *
834
+ * Example usage:
835
+ * ```
836
+ * const flow = buildMultiProviderFlow({
837
+ * ocr: suryaProvider({ endpoint, apiKey }),
838
+ * llmConfigs: [
839
+ * { provider: 'openai', model: 'gpt-4.1', apiKey: process.env.OPENAI_KEY },
840
+ * { provider: 'anthropic', model: 'claude-haiku-4.5', apiKey: process.env.ANTHROPIC_KEY, via: 'openrouter' },
841
+ * { provider: 'google', model: 'gemini-2.5-flash', apiKey: process.env.GOOGLE_KEY }
842
+ * ],
843
+ * maxRetries: 2
844
+ * });
845
+ * ```
846
+ */
847
+ declare function buildMultiProviderFlow(opts: {
848
+ ocr: OCRProvider;
849
+ llmConfigs: Array<{
850
+ provider: 'openai' | 'anthropic' | 'google' | 'xai';
851
+ model: string;
852
+ apiKey: string;
853
+ via?: 'openrouter' | 'native';
854
+ baseUrl?: string;
855
+ }>;
856
+ maxRetries?: number;
857
+ retryDelay?: number;
858
+ circuitBreakerThreshold?: number;
859
+ }): {
860
+ run(input: {
861
+ url?: string;
862
+ base64?: string;
863
+ }): Promise<{
864
+ ir: DocumentIR;
865
+ output: any;
866
+ metrics: _doclo_core.StepMetric[];
867
+ artifacts: {
868
+ parse: unknown;
869
+ extract: unknown;
870
+ };
871
+ }>;
872
+ };
873
+
874
+ /**
875
+ * Build a flow that uses VLM (Vision Language Model) for direct extraction
876
+ * Skips OCR entirely - sends image/PDF directly to the vision model
877
+ *
878
+ * Pros:
879
+ * - Faster (one API call instead of two)
880
+ * - Can understand layout, tables, charts visually
881
+ * - No OCR errors/artifacts
882
+ *
883
+ * Cons:
884
+ * - More expensive (vision tokens cost more)
885
+ * - Limited to models with vision capabilities
886
+ */
887
+ declare function buildVLMDirectFlow(opts: {
888
+ llmConfigs: Array<{
889
+ provider: 'openai' | 'anthropic' | 'google' | 'xai';
890
+ model: string;
891
+ apiKey: string;
892
+ via?: 'openrouter' | 'native';
893
+ baseUrl?: string;
894
+ }>;
895
+ maxRetries?: number;
896
+ retryDelay?: number;
897
+ circuitBreakerThreshold?: number;
898
+ }): {
899
+ run(input: {
900
+ url?: string;
901
+ base64?: string;
902
+ }): Promise<{
903
+ output: any;
904
+ metrics: _doclo_core.StepMetric[];
905
+ artifacts: {
906
+ vlm_extract: unknown;
907
+ };
908
+ }>;
909
+ };
910
+
911
+ declare function buildTwoProviderFlow(opts: {
912
+ ocr: OCRProvider;
913
+ llmA: LLMJsonProvider;
914
+ llmB: LLMJsonProvider;
915
+ }): {
916
+ run(input: {
917
+ url?: string;
918
+ base64?: string;
919
+ }): Promise<{
920
+ ir: DocumentIR;
921
+ outputA: any;
922
+ outputB: any;
923
+ metrics: _doclo_core.StepMetric[];
924
+ artifacts: {
925
+ parse: unknown;
926
+ extractA: unknown;
927
+ extractB: unknown;
928
+ };
929
+ }>;
930
+ };
931
+
932
+ export { type BuiltFlow, type CategorizeConfig, type ConditionalCompositeConfig, type ExtractConfig, FLOW_REGISTRY, type FieldMapping, type FlowBuilder, type FlowOptions, type FlowProgressCallbacks, type FlowReference, type FlowRegistry$1 as FlowRegistry, FlowSerializationError, type FlowValidationResult, type ForEachCompositeConfig, type InputMappingConfig, type NodeConfig, type OutputConfig, type ParseConfig, type ProviderRegistry, type SerializableConditionalStep, type SerializableFlow, type SerializableForEachStep, type SerializableInputValidation, type SerializableStandardStep, type SerializableStep, type SplitConfig, type TriggerConfig, type ValidationError, type ValidationOptions, type ValidationResult, type ValidationWarning, buildFlowFromConfig, buildMultiProviderFlow, buildTwoProviderFlow, buildVLMDirectFlow, clearRegistry, createConditionalCompositeNode, createFlow, createForEachCompositeNode, defineFlowConfig, extractNodeMetadata, getFlow, getFlowCount, hasFlow, isFlowReference, listFlows, registerFlow, resolveFlowReference, unregisterFlow, validateFlow, validateFlowOrThrow };