@doclo/core 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js ADDED
@@ -0,0 +1,2293 @@
1
+ // src/internal/validation-utils.ts
2
+ function aggregateMetrics(metrics) {
3
+ const byProvider = {};
4
+ const result = metrics.reduce((acc, m) => {
5
+ acc.totalDurationMs += m.ms;
6
+ acc.totalCostUSD += m.costUSD || 0;
7
+ acc.totalInputTokens += m.inputTokens || 0;
8
+ acc.totalOutputTokens += m.outputTokens || 0;
9
+ acc.totalCacheCreationTokens += m.cacheCreationInputTokens || 0;
10
+ acc.totalCacheReadTokens += m.cacheReadInputTokens || 0;
11
+ if (m.provider) {
12
+ if (!byProvider[m.provider]) {
13
+ byProvider[m.provider] = { costUSD: 0, inputTokens: 0, outputTokens: 0, callCount: 0 };
14
+ }
15
+ byProvider[m.provider].costUSD += m.costUSD || 0;
16
+ byProvider[m.provider].inputTokens += m.inputTokens || 0;
17
+ byProvider[m.provider].outputTokens += m.outputTokens || 0;
18
+ byProvider[m.provider].callCount += 1;
19
+ }
20
+ return acc;
21
+ }, {
22
+ totalDurationMs: 0,
23
+ totalCostUSD: 0,
24
+ totalInputTokens: 0,
25
+ totalOutputTokens: 0,
26
+ totalCacheCreationTokens: 0,
27
+ totalCacheReadTokens: 0,
28
+ stepCount: metrics.length,
29
+ byProvider
30
+ });
31
+ return result;
32
+ }
33
+ var node = (key, run) => ({ key, run });
34
+ async function runPipeline(steps, input, observabilityContext) {
35
+ const artifacts = {};
36
+ const metrics = [];
37
+ const ctx = {
38
+ stepId: observabilityContext?.stepId,
39
+ artifacts,
40
+ emit: (k, v) => {
41
+ artifacts[k] = v;
42
+ },
43
+ metrics: { push: (m) => metrics.push(m) },
44
+ observability: observabilityContext
45
+ };
46
+ let acc = input;
47
+ for (const s of steps) {
48
+ acc = await s.run(acc, ctx);
49
+ ctx.emit(s.key, acc);
50
+ }
51
+ return { output: acc, artifacts, metrics };
52
+ }
53
+ var FlowExecutionError = class _FlowExecutionError extends Error {
54
+ constructor(message, failedStep, failedStepIndex, failedStepType, completedSteps, originalError, partialArtifacts) {
55
+ super(message);
56
+ this.failedStep = failedStep;
57
+ this.failedStepIndex = failedStepIndex;
58
+ this.failedStepType = failedStepType;
59
+ this.completedSteps = completedSteps;
60
+ this.originalError = originalError;
61
+ this.partialArtifacts = partialArtifacts;
62
+ this.name = "FlowExecutionError";
63
+ if (Error.captureStackTrace) {
64
+ Error.captureStackTrace(this, _FlowExecutionError);
65
+ }
66
+ }
67
+ };
68
+ var FlowValidationError = class _FlowValidationError extends Error {
69
+ constructor(message, reason, suggestions, sourceNode, targetNode, sourceOutputType, targetInputTypes) {
70
+ super(message);
71
+ this.reason = reason;
72
+ this.suggestions = suggestions;
73
+ this.sourceNode = sourceNode;
74
+ this.targetNode = targetNode;
75
+ this.sourceOutputType = sourceOutputType;
76
+ this.targetInputTypes = targetInputTypes;
77
+ this.name = "FlowValidationError";
78
+ if (Error.captureStackTrace) {
79
+ Error.captureStackTrace(this, _FlowValidationError);
80
+ }
81
+ }
82
+ };
83
+ var NODE_COMPATIBILITY_MATRIX = {
84
+ parse: {
85
+ parse: {
86
+ valid: false,
87
+ reason: "Cannot chain parse nodes. Parse is typically the starting node."
88
+ },
89
+ split: {
90
+ valid: false,
91
+ reason: "Split requires FlowInput, but parse outputs DocumentIR. Use split directly on input instead.",
92
+ note: "If you need to re-split after parsing, use trigger to invoke a child flow with FlowInput."
93
+ },
94
+ categorize: {
95
+ valid: true,
96
+ note: "categorize accepts DocumentIR and wraps it with {input, category}"
97
+ },
98
+ extract: {
99
+ valid: true,
100
+ note: "extract accepts DocumentIR and produces typed JSON"
101
+ },
102
+ chunk: {
103
+ valid: true,
104
+ note: "chunk accepts DocumentIR and produces ChunkOutput for RAG"
105
+ },
106
+ combine: {
107
+ valid: false,
108
+ reason: "Parse outputs DocumentIR (single document), not an array. Combine requires array input from forEach.",
109
+ note: "Use parse with chunked:true to output DocumentIR[], then use combine."
110
+ },
111
+ trigger: {
112
+ valid: true,
113
+ note: "trigger accepts any input type"
114
+ },
115
+ output: {
116
+ valid: true,
117
+ note: "output node can follow any node to select or transform results"
118
+ }
119
+ },
120
+ split: {
121
+ parse: {
122
+ valid: true,
123
+ requiresForEach: true,
124
+ reason: "Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input \u2192 FlowInput for parse.",
125
+ note: "Enable forEach on split node before connecting to parse."
126
+ },
127
+ split: {
128
+ valid: false,
129
+ reason: "Cannot nest split operations. Split nodes cannot appear in forEach itemFlow."
130
+ },
131
+ categorize: {
132
+ valid: true,
133
+ requiresForEach: true,
134
+ reason: "Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input for categorize."
135
+ },
136
+ extract: {
137
+ valid: true,
138
+ requiresForEach: true,
139
+ reason: "Split outputs SplitDocument[] which requires forEach. forEach auto-unwraps SplitDocument.input for extract."
140
+ },
141
+ chunk: {
142
+ valid: false,
143
+ reason: "SplitDocument output is incompatible with Chunk input. Chunk expects DocumentIR or DocumentIR[].",
144
+ note: "Use parse in forEach after split to convert SplitDocument \u2192 DocumentIR, then chunk."
145
+ },
146
+ combine: {
147
+ valid: false,
148
+ reason: "Combine should appear AFTER forEach completes, not as a forEach itemFlow step.",
149
+ note: "Place combine after the forEach block to merge results."
150
+ },
151
+ trigger: {
152
+ valid: true,
153
+ requiresForEach: true,
154
+ reason: "Split outputs SplitDocument[] which requires forEach for processing.",
155
+ note: "forEach auto-unwraps SplitDocument.input for child flow."
156
+ },
157
+ output: {
158
+ valid: true,
159
+ note: "output node can follow any node to select or transform results"
160
+ }
161
+ },
162
+ categorize: {
163
+ parse: {
164
+ valid: true,
165
+ note: "categorize outputs {input, category}. Conditional can unwrap this or use directly."
166
+ },
167
+ split: {
168
+ valid: false,
169
+ reason: "Split requires FlowInput, but categorize outputs {input, category}.",
170
+ note: "Use conditional to unwrap and pass input field to split."
171
+ },
172
+ categorize: {
173
+ valid: true,
174
+ note: "Can chain categorize nodes for multi-level classification."
175
+ },
176
+ extract: {
177
+ valid: true,
178
+ note: "extract can process the categorized document."
179
+ },
180
+ chunk: {
181
+ valid: false,
182
+ reason: "Categorize wraps input as {input, category}. Chunk needs unwrapped DocumentIR.",
183
+ note: "Use conditional to unwrap input field before chunk."
184
+ },
185
+ combine: {
186
+ valid: false,
187
+ reason: "Categorize outputs single result {input, category}, not an array. Combine requires array input."
188
+ },
189
+ trigger: {
190
+ valid: true,
191
+ note: "trigger accepts any input type, including {input, category}"
192
+ },
193
+ output: {
194
+ valid: true,
195
+ note: "output node can follow any node to select or transform results"
196
+ }
197
+ },
198
+ extract: {
199
+ parse: {
200
+ valid: false,
201
+ reason: "Extract outputs typed JSON (terminal node). Cannot pipe JSON to parse.",
202
+ note: "Extract should be one of the last steps in a flow. Use combine if extracting in parallel."
203
+ },
204
+ split: {
205
+ valid: false,
206
+ reason: "Extract outputs typed JSON (terminal node). Cannot pipe JSON to split."
207
+ },
208
+ categorize: {
209
+ valid: false,
210
+ reason: "Extract outputs typed JSON (terminal node). Cannot pipe JSON to categorize."
211
+ },
212
+ extract: {
213
+ valid: false,
214
+ reason: "Extract outputs typed JSON (terminal node). Cannot chain extractions on JSON output.",
215
+ note: "If you need multi-step extraction, extract from DocumentIR/ChunkOutput in parallel, then combine."
216
+ },
217
+ chunk: {
218
+ valid: false,
219
+ reason: "Extract outputs typed JSON, not DocumentIR. Chunk expects DocumentIR input."
220
+ },
221
+ combine: {
222
+ valid: true,
223
+ note: "Use combine to merge parallel extraction results from forEach."
224
+ },
225
+ trigger: {
226
+ valid: true,
227
+ note: "trigger accepts any input type, including extracted JSON"
228
+ },
229
+ output: {
230
+ valid: true,
231
+ note: "output node can follow any node to select or transform results"
232
+ }
233
+ },
234
+ chunk: {
235
+ parse: {
236
+ valid: false,
237
+ reason: "Chunk outputs ChunkOutput (specialized type), not FlowInput. Parse expects FlowInput as input."
238
+ },
239
+ split: {
240
+ valid: false,
241
+ reason: "Chunk outputs ChunkOutput, incompatible with Split input (FlowInput)."
242
+ },
243
+ categorize: {
244
+ valid: false,
245
+ reason: "Chunk outputs ChunkOutput, incompatible with Categorize input (DocumentIR|FlowInput).",
246
+ note: "Categorize before chunking, not after."
247
+ },
248
+ extract: {
249
+ valid: true,
250
+ note: "extract has special handling for ChunkOutput - extracts data from chunks."
251
+ },
252
+ chunk: {
253
+ valid: false,
254
+ reason: "Cannot chain chunk operations. Chunk only once per document.",
255
+ note: "Different chunking strategies should be applied to the original DocumentIR, not to chunks."
256
+ },
257
+ combine: {
258
+ valid: false,
259
+ reason: "Chunk outputs ChunkOutput (specialized type), not an array type. Combine expects T[].",
260
+ note: "Use chunk on individual documents in forEach, then extract, then combine extractions."
261
+ },
262
+ trigger: {
263
+ valid: true,
264
+ note: "trigger accepts any input type, including ChunkOutput"
265
+ },
266
+ output: {
267
+ valid: true,
268
+ note: "output node can follow any node to select or transform results"
269
+ }
270
+ },
271
+ combine: {
272
+ parse: {
273
+ valid: true,
274
+ note: "After combining, result can be re-parsed if needed."
275
+ },
276
+ split: {
277
+ valid: false,
278
+ reason: "Combine output depends on strategy. Split requires FlowInput.",
279
+ note: "Most combine strategies output merged objects/arrays, not FlowInput."
280
+ },
281
+ categorize: {
282
+ valid: true,
283
+ note: "Can categorize combined results."
284
+ },
285
+ extract: {
286
+ valid: true,
287
+ note: "Can extract from combined results."
288
+ },
289
+ chunk: {
290
+ valid: true,
291
+ note: "Can chunk combined DocumentIR. Only valid if combine output is DocumentIR or DocumentIR[]."
292
+ },
293
+ combine: {
294
+ valid: false,
295
+ reason: "Cannot chain combine nodes. Combine once per forEach operation."
296
+ },
297
+ trigger: {
298
+ valid: true,
299
+ note: "trigger accepts any input type"
300
+ },
301
+ output: {
302
+ valid: true,
303
+ note: "output node can follow any node to select or transform results"
304
+ }
305
+ },
306
+ trigger: {
307
+ parse: {
308
+ valid: true,
309
+ requiresRuntimeValidation: true,
310
+ note: "Valid only if child flow returns FlowInput. Type safety cannot be guaranteed at build-time."
311
+ },
312
+ split: {
313
+ valid: true,
314
+ requiresRuntimeValidation: true,
315
+ note: "Valid only if child flow returns FlowInput. Type safety cannot be guaranteed at build-time."
316
+ },
317
+ categorize: {
318
+ valid: true,
319
+ requiresRuntimeValidation: true,
320
+ note: "Valid only if child flow returns DocumentIR or FlowInput. Type safety cannot be guaranteed at build-time."
321
+ },
322
+ extract: {
323
+ valid: true,
324
+ requiresRuntimeValidation: true,
325
+ note: "Valid only if child flow returns DocumentIR, FlowInput, or ChunkOutput. Type safety cannot be guaranteed at build-time."
326
+ },
327
+ chunk: {
328
+ valid: true,
329
+ requiresRuntimeValidation: true,
330
+ note: "Valid only if child flow returns DocumentIR or DocumentIR[]. Type safety cannot be guaranteed at build-time."
331
+ },
332
+ combine: {
333
+ valid: true,
334
+ requiresRuntimeValidation: true,
335
+ note: "Valid only if child flow returns an array (T[]). Type safety cannot be guaranteed at build-time."
336
+ },
337
+ trigger: {
338
+ valid: true,
339
+ requiresRuntimeValidation: true,
340
+ note: "Can nest trigger nodes (with circular dependency detection and max depth limits). Output type depends on nested child flow."
341
+ },
342
+ output: {
343
+ valid: true,
344
+ note: "output node can follow any node to select or transform results"
345
+ }
346
+ },
347
+ output: {
348
+ parse: {
349
+ valid: false,
350
+ reason: "Output is a terminal node that selects/transforms results. Cannot chain to other nodes."
351
+ },
352
+ split: {
353
+ valid: false,
354
+ reason: "Output is a terminal node that selects/transforms results. Cannot chain to other nodes."
355
+ },
356
+ categorize: {
357
+ valid: false,
358
+ reason: "Output is a terminal node that selects/transforms results. Cannot chain to other nodes."
359
+ },
360
+ extract: {
361
+ valid: false,
362
+ reason: "Output is a terminal node that selects/transforms results. Cannot chain to other nodes."
363
+ },
364
+ chunk: {
365
+ valid: false,
366
+ reason: "Output is a terminal node that selects/transforms results. Cannot chain to other nodes."
367
+ },
368
+ combine: {
369
+ valid: false,
370
+ reason: "Output is a terminal node that selects/transforms results. Cannot chain to other nodes."
371
+ },
372
+ trigger: {
373
+ valid: false,
374
+ reason: "Output is a terminal node that selects/transforms results. Cannot chain to other nodes."
375
+ },
376
+ output: {
377
+ valid: true,
378
+ note: "Multiple output nodes are allowed to create multiple named outputs from a flow."
379
+ }
380
+ }
381
+ };
382
+ function getNodeTypeName(node2) {
383
+ if (!node2 || !node2.key) return null;
384
+ const key = node2.key;
385
+ const knownTypes = ["parse", "split", "categorize", "extract", "chunk", "combine", "trigger", "output"];
386
+ return knownTypes.includes(key) ? key : null;
387
+ }
388
+ function getNodeTypeInfo(node2) {
389
+ return node2.__meta || null;
390
+ }
391
+ function getCompatibleTargets(sourceType, includeForEach = false) {
392
+ const rules = NODE_COMPATIBILITY_MATRIX[sourceType];
393
+ if (!rules) return [];
394
+ return Object.entries(rules).filter(([_, rule]) => {
395
+ if (!rule.valid) return false;
396
+ if (rule.requiresForEach && !includeForEach) return false;
397
+ return true;
398
+ }).map(([targetType, _]) => targetType);
399
+ }
400
+ function getSuggestedConnections(sourceType) {
401
+ const compatibleTargets = getCompatibleTargets(sourceType, false);
402
+ const forEachTargets = getCompatibleTargets(sourceType, true).filter(
403
+ (t) => !compatibleTargets.includes(t)
404
+ );
405
+ if (compatibleTargets.length === 0 && forEachTargets.length === 0) {
406
+ return [`${sourceType} has no standard outgoing connections (terminal node).`];
407
+ }
408
+ const suggestions = [];
409
+ if (compatibleTargets.length > 0) {
410
+ suggestions.push(`${sourceType} can connect to:`);
411
+ compatibleTargets.forEach((target) => {
412
+ const rule = NODE_COMPATIBILITY_MATRIX[sourceType][target];
413
+ suggestions.push(` \u2022 ${target}${rule.note ? ` - ${rule.note}` : ""}`);
414
+ });
415
+ }
416
+ if (forEachTargets.length > 0) {
417
+ suggestions.push(`${sourceType} can connect to (with forEach enabled):`);
418
+ forEachTargets.forEach((target) => {
419
+ const rule = NODE_COMPATIBILITY_MATRIX[sourceType][target];
420
+ suggestions.push(` \u2022 ${target}${rule.note ? ` - ${rule.note}` : ""}`);
421
+ });
422
+ }
423
+ return suggestions;
424
+ }
425
+ function validateNodeConnection(sourceType, targetType, forEachEnabled = false) {
426
+ const rule = NODE_COMPATIBILITY_MATRIX[sourceType]?.[targetType];
427
+ if (!rule) {
428
+ return {
429
+ valid: false,
430
+ reason: `Unknown node type combination: ${sourceType} \u2192 ${targetType}`,
431
+ suggestions: ["Ensure both nodes are valid node types."]
432
+ };
433
+ }
434
+ if (!rule.valid) {
435
+ return {
436
+ valid: false,
437
+ reason: rule.reason,
438
+ suggestions: getSuggestedConnections(sourceType)
439
+ };
440
+ }
441
+ if (rule.requiresForEach && !forEachEnabled) {
442
+ return {
443
+ valid: false,
444
+ reason: `Cannot connect ${sourceType} to ${targetType} without forEach enabled.`,
445
+ suggestions: [
446
+ `Enable forEach on the ${sourceType} node:`,
447
+ ` 1. Click the ${sourceType} node`,
448
+ ` 2. Enable "forEach Processing" in the configuration`,
449
+ ` 3. Try connecting again`,
450
+ "",
451
+ ...getSuggestedConnections(sourceType)
452
+ ],
453
+ requiresForEach: true
454
+ };
455
+ }
456
+ if (rule.requiresRuntimeValidation) {
457
+ return {
458
+ valid: true,
459
+ warning: `\u26A0\uFE0F ${sourceType} \u2192 ${targetType}: ${rule.note || "Type compatibility depends on runtime values and cannot be validated at build-time."}`
460
+ };
461
+ }
462
+ return {
463
+ valid: true
464
+ };
465
+ }
466
+ function getValidForEachStarters(parentType) {
467
+ const rules = NODE_COMPATIBILITY_MATRIX[parentType];
468
+ if (!rules) return [];
469
+ return Object.entries(rules).filter(([_, rule]) => rule.valid && rule.requiresForEach).map(([targetType, _]) => targetType);
470
+ }
471
+ function canStartForEachItemFlow(parentType, starterType) {
472
+ const rule = NODE_COMPATIBILITY_MATRIX[parentType]?.[starterType];
473
+ if (!rule) {
474
+ return {
475
+ valid: false,
476
+ reason: `Unknown node type combination: ${parentType} \u2192 forEach \u2192 ${starterType}`,
477
+ suggestions: ["Ensure both nodes are valid node types."]
478
+ };
479
+ }
480
+ if (rule.valid && rule.requiresForEach) {
481
+ return {
482
+ valid: true
483
+ };
484
+ }
485
+ if (!rule.valid) {
486
+ const validStarters2 = getValidForEachStarters(parentType);
487
+ return {
488
+ valid: false,
489
+ reason: `${starterType} cannot start forEach itemFlow after ${parentType}. ${rule.reason || "Type incompatible with forEach unwrapped item."}`,
490
+ suggestions: validStarters2.length > 0 ? [`Valid itemFlow starters for ${parentType}: ${validStarters2.join(", ")}`] : [`${parentType} has no valid forEach itemFlow starters.`]
491
+ };
492
+ }
493
+ const validStarters = getValidForEachStarters(parentType);
494
+ return {
495
+ valid: false,
496
+ reason: `${starterType} cannot start forEach itemFlow after ${parentType}. This connection does not require forEach, meaning it expects the full array, not individual items.`,
497
+ suggestions: validStarters.length > 0 ? [`Valid itemFlow starters for ${parentType}: ${validStarters.join(", ")}`] : [`${parentType} has no valid forEach itemFlow starters.`]
498
+ };
499
+ }
500
+ function validateJson(data, schema) {
501
+ const errors = [];
502
+ const MAX_DEPTH = 50;
503
+ function validate(value, schema2, path = "", depth = 0) {
504
+ if (depth > MAX_DEPTH) {
505
+ errors.push(`${path || "root"}: maximum nesting depth (${MAX_DEPTH}) exceeded`);
506
+ return;
507
+ }
508
+ if (schema2.nullable && (value === null || value === void 0)) {
509
+ return;
510
+ }
511
+ if (value === null || value === void 0) {
512
+ if (schema2.nullable !== true) {
513
+ errors.push(`${path || "root"}: value is null or undefined`);
514
+ }
515
+ return;
516
+ }
517
+ const actualType = Array.isArray(value) ? "array" : typeof value;
518
+ const expectedType = schema2.type;
519
+ if (expectedType) {
520
+ if (expectedType === "integer") {
521
+ if (typeof value !== "number" || !Number.isInteger(value)) {
522
+ errors.push(`${path || "root"}: expected integer, got ${actualType}`);
523
+ return;
524
+ }
525
+ } else if (expectedType === "number") {
526
+ if (typeof value !== "number") {
527
+ errors.push(`${path || "root"}: expected number, got ${actualType}`);
528
+ return;
529
+ }
530
+ } else if (expectedType === "string") {
531
+ if (typeof value !== "string") {
532
+ errors.push(`${path || "root"}: expected string, got ${actualType}`);
533
+ return;
534
+ }
535
+ } else if (expectedType === "boolean") {
536
+ if (typeof value !== "boolean") {
537
+ errors.push(`${path || "root"}: expected boolean, got ${actualType}`);
538
+ return;
539
+ }
540
+ } else if (expectedType === "object") {
541
+ if (typeof value !== "object" || Array.isArray(value)) {
542
+ errors.push(`${path || "root"}: expected object, got ${actualType}`);
543
+ return;
544
+ }
545
+ if (schema2.required && Array.isArray(schema2.required)) {
546
+ for (const reqProp of schema2.required) {
547
+ if (!(reqProp in value)) {
548
+ errors.push(`${path}.${reqProp}: required property missing`);
549
+ }
550
+ }
551
+ }
552
+ const dangerousProps = ["__proto__", "constructor", "prototype"];
553
+ if (schema2.additionalProperties === false && schema2.properties) {
554
+ const allowedProps = Object.keys(schema2.properties);
555
+ const requiredProps = schema2.required || [];
556
+ const allAllowedProps = /* @__PURE__ */ new Set([...allowedProps, ...requiredProps]);
557
+ for (const key of [...Object.keys(value), ...Object.getOwnPropertyNames(value)]) {
558
+ if (dangerousProps.includes(key)) {
559
+ errors.push(`${path}.${key}: dangerous property not allowed`);
560
+ continue;
561
+ }
562
+ if (!allAllowedProps.has(key)) {
563
+ errors.push(`${path}.${key}: additional property not allowed`);
564
+ }
565
+ }
566
+ } else {
567
+ for (const key of dangerousProps) {
568
+ if (key in value && Object.prototype.hasOwnProperty.call(value, key)) {
569
+ errors.push(`${path}.${key}: dangerous property not allowed`);
570
+ }
571
+ }
572
+ }
573
+ if (schema2.properties) {
574
+ const valueObj = value;
575
+ for (const [propName, propSchema] of Object.entries(schema2.properties)) {
576
+ if (propName in valueObj) {
577
+ validate(valueObj[propName], propSchema, path ? `${path}.${propName}` : propName, depth + 1);
578
+ }
579
+ }
580
+ }
581
+ } else if (expectedType === "array") {
582
+ if (!Array.isArray(value)) {
583
+ errors.push(`${path || "root"}: expected array, got ${actualType}`);
584
+ return;
585
+ }
586
+ if (schema2.items && !Array.isArray(schema2.items)) {
587
+ const itemSchema = schema2.items;
588
+ value.forEach((item, index) => {
589
+ validate(item, itemSchema, `${path}[${index}]`, depth + 1);
590
+ });
591
+ }
592
+ }
593
+ }
594
+ }
595
+ validate(data, schema);
596
+ if (errors.length > 0) {
597
+ throw new Error(`Schema validation failed:
598
+ ${errors.join("\n")}`);
599
+ }
600
+ return data;
601
+ }
602
+ var RESERVED_VARIABLES = {
603
+ extract: ["schema", "documentText", "schemaTitle", "schemaDescription", "structuredFormat"],
604
+ categorize: ["categories", "documentText"],
605
+ parse: ["format", "schema", "describeFigures", "citationsEnabled"]
606
+ };
607
+ function protectReservedVariables(nodeType, userVariables, autoInjectedVariables) {
608
+ if (!userVariables || Object.keys(userVariables).length === 0) {
609
+ return autoInjectedVariables;
610
+ }
611
+ const reserved = RESERVED_VARIABLES[nodeType];
612
+ const warnings = [];
613
+ for (const key of reserved) {
614
+ if (key in userVariables) {
615
+ warnings.push(key);
616
+ }
617
+ }
618
+ if (warnings.length > 0) {
619
+ console.warn(
620
+ `[doclo] Attempted to override reserved variables in ${nodeType} node: ${warnings.join(", ")}. These variables are auto-injected from config and cannot be overridden. They will be ignored.`
621
+ );
622
+ }
623
+ return {
624
+ ...autoInjectedVariables,
625
+ ...userVariables,
626
+ // Restore reserved variables to ensure they can't be overridden
627
+ ...Object.fromEntries(
628
+ reserved.map((key) => [key, autoInjectedVariables[key]])
629
+ )
630
+ };
631
+ }
632
+
633
+ // src/security/url-validator.ts
634
+ var BLOCKED_IP_RANGES = [
635
+ // Loopback
636
+ { start: "127.0.0.0", end: "127.255.255.255" },
637
+ // Private Class A
638
+ { start: "10.0.0.0", end: "10.255.255.255" },
639
+ // Private Class B
640
+ { start: "172.16.0.0", end: "172.31.255.255" },
641
+ // Private Class C
642
+ { start: "192.168.0.0", end: "192.168.255.255" },
643
+ // Link Local
644
+ { start: "169.254.0.0", end: "169.254.255.255" }
645
+ ];
646
+ var BLOCKED_METADATA_HOSTS = [
647
+ "169.254.169.254",
648
+ // AWS metadata service
649
+ "169.254.169.253",
650
+ // AWS metadata service (Windows)
651
+ "metadata.google.internal",
652
+ // GCP metadata service
653
+ "metadata",
654
+ // GCP alias
655
+ "100.100.100.200",
656
+ // Aliyun metadata service
657
+ "instance-data"
658
+ // OpenStack alias
659
+ ];
660
+ var BLOCKED_IPV6_PATTERNS = [
661
+ /^::1$/,
662
+ // Loopback (::1)
663
+ /^::$/,
664
+ // Any address (::)
665
+ /^::ffff:/i,
666
+ // IPv4-mapped IPv6 (::ffff:0:0/96) - matches ::ffff:127.0.0.1
667
+ /^::ffff:0:/i,
668
+ // IPv4-mapped IPv6 alternative
669
+ /^fe80:/i,
670
+ // Link-local (fe80::/10)
671
+ /^fec0:/i,
672
+ // Site-local deprecated (fec0::/10)
673
+ /^fc00:/i,
674
+ // Unique local address (fc00::/7)
675
+ /^fd00:/i,
676
+ // Unique local address (fd00::/8)
677
+ /^ff00:/i,
678
+ // Multicast (ff00::/8)
679
+ /^0:0:0:0:0:0:0:1$/i
680
+ // Loopback expanded form
681
+ ];
682
+ function ipToNumber(ip) {
683
+ const parts = ip.split(".").map(Number);
684
+ if (parts.length !== 4 || parts.some((p) => p < 0 || p > 255)) {
685
+ return -1;
686
+ }
687
+ return (parts[0] << 24) + (parts[1] << 16) + (parts[2] << 8) + parts[3];
688
+ }
689
+ function isIpInBlockedRange(ip) {
690
+ const ipNum = ipToNumber(ip);
691
+ if (ipNum === -1) return false;
692
+ return BLOCKED_IP_RANGES.some((range) => {
693
+ const startNum = ipToNumber(range.start);
694
+ const endNum = ipToNumber(range.end);
695
+ return ipNum >= startNum && ipNum <= endNum;
696
+ });
697
+ }
698
+ function isIPv6Blocked(hostname) {
699
+ const addr = hostname.replace(/^\[|\]$/g, "");
700
+ return BLOCKED_IPV6_PATTERNS.some((pattern) => pattern.test(addr));
701
+ }
702
+ function validateUrl(urlString, options = {}) {
703
+ const {
704
+ blockInternal = true,
705
+ allowedProtocols = ["http:", "https:"]
706
+ } = options;
707
+ let url;
708
+ try {
709
+ url = new URL(urlString);
710
+ } catch (error) {
711
+ throw new Error(`Invalid URL: ${urlString}`);
712
+ }
713
+ if (!allowedProtocols.includes(url.protocol)) {
714
+ throw new Error(
715
+ `Blocked protocol: ${url.protocol}. Allowed: ${allowedProtocols.join(", ")}`
716
+ );
717
+ }
718
+ if (blockInternal) {
719
+ const hostname = url.hostname;
720
+ if (BLOCKED_METADATA_HOSTS.includes(hostname)) {
721
+ throw new Error(`Blocked metadata service: ${hostname}`);
722
+ }
723
+ if (hostname.includes(":") || hostname.startsWith("[")) {
724
+ if (isIPv6Blocked(hostname)) {
725
+ throw new Error(`Blocked IPv6 address: ${hostname}`);
726
+ }
727
+ }
728
+ if (isIpInBlockedRange(hostname)) {
729
+ throw new Error(`Blocked internal IP address: ${hostname}`);
730
+ }
731
+ if (hostname === "localhost") {
732
+ throw new Error("Blocked localhost access");
733
+ }
734
+ }
735
+ return url;
736
+ }
737
+
738
+ // src/security/resource-limits.ts
739
+ var DEFAULT_LIMITS = {
740
+ // Maximum file size: 100MB
741
+ MAX_FILE_SIZE: 100 * 1024 * 1024,
742
+ // Request timeout: 30 seconds
743
+ REQUEST_TIMEOUT: 3e4,
744
+ // Maximum JSON parse depth
745
+ MAX_JSON_DEPTH: 100
746
+ };
747
+ function validateFileSize(size, maxSize = DEFAULT_LIMITS.MAX_FILE_SIZE) {
748
+ if (size > maxSize) {
749
+ const maxMB = Math.round(maxSize / 1024 / 1024);
750
+ const sizeMB = Math.round(size / 1024 / 1024);
751
+ throw new Error(
752
+ `File size ${sizeMB}MB exceeds maximum allowed size of ${maxMB}MB`
753
+ );
754
+ }
755
+ }
756
+ function createFetchController(timeoutMs = DEFAULT_LIMITS.REQUEST_TIMEOUT) {
757
+ const controller = new AbortController();
758
+ const timeoutId = setTimeout(() => controller.abort(), timeoutMs);
759
+ controller.__timeoutId = timeoutId;
760
+ return controller;
761
+ }
762
+ function cleanupFetchController(controller) {
763
+ const timeoutId = controller.__timeoutId;
764
+ if (timeoutId) {
765
+ clearTimeout(timeoutId);
766
+ }
767
+ }
768
+ async function fetchWithTimeout(url, options = {}, timeoutMs = DEFAULT_LIMITS.REQUEST_TIMEOUT) {
769
+ const controller = createFetchController(timeoutMs);
770
+ try {
771
+ const response = await fetch(url, {
772
+ ...options,
773
+ signal: controller.signal,
774
+ cache: "no-store"
775
+ // Prevent Next.js cache revalidation which can cause AbortError (see: github.com/vercel/next.js/issues/54045)
776
+ });
777
+ return response;
778
+ } finally {
779
+ cleanupFetchController(controller);
780
+ }
781
+ }
782
+
783
+ // src/runtime/base64.ts
784
+ function arrayBufferToBase64(buffer) {
785
+ if (typeof Buffer !== "undefined") {
786
+ return Buffer.from(buffer).toString("base64");
787
+ }
788
+ const bytes = new Uint8Array(buffer);
789
+ let binary = "";
790
+ for (let i = 0; i < bytes.byteLength; i++) {
791
+ binary += String.fromCharCode(bytes[i]);
792
+ }
793
+ return btoa(binary);
794
+ }
795
+ function base64ToArrayBuffer(base64) {
796
+ const cleanBase64 = base64.replace(/^data:[^;]+;base64,/, "");
797
+ if (typeof Buffer !== "undefined") {
798
+ const buffer = Buffer.from(cleanBase64, "base64");
799
+ return buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
800
+ }
801
+ const binaryString = atob(cleanBase64);
802
+ const bytes = new Uint8Array(binaryString.length);
803
+ for (let i = 0; i < binaryString.length; i++) {
804
+ bytes[i] = binaryString.charCodeAt(i);
805
+ }
806
+ return bytes.buffer;
807
+ }
808
+ function uint8ArrayToBase64(bytes) {
809
+ return arrayBufferToBase64(bytes.buffer.slice(bytes.byteOffset, bytes.byteOffset + bytes.byteLength));
810
+ }
811
+ function createDataUri(buffer, mimeType = "application/octet-stream") {
812
+ const base64 = arrayBufferToBase64(buffer);
813
+ return `data:${mimeType};base64,${base64}`;
814
+ }
815
+
816
+ // src/mime-detection.ts
817
+ import { fileTypeFromBuffer } from "file-type";
818
+ async function detectMimeTypeFromBase64Async(base64Data) {
819
+ const base64Only = base64Data.includes(",") ? base64Data.split(",")[1] : base64Data;
820
+ const binaryString = atob(base64Only);
821
+ const bytes = new Uint8Array(binaryString.length);
822
+ for (let i = 0; i < binaryString.length; i++) {
823
+ bytes[i] = binaryString.charCodeAt(i);
824
+ }
825
+ const result = await fileTypeFromBuffer(bytes);
826
+ if (result) {
827
+ return result.mime;
828
+ }
829
+ throw new Error(
830
+ `Unsupported file format. Magic bytes: ${Array.from(bytes.slice(0, 4)).map((b) => b.toString(16).padStart(2, "0")).join(" ")}`
831
+ );
832
+ }
833
+ function detectMimeTypeFromBase64(base64Data) {
834
+ const base64Only = base64Data.includes(",") ? base64Data.split(",")[1] : base64Data;
835
+ const binaryString = atob(base64Only.substring(0, 24));
836
+ const bytes = new Uint8Array(binaryString.length);
837
+ for (let i = 0; i < binaryString.length; i++) {
838
+ bytes[i] = binaryString.charCodeAt(i);
839
+ }
840
+ return detectMimeTypeFromBytes(bytes);
841
+ }
842
+ function detectMimeTypeFromBytes(bytes) {
843
+ if (bytes.length < 4) {
844
+ throw new Error("Insufficient data to detect MIME type (need at least 4 bytes)");
845
+ }
846
+ if (bytes[0] === 255 && bytes[1] === 216 && bytes[2] === 255) {
847
+ return "image/jpeg";
848
+ }
849
+ if (bytes[0] === 137 && bytes[1] === 80 && bytes[2] === 78 && bytes[3] === 71) {
850
+ return "image/png";
851
+ }
852
+ if (bytes[0] === 71 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 56) {
853
+ return "image/gif";
854
+ }
855
+ if (bytes.length >= 12 && bytes[0] === 82 && bytes[1] === 73 && bytes[2] === 70 && bytes[3] === 70 && bytes[8] === 87 && bytes[9] === 69 && bytes[10] === 66 && bytes[11] === 80) {
856
+ return "image/webp";
857
+ }
858
+ if (bytes[0] === 37 && bytes[1] === 80 && bytes[2] === 68 && bytes[3] === 70) {
859
+ return "application/pdf";
860
+ }
861
+ if (bytes[0] === 73 && bytes[1] === 73 && bytes[2] === 42 && bytes[3] === 0 || bytes[0] === 77 && bytes[1] === 77 && bytes[2] === 0 && bytes[3] === 42) {
862
+ return "image/tiff";
863
+ }
864
+ if (bytes[0] === 66 && bytes[1] === 77) {
865
+ return "image/bmp";
866
+ }
867
+ if (bytes[0] === 123 && bytes[1] === 92 && bytes[2] === 114 && bytes[3] === 116 && bytes[4] === 102) {
868
+ return "application/rtf";
869
+ }
870
+ if (bytes[0] === 80 && bytes[1] === 75 && bytes[2] === 3 && bytes[3] === 4) {
871
+ return "application/zip";
872
+ }
873
+ if (bytes.length >= 8 && bytes[0] === 208 && bytes[1] === 207 && bytes[2] === 17 && bytes[3] === 224 && bytes[4] === 161 && bytes[5] === 177 && bytes[6] === 26 && bytes[7] === 225) {
874
+ return "application/x-cfb";
875
+ }
876
+ throw new Error(
877
+ `Unsupported file format. Magic bytes: ${Array.from(bytes.slice(0, 4)).map((b) => b.toString(16).padStart(2, "0")).join(" ")}`
878
+ );
879
+ }
880
+ function validateMimeType(base64Data, declaredMimeType) {
881
+ const actualMimeType = detectMimeTypeFromBase64(base64Data);
882
+ return {
883
+ isValid: actualMimeType === declaredMimeType,
884
+ actualMimeType,
885
+ declaredMimeType
886
+ };
887
+ }
888
+ async function validateMimeTypeAsync(base64Data, declaredMimeType) {
889
+ const actualMimeType = await detectMimeTypeFromBase64Async(base64Data);
890
+ return {
891
+ isValid: actualMimeType === declaredMimeType,
892
+ actualMimeType,
893
+ declaredMimeType
894
+ };
895
+ }
896
+ function extractBase64(data) {
897
+ if (data.startsWith("data:")) {
898
+ const commaIndex = data.indexOf(",");
899
+ if (commaIndex === -1) {
900
+ throw new Error("Invalid data URI: missing comma separator");
901
+ }
902
+ return data.substring(commaIndex + 1);
903
+ }
904
+ return data;
905
+ }
906
+
907
+ // src/internal/file-utils.ts
908
+ function detectInputType(input) {
909
+ if (input.startsWith("data:")) return "data-uri";
910
+ if (input.startsWith("http://") || input.startsWith("https://")) return "url";
911
+ throw new Error(
912
+ 'Edge Runtime does not support file paths. Use HTTP URLs, data URIs, or pass ArrayBuffer/base64 data directly.\nExample: await resolveDocument("https://example.com/doc.pdf") or resolveDocument("data:application/pdf;base64,...")'
913
+ );
914
+ }
915
+ function detectMimeType(input, contentType) {
916
+ if (input.startsWith("data:")) {
917
+ const match = input.match(/^data:([^;,]+)/);
918
+ if (match) return match[1];
919
+ }
920
+ if (contentType) {
921
+ const match = contentType.match(/^([^;]+)/);
922
+ if (match) return match[1].trim();
923
+ }
924
+ const lower = input.toLowerCase();
925
+ if (lower.endsWith(".pdf") || lower.includes(".pdf?")) return "application/pdf";
926
+ if (lower.endsWith(".png") || lower.includes(".png?")) return "image/png";
927
+ if (lower.endsWith(".webp") || lower.includes(".webp?")) return "image/webp";
928
+ if (lower.endsWith(".jpg") || lower.includes(".jpg?")) return "image/jpeg";
929
+ if (lower.endsWith(".jpeg") || lower.includes(".jpeg?")) return "image/jpeg";
930
+ if (lower.endsWith(".gif") || lower.includes(".gif?")) return "image/gif";
931
+ if (lower.endsWith(".tiff") || lower.includes(".tiff?")) return "image/tiff";
932
+ if (lower.endsWith(".tif") || lower.includes(".tif?")) return "image/tiff";
933
+ if (lower.endsWith(".bmp") || lower.includes(".bmp?")) return "image/bmp";
934
+ if (lower.endsWith(".heic") || lower.includes(".heic?")) return "image/heic";
935
+ if (lower.endsWith(".heif") || lower.includes(".heif?")) return "image/heif";
936
+ if (lower.endsWith(".psd") || lower.includes(".psd?")) return "image/vnd.adobe.photoshop";
937
+ if (lower.endsWith(".doc") || lower.includes(".doc?")) return "application/msword";
938
+ if (lower.endsWith(".docx") || lower.includes(".docx?")) return "application/vnd.openxmlformats-officedocument.wordprocessingml.document";
939
+ if (lower.endsWith(".xls") || lower.includes(".xls?")) return "application/vnd.ms-excel";
940
+ if (lower.endsWith(".xlsx") || lower.includes(".xlsx?")) return "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet";
941
+ if (lower.endsWith(".ppt") || lower.includes(".ppt?")) return "application/vnd.ms-powerpoint";
942
+ if (lower.endsWith(".pptx") || lower.includes(".pptx?")) return "application/vnd.openxmlformats-officedocument.presentationml.presentation";
943
+ if (lower.endsWith(".odt") || lower.includes(".odt?")) return "application/vnd.oasis.opendocument.text";
944
+ if (lower.endsWith(".ods") || lower.includes(".ods?")) return "application/vnd.oasis.opendocument.spreadsheet";
945
+ if (lower.endsWith(".odp") || lower.includes(".odp?")) return "application/vnd.oasis.opendocument.presentation";
946
+ if (lower.endsWith(".txt") || lower.includes(".txt?")) return "text/plain";
947
+ if (lower.endsWith(".csv") || lower.includes(".csv?")) return "text/csv";
948
+ if (lower.endsWith(".html") || lower.includes(".html?")) return "text/html";
949
+ if (lower.endsWith(".htm") || lower.includes(".htm?")) return "text/html";
950
+ if (lower.endsWith(".rtf") || lower.includes(".rtf?")) return "application/rtf";
951
+ if (lower.endsWith(".epub") || lower.includes(".epub?")) return "application/epub+zip";
952
+ return "application/octet-stream";
953
+ }
954
+ var KNOWN_MIME_TYPES = [
955
+ // PDF
956
+ "application/pdf",
957
+ // Images - common
958
+ "image/jpeg",
959
+ "image/png",
960
+ "image/gif",
961
+ "image/webp",
962
+ // Images - additional
963
+ "image/tiff",
964
+ "image/bmp",
965
+ "image/heic",
966
+ "image/heif",
967
+ "image/vnd.adobe.photoshop",
968
+ // Microsoft Office
969
+ "application/msword",
970
+ "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
971
+ "application/vnd.ms-excel",
972
+ "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
973
+ "application/vnd.ms-powerpoint",
974
+ "application/vnd.openxmlformats-officedocument.presentationml.presentation",
975
+ // OpenDocument formats
976
+ "application/vnd.oasis.opendocument.text",
977
+ "application/vnd.oasis.opendocument.spreadsheet",
978
+ "application/vnd.oasis.opendocument.presentation",
979
+ // Text formats
980
+ "text/plain",
981
+ "text/csv",
982
+ "text/html",
983
+ "application/rtf",
984
+ // Other
985
+ "application/epub+zip"
986
+ ];
987
+ var EXTENSION_TO_MIME = {
988
+ // PDF
989
+ ".pdf": "application/pdf",
990
+ // Images - common
991
+ ".jpg": "image/jpeg",
992
+ ".jpeg": "image/jpeg",
993
+ ".png": "image/png",
994
+ ".gif": "image/gif",
995
+ ".webp": "image/webp",
996
+ // Images - additional
997
+ ".tiff": "image/tiff",
998
+ ".tif": "image/tiff",
999
+ ".bmp": "image/bmp",
1000
+ ".heic": "image/heic",
1001
+ ".heif": "image/heif",
1002
+ ".psd": "image/vnd.adobe.photoshop",
1003
+ // Microsoft Office
1004
+ ".doc": "application/msword",
1005
+ ".docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
1006
+ ".xls": "application/vnd.ms-excel",
1007
+ ".xlsx": "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
1008
+ ".ppt": "application/vnd.ms-powerpoint",
1009
+ ".pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation",
1010
+ // OpenDocument formats
1011
+ ".odt": "application/vnd.oasis.opendocument.text",
1012
+ ".ods": "application/vnd.oasis.opendocument.spreadsheet",
1013
+ ".odp": "application/vnd.oasis.opendocument.presentation",
1014
+ // Text formats
1015
+ ".txt": "text/plain",
1016
+ ".csv": "text/csv",
1017
+ ".html": "text/html",
1018
+ ".htm": "text/html",
1019
+ ".rtf": "application/rtf",
1020
+ // Other
1021
+ ".epub": "application/epub+zip"
1022
+ };
1023
+ function getExtensionFromPath(path) {
1024
+ const pathWithoutQuery = path.split("?")[0];
1025
+ const lastDot = pathWithoutQuery.lastIndexOf(".");
1026
+ if (lastDot === -1) return null;
1027
+ return pathWithoutQuery.slice(lastDot).toLowerCase();
1028
+ }
1029
+ function detectDocumentType(input) {
1030
+ if (!input) return "unknown";
1031
+ if (input.startsWith("data:")) {
1032
+ const match = input.match(/^data:([^;,]+)/);
1033
+ if (match) {
1034
+ const mimeType = match[1];
1035
+ if (KNOWN_MIME_TYPES.includes(mimeType)) {
1036
+ return mimeType;
1037
+ }
1038
+ }
1039
+ }
1040
+ if (!input.startsWith("data:")) {
1041
+ let ext = null;
1042
+ try {
1043
+ const url = new URL(input);
1044
+ ext = getExtensionFromPath(url.pathname);
1045
+ } catch {
1046
+ ext = getExtensionFromPath(input);
1047
+ }
1048
+ if (ext && ext in EXTENSION_TO_MIME) {
1049
+ return EXTENSION_TO_MIME[ext];
1050
+ }
1051
+ }
1052
+ try {
1053
+ const mimeType = detectMimeTypeFromBase64(input);
1054
+ if (KNOWN_MIME_TYPES.includes(mimeType)) {
1055
+ return mimeType;
1056
+ }
1057
+ } catch {
1058
+ }
1059
+ return "unknown";
1060
+ }
1061
+ function isPDFDocument(input) {
1062
+ return detectDocumentType(input) === "application/pdf";
1063
+ }
1064
+ async function resolveDocument(input, limits) {
1065
+ const inputType = detectInputType(input);
1066
+ switch (inputType) {
1067
+ case "data-uri":
1068
+ if (!input.match(/^data:[^;,]+;base64,/)) {
1069
+ throw new Error("Invalid data URI format. Expected: data:<mimetype>;base64,<data>");
1070
+ }
1071
+ return input;
1072
+ case "url":
1073
+ try {
1074
+ validateUrl(input);
1075
+ const timeout = limits?.requestTimeout ?? DEFAULT_LIMITS.REQUEST_TIMEOUT;
1076
+ const response = await fetchWithTimeout(input, {}, timeout);
1077
+ if (!response.ok) {
1078
+ throw new Error(`HTTP ${response.status}: ${response.statusText}`);
1079
+ }
1080
+ const contentLength = response.headers.get("content-length");
1081
+ if (contentLength) {
1082
+ const maxSize2 = limits?.maxFileSize ?? DEFAULT_LIMITS.MAX_FILE_SIZE;
1083
+ validateFileSize(parseInt(contentLength, 10), maxSize2);
1084
+ }
1085
+ const arrayBuffer = await response.arrayBuffer();
1086
+ const maxSize = limits?.maxFileSize ?? DEFAULT_LIMITS.MAX_FILE_SIZE;
1087
+ validateFileSize(arrayBuffer.byteLength, maxSize);
1088
+ const base64 = arrayBufferToBase64(arrayBuffer);
1089
+ const mimeType = detectMimeType(input, response.headers.get("content-type") || void 0);
1090
+ return `data:${mimeType};base64,${base64}`;
1091
+ } catch (error) {
1092
+ throw new Error(`Failed to fetch URL ${input}: ${error.message}`);
1093
+ }
1094
+ }
1095
+ }
1096
+ function bufferToDataUri(buffer, mimeType) {
1097
+ if (buffer instanceof Uint8Array) {
1098
+ const arrayBuffer = buffer.buffer.slice(buffer.byteOffset, buffer.byteOffset + buffer.byteLength);
1099
+ return createDataUri(arrayBuffer, mimeType);
1100
+ }
1101
+ return createDataUri(buffer, mimeType);
1102
+ }
1103
+ function bufferToBase64(buffer, mimeType) {
1104
+ return bufferToDataUri(buffer, mimeType);
1105
+ }
1106
+ var FlowInputValidationError = class _FlowInputValidationError extends Error {
1107
+ /**
1108
+ * @param message - Human-readable error message
1109
+ * @param detectedType - The actual MIME type detected from the input
1110
+ * @param acceptedTypes - List of MIME types that would have been accepted
1111
+ */
1112
+ constructor(message, detectedType, acceptedTypes) {
1113
+ super(message);
1114
+ this.detectedType = detectedType;
1115
+ this.acceptedTypes = acceptedTypes;
1116
+ this.name = "FlowInputValidationError";
1117
+ Object.setPrototypeOf(this, _FlowInputValidationError.prototype);
1118
+ }
1119
+ };
1120
+ function validateFlowInputFormat(input, acceptedFormats) {
1121
+ if (!input) {
1122
+ throw new FlowInputValidationError(
1123
+ "Flow input is empty or undefined",
1124
+ "undefined",
1125
+ acceptedFormats
1126
+ );
1127
+ }
1128
+ const detected = detectDocumentType(input);
1129
+ if (detected === "unknown") {
1130
+ const acceptedList = acceptedFormats.length > 0 ? `Expected one of: ${acceptedFormats.join(", ")}` : "Unable to determine document format";
1131
+ throw new FlowInputValidationError(
1132
+ `Unable to detect document format. ${acceptedList}. Ensure the input is a valid document (PDF, JPEG, PNG, GIF, or WebP).`,
1133
+ "unknown",
1134
+ acceptedFormats
1135
+ );
1136
+ }
1137
+ if (acceptedFormats.length > 0 && !acceptedFormats.includes(detected)) {
1138
+ throw new FlowInputValidationError(
1139
+ `Document format '${detected}' is not accepted. Expected one of: ${acceptedFormats.join(", ")}`,
1140
+ detected,
1141
+ acceptedFormats
1142
+ );
1143
+ }
1144
+ return detected;
1145
+ }
1146
+
1147
+ // src/pdf-utils.ts
1148
+ import { PDFDocument } from "pdf-lib";
1149
+ async function getPDFPageCount(dataUrl) {
1150
+ const base64Match = dataUrl.match(/^data:application\/pdf;base64,(.+)$/);
1151
+ if (!base64Match) {
1152
+ throw new Error("Invalid PDF data URL format. Expected: data:application/pdf;base64,{base64data}");
1153
+ }
1154
+ const base64Data = base64Match[1];
1155
+ const pdfBytes = base64ToArrayBuffer(base64Data);
1156
+ const pdfDoc = await PDFDocument.load(pdfBytes);
1157
+ return pdfDoc.getPageCount();
1158
+ }
1159
+ async function splitPDFIntoChunks(dataUrl, pageRanges) {
1160
+ const base64Match = dataUrl.match(/^data:application\/pdf;base64,(.+)$/);
1161
+ if (!base64Match) {
1162
+ throw new Error("Invalid PDF data URL format. Expected: data:application/pdf;base64,{base64data}");
1163
+ }
1164
+ const base64Data = base64Match[1];
1165
+ const pdfBytes = base64ToArrayBuffer(base64Data);
1166
+ const pdfDoc = await PDFDocument.load(pdfBytes);
1167
+ const totalPages = pdfDoc.getPageCount();
1168
+ const chunks = [];
1169
+ for (const [startPage, endPage] of pageRanges) {
1170
+ if (startPage < 1 || endPage > totalPages || startPage > endPage) {
1171
+ throw new Error(
1172
+ `Invalid page range [${startPage}, ${endPage}] for PDF with ${totalPages} pages. Page numbers must be 1-indexed and within bounds.`
1173
+ );
1174
+ }
1175
+ const chunkDoc = await PDFDocument.create();
1176
+ const pagesToCopy = Array.from(
1177
+ { length: endPage - startPage + 1 },
1178
+ (_, i) => startPage - 1 + i
1179
+ // Convert to 0-indexed
1180
+ );
1181
+ const copiedPages = await chunkDoc.copyPages(pdfDoc, pagesToCopy);
1182
+ copiedPages.forEach((page) => chunkDoc.addPage(page));
1183
+ const chunkBytes = await chunkDoc.save();
1184
+ const chunkBase64 = uint8ArrayToBase64(chunkBytes);
1185
+ chunks.push(`data:application/pdf;base64,${chunkBase64}`);
1186
+ }
1187
+ return chunks;
1188
+ }
1189
+ function getDocumentPageCount(ir) {
1190
+ if (ir.extras?.pageCount !== void 0) {
1191
+ return ir.extras.pageCount;
1192
+ }
1193
+ return ir.pages.length;
1194
+ }
1195
+ function getTotalPageCount(irArray) {
1196
+ return irArray.reduce((sum, ir) => sum + getDocumentPageCount(ir), 0);
1197
+ }
1198
+ function getPageCountMetadata(ir) {
1199
+ const pagesInIR = ir.pages.length;
1200
+ const pageCount = ir.extras?.pageCount ?? pagesInIR;
1201
+ const isSemanticChunking = ir.extras?.totalSemanticChunks !== void 0;
1202
+ const isChunked = ir.extras?.chunkIndex !== void 0 && ir.extras?.totalChunks !== void 0;
1203
+ return {
1204
+ pageCount,
1205
+ pagesInIR,
1206
+ isChunked,
1207
+ chunkIndex: ir.extras?.chunkIndex,
1208
+ totalChunks: ir.extras?.totalChunks,
1209
+ pageRange: ir.extras?.pageRange,
1210
+ totalSemanticChunks: ir.extras?.totalSemanticChunks,
1211
+ isSemanticChunking
1212
+ };
1213
+ }
1214
+
1215
+ // src/provider-config.ts
1216
+ function defineVLMProvider(config) {
1217
+ return {
1218
+ type: "vlm",
1219
+ ...config
1220
+ };
1221
+ }
1222
+ function defineSuryaProvider(config) {
1223
+ return {
1224
+ type: "ocr",
1225
+ ...config
1226
+ };
1227
+ }
1228
+ function defineMarkerProvider(config) {
1229
+ return {
1230
+ type: "ocr",
1231
+ ...config
1232
+ };
1233
+ }
1234
+ async function buildProviderFromConfig(config, secrets) {
1235
+ const secret = secrets[config.id];
1236
+ if (!secret || !secret.apiKey) {
1237
+ throw new Error(`API key not found for provider "${config.id}"`);
1238
+ }
1239
+ if (config.type === "vlm") {
1240
+ try {
1241
+ const module = await import(
1242
+ /* webpackIgnore: true */
1243
+ "@doclo/providers-llm"
1244
+ );
1245
+ const createVLMProvider = module.createVLMProvider || module.default?.createVLMProvider;
1246
+ if (!createVLMProvider) {
1247
+ throw new Error("@doclo/providers-llm does not export createVLMProvider");
1248
+ }
1249
+ return createVLMProvider({
1250
+ provider: config.provider,
1251
+ model: config.model,
1252
+ apiKey: secret.apiKey,
1253
+ via: config.via === "openrouter" ? "openrouter" : void 0,
1254
+ baseUrl: config.baseUrl
1255
+ });
1256
+ } catch (error) {
1257
+ throw new Error(
1258
+ `Failed to create VLM provider: ${error.message}. Make sure @doclo/providers-llm is installed.`
1259
+ );
1260
+ }
1261
+ } else if (config.type === "ocr") {
1262
+ try {
1263
+ const module = await import(
1264
+ /* webpackIgnore: true */
1265
+ "@doclo/providers-datalab"
1266
+ );
1267
+ if (config.provider === "surya") {
1268
+ const suryaProvider = module.suryaProvider || module.default?.suryaProvider;
1269
+ if (!suryaProvider) {
1270
+ throw new Error("@doclo/providers-datalab does not export suryaProvider");
1271
+ }
1272
+ return suryaProvider({
1273
+ endpoint: config.endpoint,
1274
+ apiKey: secret.apiKey
1275
+ });
1276
+ } else if (config.provider === "marker") {
1277
+ const markerProvider = module.markerProvider || module.default?.markerProvider;
1278
+ if (!markerProvider) {
1279
+ throw new Error("@doclo/providers-datalab does not export markerProvider");
1280
+ }
1281
+ return markerProvider({
1282
+ apiKey: secret.apiKey,
1283
+ force_ocr: config.force_ocr,
1284
+ use_llm: config.use_llm
1285
+ });
1286
+ } else {
1287
+ const exhaustiveCheck = config;
1288
+ throw new Error(`Unknown OCR provider: ${exhaustiveCheck.provider}`);
1289
+ }
1290
+ } catch (error) {
1291
+ throw new Error(
1292
+ `Failed to create OCR provider: ${error.message}. Make sure @doclo/providers-datalab is installed.`
1293
+ );
1294
+ }
1295
+ } else {
1296
+ const exhaustiveCheck = config;
1297
+ throw new Error(`Unknown provider type: ${exhaustiveCheck.type}`);
1298
+ }
1299
+ }
1300
+ async function buildProvidersFromConfigs(configs, secrets) {
1301
+ const registry = {};
1302
+ for (const config of configs) {
1303
+ try {
1304
+ registry[config.id] = await buildProviderFromConfig(config, secrets);
1305
+ } catch (error) {
1306
+ throw new Error(
1307
+ `Failed to build provider "${config.id}": ${error.message}`
1308
+ );
1309
+ }
1310
+ }
1311
+ return registry;
1312
+ }
1313
+
1314
+ // src/provider-identity.ts
1315
+ function toProviderString(identity) {
1316
+ return `${identity.provider}:${identity.model}`;
1317
+ }
1318
+ function parseProviderString(str) {
1319
+ const colonIndex = str.indexOf(":");
1320
+ if (colonIndex === -1) {
1321
+ return { provider: str, model: str };
1322
+ }
1323
+ return {
1324
+ provider: str.slice(0, colonIndex),
1325
+ model: str.slice(colonIndex + 1)
1326
+ };
1327
+ }
1328
+ function isLocalEndpoint(endpoint) {
1329
+ if (!endpoint) return false;
1330
+ return endpoint.includes("localhost") || endpoint.includes("127.0.0.1") || endpoint.includes("0.0.0.0") || endpoint.startsWith("http://192.168.") || endpoint.startsWith("http://10.");
1331
+ }
1332
+ function createIdentity(provider, model, opts) {
1333
+ let method = "native";
1334
+ if (opts?.via === "openrouter") {
1335
+ method = "openrouter";
1336
+ } else if (isLocalEndpoint(opts?.endpoint)) {
1337
+ method = "self-hosted";
1338
+ }
1339
+ return { provider, model, method };
1340
+ }
1341
+
1342
+ // src/provider-query.ts
1343
+ var providerRegistry = /* @__PURE__ */ new Map();
1344
+ function registerProviderMetadata(source, metadata, normalizer) {
1345
+ const normalized = /* @__PURE__ */ new Map();
1346
+ for (const [id, data] of Object.entries(metadata)) {
1347
+ if (normalizer) {
1348
+ normalized.set(id, normalizer(id, data, source));
1349
+ } else {
1350
+ normalized.set(id, defaultNormalizer(id, data, source));
1351
+ }
1352
+ }
1353
+ providerRegistry.set(source, normalized);
1354
+ }
1355
+ function getAllProviders() {
1356
+ const all = [];
1357
+ for (const providers of providerRegistry.values()) {
1358
+ all.push(...providers.values());
1359
+ }
1360
+ return all;
1361
+ }
1362
+ function queryProviders(filter = {}) {
1363
+ let providers = getAllProviders();
1364
+ if (filter.source) {
1365
+ const sources = Array.isArray(filter.source) ? filter.source : [filter.source];
1366
+ providers = providers.filter((p) => sources.includes(p.source));
1367
+ }
1368
+ if (filter.type) {
1369
+ const types = Array.isArray(filter.type) ? filter.type : [filter.type];
1370
+ providers = providers.filter((p) => types.includes(p.type));
1371
+ }
1372
+ if (filter.provider) {
1373
+ const providerVendors = Array.isArray(filter.provider) ? filter.provider : [filter.provider];
1374
+ providers = providers.filter((p) => p.identity?.provider && providerVendors.includes(p.identity.provider));
1375
+ }
1376
+ if (filter.model) {
1377
+ const models = Array.isArray(filter.model) ? filter.model : [filter.model];
1378
+ providers = providers.filter((p) => p.identity?.model && models.includes(p.identity.model));
1379
+ }
1380
+ if (filter.method) {
1381
+ const methods = Array.isArray(filter.method) ? filter.method : [filter.method];
1382
+ providers = providers.filter((p) => p.identity?.method && methods.includes(p.identity.method));
1383
+ }
1384
+ if (filter.supports) {
1385
+ if (filter.supports.images !== void 0) {
1386
+ providers = providers.filter((p) => p.capabilities.supportsImages === filter.supports.images);
1387
+ }
1388
+ if (filter.supports.pdfs !== void 0) {
1389
+ providers = providers.filter((p) => p.capabilities.supportsPDFs === filter.supports.pdfs);
1390
+ }
1391
+ if (filter.supports.documents !== void 0) {
1392
+ providers = providers.filter((p) => p.capabilities.supportsDocuments === filter.supports.documents);
1393
+ }
1394
+ if (filter.supports.reasoning !== void 0) {
1395
+ providers = providers.filter((p) => p.capabilities.supportsReasoning === filter.supports.reasoning);
1396
+ }
1397
+ if (filter.supports.structuredOutput !== void 0) {
1398
+ providers = providers.filter((p) => p.capabilities.supportsStructuredOutput === filter.supports.structuredOutput);
1399
+ }
1400
+ if (filter.supports.prompts !== void 0) {
1401
+ providers = providers.filter((p) => p.capabilities.supportsPrompts === filter.supports.prompts);
1402
+ }
1403
+ if (filter.supports.citations !== void 0) {
1404
+ providers = providers.filter((p) => p.capabilities.supportsCitations === filter.supports.citations);
1405
+ }
1406
+ if (filter.supports.chunking !== void 0) {
1407
+ providers = providers.filter((p) => p.capabilities.supportsChunking === filter.supports.chunking);
1408
+ }
1409
+ if (filter.supports.imageExtraction !== void 0) {
1410
+ providers = providers.filter((p) => p.capabilities.supportsImageExtraction === filter.supports.imageExtraction);
1411
+ }
1412
+ if (filter.supports.pageMarkers !== void 0) {
1413
+ providers = providers.filter((p) => p.capabilities.supportsPageMarkers === filter.supports.pageMarkers);
1414
+ }
1415
+ if (filter.supports.languageHints !== void 0) {
1416
+ providers = providers.filter((p) => p.capabilities.supportsLanguageHints === filter.supports.languageHints);
1417
+ }
1418
+ if (filter.supports.processingModes !== void 0) {
1419
+ providers = providers.filter((p) => p.capabilities.supportsProcessingModes === filter.supports.processingModes);
1420
+ }
1421
+ if (filter.supports.segmentation !== void 0) {
1422
+ providers = providers.filter((p) => p.capabilities.supportsSegmentation === filter.supports.segmentation);
1423
+ }
1424
+ }
1425
+ if (filter.hasFeatures && filter.hasFeatures.length > 0) {
1426
+ providers = providers.filter(
1427
+ (p) => filter.hasFeatures.every((feature) => p.features[feature] === true)
1428
+ );
1429
+ }
1430
+ if (filter.outputFormat) {
1431
+ providers = providers.filter(
1432
+ (p) => p.capabilities.outputFormats[filter.outputFormat] === true
1433
+ );
1434
+ }
1435
+ if (filter.inputRequirements?.inputType !== void 0) {
1436
+ const inputTypes = Array.isArray(filter.inputRequirements.inputType) ? filter.inputRequirements.inputType : [filter.inputRequirements.inputType];
1437
+ providers = providers.filter((p) => inputTypes.includes(p.inputRequirements.inputType));
1438
+ }
1439
+ if (filter.compatibleWith && filter.compatibleWith.length > 0) {
1440
+ providers = providers.filter(
1441
+ (p) => filter.compatibleWith.every((node2) => p.compatibleNodes[node2])
1442
+ );
1443
+ }
1444
+ if (filter.mimeType) {
1445
+ const mimeTypes = Array.isArray(filter.mimeType) ? filter.mimeType : [filter.mimeType];
1446
+ providers = providers.filter((p) => {
1447
+ const allMimes = [...p.inputFormats.imageMimeTypes, ...p.inputFormats.documentMimeTypes];
1448
+ return mimeTypes.every((mime) => allMimes.includes(mime));
1449
+ });
1450
+ }
1451
+ if (filter.minFileSize !== void 0) {
1452
+ providers = providers.filter((p) => {
1453
+ const maxSize = p.inputFormats.maxFileSize ?? Math.max(p.inputFormats.maxImageSize ?? 0, p.inputFormats.maxPdfSize ?? 0);
1454
+ return maxSize >= filter.minFileSize;
1455
+ });
1456
+ }
1457
+ if (filter.maxFileSize !== void 0) {
1458
+ providers = providers.filter((p) => {
1459
+ const maxSize = p.inputFormats.maxFileSize ?? Math.max(p.inputFormats.maxImageSize ?? Infinity, p.inputFormats.maxPdfSize ?? Infinity);
1460
+ return maxSize <= filter.maxFileSize;
1461
+ });
1462
+ }
1463
+ if (filter.maxCostPerPage !== void 0) {
1464
+ providers = providers.filter(
1465
+ (p) => p.pricing.perPage !== void 0 && p.pricing.perPage <= filter.maxCostPerPage
1466
+ );
1467
+ }
1468
+ if (filter.maxCostPer1kTokens !== void 0) {
1469
+ providers = providers.filter(
1470
+ (p) => p.pricing.inputPer1kTokens !== void 0 && p.pricing.inputPer1kTokens <= filter.maxCostPer1kTokens
1471
+ );
1472
+ }
1473
+ if (filter.filter) {
1474
+ providers = providers.filter(filter.filter);
1475
+ }
1476
+ return providers;
1477
+ }
1478
+ function getProviderById(id) {
1479
+ for (const providers of providerRegistry.values()) {
1480
+ if (providers.has(id)) {
1481
+ return providers.get(id);
1482
+ }
1483
+ }
1484
+ return void 0;
1485
+ }
1486
+ function getProvidersBySource(source) {
1487
+ const providers = providerRegistry.get(source);
1488
+ return providers ? [...providers.values()] : [];
1489
+ }
1490
+ function clearProviderRegistry() {
1491
+ providerRegistry.clear();
1492
+ }
1493
+ function defaultNormalizer(id, data, source) {
1494
+ const d = data;
1495
+ if (source === "llm") {
1496
+ return normalizeLLMProvider(id, d);
1497
+ } else if (source === "datalab") {
1498
+ return normalizeDatalabProvider(id, d);
1499
+ } else if (source === "reducto") {
1500
+ return normalizeReductoProvider(id, d);
1501
+ } else if (source === "unsiloed") {
1502
+ return normalizeUnsiloedProvider(id, d);
1503
+ }
1504
+ const defaultOutputFormats = { text: true, markdown: false, html: false, json: false };
1505
+ const defaultFeatures = {
1506
+ maxPages: false,
1507
+ pageRange: false,
1508
+ languageHints: false,
1509
+ processingModes: false,
1510
+ agenticMode: false,
1511
+ customPrompts: false,
1512
+ imageExtraction: false,
1513
+ pageMarkers: false,
1514
+ citations: false,
1515
+ chunking: false,
1516
+ segmentation: false,
1517
+ stripExistingOCR: false,
1518
+ formatLines: false,
1519
+ forceOCR: false,
1520
+ tableOutputFormats: false,
1521
+ tableMerging: false,
1522
+ confidence: false,
1523
+ boundingBoxes: false,
1524
+ schemaValidation: false,
1525
+ handwrittenText: false,
1526
+ outputFormats: defaultOutputFormats
1527
+ };
1528
+ return {
1529
+ id,
1530
+ name: d.name ?? id,
1531
+ source,
1532
+ type: d.type ?? "LLM",
1533
+ capabilities: {
1534
+ supportsImages: d.capabilities?.supportsImages ?? false,
1535
+ supportsPDFs: d.capabilities?.supportsPDFs ?? false,
1536
+ supportsDocuments: d.capabilities?.supportsDocuments ?? false,
1537
+ supportsReasoning: d.capabilities?.supportsReasoning ?? false,
1538
+ supportsStructuredOutput: d.capabilities?.supportsStructuredOutput ?? false,
1539
+ supportsPrompts: false,
1540
+ supportsCitations: false,
1541
+ supportsChunking: false,
1542
+ supportsImageExtraction: false,
1543
+ supportsPageMarkers: false,
1544
+ supportsLanguageHints: false,
1545
+ supportsProcessingModes: false,
1546
+ supportsSegmentation: false,
1547
+ outputFormats: defaultOutputFormats
1548
+ },
1549
+ features: defaultFeatures,
1550
+ inputRequirements: {
1551
+ inputType: d.inputRequirements?.inputType ?? "any",
1552
+ acceptedMethods: d.inputRequirements?.acceptedMethods ?? d.inputFormats?.inputMethods ?? ["base64"]
1553
+ },
1554
+ compatibleNodes: {
1555
+ parse: d.compatibleNodes?.parse ?? false,
1556
+ extract: d.compatibleNodes?.extract ?? false,
1557
+ categorize: d.compatibleNodes?.categorize ?? false,
1558
+ qualify: d.compatibleNodes?.qualify ?? false,
1559
+ split: d.compatibleNodes?.split ?? false
1560
+ },
1561
+ inputFormats: {
1562
+ imageMimeTypes: [],
1563
+ documentMimeTypes: [],
1564
+ inputMethods: ["base64"]
1565
+ },
1566
+ pricing: {
1567
+ model: "per-token",
1568
+ currency: "USD"
1569
+ },
1570
+ raw: data
1571
+ };
1572
+ }
1573
+ function normalizeLLMProvider(id, d) {
1574
+ const outputFormats = {
1575
+ text: true,
1576
+ markdown: true,
1577
+ html: true,
1578
+ json: d.capabilities?.supportsStructuredOutput ?? true
1579
+ };
1580
+ const features = {
1581
+ maxPages: d.inputFormats?.pdfs?.maxPages !== void 0,
1582
+ pageRange: true,
1583
+ // LLMs can handle page ranges
1584
+ languageHints: false,
1585
+ // Not applicable to LLMs
1586
+ processingModes: false,
1587
+ // Not applicable to LLMs
1588
+ agenticMode: false,
1589
+ // Not applicable to LLMs
1590
+ customPrompts: true,
1591
+ // All LLMs support prompts
1592
+ imageExtraction: false,
1593
+ // LLMs don't extract images
1594
+ pageMarkers: false,
1595
+ // LLMs don't add page markers
1596
+ citations: false,
1597
+ // Most LLMs don't have native citations (Anthropic has different API)
1598
+ chunking: false,
1599
+ // LLMs don't do chunking
1600
+ segmentation: false,
1601
+ // LLMs don't do segmentation
1602
+ stripExistingOCR: false,
1603
+ formatLines: false,
1604
+ forceOCR: false,
1605
+ tableOutputFormats: false,
1606
+ tableMerging: false,
1607
+ confidence: false,
1608
+ // LLMs don't provide confidence scores
1609
+ boundingBoxes: false,
1610
+ // LLMs don't provide bounding boxes
1611
+ schemaValidation: d.capabilities?.supportsStructuredOutput ?? false,
1612
+ // Some LLMs support schema validation
1613
+ handwrittenText: false,
1614
+ // Not specific to LLMs
1615
+ outputFormats
1616
+ };
1617
+ const vendor = d.vendor ?? id;
1618
+ return {
1619
+ id,
1620
+ name: d.name ?? id,
1621
+ source: "llm",
1622
+ type: "LLM",
1623
+ // NEW: 3-layer identity
1624
+ identity: {
1625
+ provider: vendor,
1626
+ model: d.defaultModel ?? id,
1627
+ method: "native"
1628
+ },
1629
+ capabilities: {
1630
+ supportsImages: d.capabilities?.supportsImages ?? false,
1631
+ supportsPDFs: d.capabilities?.supportsPDFs ?? false,
1632
+ supportsDocuments: false,
1633
+ // LLM providers don't support Office docs directly
1634
+ supportsReasoning: d.capabilities?.supportsReasoning ?? false,
1635
+ supportsStructuredOutput: d.capabilities?.supportsStructuredOutput ?? false,
1636
+ // NEW capabilities
1637
+ supportsPrompts: true,
1638
+ supportsCitations: false,
1639
+ supportsChunking: false,
1640
+ supportsImageExtraction: false,
1641
+ supportsPageMarkers: false,
1642
+ supportsLanguageHints: false,
1643
+ supportsProcessingModes: false,
1644
+ supportsSegmentation: false,
1645
+ outputFormats
1646
+ },
1647
+ features,
1648
+ // LLM providers with vision can work with either raw documents or parsed text
1649
+ inputRequirements: {
1650
+ inputType: d.inputRequirements?.inputType ?? "any",
1651
+ acceptedMethods: d.inputRequirements?.acceptedMethods ?? d.inputFormats?.images?.methods ?? ["base64", "url"]
1652
+ },
1653
+ compatibleNodes: {
1654
+ parse: d.compatibleNodes?.parse ?? false,
1655
+ extract: d.compatibleNodes?.extract ?? false,
1656
+ categorize: d.compatibleNodes?.categorize ?? false,
1657
+ qualify: d.compatibleNodes?.qualify ?? false,
1658
+ split: d.compatibleNodes?.split ?? false
1659
+ },
1660
+ inputFormats: {
1661
+ imageMimeTypes: d.inputFormats?.images?.mimeTypes ?? [],
1662
+ documentMimeTypes: ["application/pdf"],
1663
+ // PDFs only for LLM
1664
+ inputMethods: d.inputFormats?.images?.methods ?? ["base64"],
1665
+ maxImageSize: d.inputFormats?.images?.maxSize,
1666
+ maxPdfSize: d.inputFormats?.pdfs?.maxSize,
1667
+ maxPages: d.inputFormats?.pdfs?.maxPages
1668
+ },
1669
+ pricing: {
1670
+ model: "per-token",
1671
+ inputPer1kTokens: d.pricing?.inputPer1k,
1672
+ outputPer1kTokens: d.pricing?.outputPer1k,
1673
+ currency: "USD",
1674
+ notes: d.pricing?.notes
1675
+ },
1676
+ rateLimits: {
1677
+ requestsPerMinute: d.limits?.requestsPerMinute
1678
+ },
1679
+ raw: d
1680
+ };
1681
+ }
1682
+ function normalizeDatalabProvider(id, d) {
1683
+ const opts = d.supportedOptions ?? {};
1684
+ const isVLM = d.type === "VLM";
1685
+ const model = d.model ?? id;
1686
+ const outputFormats = {
1687
+ text: true,
1688
+ markdown: d.outputFormat?.features?.markdown ?? false,
1689
+ html: false,
1690
+ json: d.outputFormat?.features?.structuredJSON ?? isVLM
1691
+ };
1692
+ const features = {
1693
+ maxPages: opts.maxPages ?? false,
1694
+ pageRange: opts.pageRange ?? false,
1695
+ languageHints: opts.langs ?? false,
1696
+ // maps from 'langs'
1697
+ processingModes: opts.mode ?? false,
1698
+ agenticMode: false,
1699
+ // Datalab doesn't have agentic mode
1700
+ customPrompts: opts.blockCorrectionPrompt ?? false,
1701
+ imageExtraction: opts.extractImages ?? false,
1702
+ pageMarkers: opts.paginate ?? false,
1703
+ // maps from 'paginate'
1704
+ citations: opts.citations ?? false,
1705
+ chunking: false,
1706
+ // Datalab doesn't have chunking
1707
+ segmentation: opts.segmentation ?? false,
1708
+ stripExistingOCR: opts.stripExistingOCR ?? false,
1709
+ formatLines: opts.formatLines ?? false,
1710
+ forceOCR: true,
1711
+ // Datalab supports force_ocr
1712
+ tableOutputFormats: false,
1713
+ tableMerging: false,
1714
+ confidence: false,
1715
+ // Datalab doesn't provide confidence scores
1716
+ boundingBoxes: d.outputFormat?.features?.boundingBoxes ?? true,
1717
+ // Datalab provides bounding boxes
1718
+ schemaValidation: isVLM,
1719
+ // VLM providers support schema validation
1720
+ handwrittenText: true,
1721
+ // Datalab handles handwritten text
1722
+ outputFormats
1723
+ };
1724
+ return {
1725
+ id,
1726
+ name: d.name ?? id,
1727
+ source: "datalab",
1728
+ type: d.type ?? "OCR",
1729
+ // NEW: 3-layer identity
1730
+ identity: {
1731
+ provider: "datalab",
1732
+ model,
1733
+ method: "native"
1734
+ // Default to native, can be overridden when self-hosted
1735
+ },
1736
+ capabilities: {
1737
+ supportsImages: d.capabilities?.supportsImages ?? true,
1738
+ supportsPDFs: d.capabilities?.supportsPDFs ?? true,
1739
+ supportsDocuments: d.capabilities?.supportsDocuments ?? true,
1740
+ supportsReasoning: false,
1741
+ // Datalab doesn't do reasoning
1742
+ supportsStructuredOutput: isVLM,
1743
+ // NEW capabilities from supportedOptions
1744
+ supportsPrompts: opts.blockCorrectionPrompt ?? false,
1745
+ supportsCitations: opts.citations ?? false,
1746
+ supportsChunking: false,
1747
+ supportsImageExtraction: opts.extractImages ?? false,
1748
+ supportsPageMarkers: opts.paginate ?? false,
1749
+ supportsLanguageHints: opts.langs ?? false,
1750
+ supportsProcessingModes: opts.mode ?? false,
1751
+ supportsSegmentation: opts.segmentation ?? false,
1752
+ outputFormats
1753
+ },
1754
+ features,
1755
+ // Datalab providers always need raw document input
1756
+ inputRequirements: {
1757
+ inputType: d.inputRequirements?.inputType ?? "raw-document",
1758
+ acceptedMethods: d.inputRequirements?.acceptedMethods ?? d.inputFormats?.inputMethods ?? ["base64", "url"]
1759
+ },
1760
+ compatibleNodes: {
1761
+ parse: d.compatibleNodes?.parse ?? false,
1762
+ extract: d.compatibleNodes?.extract ?? false,
1763
+ categorize: d.compatibleNodes?.categorize ?? false,
1764
+ qualify: d.compatibleNodes?.qualify ?? false,
1765
+ split: d.compatibleNodes?.split ?? false
1766
+ },
1767
+ inputFormats: {
1768
+ imageMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => m.startsWith("image/")),
1769
+ documentMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => !m.startsWith("image/")),
1770
+ inputMethods: d.inputFormats?.inputMethods ?? ["base64"],
1771
+ maxFileSize: d.inputFormats?.maxFileSize,
1772
+ maxPages: d.inputFormats?.maxPages
1773
+ },
1774
+ pricing: {
1775
+ model: "per-page",
1776
+ perPage: d.pricing?.perPage,
1777
+ currency: "USD",
1778
+ notes: d.pricing?.notes
1779
+ },
1780
+ rateLimits: {
1781
+ docsPerMinute: d.apiConfig?.rateLimit?.docsPerMinute
1782
+ },
1783
+ raw: d
1784
+ };
1785
+ }
1786
+ function normalizeReductoProvider(id, d) {
1787
+ const opts = d.supportedOptions ?? {};
1788
+ const isVLM = d.type === "VLM";
1789
+ const isExtract = d.compatibleNodes?.extract === true;
1790
+ const model = d.model ?? "v1";
1791
+ const outputFormats = {
1792
+ text: d.outputFormat?.features?.textLines ?? true,
1793
+ markdown: d.outputFormat?.features?.markdown ?? d.compatibleNodes?.parse ?? false,
1794
+ html: opts.tableOutputFormat ?? false,
1795
+ // Reducto can output HTML tables
1796
+ json: d.outputFormat?.features?.structuredJSON ?? isExtract
1797
+ };
1798
+ const features = {
1799
+ maxPages: opts.maxPages ?? false,
1800
+ pageRange: opts.pageRange ?? false,
1801
+ languageHints: opts.langs ?? false,
1802
+ // Reducto doesn't support langs
1803
+ processingModes: false,
1804
+ // Reducto uses agentic instead
1805
+ agenticMode: opts.mode ?? false,
1806
+ // maps from 'mode' (agentic)
1807
+ customPrompts: opts.additionalPrompt ?? false,
1808
+ // maps from 'additionalPrompt'
1809
+ imageExtraction: opts.extractImages ?? false,
1810
+ // maps from 'returnImages'
1811
+ pageMarkers: true,
1812
+ // Reducto has addPageMarkers
1813
+ citations: opts.citations ?? false,
1814
+ chunking: opts.chunking ?? false,
1815
+ segmentation: opts.segmentation ?? false,
1816
+ // Via Split endpoint
1817
+ stripExistingOCR: false,
1818
+ formatLines: false,
1819
+ forceOCR: false,
1820
+ tableOutputFormats: opts.tableOutputFormat ?? false,
1821
+ tableMerging: d.compatibleNodes?.parse ?? false,
1822
+ // Parse has mergeTables
1823
+ confidence: opts.confidence ?? d.outputFormat?.features?.confidence ?? false,
1824
+ // Reducto Parse has confidence
1825
+ boundingBoxes: d.outputFormat?.features?.boundingBoxes ?? d.compatibleNodes?.parse ?? false,
1826
+ // Reducto Parse has bounding boxes
1827
+ schemaValidation: d.outputFormat?.features?.schemaValidation ?? isExtract,
1828
+ // Extract has schema validation
1829
+ handwrittenText: false,
1830
+ // Reducto doesn't specifically advertise handwriting
1831
+ outputFormats
1832
+ };
1833
+ return {
1834
+ id,
1835
+ name: d.name ?? id,
1836
+ source: "reducto",
1837
+ type: d.type ?? "OCR",
1838
+ // NEW: 3-layer identity
1839
+ identity: {
1840
+ provider: "reducto",
1841
+ model,
1842
+ method: "native"
1843
+ },
1844
+ capabilities: {
1845
+ supportsImages: d.capabilities?.supportsImages ?? true,
1846
+ supportsPDFs: d.capabilities?.supportsPDFs ?? true,
1847
+ supportsDocuments: d.capabilities?.supportsDocuments ?? true,
1848
+ supportsReasoning: false,
1849
+ // Reducto doesn't do reasoning
1850
+ supportsStructuredOutput: isVLM || isExtract,
1851
+ // NEW capabilities from supportedOptions
1852
+ supportsPrompts: opts.additionalPrompt ?? false,
1853
+ supportsCitations: opts.citations ?? false,
1854
+ supportsChunking: opts.chunking ?? false,
1855
+ supportsImageExtraction: opts.extractImages ?? false,
1856
+ supportsPageMarkers: true,
1857
+ supportsLanguageHints: false,
1858
+ supportsProcessingModes: opts.mode ?? false,
1859
+ // agentic mode
1860
+ supportsSegmentation: opts.segmentation ?? false,
1861
+ outputFormats
1862
+ },
1863
+ features,
1864
+ // Reducto providers always need raw document input
1865
+ inputRequirements: {
1866
+ inputType: d.inputRequirements?.inputType ?? "raw-document",
1867
+ acceptedMethods: d.inputRequirements?.acceptedMethods ?? d.inputFormats?.inputMethods ?? ["base64", "url"]
1868
+ },
1869
+ compatibleNodes: {
1870
+ parse: d.compatibleNodes?.parse ?? false,
1871
+ extract: d.compatibleNodes?.extract ?? false,
1872
+ categorize: d.compatibleNodes?.categorize ?? false,
1873
+ qualify: d.compatibleNodes?.qualify ?? false,
1874
+ split: d.compatibleNodes?.split ?? false
1875
+ },
1876
+ inputFormats: {
1877
+ imageMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => m.startsWith("image/")),
1878
+ documentMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => !m.startsWith("image/")),
1879
+ inputMethods: d.inputFormats?.inputMethods ?? ["base64"],
1880
+ maxFileSize: d.inputFormats?.maxFileSize,
1881
+ maxPages: d.inputFormats?.maxPages
1882
+ },
1883
+ pricing: {
1884
+ model: "per-page",
1885
+ perPage: d.pricing?.standard ? d.pricing.standard * (d.pricing.usdPerCredit ?? 4e-3) : d.pricing?.perPage,
1886
+ currency: "USD",
1887
+ notes: d.pricing?.notes
1888
+ },
1889
+ rateLimits: {
1890
+ docsPerMinute: d.apiConfig?.rateLimit?.docsPerMinute
1891
+ },
1892
+ raw: d
1893
+ };
1894
+ }
1895
+ function normalizeUnsiloedProvider(id, d) {
1896
+ const isVLM = d.type === "VLM";
1897
+ const isExtract = d.compatibleNodes?.extract === true;
1898
+ const isParse = d.compatibleNodes?.parse === true;
1899
+ const isSplit = d.compatibleNodes?.split === true;
1900
+ const isCategorize = d.compatibleNodes?.categorize === true;
1901
+ const model = d.model ?? "v1";
1902
+ const outputFormats = {
1903
+ text: d.outputFormat?.features?.textLines ?? isParse,
1904
+ markdown: d.outputFormat?.features?.markdown ?? isParse,
1905
+ html: false,
1906
+ // Unsiloed doesn't output HTML
1907
+ json: d.outputFormat?.features?.structuredJSON ?? (isVLM || isExtract)
1908
+ };
1909
+ const features = {
1910
+ maxPages: false,
1911
+ // Unsiloed doesn't have max pages option
1912
+ pageRange: false,
1913
+ // Unsiloed doesn't have page range option
1914
+ languageHints: false,
1915
+ // Unsiloed doesn't support language hints
1916
+ processingModes: d.capabilities?.specialFeatures?.includes("YOLO segmentation") ?? false,
1917
+ agenticMode: false,
1918
+ // Unsiloed doesn't have agentic mode
1919
+ customPrompts: false,
1920
+ // Unsiloed doesn't support custom prompts
1921
+ imageExtraction: false,
1922
+ // Unsiloed doesn't extract images
1923
+ pageMarkers: false,
1924
+ // Unsiloed doesn't add page markers
1925
+ citations: d.outputFormat?.features?.citations ?? isExtract,
1926
+ // Extract has citations
1927
+ chunking: d.outputFormat?.features?.semanticChunking ?? isParse,
1928
+ // Parse has semantic chunking
1929
+ segmentation: isSplit,
1930
+ // Split provider does segmentation
1931
+ stripExistingOCR: false,
1932
+ formatLines: false,
1933
+ forceOCR: false,
1934
+ tableOutputFormats: false,
1935
+ tableMerging: false,
1936
+ confidence: d.outputFormat?.features?.confidence ?? false,
1937
+ // Unsiloed may provide confidence
1938
+ boundingBoxes: d.outputFormat?.features?.boundingBoxes ?? false,
1939
+ // Unsiloed may provide bounding boxes
1940
+ schemaValidation: isExtract,
1941
+ // Extract supports schema validation
1942
+ handwrittenText: false,
1943
+ // Unsiloed doesn't specifically advertise handwriting
1944
+ outputFormats
1945
+ };
1946
+ return {
1947
+ id,
1948
+ name: d.name ?? id,
1949
+ source: "unsiloed",
1950
+ type: d.type ?? "OCR",
1951
+ // NEW: 3-layer identity
1952
+ identity: {
1953
+ provider: "unsiloed",
1954
+ model,
1955
+ method: "native"
1956
+ },
1957
+ capabilities: {
1958
+ supportsImages: d.capabilities?.supportsImages ?? true,
1959
+ supportsPDFs: d.capabilities?.supportsPDFs ?? true,
1960
+ supportsDocuments: d.capabilities?.supportsDocuments ?? false,
1961
+ supportsReasoning: false,
1962
+ // Unsiloed doesn't do reasoning
1963
+ supportsStructuredOutput: isVLM || isExtract,
1964
+ // NEW capabilities
1965
+ supportsPrompts: false,
1966
+ // Unsiloed doesn't support custom prompts
1967
+ supportsCitations: d.outputFormat?.features?.citations ?? isExtract,
1968
+ supportsChunking: d.outputFormat?.features?.semanticChunking ?? isParse,
1969
+ supportsImageExtraction: false,
1970
+ supportsPageMarkers: false,
1971
+ supportsLanguageHints: false,
1972
+ supportsProcessingModes: d.capabilities?.specialFeatures?.includes("YOLO segmentation") ?? false,
1973
+ supportsSegmentation: isSplit || isCategorize,
1974
+ outputFormats
1975
+ },
1976
+ features,
1977
+ // Unsiloed providers always need raw document input
1978
+ inputRequirements: {
1979
+ inputType: d.inputRequirements?.inputType ?? "raw-document",
1980
+ acceptedMethods: d.inputRequirements?.acceptedMethods ?? d.inputFormats?.inputMethods ?? ["base64", "url"]
1981
+ },
1982
+ compatibleNodes: {
1983
+ parse: d.compatibleNodes?.parse ?? false,
1984
+ extract: d.compatibleNodes?.extract ?? false,
1985
+ categorize: d.compatibleNodes?.categorize ?? false,
1986
+ qualify: d.compatibleNodes?.qualify ?? false,
1987
+ split: d.compatibleNodes?.split ?? false
1988
+ },
1989
+ inputFormats: {
1990
+ imageMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => m.startsWith("image/")),
1991
+ documentMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => !m.startsWith("image/")),
1992
+ inputMethods: d.inputFormats?.inputMethods ?? ["base64"],
1993
+ maxFileSize: d.inputFormats?.maxFileSize,
1994
+ maxPages: d.inputFormats?.maxPages
1995
+ },
1996
+ pricing: {
1997
+ model: "per-page",
1998
+ perPage: d.pricing?.standardUSD ?? d.pricing?.perPage,
1999
+ currency: "USD",
2000
+ notes: d.pricing?.notes
2001
+ },
2002
+ rateLimits: {
2003
+ docsPerMinute: d.apiConfig?.rateLimit?.docsPerMinute
2004
+ },
2005
+ raw: d
2006
+ };
2007
+ }
2008
+ function getProvidersForMimeType(mimeType) {
2009
+ return queryProviders({ mimeType });
2010
+ }
2011
+ function getCheapestProviderFor(capability) {
2012
+ let providers;
2013
+ switch (capability) {
2014
+ case "ocr":
2015
+ case "parse":
2016
+ providers = queryProviders({ compatibleWith: ["parse"] });
2017
+ break;
2018
+ case "extraction":
2019
+ providers = queryProviders({ compatibleWith: ["extract"] });
2020
+ break;
2021
+ }
2022
+ return providers.sort((a, b) => {
2023
+ const costA = a.pricing.perPage ?? (a.pricing.inputPer1kTokens ?? Infinity);
2024
+ const costB = b.pricing.perPage ?? (b.pricing.inputPer1kTokens ?? Infinity);
2025
+ return costA - costB;
2026
+ })[0];
2027
+ }
2028
+ function getProvidersForLargeFiles(minSizeMB = 100) {
2029
+ return queryProviders({ minFileSize: minSizeMB });
2030
+ }
2031
+ var modelRegistry = /* @__PURE__ */ new Map();
2032
+ function registerProviderWithModels(providerId, metadata) {
2033
+ modelRegistry.set(providerId, metadata);
2034
+ }
2035
+ function resolveModelMetadata(providerId, modelId) {
2036
+ const providerWithModels = modelRegistry.get(providerId);
2037
+ if (providerWithModels) {
2038
+ return resolveFromProviderWithModels(providerWithModels, modelId);
2039
+ }
2040
+ const provider = getProviderById(providerId);
2041
+ if (!provider) return void 0;
2042
+ return {
2043
+ modelId: modelId ?? providerId,
2044
+ modelName: modelId ?? provider.name,
2045
+ providerId: provider.id,
2046
+ providerName: provider.name,
2047
+ providerSource: provider.source,
2048
+ capabilities: { ...provider.capabilities },
2049
+ features: { ...provider.features },
2050
+ inputRequirements: { ...provider.inputRequirements },
2051
+ compatibleNodes: { ...provider.compatibleNodes },
2052
+ pricing: { ...provider.pricing }
2053
+ };
2054
+ }
2055
+ function resolveFromProviderWithModels(provider, modelId) {
2056
+ const model = modelId ? provider.models?.find((m) => m.id === modelId) : void 0;
2057
+ return {
2058
+ modelId: model?.id ?? modelId ?? provider.id,
2059
+ modelName: model?.name ?? model?.id ?? modelId ?? provider.name,
2060
+ openRouterId: model?.openRouterId,
2061
+ providerId: provider.id,
2062
+ providerName: provider.name,
2063
+ providerSource: provider.source,
2064
+ // Merge capabilities (model overrides provider)
2065
+ capabilities: {
2066
+ supportsImages: model?.capabilities?.supportsImages ?? provider.capabilities.supportsImages,
2067
+ supportsPDFs: model?.capabilities?.supportsPDFs ?? provider.capabilities.supportsPDFs,
2068
+ supportsDocuments: model?.capabilities?.supportsDocuments ?? provider.capabilities.supportsDocuments,
2069
+ supportsReasoning: model?.capabilities?.supportsReasoning ?? provider.capabilities.supportsReasoning,
2070
+ supportsStructuredOutput: model?.capabilities?.supportsStructuredOutput ?? provider.capabilities.supportsStructuredOutput,
2071
+ // NEW capabilities
2072
+ supportsPrompts: model?.capabilities?.supportsPrompts ?? provider.capabilities.supportsPrompts,
2073
+ supportsCitations: model?.capabilities?.supportsCitations ?? provider.capabilities.supportsCitations,
2074
+ supportsChunking: model?.capabilities?.supportsChunking ?? provider.capabilities.supportsChunking,
2075
+ supportsImageExtraction: model?.capabilities?.supportsImageExtraction ?? provider.capabilities.supportsImageExtraction,
2076
+ supportsPageMarkers: model?.capabilities?.supportsPageMarkers ?? provider.capabilities.supportsPageMarkers,
2077
+ supportsLanguageHints: model?.capabilities?.supportsLanguageHints ?? provider.capabilities.supportsLanguageHints,
2078
+ supportsProcessingModes: model?.capabilities?.supportsProcessingModes ?? provider.capabilities.supportsProcessingModes,
2079
+ supportsSegmentation: model?.capabilities?.supportsSegmentation ?? provider.capabilities.supportsSegmentation,
2080
+ outputFormats: model?.capabilities?.outputFormats ?? provider.capabilities.outputFormats
2081
+ },
2082
+ // Merge input requirements
2083
+ inputRequirements: {
2084
+ inputType: model?.inputRequirements?.inputType ?? provider.inputRequirements.inputType,
2085
+ acceptedMethods: model?.inputRequirements?.acceptedMethods ?? provider.inputRequirements.acceptedMethods
2086
+ },
2087
+ // Merge node compatibility
2088
+ compatibleNodes: {
2089
+ parse: model?.compatibleNodes?.parse ?? provider.compatibleNodes.parse,
2090
+ extract: model?.compatibleNodes?.extract ?? provider.compatibleNodes.extract,
2091
+ categorize: model?.compatibleNodes?.categorize ?? provider.compatibleNodes.categorize,
2092
+ qualify: model?.compatibleNodes?.qualify ?? provider.compatibleNodes.qualify,
2093
+ split: model?.compatibleNodes?.split ?? provider.compatibleNodes.split
2094
+ },
2095
+ // Features (inherited from provider - models don't override features)
2096
+ features: { ...provider.features },
2097
+ // Merge pricing
2098
+ pricing: {
2099
+ model: provider.pricing.model,
2100
+ inputPer1kTokens: model?.pricing?.inputPer1kTokens ?? provider.pricing.inputPer1kTokens,
2101
+ outputPer1kTokens: model?.pricing?.outputPer1kTokens ?? provider.pricing.outputPer1kTokens,
2102
+ perPage: model?.pricing?.perPage ?? provider.pricing.perPage,
2103
+ currency: provider.pricing.currency,
2104
+ notes: provider.pricing.notes
2105
+ },
2106
+ // Model limits
2107
+ limits: model?.limits
2108
+ };
2109
+ }
2110
+ function queryModels(filter = {}) {
2111
+ const results = [];
2112
+ for (const [providerId, provider] of modelRegistry) {
2113
+ if (filter.providerId) {
2114
+ const providerIds = Array.isArray(filter.providerId) ? filter.providerId : [filter.providerId];
2115
+ if (!providerIds.includes(providerId)) continue;
2116
+ }
2117
+ if (filter.source) {
2118
+ const sources = Array.isArray(filter.source) ? filter.source : [filter.source];
2119
+ if (!sources.includes(provider.source)) continue;
2120
+ }
2121
+ const models = provider.models ?? [{ id: provider.id }];
2122
+ for (const model of models) {
2123
+ const resolved = resolveFromProviderWithModels(provider, model.id);
2124
+ if (matchesModelFilter(resolved, filter)) {
2125
+ results.push(resolved);
2126
+ }
2127
+ }
2128
+ }
2129
+ for (const provider of getAllProviders()) {
2130
+ if (modelRegistry.has(provider.id)) continue;
2131
+ if (filter.providerId) {
2132
+ const providerIds = Array.isArray(filter.providerId) ? filter.providerId : [filter.providerId];
2133
+ if (!providerIds.includes(provider.id)) continue;
2134
+ }
2135
+ if (filter.source) {
2136
+ const sources = Array.isArray(filter.source) ? filter.source : [filter.source];
2137
+ if (!sources.includes(provider.source)) continue;
2138
+ }
2139
+ const resolved = resolveModelMetadata(provider.id);
2140
+ if (resolved && matchesModelFilter(resolved, filter)) {
2141
+ results.push(resolved);
2142
+ }
2143
+ }
2144
+ return results;
2145
+ }
2146
+ function matchesModelFilter(model, filter) {
2147
+ if (filter.supports) {
2148
+ if (filter.supports.images !== void 0 && model.capabilities.supportsImages !== filter.supports.images) {
2149
+ return false;
2150
+ }
2151
+ if (filter.supports.pdfs !== void 0 && model.capabilities.supportsPDFs !== filter.supports.pdfs) {
2152
+ return false;
2153
+ }
2154
+ if (filter.supports.documents !== void 0 && model.capabilities.supportsDocuments !== filter.supports.documents) {
2155
+ return false;
2156
+ }
2157
+ if (filter.supports.reasoning !== void 0 && model.capabilities.supportsReasoning !== filter.supports.reasoning) {
2158
+ return false;
2159
+ }
2160
+ if (filter.supports.structuredOutput !== void 0 && model.capabilities.supportsStructuredOutput !== filter.supports.structuredOutput) {
2161
+ return false;
2162
+ }
2163
+ if (filter.supports.prompts !== void 0 && model.capabilities.supportsPrompts !== filter.supports.prompts) {
2164
+ return false;
2165
+ }
2166
+ if (filter.supports.citations !== void 0 && model.capabilities.supportsCitations !== filter.supports.citations) {
2167
+ return false;
2168
+ }
2169
+ if (filter.supports.chunking !== void 0 && model.capabilities.supportsChunking !== filter.supports.chunking) {
2170
+ return false;
2171
+ }
2172
+ if (filter.supports.imageExtraction !== void 0 && model.capabilities.supportsImageExtraction !== filter.supports.imageExtraction) {
2173
+ return false;
2174
+ }
2175
+ if (filter.supports.pageMarkers !== void 0 && model.capabilities.supportsPageMarkers !== filter.supports.pageMarkers) {
2176
+ return false;
2177
+ }
2178
+ if (filter.supports.languageHints !== void 0 && model.capabilities.supportsLanguageHints !== filter.supports.languageHints) {
2179
+ return false;
2180
+ }
2181
+ if (filter.supports.processingModes !== void 0 && model.capabilities.supportsProcessingModes !== filter.supports.processingModes) {
2182
+ return false;
2183
+ }
2184
+ if (filter.supports.segmentation !== void 0 && model.capabilities.supportsSegmentation !== filter.supports.segmentation) {
2185
+ return false;
2186
+ }
2187
+ }
2188
+ if (filter.hasFeatures && filter.hasFeatures.length > 0) {
2189
+ for (const feature of filter.hasFeatures) {
2190
+ if (model.features[feature] !== true) {
2191
+ return false;
2192
+ }
2193
+ }
2194
+ }
2195
+ if (filter.outputFormat) {
2196
+ if (model.capabilities.outputFormats[filter.outputFormat] !== true) {
2197
+ return false;
2198
+ }
2199
+ }
2200
+ if (filter.inputRequirements?.inputType !== void 0) {
2201
+ const inputTypes = Array.isArray(filter.inputRequirements.inputType) ? filter.inputRequirements.inputType : [filter.inputRequirements.inputType];
2202
+ if (!inputTypes.includes(model.inputRequirements.inputType)) {
2203
+ return false;
2204
+ }
2205
+ }
2206
+ if (filter.compatibleWith && filter.compatibleWith.length > 0) {
2207
+ for (const node2 of filter.compatibleWith) {
2208
+ if (!model.compatibleNodes[node2]) {
2209
+ return false;
2210
+ }
2211
+ }
2212
+ }
2213
+ if (filter.minContextTokens !== void 0) {
2214
+ const contextTokens = model.limits?.maxContextTokens ?? 0;
2215
+ if (contextTokens < filter.minContextTokens) {
2216
+ return false;
2217
+ }
2218
+ }
2219
+ if (filter.filter && !filter.filter(model)) {
2220
+ return false;
2221
+ }
2222
+ return true;
2223
+ }
2224
+ function getModelsForNode(nodeType) {
2225
+ return queryModels({ compatibleWith: [nodeType] });
2226
+ }
2227
+ function getAllModels() {
2228
+ return queryModels({});
2229
+ }
2230
+ function clearModelRegistry() {
2231
+ modelRegistry.clear();
2232
+ }
2233
+ export {
2234
+ FlowExecutionError,
2235
+ FlowInputValidationError,
2236
+ FlowValidationError,
2237
+ NODE_COMPATIBILITY_MATRIX,
2238
+ RESERVED_VARIABLES,
2239
+ aggregateMetrics,
2240
+ bufferToBase64,
2241
+ bufferToDataUri,
2242
+ buildProviderFromConfig,
2243
+ buildProvidersFromConfigs,
2244
+ canStartForEachItemFlow,
2245
+ clearModelRegistry,
2246
+ clearProviderRegistry,
2247
+ createIdentity,
2248
+ defineMarkerProvider,
2249
+ defineSuryaProvider,
2250
+ defineVLMProvider,
2251
+ detectDocumentType,
2252
+ detectMimeTypeFromBase64,
2253
+ detectMimeTypeFromBase64Async,
2254
+ detectMimeTypeFromBytes,
2255
+ extractBase64,
2256
+ getAllModels,
2257
+ getAllProviders,
2258
+ getCheapestProviderFor,
2259
+ getCompatibleTargets,
2260
+ getDocumentPageCount,
2261
+ getModelsForNode,
2262
+ getNodeTypeInfo,
2263
+ getNodeTypeName,
2264
+ getPDFPageCount,
2265
+ getPageCountMetadata,
2266
+ getProviderById,
2267
+ getProvidersBySource,
2268
+ getProvidersForLargeFiles,
2269
+ getProvidersForMimeType,
2270
+ getSuggestedConnections,
2271
+ getTotalPageCount,
2272
+ getValidForEachStarters,
2273
+ isLocalEndpoint,
2274
+ isPDFDocument,
2275
+ node,
2276
+ parseProviderString,
2277
+ protectReservedVariables,
2278
+ queryModels,
2279
+ queryProviders,
2280
+ registerProviderMetadata,
2281
+ registerProviderWithModels,
2282
+ resolveDocument,
2283
+ resolveModelMetadata,
2284
+ runPipeline,
2285
+ splitPDFIntoChunks,
2286
+ toProviderString,
2287
+ validateFlowInputFormat,
2288
+ validateJson,
2289
+ validateMimeType,
2290
+ validateMimeTypeAsync,
2291
+ validateNodeConnection
2292
+ };
2293
+ //# sourceMappingURL=index.js.map