@doclo/core 0.1.10 → 0.1.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +165 -3
- package/dist/index.js +317 -1
- package/dist/index.js.map +1 -1
- package/dist/internal/validation-utils.d.ts +1 -1
- package/dist/pdf-utils.d.ts +1 -1
- package/dist/{validation-D_EcHqPl.d.ts → validation-wlK06puw.d.ts} +1 -1
- package/dist/validation.d.ts +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -1,5 +1,5 @@
|
|
|
1
|
-
import { P as ProviderVendor, A as AccessMethod } from './validation-
|
|
2
|
-
export { G as AggregatedMetrics, B as BBox, r as CategorizeNodeConfig, t as ChunkMetadata, v as ChunkNodeConfig, u as ChunkOutput, n as CitationConfig, k as CitationSourceType, w as CombineNodeConfig, U as CompatibilityRule, C as ConsensusConfig, e as ConsensusMetadata, d as ConsensusRunResult, D as DocumentIR, b as DocumentIRExtras, y as EnhancedExtractionSchema, s as ExtractInputMode, E as ExtractNodeConfig, a0 as ExtractedImage, m as FieldCitation, F as FieldVotingDetails, H as FlowContext, a8 as FlowExecutionError, h as FlowInput, i as FlowInputValidation, j as FlowResult, a4 as FlowStepLocation, a9 as FlowValidationError, I as IRLine, a as IRPage, X as JSONSchemaNode, c as LLMJsonProvider, L as LLMProvider, _ as LanguageOptions, l as LineCitation, g as MaybeWithConsensusMetadata, M as MultimodalInput, aa as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, K as NodeTypeInfo, T as NodeTypeName, N as NormalizedBBox, O as OCRProvider, a1 as OCRProviderOptions, x as OutputNodeConfig, o as OutputWithCitations, f as OutputWithConsensus, Z as PageRangeOptions, p as ParseNodeConfig, Y as ProcessingMode, a3 as ProviderCitation, am as ProviderIdentity, aj as RESERVED_VARIABLES, R as ReasoningConfig, $ as SegmentationResult, S as SplitDocument, q as SplitNodeConfig, z as StepMetric, V as VLMProvider, a2 as VLMProviderOptions, W as ValidationResult, a5 as aggregateMetrics, ah as canStartForEachItemFlow, aq as createIdentity, al as extractErrorMessage, ad as getCompatibleTargets, ac as getNodeTypeInfo, ab as getNodeTypeName, ae as getSuggestedConnections, ag as getValidForEachStarters, ap as isLocalEndpoint, a6 as node, ao as parseProviderString, ak as protectReservedVariables, a7 as runPipeline, an as toProviderString, ai as validateJson, af as validateNodeConnection } from './validation-
|
|
1
|
+
import { P as ProviderVendor, A as AccessMethod } from './validation-wlK06puw.js';
|
|
2
|
+
export { G as AggregatedMetrics, B as BBox, r as CategorizeNodeConfig, t as ChunkMetadata, v as ChunkNodeConfig, u as ChunkOutput, n as CitationConfig, k as CitationSourceType, w as CombineNodeConfig, U as CompatibilityRule, C as ConsensusConfig, e as ConsensusMetadata, d as ConsensusRunResult, D as DocumentIR, b as DocumentIRExtras, y as EnhancedExtractionSchema, s as ExtractInputMode, E as ExtractNodeConfig, a0 as ExtractedImage, m as FieldCitation, F as FieldVotingDetails, H as FlowContext, a8 as FlowExecutionError, h as FlowInput, i as FlowInputValidation, j as FlowResult, a4 as FlowStepLocation, a9 as FlowValidationError, I as IRLine, a as IRPage, X as JSONSchemaNode, c as LLMJsonProvider, L as LLMProvider, _ as LanguageOptions, l as LineCitation, g as MaybeWithConsensusMetadata, M as MultimodalInput, aa as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, K as NodeTypeInfo, T as NodeTypeName, N as NormalizedBBox, O as OCRProvider, a1 as OCRProviderOptions, x as OutputNodeConfig, o as OutputWithCitations, f as OutputWithConsensus, Z as PageRangeOptions, p as ParseNodeConfig, Y as ProcessingMode, a3 as ProviderCitation, am as ProviderIdentity, aj as RESERVED_VARIABLES, R as ReasoningConfig, $ as SegmentationResult, S as SplitDocument, q as SplitNodeConfig, z as StepMetric, V as VLMProvider, a2 as VLMProviderOptions, W as ValidationResult, a5 as aggregateMetrics, ah as canStartForEachItemFlow, aq as createIdentity, al as extractErrorMessage, ad as getCompatibleTargets, ac as getNodeTypeInfo, ab as getNodeTypeName, ae as getSuggestedConnections, ag as getValidForEachStarters, ap as isLocalEndpoint, a6 as node, ao as parseProviderString, ak as protectReservedVariables, a7 as runPipeline, an as toProviderString, ai as validateJson, af as validateNodeConnection } from './validation-wlK06puw.js';
|
|
3
3
|
export { getDocumentPageCount, getPDFPageCount, getPageCountMetadata, getTotalPageCount, splitPDFIntoChunks } from './pdf-utils.js';
|
|
4
4
|
|
|
5
5
|
/**
|
|
@@ -559,6 +559,8 @@ type NormalizedFeatures = {
|
|
|
559
559
|
schemaValidation: boolean;
|
|
560
560
|
/** Handwritten text recognition support */
|
|
561
561
|
handwrittenText: boolean;
|
|
562
|
+
/** Separate header/footer extraction from main content */
|
|
563
|
+
headerFooterExtraction: boolean;
|
|
562
564
|
/** Supported output formats */
|
|
563
565
|
outputFormats: OutputFormatSupport;
|
|
564
566
|
};
|
|
@@ -928,4 +930,164 @@ declare function getAllModels(): ResolvedModelMetadata[];
|
|
|
928
930
|
*/
|
|
929
931
|
declare function clearModelRegistry(): void;
|
|
930
932
|
|
|
931
|
-
|
|
933
|
+
/**
|
|
934
|
+
* @doclo/core - Retry Utilities
|
|
935
|
+
*
|
|
936
|
+
* Shared retry infrastructure for LLM and OCR providers.
|
|
937
|
+
* Includes exponential backoff, circuit breaker pattern, and error classification.
|
|
938
|
+
*/
|
|
939
|
+
/**
|
|
940
|
+
* Configuration for retry behavior
|
|
941
|
+
*/
|
|
942
|
+
interface RetryConfig {
|
|
943
|
+
/** Maximum number of retry attempts (default: 2) */
|
|
944
|
+
maxRetries?: number;
|
|
945
|
+
/** Base delay in milliseconds between retries (default: 1000) */
|
|
946
|
+
retryDelay?: number;
|
|
947
|
+
/** Enable exponential backoff (default: true) */
|
|
948
|
+
useExponentialBackoff?: boolean;
|
|
949
|
+
/** Maximum delay cap in milliseconds (default: 30000) */
|
|
950
|
+
maxDelay?: number;
|
|
951
|
+
}
|
|
952
|
+
/**
|
|
953
|
+
* Configuration for circuit breaker behavior
|
|
954
|
+
*/
|
|
955
|
+
interface CircuitBreakerConfig {
|
|
956
|
+
/** Number of consecutive failures before opening circuit (default: 3) */
|
|
957
|
+
threshold?: number;
|
|
958
|
+
/** Time in milliseconds before trying again after circuit opens (default: 30000) */
|
|
959
|
+
resetTimeout?: number;
|
|
960
|
+
}
|
|
961
|
+
/**
|
|
962
|
+
* Internal state for a circuit breaker
|
|
963
|
+
*/
|
|
964
|
+
interface CircuitBreakerState {
|
|
965
|
+
consecutiveFailures: number;
|
|
966
|
+
lastFailureTime?: number;
|
|
967
|
+
isOpen: boolean;
|
|
968
|
+
}
|
|
969
|
+
/**
|
|
970
|
+
* Circuit breaker interface
|
|
971
|
+
*/
|
|
972
|
+
interface CircuitBreaker {
|
|
973
|
+
/** Check if circuit is currently open (should skip requests) */
|
|
974
|
+
isOpen(): boolean;
|
|
975
|
+
/** Record a successful request (resets failure count) */
|
|
976
|
+
recordSuccess(): void;
|
|
977
|
+
/** Record a failed request (may open circuit) */
|
|
978
|
+
recordFailure(): void;
|
|
979
|
+
/** Get current state for inspection */
|
|
980
|
+
getState(): CircuitBreakerState;
|
|
981
|
+
}
|
|
982
|
+
/**
|
|
983
|
+
* Options for the withRetry wrapper
|
|
984
|
+
*/
|
|
985
|
+
interface WithRetryOptions<T> extends RetryConfig {
|
|
986
|
+
/** Called before each retry attempt (for logging/observability) */
|
|
987
|
+
onRetry?: (attempt: number, error: Error, delay: number) => void | Promise<void>;
|
|
988
|
+
/** Override to parse Retry-After header from response errors */
|
|
989
|
+
getRetryAfter?: (error: Error) => number | undefined;
|
|
990
|
+
/** Circuit breaker to use (optional) */
|
|
991
|
+
circuitBreaker?: CircuitBreaker;
|
|
992
|
+
}
|
|
993
|
+
/** Default retry configuration */
|
|
994
|
+
declare const DEFAULT_RETRY_CONFIG: Required<RetryConfig>;
|
|
995
|
+
/** Default circuit breaker configuration */
|
|
996
|
+
declare const DEFAULT_CIRCUIT_BREAKER_CONFIG: Required<CircuitBreakerConfig>;
|
|
997
|
+
/**
|
|
998
|
+
* Determines if an error is retryable based on its message content.
|
|
999
|
+
*
|
|
1000
|
+
* Retryable errors include:
|
|
1001
|
+
* - HTTP 408, 429, 500, 502, 503, 504
|
|
1002
|
+
* - Timeout errors
|
|
1003
|
+
* - Rate limit errors
|
|
1004
|
+
* - Network errors (ECONNRESET, ETIMEDOUT, etc.)
|
|
1005
|
+
*
|
|
1006
|
+
* Non-retryable errors include:
|
|
1007
|
+
* - HTTP 400, 401, 403, 404 (client errors)
|
|
1008
|
+
* - Business logic failures
|
|
1009
|
+
*
|
|
1010
|
+
* @param error - The error to classify
|
|
1011
|
+
* @returns true if the error is retryable
|
|
1012
|
+
*/
|
|
1013
|
+
declare function isRetryableError(error: Error): boolean;
|
|
1014
|
+
/**
|
|
1015
|
+
* Extracts HTTP status code from an error message if present.
|
|
1016
|
+
*
|
|
1017
|
+
* @param error - The error to extract status from
|
|
1018
|
+
* @returns The HTTP status code or undefined
|
|
1019
|
+
*/
|
|
1020
|
+
declare function extractStatusCode(error: Error): number | undefined;
|
|
1021
|
+
/**
|
|
1022
|
+
* Parses Retry-After header value from error message or response.
|
|
1023
|
+
* Supports both seconds (integer) and HTTP-date formats.
|
|
1024
|
+
*
|
|
1025
|
+
* @param error - Error that may contain Retry-After information
|
|
1026
|
+
* @returns Delay in milliseconds, or undefined if not found
|
|
1027
|
+
*/
|
|
1028
|
+
declare function parseRetryAfter(error: Error): number | undefined;
|
|
1029
|
+
/**
|
|
1030
|
+
* Calculates the delay before the next retry attempt.
|
|
1031
|
+
*
|
|
1032
|
+
* With exponential backoff enabled (default):
|
|
1033
|
+
* - Attempt 1: baseDelay * 2^0 = 1x baseDelay
|
|
1034
|
+
* - Attempt 2: baseDelay * 2^1 = 2x baseDelay
|
|
1035
|
+
* - Attempt 3: baseDelay * 2^2 = 4x baseDelay
|
|
1036
|
+
* Plus random jitter (0-1000ms) to prevent thundering herd.
|
|
1037
|
+
*
|
|
1038
|
+
* @param attempt - Current attempt number (1-indexed)
|
|
1039
|
+
* @param config - Retry configuration
|
|
1040
|
+
* @returns Delay in milliseconds
|
|
1041
|
+
*/
|
|
1042
|
+
declare function calculateRetryDelay(attempt: number, config?: RetryConfig): number;
|
|
1043
|
+
/**
|
|
1044
|
+
* Creates or retrieves a circuit breaker for a given key.
|
|
1045
|
+
*
|
|
1046
|
+
* Circuit breakers prevent cascading failures by:
|
|
1047
|
+
* 1. Tracking consecutive failures per provider/endpoint
|
|
1048
|
+
* 2. "Opening" the circuit after threshold failures (skipping requests)
|
|
1049
|
+
* 3. Allowing a retry after resetTimeout (half-open state)
|
|
1050
|
+
* 4. Closing the circuit on success
|
|
1051
|
+
*
|
|
1052
|
+
* @param key - Unique identifier (e.g., "datalab:surya" or "openai:gpt-4")
|
|
1053
|
+
* @param config - Circuit breaker configuration
|
|
1054
|
+
* @returns CircuitBreaker instance
|
|
1055
|
+
*/
|
|
1056
|
+
declare function createCircuitBreaker(key: string, config?: CircuitBreakerConfig): CircuitBreaker;
|
|
1057
|
+
/**
|
|
1058
|
+
* Clears all circuit breakers. Useful for testing.
|
|
1059
|
+
*/
|
|
1060
|
+
declare function clearCircuitBreakers(): void;
|
|
1061
|
+
/**
|
|
1062
|
+
* Gets the circuit breaker for a key without creating one.
|
|
1063
|
+
*
|
|
1064
|
+
* @param key - Unique identifier
|
|
1065
|
+
* @returns CircuitBreaker or undefined if not found
|
|
1066
|
+
*/
|
|
1067
|
+
declare function getCircuitBreaker(key: string): CircuitBreaker | undefined;
|
|
1068
|
+
/**
|
|
1069
|
+
* Wraps an async function with retry logic.
|
|
1070
|
+
*
|
|
1071
|
+
* @example
|
|
1072
|
+
* ```typescript
|
|
1073
|
+
* const result = await withRetry(
|
|
1074
|
+
* () => fetchWithTimeout(url, options),
|
|
1075
|
+
* {
|
|
1076
|
+
* maxRetries: 3,
|
|
1077
|
+
* retryDelay: 1000,
|
|
1078
|
+
* useExponentialBackoff: true,
|
|
1079
|
+
* onRetry: (attempt, error, delay) => {
|
|
1080
|
+
* console.log(`Retry ${attempt} after ${delay}ms: ${error.message}`);
|
|
1081
|
+
* }
|
|
1082
|
+
* }
|
|
1083
|
+
* );
|
|
1084
|
+
* ```
|
|
1085
|
+
*
|
|
1086
|
+
* @param fn - Async function to retry
|
|
1087
|
+
* @param options - Retry options
|
|
1088
|
+
* @returns Result of the function
|
|
1089
|
+
* @throws Last error if all retries fail
|
|
1090
|
+
*/
|
|
1091
|
+
declare function withRetry<T>(fn: () => Promise<T>, options?: WithRetryOptions<T>): Promise<T>;
|
|
1092
|
+
|
|
1093
|
+
export { type AcceptedMimeType, AccessMethod, type AllAutoVariables, type AutoVariablesForNode, type BaseProviderConfig, type CategorizeAutoVariables, type CircuitBreaker, type CircuitBreakerConfig, type CircuitBreakerState, DEFAULT_CIRCUIT_BREAKER_CONFIG, DEFAULT_RETRY_CONFIG, type DocumentMimeType, type ExtractAutoVariables, type FeatureName, FlowInputValidationError, type InputRequirements, type ModelMetadata, type ModelQueryFilter, type NormalizedCapabilities, type NormalizedFeatures, type NormalizedProviderMetadata, type OCRProviderConfig, type OutputFormatSupport, type ParseAutoVariables, type PromptVariables, type ProviderConfig, type ProviderInputType, type ProviderInstance, type ProviderMetadataWithModels, type ProviderQueryFilter, type ProviderRegistry, type ProviderSecrets, ProviderVendor, type ResolvedModelMetadata, type RetryConfig, type VLMProviderConfig, type WithRetryOptions, bufferToBase64, bufferToDataUri, buildProviderFromConfig, buildProvidersFromConfigs, calculateRetryDelay, clearCircuitBreakers, clearModelRegistry, clearProviderRegistry, createCircuitBreaker, defineMarkerProvider, defineSuryaProvider, defineVLMProvider, detectDocumentType, detectMimeTypeFromBase64, detectMimeTypeFromBase64Async, detectMimeTypeFromBytes, extractBase64, extractStatusCode, getAllModels, getAllProviders, getCheapestProviderFor, getCircuitBreaker, getModelsForNode, getProviderById, getProvidersBySource, getProvidersForLargeFiles, getProvidersForMimeType, isPDFDocument, isRetryableError, parseRetryAfter, queryModels, queryProviders, registerProviderMetadata, registerProviderWithModels, resolveDocument, resolveModelMetadata, validateFlowInputFormat, validateMimeType, validateMimeTypeAsync, withRetry };
|
package/dist/index.js
CHANGED
|
@@ -1558,6 +1558,8 @@ function defaultNormalizer(id, data, source) {
|
|
|
1558
1558
|
return normalizeReductoProvider(id, d);
|
|
1559
1559
|
} else if (source === "unsiloed") {
|
|
1560
1560
|
return normalizeUnsiloedProvider(id, d);
|
|
1561
|
+
} else if (source === "mistral") {
|
|
1562
|
+
return normalizeMistralProvider(id, d);
|
|
1561
1563
|
}
|
|
1562
1564
|
const defaultOutputFormats = { text: true, markdown: false, html: false, json: false };
|
|
1563
1565
|
const defaultFeatures = {
|
|
@@ -1581,6 +1583,7 @@ function defaultNormalizer(id, data, source) {
|
|
|
1581
1583
|
boundingBoxes: false,
|
|
1582
1584
|
schemaValidation: false,
|
|
1583
1585
|
handwrittenText: false,
|
|
1586
|
+
headerFooterExtraction: false,
|
|
1584
1587
|
outputFormats: defaultOutputFormats
|
|
1585
1588
|
};
|
|
1586
1589
|
return {
|
|
@@ -1670,6 +1673,8 @@ function normalizeLLMProvider(id, d) {
|
|
|
1670
1673
|
// Some LLMs support schema validation
|
|
1671
1674
|
handwrittenText: false,
|
|
1672
1675
|
// Not specific to LLMs
|
|
1676
|
+
headerFooterExtraction: false,
|
|
1677
|
+
// LLMs don't extract header/footer separately
|
|
1673
1678
|
outputFormats
|
|
1674
1679
|
};
|
|
1675
1680
|
const vendor = d.vendor ?? id;
|
|
@@ -1777,6 +1782,8 @@ function normalizeDatalabProvider(id, d) {
|
|
|
1777
1782
|
// VLM providers support schema validation
|
|
1778
1783
|
handwrittenText: true,
|
|
1779
1784
|
// Datalab handles handwritten text
|
|
1785
|
+
headerFooterExtraction: false,
|
|
1786
|
+
// Datalab has issues with header/footer extraction
|
|
1780
1787
|
outputFormats
|
|
1781
1788
|
};
|
|
1782
1789
|
return {
|
|
@@ -1886,6 +1893,8 @@ function normalizeReductoProvider(id, d) {
|
|
|
1886
1893
|
// Extract has schema validation
|
|
1887
1894
|
handwrittenText: false,
|
|
1888
1895
|
// Reducto doesn't specifically advertise handwriting
|
|
1896
|
+
headerFooterExtraction: true,
|
|
1897
|
+
// Reducto has Header/Footer block types
|
|
1889
1898
|
outputFormats
|
|
1890
1899
|
};
|
|
1891
1900
|
return {
|
|
@@ -1999,6 +2008,8 @@ function normalizeUnsiloedProvider(id, d) {
|
|
|
1999
2008
|
// Extract supports schema validation
|
|
2000
2009
|
handwrittenText: false,
|
|
2001
2010
|
// Unsiloed doesn't specifically advertise handwriting
|
|
2011
|
+
headerFooterExtraction: false,
|
|
2012
|
+
// Unsiloed doesn't extract header/footer separately
|
|
2002
2013
|
outputFormats
|
|
2003
2014
|
};
|
|
2004
2015
|
return {
|
|
@@ -2288,7 +2299,304 @@ function getAllModels() {
|
|
|
2288
2299
|
function clearModelRegistry() {
|
|
2289
2300
|
modelRegistry.clear();
|
|
2290
2301
|
}
|
|
2302
|
+
function normalizeMistralProvider(id, d) {
|
|
2303
|
+
const opts = d.supportedOptions ?? {};
|
|
2304
|
+
const isVLM = d.type === "VLM";
|
|
2305
|
+
const isOCR = d.type === "OCR";
|
|
2306
|
+
const model = d.model ?? id;
|
|
2307
|
+
const outputFormats = {
|
|
2308
|
+
text: true,
|
|
2309
|
+
markdown: d.outputFormat?.features?.markdown ?? isOCR,
|
|
2310
|
+
html: d.outputFormat?.features?.htmlTables ?? isOCR,
|
|
2311
|
+
// OCR 3 can output HTML tables
|
|
2312
|
+
json: d.outputFormat?.features?.structuredJSON ?? isVLM
|
|
2313
|
+
};
|
|
2314
|
+
const features = {
|
|
2315
|
+
maxPages: d.inputFormats?.maxPages !== void 0,
|
|
2316
|
+
pageRange: true,
|
|
2317
|
+
// Mistral supports pages param: "0-5" or [0,2,5]
|
|
2318
|
+
languageHints: false,
|
|
2319
|
+
// Mistral doesn't support language hints
|
|
2320
|
+
processingModes: false,
|
|
2321
|
+
// Mistral doesn't have processing modes
|
|
2322
|
+
agenticMode: false,
|
|
2323
|
+
// Mistral doesn't have agentic mode
|
|
2324
|
+
customPrompts: false,
|
|
2325
|
+
// Mistral OCR 3 doesn't support custom prompts
|
|
2326
|
+
imageExtraction: opts.includeImageBase64 ?? false,
|
|
2327
|
+
// Can include embedded images
|
|
2328
|
+
pageMarkers: false,
|
|
2329
|
+
// Mistral doesn't add page markers
|
|
2330
|
+
citations: false,
|
|
2331
|
+
// Mistral doesn't provide citations
|
|
2332
|
+
chunking: false,
|
|
2333
|
+
// Mistral doesn't do chunking
|
|
2334
|
+
segmentation: false,
|
|
2335
|
+
// Mistral doesn't do segmentation
|
|
2336
|
+
stripExistingOCR: false,
|
|
2337
|
+
formatLines: false,
|
|
2338
|
+
forceOCR: true,
|
|
2339
|
+
// OCR 3 always does OCR
|
|
2340
|
+
tableOutputFormats: opts.tableFormat ?? isOCR,
|
|
2341
|
+
// html or markdown table format
|
|
2342
|
+
tableMerging: false,
|
|
2343
|
+
confidence: false,
|
|
2344
|
+
// Mistral doesn't provide confidence scores
|
|
2345
|
+
boundingBoxes: d.outputFormat?.features?.boundingBoxes ?? false,
|
|
2346
|
+
// NO text-level bboxes
|
|
2347
|
+
schemaValidation: d.outputFormat?.features?.schemaValidation ?? isVLM,
|
|
2348
|
+
// VLM supports schema
|
|
2349
|
+
handwrittenText: d.outputFormat?.features?.handwrittenText ?? true,
|
|
2350
|
+
// Excellent handwriting support
|
|
2351
|
+
headerFooterExtraction: opts.extractHeader ?? opts.extractFooter ?? false,
|
|
2352
|
+
// extract_header/extract_footer
|
|
2353
|
+
outputFormats
|
|
2354
|
+
};
|
|
2355
|
+
return {
|
|
2356
|
+
id: d.id ?? id,
|
|
2357
|
+
name: d.name ?? id,
|
|
2358
|
+
source: "mistral",
|
|
2359
|
+
type: d.type ?? "OCR",
|
|
2360
|
+
// 3-layer identity
|
|
2361
|
+
identity: {
|
|
2362
|
+
provider: "mistral",
|
|
2363
|
+
model,
|
|
2364
|
+
method: "native"
|
|
2365
|
+
},
|
|
2366
|
+
capabilities: {
|
|
2367
|
+
supportsImages: d.capabilities?.supportsImages ?? true,
|
|
2368
|
+
supportsPDFs: d.capabilities?.supportsPDFs ?? true,
|
|
2369
|
+
supportsDocuments: d.capabilities?.supportsDocuments ?? false,
|
|
2370
|
+
// DOCX/PPTX has known issues
|
|
2371
|
+
supportsReasoning: false,
|
|
2372
|
+
// OCR 3 doesn't do reasoning
|
|
2373
|
+
supportsStructuredOutput: d.capabilities?.supportsStructuredOutput ?? isVLM,
|
|
2374
|
+
// Extended capabilities
|
|
2375
|
+
supportsPrompts: false,
|
|
2376
|
+
supportsCitations: false,
|
|
2377
|
+
supportsChunking: false,
|
|
2378
|
+
supportsImageExtraction: opts.includeImageBase64 ?? false,
|
|
2379
|
+
supportsPageMarkers: false,
|
|
2380
|
+
supportsLanguageHints: false,
|
|
2381
|
+
supportsProcessingModes: false,
|
|
2382
|
+
supportsSegmentation: false,
|
|
2383
|
+
outputFormats
|
|
2384
|
+
},
|
|
2385
|
+
features,
|
|
2386
|
+
// Mistral providers always need raw document input
|
|
2387
|
+
inputRequirements: {
|
|
2388
|
+
inputType: d.inputRequirements?.inputType ?? "raw-document",
|
|
2389
|
+
acceptedMethods: d.inputRequirements?.acceptedMethods ?? ["base64", "url"]
|
|
2390
|
+
},
|
|
2391
|
+
compatibleNodes: {
|
|
2392
|
+
parse: d.compatibleNodes?.parse ?? isOCR,
|
|
2393
|
+
extract: d.compatibleNodes?.extract ?? isVLM,
|
|
2394
|
+
categorize: d.compatibleNodes?.categorize ?? false,
|
|
2395
|
+
qualify: d.compatibleNodes?.qualify ?? false,
|
|
2396
|
+
split: d.compatibleNodes?.split ?? false
|
|
2397
|
+
},
|
|
2398
|
+
inputFormats: {
|
|
2399
|
+
imageMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => m.startsWith("image/")),
|
|
2400
|
+
documentMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => !m.startsWith("image/")),
|
|
2401
|
+
inputMethods: d.inputFormats?.inputMethods ?? ["base64", "url"],
|
|
2402
|
+
maxFileSize: d.inputFormats?.maxFileSize ?? 50,
|
|
2403
|
+
// 50MB limit
|
|
2404
|
+
maxPages: d.inputFormats?.maxPages ?? 1e3
|
|
2405
|
+
},
|
|
2406
|
+
pricing: {
|
|
2407
|
+
model: "per-page",
|
|
2408
|
+
perPage: d.pricing?.perPage ?? 2e-3,
|
|
2409
|
+
// $2/1000 pages
|
|
2410
|
+
currency: "USD",
|
|
2411
|
+
notes: d.pricing?.notes ?? "$2 per 1000 pages"
|
|
2412
|
+
},
|
|
2413
|
+
rateLimits: {
|
|
2414
|
+
docsPerMinute: d.apiConfig?.rateLimit?.docsPerMinute
|
|
2415
|
+
},
|
|
2416
|
+
raw: d
|
|
2417
|
+
};
|
|
2418
|
+
}
|
|
2419
|
+
|
|
2420
|
+
// src/retry.ts
|
|
2421
|
+
var DEFAULT_RETRY_CONFIG = {
|
|
2422
|
+
maxRetries: 2,
|
|
2423
|
+
retryDelay: 1e3,
|
|
2424
|
+
useExponentialBackoff: true,
|
|
2425
|
+
maxDelay: 3e4
|
|
2426
|
+
};
|
|
2427
|
+
var DEFAULT_CIRCUIT_BREAKER_CONFIG = {
|
|
2428
|
+
threshold: 3,
|
|
2429
|
+
resetTimeout: 3e4
|
|
2430
|
+
};
|
|
2431
|
+
var RETRYABLE_STATUS_CODES = ["408", "429", "500", "502", "503", "504"];
|
|
2432
|
+
var RETRYABLE_ERROR_PATTERNS = [
|
|
2433
|
+
"timeout",
|
|
2434
|
+
"rate limit",
|
|
2435
|
+
"overloaded",
|
|
2436
|
+
"econnreset",
|
|
2437
|
+
"etimedout",
|
|
2438
|
+
"enotfound",
|
|
2439
|
+
"econnrefused",
|
|
2440
|
+
"socket hang up",
|
|
2441
|
+
"network error"
|
|
2442
|
+
];
|
|
2443
|
+
function isRetryableError(error) {
|
|
2444
|
+
const message = error.message.toLowerCase();
|
|
2445
|
+
for (const code of RETRYABLE_STATUS_CODES) {
|
|
2446
|
+
if (message.includes(code)) {
|
|
2447
|
+
return true;
|
|
2448
|
+
}
|
|
2449
|
+
}
|
|
2450
|
+
for (const pattern of RETRYABLE_ERROR_PATTERNS) {
|
|
2451
|
+
if (message.includes(pattern)) {
|
|
2452
|
+
return true;
|
|
2453
|
+
}
|
|
2454
|
+
}
|
|
2455
|
+
return false;
|
|
2456
|
+
}
|
|
2457
|
+
function extractStatusCode(error) {
|
|
2458
|
+
const patterns = [
|
|
2459
|
+
/\b(\d{3})\b/,
|
|
2460
|
+
// Just the status code
|
|
2461
|
+
/status[:\s]+(\d{3})/i,
|
|
2462
|
+
/http[:\s]+(\d{3})/i,
|
|
2463
|
+
/failed[:\s]+(\d{3})/i
|
|
2464
|
+
];
|
|
2465
|
+
for (const pattern of patterns) {
|
|
2466
|
+
const match = error.message.match(pattern);
|
|
2467
|
+
if (match && match[1]) {
|
|
2468
|
+
const code = parseInt(match[1], 10);
|
|
2469
|
+
if (code >= 100 && code < 600) {
|
|
2470
|
+
return code;
|
|
2471
|
+
}
|
|
2472
|
+
}
|
|
2473
|
+
}
|
|
2474
|
+
return void 0;
|
|
2475
|
+
}
|
|
2476
|
+
function parseRetryAfter(error) {
|
|
2477
|
+
const message = error.message;
|
|
2478
|
+
const match = message.match(/retry-after[:\s]+(\d+)/i);
|
|
2479
|
+
if (match && match[1]) {
|
|
2480
|
+
const seconds = parseInt(match[1], 10);
|
|
2481
|
+
if (!isNaN(seconds) && seconds > 0 && seconds < 3600) {
|
|
2482
|
+
return seconds * 1e3;
|
|
2483
|
+
}
|
|
2484
|
+
}
|
|
2485
|
+
return void 0;
|
|
2486
|
+
}
|
|
2487
|
+
function calculateRetryDelay(attempt, config = {}) {
|
|
2488
|
+
const {
|
|
2489
|
+
retryDelay = DEFAULT_RETRY_CONFIG.retryDelay,
|
|
2490
|
+
useExponentialBackoff = DEFAULT_RETRY_CONFIG.useExponentialBackoff,
|
|
2491
|
+
maxDelay = DEFAULT_RETRY_CONFIG.maxDelay
|
|
2492
|
+
} = config;
|
|
2493
|
+
if (!useExponentialBackoff) {
|
|
2494
|
+
return retryDelay;
|
|
2495
|
+
}
|
|
2496
|
+
const exponentialDelay = retryDelay * Math.pow(2, attempt - 1);
|
|
2497
|
+
const jitter = Math.random() * 1e3;
|
|
2498
|
+
return Math.min(exponentialDelay + jitter, maxDelay);
|
|
2499
|
+
}
|
|
2500
|
+
var circuitBreakerRegistry = /* @__PURE__ */ new Map();
|
|
2501
|
+
function createCircuitBreaker(key, config = {}) {
|
|
2502
|
+
const existing = circuitBreakerRegistry.get(key);
|
|
2503
|
+
if (existing) {
|
|
2504
|
+
return existing;
|
|
2505
|
+
}
|
|
2506
|
+
const {
|
|
2507
|
+
threshold = DEFAULT_CIRCUIT_BREAKER_CONFIG.threshold,
|
|
2508
|
+
resetTimeout = DEFAULT_CIRCUIT_BREAKER_CONFIG.resetTimeout
|
|
2509
|
+
} = config;
|
|
2510
|
+
let state = {
|
|
2511
|
+
consecutiveFailures: 0,
|
|
2512
|
+
isOpen: false
|
|
2513
|
+
};
|
|
2514
|
+
const circuitBreaker = {
|
|
2515
|
+
isOpen() {
|
|
2516
|
+
if (!state.isOpen) return false;
|
|
2517
|
+
if (state.lastFailureTime && Date.now() - state.lastFailureTime > resetTimeout) {
|
|
2518
|
+
state = {
|
|
2519
|
+
consecutiveFailures: 0,
|
|
2520
|
+
isOpen: false
|
|
2521
|
+
};
|
|
2522
|
+
return false;
|
|
2523
|
+
}
|
|
2524
|
+
return true;
|
|
2525
|
+
},
|
|
2526
|
+
recordSuccess() {
|
|
2527
|
+
state = {
|
|
2528
|
+
consecutiveFailures: 0,
|
|
2529
|
+
isOpen: false
|
|
2530
|
+
};
|
|
2531
|
+
},
|
|
2532
|
+
recordFailure() {
|
|
2533
|
+
state.consecutiveFailures++;
|
|
2534
|
+
state.lastFailureTime = Date.now();
|
|
2535
|
+
if (state.consecutiveFailures >= threshold) {
|
|
2536
|
+
state.isOpen = true;
|
|
2537
|
+
console.warn(`Circuit breaker opened for ${key} after ${state.consecutiveFailures} consecutive failures`);
|
|
2538
|
+
}
|
|
2539
|
+
},
|
|
2540
|
+
getState() {
|
|
2541
|
+
return { ...state };
|
|
2542
|
+
}
|
|
2543
|
+
};
|
|
2544
|
+
circuitBreakerRegistry.set(key, circuitBreaker);
|
|
2545
|
+
return circuitBreaker;
|
|
2546
|
+
}
|
|
2547
|
+
function clearCircuitBreakers() {
|
|
2548
|
+
circuitBreakerRegistry.clear();
|
|
2549
|
+
}
|
|
2550
|
+
function getCircuitBreaker(key) {
|
|
2551
|
+
return circuitBreakerRegistry.get(key);
|
|
2552
|
+
}
|
|
2553
|
+
async function withRetry(fn, options = {}) {
|
|
2554
|
+
const {
|
|
2555
|
+
maxRetries = DEFAULT_RETRY_CONFIG.maxRetries,
|
|
2556
|
+
retryDelay = DEFAULT_RETRY_CONFIG.retryDelay,
|
|
2557
|
+
useExponentialBackoff = DEFAULT_RETRY_CONFIG.useExponentialBackoff,
|
|
2558
|
+
maxDelay = DEFAULT_RETRY_CONFIG.maxDelay,
|
|
2559
|
+
onRetry,
|
|
2560
|
+
getRetryAfter,
|
|
2561
|
+
circuitBreaker
|
|
2562
|
+
} = options;
|
|
2563
|
+
if (circuitBreaker?.isOpen()) {
|
|
2564
|
+
throw new Error("Circuit breaker is open");
|
|
2565
|
+
}
|
|
2566
|
+
let lastError = null;
|
|
2567
|
+
const totalAttempts = maxRetries + 1;
|
|
2568
|
+
for (let attempt = 1; attempt <= totalAttempts; attempt++) {
|
|
2569
|
+
try {
|
|
2570
|
+
const result = await fn();
|
|
2571
|
+
circuitBreaker?.recordSuccess();
|
|
2572
|
+
return result;
|
|
2573
|
+
} catch (error) {
|
|
2574
|
+
lastError = error;
|
|
2575
|
+
const isLastAttempt = attempt === totalAttempts;
|
|
2576
|
+
const canRetry = !isLastAttempt && isRetryableError(lastError);
|
|
2577
|
+
if (!canRetry) {
|
|
2578
|
+
break;
|
|
2579
|
+
}
|
|
2580
|
+
let delay = calculateRetryDelay(attempt, { retryDelay, useExponentialBackoff, maxDelay });
|
|
2581
|
+
const retryAfter = getRetryAfter?.(lastError) ?? parseRetryAfter(lastError);
|
|
2582
|
+
if (retryAfter !== void 0 && retryAfter > 0) {
|
|
2583
|
+
delay = Math.min(retryAfter, maxDelay);
|
|
2584
|
+
}
|
|
2585
|
+
if (onRetry) {
|
|
2586
|
+
await onRetry(attempt, lastError, delay);
|
|
2587
|
+
}
|
|
2588
|
+
await sleep(delay);
|
|
2589
|
+
}
|
|
2590
|
+
}
|
|
2591
|
+
circuitBreaker?.recordFailure();
|
|
2592
|
+
throw lastError;
|
|
2593
|
+
}
|
|
2594
|
+
function sleep(ms) {
|
|
2595
|
+
return new Promise((resolve) => setTimeout(resolve, ms));
|
|
2596
|
+
}
|
|
2291
2597
|
export {
|
|
2598
|
+
DEFAULT_CIRCUIT_BREAKER_CONFIG,
|
|
2599
|
+
DEFAULT_RETRY_CONFIG,
|
|
2292
2600
|
FlowExecutionError,
|
|
2293
2601
|
FlowInputValidationError,
|
|
2294
2602
|
FlowValidationError,
|
|
@@ -2299,9 +2607,12 @@ export {
|
|
|
2299
2607
|
bufferToDataUri,
|
|
2300
2608
|
buildProviderFromConfig,
|
|
2301
2609
|
buildProvidersFromConfigs,
|
|
2610
|
+
calculateRetryDelay,
|
|
2302
2611
|
canStartForEachItemFlow,
|
|
2612
|
+
clearCircuitBreakers,
|
|
2303
2613
|
clearModelRegistry,
|
|
2304
2614
|
clearProviderRegistry,
|
|
2615
|
+
createCircuitBreaker,
|
|
2305
2616
|
createIdentity,
|
|
2306
2617
|
defineMarkerProvider,
|
|
2307
2618
|
defineSuryaProvider,
|
|
@@ -2312,9 +2623,11 @@ export {
|
|
|
2312
2623
|
detectMimeTypeFromBytes,
|
|
2313
2624
|
extractBase64,
|
|
2314
2625
|
extractErrorMessage,
|
|
2626
|
+
extractStatusCode,
|
|
2315
2627
|
getAllModels,
|
|
2316
2628
|
getAllProviders,
|
|
2317
2629
|
getCheapestProviderFor,
|
|
2630
|
+
getCircuitBreaker,
|
|
2318
2631
|
getCompatibleTargets,
|
|
2319
2632
|
getDocumentPageCount,
|
|
2320
2633
|
getModelsForNode,
|
|
@@ -2331,8 +2644,10 @@ export {
|
|
|
2331
2644
|
getValidForEachStarters,
|
|
2332
2645
|
isLocalEndpoint,
|
|
2333
2646
|
isPDFDocument,
|
|
2647
|
+
isRetryableError,
|
|
2334
2648
|
node,
|
|
2335
2649
|
parseProviderString,
|
|
2650
|
+
parseRetryAfter,
|
|
2336
2651
|
protectReservedVariables,
|
|
2337
2652
|
queryModels,
|
|
2338
2653
|
queryProviders,
|
|
@@ -2347,6 +2662,7 @@ export {
|
|
|
2347
2662
|
validateJson,
|
|
2348
2663
|
validateMimeType,
|
|
2349
2664
|
validateMimeTypeAsync,
|
|
2350
|
-
validateNodeConnection
|
|
2665
|
+
validateNodeConnection,
|
|
2666
|
+
withRetry
|
|
2351
2667
|
};
|
|
2352
2668
|
//# sourceMappingURL=index.js.map
|