@doclo/core 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -1,5 +1,5 @@
1
- import { P as ProviderVendor, A as AccessMethod } from './validation-D_EcHqPl.js';
2
- export { G as AggregatedMetrics, B as BBox, r as CategorizeNodeConfig, t as ChunkMetadata, v as ChunkNodeConfig, u as ChunkOutput, n as CitationConfig, k as CitationSourceType, w as CombineNodeConfig, U as CompatibilityRule, C as ConsensusConfig, e as ConsensusMetadata, d as ConsensusRunResult, D as DocumentIR, b as DocumentIRExtras, y as EnhancedExtractionSchema, s as ExtractInputMode, E as ExtractNodeConfig, a0 as ExtractedImage, m as FieldCitation, F as FieldVotingDetails, H as FlowContext, a8 as FlowExecutionError, h as FlowInput, i as FlowInputValidation, j as FlowResult, a4 as FlowStepLocation, a9 as FlowValidationError, I as IRLine, a as IRPage, X as JSONSchemaNode, c as LLMJsonProvider, L as LLMProvider, _ as LanguageOptions, l as LineCitation, g as MaybeWithConsensusMetadata, M as MultimodalInput, aa as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, K as NodeTypeInfo, T as NodeTypeName, N as NormalizedBBox, O as OCRProvider, a1 as OCRProviderOptions, x as OutputNodeConfig, o as OutputWithCitations, f as OutputWithConsensus, Z as PageRangeOptions, p as ParseNodeConfig, Y as ProcessingMode, a3 as ProviderCitation, am as ProviderIdentity, aj as RESERVED_VARIABLES, R as ReasoningConfig, $ as SegmentationResult, S as SplitDocument, q as SplitNodeConfig, z as StepMetric, V as VLMProvider, a2 as VLMProviderOptions, W as ValidationResult, a5 as aggregateMetrics, ah as canStartForEachItemFlow, aq as createIdentity, al as extractErrorMessage, ad as getCompatibleTargets, ac as getNodeTypeInfo, ab as getNodeTypeName, ae as getSuggestedConnections, ag as getValidForEachStarters, ap as isLocalEndpoint, a6 as node, ao as parseProviderString, ak as protectReservedVariables, a7 as runPipeline, an as toProviderString, ai as validateJson, af as validateNodeConnection } from './validation-D_EcHqPl.js';
1
+ import { P as ProviderVendor, A as AccessMethod } from './validation-wlK06puw.js';
2
+ export { G as AggregatedMetrics, B as BBox, r as CategorizeNodeConfig, t as ChunkMetadata, v as ChunkNodeConfig, u as ChunkOutput, n as CitationConfig, k as CitationSourceType, w as CombineNodeConfig, U as CompatibilityRule, C as ConsensusConfig, e as ConsensusMetadata, d as ConsensusRunResult, D as DocumentIR, b as DocumentIRExtras, y as EnhancedExtractionSchema, s as ExtractInputMode, E as ExtractNodeConfig, a0 as ExtractedImage, m as FieldCitation, F as FieldVotingDetails, H as FlowContext, a8 as FlowExecutionError, h as FlowInput, i as FlowInputValidation, j as FlowResult, a4 as FlowStepLocation, a9 as FlowValidationError, I as IRLine, a as IRPage, X as JSONSchemaNode, c as LLMJsonProvider, L as LLMProvider, _ as LanguageOptions, l as LineCitation, g as MaybeWithConsensusMetadata, M as MultimodalInput, aa as NODE_COMPATIBILITY_MATRIX, J as NodeCtx, Q as NodeDef, K as NodeTypeInfo, T as NodeTypeName, N as NormalizedBBox, O as OCRProvider, a1 as OCRProviderOptions, x as OutputNodeConfig, o as OutputWithCitations, f as OutputWithConsensus, Z as PageRangeOptions, p as ParseNodeConfig, Y as ProcessingMode, a3 as ProviderCitation, am as ProviderIdentity, aj as RESERVED_VARIABLES, R as ReasoningConfig, $ as SegmentationResult, S as SplitDocument, q as SplitNodeConfig, z as StepMetric, V as VLMProvider, a2 as VLMProviderOptions, W as ValidationResult, a5 as aggregateMetrics, ah as canStartForEachItemFlow, aq as createIdentity, al as extractErrorMessage, ad as getCompatibleTargets, ac as getNodeTypeInfo, ab as getNodeTypeName, ae as getSuggestedConnections, ag as getValidForEachStarters, ap as isLocalEndpoint, a6 as node, ao as parseProviderString, ak as protectReservedVariables, a7 as runPipeline, an as toProviderString, ai as validateJson, af as validateNodeConnection } from './validation-wlK06puw.js';
3
3
  export { getDocumentPageCount, getPDFPageCount, getPageCountMetadata, getTotalPageCount, splitPDFIntoChunks } from './pdf-utils.js';
4
4
 
5
5
  /**
@@ -559,6 +559,8 @@ type NormalizedFeatures = {
559
559
  schemaValidation: boolean;
560
560
  /** Handwritten text recognition support */
561
561
  handwrittenText: boolean;
562
+ /** Separate header/footer extraction from main content */
563
+ headerFooterExtraction: boolean;
562
564
  /** Supported output formats */
563
565
  outputFormats: OutputFormatSupport;
564
566
  };
@@ -928,4 +930,164 @@ declare function getAllModels(): ResolvedModelMetadata[];
928
930
  */
929
931
  declare function clearModelRegistry(): void;
930
932
 
931
- export { type AcceptedMimeType, AccessMethod, type AllAutoVariables, type AutoVariablesForNode, type BaseProviderConfig, type CategorizeAutoVariables, type DocumentMimeType, type ExtractAutoVariables, type FeatureName, FlowInputValidationError, type InputRequirements, type ModelMetadata, type ModelQueryFilter, type NormalizedCapabilities, type NormalizedFeatures, type NormalizedProviderMetadata, type OCRProviderConfig, type OutputFormatSupport, type ParseAutoVariables, type PromptVariables, type ProviderConfig, type ProviderInputType, type ProviderInstance, type ProviderMetadataWithModels, type ProviderQueryFilter, type ProviderRegistry, type ProviderSecrets, ProviderVendor, type ResolvedModelMetadata, type VLMProviderConfig, bufferToBase64, bufferToDataUri, buildProviderFromConfig, buildProvidersFromConfigs, clearModelRegistry, clearProviderRegistry, defineMarkerProvider, defineSuryaProvider, defineVLMProvider, detectDocumentType, detectMimeTypeFromBase64, detectMimeTypeFromBase64Async, detectMimeTypeFromBytes, extractBase64, getAllModels, getAllProviders, getCheapestProviderFor, getModelsForNode, getProviderById, getProvidersBySource, getProvidersForLargeFiles, getProvidersForMimeType, isPDFDocument, queryModels, queryProviders, registerProviderMetadata, registerProviderWithModels, resolveDocument, resolveModelMetadata, validateFlowInputFormat, validateMimeType, validateMimeTypeAsync };
933
+ /**
934
+ * @doclo/core - Retry Utilities
935
+ *
936
+ * Shared retry infrastructure for LLM and OCR providers.
937
+ * Includes exponential backoff, circuit breaker pattern, and error classification.
938
+ */
939
+ /**
940
+ * Configuration for retry behavior
941
+ */
942
+ interface RetryConfig {
943
+ /** Maximum number of retry attempts (default: 2) */
944
+ maxRetries?: number;
945
+ /** Base delay in milliseconds between retries (default: 1000) */
946
+ retryDelay?: number;
947
+ /** Enable exponential backoff (default: true) */
948
+ useExponentialBackoff?: boolean;
949
+ /** Maximum delay cap in milliseconds (default: 30000) */
950
+ maxDelay?: number;
951
+ }
952
+ /**
953
+ * Configuration for circuit breaker behavior
954
+ */
955
+ interface CircuitBreakerConfig {
956
+ /** Number of consecutive failures before opening circuit (default: 3) */
957
+ threshold?: number;
958
+ /** Time in milliseconds before trying again after circuit opens (default: 30000) */
959
+ resetTimeout?: number;
960
+ }
961
+ /**
962
+ * Internal state for a circuit breaker
963
+ */
964
+ interface CircuitBreakerState {
965
+ consecutiveFailures: number;
966
+ lastFailureTime?: number;
967
+ isOpen: boolean;
968
+ }
969
+ /**
970
+ * Circuit breaker interface
971
+ */
972
+ interface CircuitBreaker {
973
+ /** Check if circuit is currently open (should skip requests) */
974
+ isOpen(): boolean;
975
+ /** Record a successful request (resets failure count) */
976
+ recordSuccess(): void;
977
+ /** Record a failed request (may open circuit) */
978
+ recordFailure(): void;
979
+ /** Get current state for inspection */
980
+ getState(): CircuitBreakerState;
981
+ }
982
+ /**
983
+ * Options for the withRetry wrapper
984
+ */
985
+ interface WithRetryOptions<T> extends RetryConfig {
986
+ /** Called before each retry attempt (for logging/observability) */
987
+ onRetry?: (attempt: number, error: Error, delay: number) => void | Promise<void>;
988
+ /** Override to parse Retry-After header from response errors */
989
+ getRetryAfter?: (error: Error) => number | undefined;
990
+ /** Circuit breaker to use (optional) */
991
+ circuitBreaker?: CircuitBreaker;
992
+ }
993
+ /** Default retry configuration */
994
+ declare const DEFAULT_RETRY_CONFIG: Required<RetryConfig>;
995
+ /** Default circuit breaker configuration */
996
+ declare const DEFAULT_CIRCUIT_BREAKER_CONFIG: Required<CircuitBreakerConfig>;
997
+ /**
998
+ * Determines if an error is retryable based on its message content.
999
+ *
1000
+ * Retryable errors include:
1001
+ * - HTTP 408, 429, 500, 502, 503, 504
1002
+ * - Timeout errors
1003
+ * - Rate limit errors
1004
+ * - Network errors (ECONNRESET, ETIMEDOUT, etc.)
1005
+ *
1006
+ * Non-retryable errors include:
1007
+ * - HTTP 400, 401, 403, 404 (client errors)
1008
+ * - Business logic failures
1009
+ *
1010
+ * @param error - The error to classify
1011
+ * @returns true if the error is retryable
1012
+ */
1013
+ declare function isRetryableError(error: Error): boolean;
1014
+ /**
1015
+ * Extracts HTTP status code from an error message if present.
1016
+ *
1017
+ * @param error - The error to extract status from
1018
+ * @returns The HTTP status code or undefined
1019
+ */
1020
+ declare function extractStatusCode(error: Error): number | undefined;
1021
+ /**
1022
+ * Parses Retry-After header value from error message or response.
1023
+ * Supports both seconds (integer) and HTTP-date formats.
1024
+ *
1025
+ * @param error - Error that may contain Retry-After information
1026
+ * @returns Delay in milliseconds, or undefined if not found
1027
+ */
1028
+ declare function parseRetryAfter(error: Error): number | undefined;
1029
+ /**
1030
+ * Calculates the delay before the next retry attempt.
1031
+ *
1032
+ * With exponential backoff enabled (default):
1033
+ * - Attempt 1: baseDelay * 2^0 = 1x baseDelay
1034
+ * - Attempt 2: baseDelay * 2^1 = 2x baseDelay
1035
+ * - Attempt 3: baseDelay * 2^2 = 4x baseDelay
1036
+ * Plus random jitter (0-1000ms) to prevent thundering herd.
1037
+ *
1038
+ * @param attempt - Current attempt number (1-indexed)
1039
+ * @param config - Retry configuration
1040
+ * @returns Delay in milliseconds
1041
+ */
1042
+ declare function calculateRetryDelay(attempt: number, config?: RetryConfig): number;
1043
+ /**
1044
+ * Creates or retrieves a circuit breaker for a given key.
1045
+ *
1046
+ * Circuit breakers prevent cascading failures by:
1047
+ * 1. Tracking consecutive failures per provider/endpoint
1048
+ * 2. "Opening" the circuit after threshold failures (skipping requests)
1049
+ * 3. Allowing a retry after resetTimeout (half-open state)
1050
+ * 4. Closing the circuit on success
1051
+ *
1052
+ * @param key - Unique identifier (e.g., "datalab:surya" or "openai:gpt-4")
1053
+ * @param config - Circuit breaker configuration
1054
+ * @returns CircuitBreaker instance
1055
+ */
1056
+ declare function createCircuitBreaker(key: string, config?: CircuitBreakerConfig): CircuitBreaker;
1057
+ /**
1058
+ * Clears all circuit breakers. Useful for testing.
1059
+ */
1060
+ declare function clearCircuitBreakers(): void;
1061
+ /**
1062
+ * Gets the circuit breaker for a key without creating one.
1063
+ *
1064
+ * @param key - Unique identifier
1065
+ * @returns CircuitBreaker or undefined if not found
1066
+ */
1067
+ declare function getCircuitBreaker(key: string): CircuitBreaker | undefined;
1068
+ /**
1069
+ * Wraps an async function with retry logic.
1070
+ *
1071
+ * @example
1072
+ * ```typescript
1073
+ * const result = await withRetry(
1074
+ * () => fetchWithTimeout(url, options),
1075
+ * {
1076
+ * maxRetries: 3,
1077
+ * retryDelay: 1000,
1078
+ * useExponentialBackoff: true,
1079
+ * onRetry: (attempt, error, delay) => {
1080
+ * console.log(`Retry ${attempt} after ${delay}ms: ${error.message}`);
1081
+ * }
1082
+ * }
1083
+ * );
1084
+ * ```
1085
+ *
1086
+ * @param fn - Async function to retry
1087
+ * @param options - Retry options
1088
+ * @returns Result of the function
1089
+ * @throws Last error if all retries fail
1090
+ */
1091
+ declare function withRetry<T>(fn: () => Promise<T>, options?: WithRetryOptions<T>): Promise<T>;
1092
+
1093
+ export { type AcceptedMimeType, AccessMethod, type AllAutoVariables, type AutoVariablesForNode, type BaseProviderConfig, type CategorizeAutoVariables, type CircuitBreaker, type CircuitBreakerConfig, type CircuitBreakerState, DEFAULT_CIRCUIT_BREAKER_CONFIG, DEFAULT_RETRY_CONFIG, type DocumentMimeType, type ExtractAutoVariables, type FeatureName, FlowInputValidationError, type InputRequirements, type ModelMetadata, type ModelQueryFilter, type NormalizedCapabilities, type NormalizedFeatures, type NormalizedProviderMetadata, type OCRProviderConfig, type OutputFormatSupport, type ParseAutoVariables, type PromptVariables, type ProviderConfig, type ProviderInputType, type ProviderInstance, type ProviderMetadataWithModels, type ProviderQueryFilter, type ProviderRegistry, type ProviderSecrets, ProviderVendor, type ResolvedModelMetadata, type RetryConfig, type VLMProviderConfig, type WithRetryOptions, bufferToBase64, bufferToDataUri, buildProviderFromConfig, buildProvidersFromConfigs, calculateRetryDelay, clearCircuitBreakers, clearModelRegistry, clearProviderRegistry, createCircuitBreaker, defineMarkerProvider, defineSuryaProvider, defineVLMProvider, detectDocumentType, detectMimeTypeFromBase64, detectMimeTypeFromBase64Async, detectMimeTypeFromBytes, extractBase64, extractStatusCode, getAllModels, getAllProviders, getCheapestProviderFor, getCircuitBreaker, getModelsForNode, getProviderById, getProvidersBySource, getProvidersForLargeFiles, getProvidersForMimeType, isPDFDocument, isRetryableError, parseRetryAfter, queryModels, queryProviders, registerProviderMetadata, registerProviderWithModels, resolveDocument, resolveModelMetadata, validateFlowInputFormat, validateMimeType, validateMimeTypeAsync, withRetry };
package/dist/index.js CHANGED
@@ -1558,6 +1558,8 @@ function defaultNormalizer(id, data, source) {
1558
1558
  return normalizeReductoProvider(id, d);
1559
1559
  } else if (source === "unsiloed") {
1560
1560
  return normalizeUnsiloedProvider(id, d);
1561
+ } else if (source === "mistral") {
1562
+ return normalizeMistralProvider(id, d);
1561
1563
  }
1562
1564
  const defaultOutputFormats = { text: true, markdown: false, html: false, json: false };
1563
1565
  const defaultFeatures = {
@@ -1581,6 +1583,7 @@ function defaultNormalizer(id, data, source) {
1581
1583
  boundingBoxes: false,
1582
1584
  schemaValidation: false,
1583
1585
  handwrittenText: false,
1586
+ headerFooterExtraction: false,
1584
1587
  outputFormats: defaultOutputFormats
1585
1588
  };
1586
1589
  return {
@@ -1670,6 +1673,8 @@ function normalizeLLMProvider(id, d) {
1670
1673
  // Some LLMs support schema validation
1671
1674
  handwrittenText: false,
1672
1675
  // Not specific to LLMs
1676
+ headerFooterExtraction: false,
1677
+ // LLMs don't extract header/footer separately
1673
1678
  outputFormats
1674
1679
  };
1675
1680
  const vendor = d.vendor ?? id;
@@ -1777,6 +1782,8 @@ function normalizeDatalabProvider(id, d) {
1777
1782
  // VLM providers support schema validation
1778
1783
  handwrittenText: true,
1779
1784
  // Datalab handles handwritten text
1785
+ headerFooterExtraction: false,
1786
+ // Datalab has issues with header/footer extraction
1780
1787
  outputFormats
1781
1788
  };
1782
1789
  return {
@@ -1886,6 +1893,8 @@ function normalizeReductoProvider(id, d) {
1886
1893
  // Extract has schema validation
1887
1894
  handwrittenText: false,
1888
1895
  // Reducto doesn't specifically advertise handwriting
1896
+ headerFooterExtraction: true,
1897
+ // Reducto has Header/Footer block types
1889
1898
  outputFormats
1890
1899
  };
1891
1900
  return {
@@ -1999,6 +2008,8 @@ function normalizeUnsiloedProvider(id, d) {
1999
2008
  // Extract supports schema validation
2000
2009
  handwrittenText: false,
2001
2010
  // Unsiloed doesn't specifically advertise handwriting
2011
+ headerFooterExtraction: false,
2012
+ // Unsiloed doesn't extract header/footer separately
2002
2013
  outputFormats
2003
2014
  };
2004
2015
  return {
@@ -2288,7 +2299,304 @@ function getAllModels() {
2288
2299
  function clearModelRegistry() {
2289
2300
  modelRegistry.clear();
2290
2301
  }
2302
+ function normalizeMistralProvider(id, d) {
2303
+ const opts = d.supportedOptions ?? {};
2304
+ const isVLM = d.type === "VLM";
2305
+ const isOCR = d.type === "OCR";
2306
+ const model = d.model ?? id;
2307
+ const outputFormats = {
2308
+ text: true,
2309
+ markdown: d.outputFormat?.features?.markdown ?? isOCR,
2310
+ html: d.outputFormat?.features?.htmlTables ?? isOCR,
2311
+ // OCR 3 can output HTML tables
2312
+ json: d.outputFormat?.features?.structuredJSON ?? isVLM
2313
+ };
2314
+ const features = {
2315
+ maxPages: d.inputFormats?.maxPages !== void 0,
2316
+ pageRange: true,
2317
+ // Mistral supports pages param: "0-5" or [0,2,5]
2318
+ languageHints: false,
2319
+ // Mistral doesn't support language hints
2320
+ processingModes: false,
2321
+ // Mistral doesn't have processing modes
2322
+ agenticMode: false,
2323
+ // Mistral doesn't have agentic mode
2324
+ customPrompts: false,
2325
+ // Mistral OCR 3 doesn't support custom prompts
2326
+ imageExtraction: opts.includeImageBase64 ?? false,
2327
+ // Can include embedded images
2328
+ pageMarkers: false,
2329
+ // Mistral doesn't add page markers
2330
+ citations: false,
2331
+ // Mistral doesn't provide citations
2332
+ chunking: false,
2333
+ // Mistral doesn't do chunking
2334
+ segmentation: false,
2335
+ // Mistral doesn't do segmentation
2336
+ stripExistingOCR: false,
2337
+ formatLines: false,
2338
+ forceOCR: true,
2339
+ // OCR 3 always does OCR
2340
+ tableOutputFormats: opts.tableFormat ?? isOCR,
2341
+ // html or markdown table format
2342
+ tableMerging: false,
2343
+ confidence: false,
2344
+ // Mistral doesn't provide confidence scores
2345
+ boundingBoxes: d.outputFormat?.features?.boundingBoxes ?? false,
2346
+ // NO text-level bboxes
2347
+ schemaValidation: d.outputFormat?.features?.schemaValidation ?? isVLM,
2348
+ // VLM supports schema
2349
+ handwrittenText: d.outputFormat?.features?.handwrittenText ?? true,
2350
+ // Excellent handwriting support
2351
+ headerFooterExtraction: opts.extractHeader ?? opts.extractFooter ?? false,
2352
+ // extract_header/extract_footer
2353
+ outputFormats
2354
+ };
2355
+ return {
2356
+ id: d.id ?? id,
2357
+ name: d.name ?? id,
2358
+ source: "mistral",
2359
+ type: d.type ?? "OCR",
2360
+ // 3-layer identity
2361
+ identity: {
2362
+ provider: "mistral",
2363
+ model,
2364
+ method: "native"
2365
+ },
2366
+ capabilities: {
2367
+ supportsImages: d.capabilities?.supportsImages ?? true,
2368
+ supportsPDFs: d.capabilities?.supportsPDFs ?? true,
2369
+ supportsDocuments: d.capabilities?.supportsDocuments ?? false,
2370
+ // DOCX/PPTX has known issues
2371
+ supportsReasoning: false,
2372
+ // OCR 3 doesn't do reasoning
2373
+ supportsStructuredOutput: d.capabilities?.supportsStructuredOutput ?? isVLM,
2374
+ // Extended capabilities
2375
+ supportsPrompts: false,
2376
+ supportsCitations: false,
2377
+ supportsChunking: false,
2378
+ supportsImageExtraction: opts.includeImageBase64 ?? false,
2379
+ supportsPageMarkers: false,
2380
+ supportsLanguageHints: false,
2381
+ supportsProcessingModes: false,
2382
+ supportsSegmentation: false,
2383
+ outputFormats
2384
+ },
2385
+ features,
2386
+ // Mistral providers always need raw document input
2387
+ inputRequirements: {
2388
+ inputType: d.inputRequirements?.inputType ?? "raw-document",
2389
+ acceptedMethods: d.inputRequirements?.acceptedMethods ?? ["base64", "url"]
2390
+ },
2391
+ compatibleNodes: {
2392
+ parse: d.compatibleNodes?.parse ?? isOCR,
2393
+ extract: d.compatibleNodes?.extract ?? isVLM,
2394
+ categorize: d.compatibleNodes?.categorize ?? false,
2395
+ qualify: d.compatibleNodes?.qualify ?? false,
2396
+ split: d.compatibleNodes?.split ?? false
2397
+ },
2398
+ inputFormats: {
2399
+ imageMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => m.startsWith("image/")),
2400
+ documentMimeTypes: (d.inputFormats?.mimeTypes ?? []).filter((m) => !m.startsWith("image/")),
2401
+ inputMethods: d.inputFormats?.inputMethods ?? ["base64", "url"],
2402
+ maxFileSize: d.inputFormats?.maxFileSize ?? 50,
2403
+ // 50MB limit
2404
+ maxPages: d.inputFormats?.maxPages ?? 1e3
2405
+ },
2406
+ pricing: {
2407
+ model: "per-page",
2408
+ perPage: d.pricing?.perPage ?? 2e-3,
2409
+ // $2/1000 pages
2410
+ currency: "USD",
2411
+ notes: d.pricing?.notes ?? "$2 per 1000 pages"
2412
+ },
2413
+ rateLimits: {
2414
+ docsPerMinute: d.apiConfig?.rateLimit?.docsPerMinute
2415
+ },
2416
+ raw: d
2417
+ };
2418
+ }
2419
+
2420
+ // src/retry.ts
2421
+ var DEFAULT_RETRY_CONFIG = {
2422
+ maxRetries: 2,
2423
+ retryDelay: 1e3,
2424
+ useExponentialBackoff: true,
2425
+ maxDelay: 3e4
2426
+ };
2427
+ var DEFAULT_CIRCUIT_BREAKER_CONFIG = {
2428
+ threshold: 3,
2429
+ resetTimeout: 3e4
2430
+ };
2431
+ var RETRYABLE_STATUS_CODES = ["408", "429", "500", "502", "503", "504"];
2432
+ var RETRYABLE_ERROR_PATTERNS = [
2433
+ "timeout",
2434
+ "rate limit",
2435
+ "overloaded",
2436
+ "econnreset",
2437
+ "etimedout",
2438
+ "enotfound",
2439
+ "econnrefused",
2440
+ "socket hang up",
2441
+ "network error"
2442
+ ];
2443
+ function isRetryableError(error) {
2444
+ const message = error.message.toLowerCase();
2445
+ for (const code of RETRYABLE_STATUS_CODES) {
2446
+ if (message.includes(code)) {
2447
+ return true;
2448
+ }
2449
+ }
2450
+ for (const pattern of RETRYABLE_ERROR_PATTERNS) {
2451
+ if (message.includes(pattern)) {
2452
+ return true;
2453
+ }
2454
+ }
2455
+ return false;
2456
+ }
2457
+ function extractStatusCode(error) {
2458
+ const patterns = [
2459
+ /\b(\d{3})\b/,
2460
+ // Just the status code
2461
+ /status[:\s]+(\d{3})/i,
2462
+ /http[:\s]+(\d{3})/i,
2463
+ /failed[:\s]+(\d{3})/i
2464
+ ];
2465
+ for (const pattern of patterns) {
2466
+ const match = error.message.match(pattern);
2467
+ if (match && match[1]) {
2468
+ const code = parseInt(match[1], 10);
2469
+ if (code >= 100 && code < 600) {
2470
+ return code;
2471
+ }
2472
+ }
2473
+ }
2474
+ return void 0;
2475
+ }
2476
+ function parseRetryAfter(error) {
2477
+ const message = error.message;
2478
+ const match = message.match(/retry-after[:\s]+(\d+)/i);
2479
+ if (match && match[1]) {
2480
+ const seconds = parseInt(match[1], 10);
2481
+ if (!isNaN(seconds) && seconds > 0 && seconds < 3600) {
2482
+ return seconds * 1e3;
2483
+ }
2484
+ }
2485
+ return void 0;
2486
+ }
2487
+ function calculateRetryDelay(attempt, config = {}) {
2488
+ const {
2489
+ retryDelay = DEFAULT_RETRY_CONFIG.retryDelay,
2490
+ useExponentialBackoff = DEFAULT_RETRY_CONFIG.useExponentialBackoff,
2491
+ maxDelay = DEFAULT_RETRY_CONFIG.maxDelay
2492
+ } = config;
2493
+ if (!useExponentialBackoff) {
2494
+ return retryDelay;
2495
+ }
2496
+ const exponentialDelay = retryDelay * Math.pow(2, attempt - 1);
2497
+ const jitter = Math.random() * 1e3;
2498
+ return Math.min(exponentialDelay + jitter, maxDelay);
2499
+ }
2500
+ var circuitBreakerRegistry = /* @__PURE__ */ new Map();
2501
+ function createCircuitBreaker(key, config = {}) {
2502
+ const existing = circuitBreakerRegistry.get(key);
2503
+ if (existing) {
2504
+ return existing;
2505
+ }
2506
+ const {
2507
+ threshold = DEFAULT_CIRCUIT_BREAKER_CONFIG.threshold,
2508
+ resetTimeout = DEFAULT_CIRCUIT_BREAKER_CONFIG.resetTimeout
2509
+ } = config;
2510
+ let state = {
2511
+ consecutiveFailures: 0,
2512
+ isOpen: false
2513
+ };
2514
+ const circuitBreaker = {
2515
+ isOpen() {
2516
+ if (!state.isOpen) return false;
2517
+ if (state.lastFailureTime && Date.now() - state.lastFailureTime > resetTimeout) {
2518
+ state = {
2519
+ consecutiveFailures: 0,
2520
+ isOpen: false
2521
+ };
2522
+ return false;
2523
+ }
2524
+ return true;
2525
+ },
2526
+ recordSuccess() {
2527
+ state = {
2528
+ consecutiveFailures: 0,
2529
+ isOpen: false
2530
+ };
2531
+ },
2532
+ recordFailure() {
2533
+ state.consecutiveFailures++;
2534
+ state.lastFailureTime = Date.now();
2535
+ if (state.consecutiveFailures >= threshold) {
2536
+ state.isOpen = true;
2537
+ console.warn(`Circuit breaker opened for ${key} after ${state.consecutiveFailures} consecutive failures`);
2538
+ }
2539
+ },
2540
+ getState() {
2541
+ return { ...state };
2542
+ }
2543
+ };
2544
+ circuitBreakerRegistry.set(key, circuitBreaker);
2545
+ return circuitBreaker;
2546
+ }
2547
+ function clearCircuitBreakers() {
2548
+ circuitBreakerRegistry.clear();
2549
+ }
2550
+ function getCircuitBreaker(key) {
2551
+ return circuitBreakerRegistry.get(key);
2552
+ }
2553
+ async function withRetry(fn, options = {}) {
2554
+ const {
2555
+ maxRetries = DEFAULT_RETRY_CONFIG.maxRetries,
2556
+ retryDelay = DEFAULT_RETRY_CONFIG.retryDelay,
2557
+ useExponentialBackoff = DEFAULT_RETRY_CONFIG.useExponentialBackoff,
2558
+ maxDelay = DEFAULT_RETRY_CONFIG.maxDelay,
2559
+ onRetry,
2560
+ getRetryAfter,
2561
+ circuitBreaker
2562
+ } = options;
2563
+ if (circuitBreaker?.isOpen()) {
2564
+ throw new Error("Circuit breaker is open");
2565
+ }
2566
+ let lastError = null;
2567
+ const totalAttempts = maxRetries + 1;
2568
+ for (let attempt = 1; attempt <= totalAttempts; attempt++) {
2569
+ try {
2570
+ const result = await fn();
2571
+ circuitBreaker?.recordSuccess();
2572
+ return result;
2573
+ } catch (error) {
2574
+ lastError = error;
2575
+ const isLastAttempt = attempt === totalAttempts;
2576
+ const canRetry = !isLastAttempt && isRetryableError(lastError);
2577
+ if (!canRetry) {
2578
+ break;
2579
+ }
2580
+ let delay = calculateRetryDelay(attempt, { retryDelay, useExponentialBackoff, maxDelay });
2581
+ const retryAfter = getRetryAfter?.(lastError) ?? parseRetryAfter(lastError);
2582
+ if (retryAfter !== void 0 && retryAfter > 0) {
2583
+ delay = Math.min(retryAfter, maxDelay);
2584
+ }
2585
+ if (onRetry) {
2586
+ await onRetry(attempt, lastError, delay);
2587
+ }
2588
+ await sleep(delay);
2589
+ }
2590
+ }
2591
+ circuitBreaker?.recordFailure();
2592
+ throw lastError;
2593
+ }
2594
+ function sleep(ms) {
2595
+ return new Promise((resolve) => setTimeout(resolve, ms));
2596
+ }
2291
2597
  export {
2598
+ DEFAULT_CIRCUIT_BREAKER_CONFIG,
2599
+ DEFAULT_RETRY_CONFIG,
2292
2600
  FlowExecutionError,
2293
2601
  FlowInputValidationError,
2294
2602
  FlowValidationError,
@@ -2299,9 +2607,12 @@ export {
2299
2607
  bufferToDataUri,
2300
2608
  buildProviderFromConfig,
2301
2609
  buildProvidersFromConfigs,
2610
+ calculateRetryDelay,
2302
2611
  canStartForEachItemFlow,
2612
+ clearCircuitBreakers,
2303
2613
  clearModelRegistry,
2304
2614
  clearProviderRegistry,
2615
+ createCircuitBreaker,
2305
2616
  createIdentity,
2306
2617
  defineMarkerProvider,
2307
2618
  defineSuryaProvider,
@@ -2312,9 +2623,11 @@ export {
2312
2623
  detectMimeTypeFromBytes,
2313
2624
  extractBase64,
2314
2625
  extractErrorMessage,
2626
+ extractStatusCode,
2315
2627
  getAllModels,
2316
2628
  getAllProviders,
2317
2629
  getCheapestProviderFor,
2630
+ getCircuitBreaker,
2318
2631
  getCompatibleTargets,
2319
2632
  getDocumentPageCount,
2320
2633
  getModelsForNode,
@@ -2331,8 +2644,10 @@ export {
2331
2644
  getValidForEachStarters,
2332
2645
  isLocalEndpoint,
2333
2646
  isPDFDocument,
2647
+ isRetryableError,
2334
2648
  node,
2335
2649
  parseProviderString,
2650
+ parseRetryAfter,
2336
2651
  protectReservedVariables,
2337
2652
  queryModels,
2338
2653
  queryProviders,
@@ -2347,6 +2662,7 @@ export {
2347
2662
  validateJson,
2348
2663
  validateMimeType,
2349
2664
  validateMimeTypeAsync,
2350
- validateNodeConnection
2665
+ validateNodeConnection,
2666
+ withRetry
2351
2667
  };
2352
2668
  //# sourceMappingURL=index.js.map