@doclo/providers-llm 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -32,6 +32,26 @@ interface ResourceLimits {
32
32
  */
33
33
  maxJsonDepth?: number;
34
34
  }
35
+ /** Caching configuration for prompt caching */
36
+ interface CachingConfig {
37
+ /**
38
+ * Enable/disable prompt caching.
39
+ * Default varies by provider:
40
+ * - Anthropic: false (cache writes cost 1.25x-2x, opt-in)
41
+ * - OpenAI/Google/XAI/DeepSeek: true (automatic, free)
42
+ */
43
+ enabled?: boolean;
44
+ /**
45
+ * Cache TTL for providers that support it (Anthropic only).
46
+ * - '5m': 5-minute TTL, cache writes cost 1.25x (default)
47
+ * - '1h': 1-hour TTL, cache writes cost 2x
48
+ *
49
+ * Break-even: ~1.4 reads/write (5m) or ~2.2 reads/write (1h).
50
+ * For high-frequency flows (100+ docs/hr with same schema), caching
51
+ * is almost always cost-effective despite the write cost.
52
+ */
53
+ ttl?: '5m' | '1h';
54
+ }
35
55
  /** Provider configuration */
36
56
  interface ProviderConfig {
37
57
  provider: ProviderType;
@@ -40,6 +60,8 @@ interface ProviderConfig {
40
60
  apiKey: string;
41
61
  baseUrl?: string;
42
62
  limits?: ResourceLimits;
63
+ /** Optional caching configuration for prompt caching */
64
+ caching?: CachingConfig;
43
65
  }
44
66
  /** Fallback configuration */
45
67
  interface FallbackConfig {
@@ -79,8 +101,12 @@ interface ResponseMetrics {
79
101
  attemptNumber: number;
80
102
  provider: string;
81
103
  model: string;
104
+ /** Tokens written to cache (Anthropic only - costs 1.25x-2x) */
82
105
  cacheCreationInputTokens?: number;
106
+ /** Tokens read from cache (all providers - significant cost savings) */
83
107
  cacheReadInputTokens?: number;
108
+ /** Calculated cache savings percentage (0-100) based on provider discount rates */
109
+ cacheSavingsPercent?: number;
84
110
  httpStatusCode?: number;
85
111
  httpMethod?: string;
86
112
  httpUrl?: string;
@@ -250,6 +276,19 @@ interface CircuitBreakerState {
250
276
  lastFailureTime?: number;
251
277
  isOpen: boolean;
252
278
  }
279
+ /**
280
+ * Calculate the cache savings percentage based on provider discount rates.
281
+ *
282
+ * @param provider - The provider name (e.g., 'anthropic', 'openai', 'google')
283
+ * @param inputTokens - Total input tokens in the request
284
+ * @param cacheReadTokens - Tokens read from cache
285
+ * @returns Savings percentage (0-100) or undefined if not calculable
286
+ *
287
+ * @example
288
+ * // 1000 input tokens, 800 from cache, using Anthropic (90% discount)
289
+ * calculateCacheSavings('anthropic', 1000, 800) // => 72 (72% savings)
290
+ */
291
+ declare function calculateCacheSavings(provider: string, inputTokens: number | undefined, cacheReadTokens: number | undefined): number | undefined;
253
292
 
254
293
  /**
255
294
  * Internal JSON Schema representation for schema translation.
@@ -1690,4 +1729,4 @@ declare function createVLMProvider(config: {
1690
1729
  */
1691
1730
  declare function buildLLMProvider(config: FallbackConfig): VLMProvider;
1692
1731
 
1693
- export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
1732
+ export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CachingConfig, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, calculateCacheSavings, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
package/dist/index.js CHANGED
@@ -11,6 +11,31 @@ import {
11
11
  formatSchemaForPrompt
12
12
  } from "./chunk-7YPJIWRM.js";
13
13
 
14
+ // src/types.ts
15
+ var CACHE_DISCOUNT_RATES = {
16
+ anthropic: 0.9,
17
+ // 90% discount on cached reads (0.1x price)
18
+ openai: 0.5,
19
+ // 50% discount
20
+ google: 0.75,
21
+ // 75% discount (0.25x price)
22
+ "x-ai": 0.75,
23
+ // 75% discount (Grok)
24
+ xai: 0.75,
25
+ // alias
26
+ deepseek: 0.9
27
+ // 90% discount
28
+ };
29
+ function calculateCacheSavings(provider, inputTokens, cacheReadTokens) {
30
+ if (!inputTokens || !cacheReadTokens || inputTokens === 0) {
31
+ return void 0;
32
+ }
33
+ const normalizedProvider = provider.includes("/") ? provider.split("/")[0] : provider;
34
+ const discountRate = CACHE_DISCOUNT_RATES[normalizedProvider.toLowerCase()] ?? 0.5;
35
+ const savingsPercent = Math.round(cacheReadTokens / inputTokens * discountRate * 100);
36
+ return Math.min(savingsPercent, 100);
37
+ }
38
+
14
39
  // src/schema-translator.ts
15
40
  import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
16
41
  var SchemaTranslator = class {
@@ -470,18 +495,23 @@ var OpenAIProvider = class {
470
495
  costUSD = this.calculateCost(data.usage);
471
496
  }
472
497
  const baseProvider = extractProviderFromModel(this.config.model, "openai");
498
+ const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
499
+ const inputTokens = data.usage?.prompt_tokens;
500
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
473
501
  return {
474
502
  json: parsed,
475
503
  rawText: content,
476
504
  metrics: {
477
505
  costUSD,
478
- inputTokens: data.usage?.prompt_tokens,
506
+ inputTokens,
479
507
  outputTokens: data.usage?.completion_tokens,
480
508
  latencyMs,
481
509
  attemptNumber: 1,
482
510
  provider: baseProvider,
483
511
  // Base provider (e.g., "openai" from "openai/gpt-4...")
484
- model: this.config.model
512
+ model: this.config.model,
513
+ cacheReadInputTokens,
514
+ cacheSavingsPercent
485
515
  },
486
516
  reasoning,
487
517
  reasoning_details,
@@ -747,8 +777,8 @@ var AnthropicProvider = class {
747
777
  inputTokens = data.usage?.prompt_tokens;
748
778
  outputTokens = data.usage?.completion_tokens;
749
779
  costUSD = data.usage?.total_cost ?? data.usage?.cost;
750
- const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens;
751
- const cacheReadInputTokens = data.usage?.cache_read_input_tokens;
780
+ const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens ?? data.usage?.prompt_tokens_details?.cache_write_tokens;
781
+ const cacheReadInputTokens = data.usage?.cache_read_input_tokens ?? data.usage?.prompt_tokens_details?.cached_tokens;
752
782
  if (process.env.DEBUG_PROVIDERS) {
753
783
  console.log("[AnthropicProvider] OpenRouter usage response:", JSON.stringify(data.usage, null, 2));
754
784
  console.log("[AnthropicProvider] Extracted costUSD:", costUSD);
@@ -757,6 +787,7 @@ var AnthropicProvider = class {
757
787
  }
758
788
  const latencyMs = Date.now() - startTime;
759
789
  const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
790
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
760
791
  const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
761
792
  return {
762
793
  json: cleanJson,
@@ -771,7 +802,8 @@ var AnthropicProvider = class {
771
802
  // Base provider (e.g., "anthropic" from "anthropic/claude-...")
772
803
  model: this.config.model,
773
804
  cacheCreationInputTokens,
774
- cacheReadInputTokens
805
+ cacheReadInputTokens,
806
+ cacheSavingsPercent
775
807
  },
776
808
  reasoning,
777
809
  reasoning_details,
@@ -882,13 +914,22 @@ var AnthropicProvider = class {
882
914
  }
883
915
  translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning, systemPrompt) {
884
916
  const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
917
+ const cachingEnabled = this.config.caching?.enabled === true;
918
+ const cacheTTL = this.config.caching?.ttl || "5m";
885
919
  const jsonInstructions = mode === "strict" ? "You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text.";
886
920
  const systemContent = systemPrompt ? `${systemPrompt}
887
921
 
888
922
  ${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}`;
889
923
  const systemMessage = {
890
924
  role: "system",
891
- content: systemContent
925
+ content: cachingEnabled ? [{
926
+ type: "text",
927
+ text: systemContent,
928
+ cache_control: {
929
+ type: "ephemeral",
930
+ ...cacheTTL === "1h" && { ttl: "1h" }
931
+ }
932
+ }] : systemContent
892
933
  };
893
934
  const messageArray = [systemMessage, ...messages];
894
935
  const requestBody = {
@@ -1055,22 +1096,38 @@ ${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}
1055
1096
  });
1056
1097
  }
1057
1098
  }
1099
+ const cachingEnabled = this.config.caching?.enabled === true;
1100
+ const cacheTTL = this.config.caching?.ttl || "5m";
1058
1101
  if (hasMedia) {
1059
1102
  const textContent = input.text || "Extract the requested information from the document.";
1060
1103
  if (process.env.DEBUG_PROVIDERS) {
1061
- console.log("[AnthropicProvider.buildMessages] Adding text block with cache_control");
1104
+ console.log("[AnthropicProvider.buildMessages] Adding text block" + (cachingEnabled ? " with cache_control" : ""));
1062
1105
  console.log(" textContent:", textContent);
1106
+ console.log(" cachingEnabled:", cachingEnabled);
1063
1107
  }
1064
- content.push({
1108
+ const textBlock = {
1065
1109
  type: "text",
1066
- text: textContent,
1067
- cache_control: { type: "ephemeral" }
1068
- });
1110
+ text: textContent
1111
+ };
1112
+ if (cachingEnabled) {
1113
+ textBlock.cache_control = {
1114
+ type: "ephemeral",
1115
+ ...cacheTTL === "1h" && { ttl: "1h" }
1116
+ };
1117
+ }
1118
+ content.push(textBlock);
1069
1119
  } else if (input.text) {
1070
- content.push({
1120
+ const textBlock = {
1071
1121
  type: "text",
1072
1122
  text: input.text
1073
- });
1123
+ };
1124
+ if (cachingEnabled) {
1125
+ textBlock.cache_control = {
1126
+ type: "ephemeral",
1127
+ ...cacheTTL === "1h" && { ttl: "1h" }
1128
+ };
1129
+ }
1130
+ content.push(textBlock);
1074
1131
  }
1075
1132
  } else {
1076
1133
  if (input.text) {
@@ -1364,10 +1421,12 @@ var GoogleProvider = class {
1364
1421
  costUSD = data.usage?.total_cost ?? data.usage?.cost;
1365
1422
  const reasoning = message?.reasoning;
1366
1423
  const reasoning_details = message?.reasoning_details;
1424
+ const cacheReadInputTokens = data.usage?.cached_tokens;
1367
1425
  content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
1368
1426
  const rawParsed = safeJsonParse3(content);
1369
1427
  const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1370
1428
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1429
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
1371
1430
  return {
1372
1431
  json: parsed,
1373
1432
  rawText: content,
@@ -1379,7 +1438,9 @@ var GoogleProvider = class {
1379
1438
  attemptNumber: 1,
1380
1439
  provider: baseProvider,
1381
1440
  // Base provider (e.g., "google" from "google/gemini-...")
1382
- model: this.config.model
1441
+ model: this.config.model,
1442
+ cacheReadInputTokens,
1443
+ cacheSavingsPercent
1383
1444
  },
1384
1445
  reasoning,
1385
1446
  reasoning_details,
@@ -1391,11 +1452,13 @@ var GoogleProvider = class {
1391
1452
  inputTokens = data.usageMetadata?.promptTokenCount;
1392
1453
  outputTokens = data.usageMetadata?.candidatesTokenCount;
1393
1454
  costUSD = this.calculateCost(data.usageMetadata);
1455
+ const cacheReadInputTokens = data.usageMetadata?.cachedContentTokenCount;
1394
1456
  const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
1395
1457
  const reasoning = thinkingPart?.text;
1396
1458
  const rawParsed = safeJsonParse3(content);
1397
1459
  const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1398
1460
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1461
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
1399
1462
  return {
1400
1463
  json: parsed,
1401
1464
  rawText: content,
@@ -1407,7 +1470,9 @@ var GoogleProvider = class {
1407
1470
  attemptNumber: 1,
1408
1471
  provider: baseProvider,
1409
1472
  // Base provider (e.g., "google" from "google/gemini-...")
1410
- model: this.config.model
1473
+ model: this.config.model,
1474
+ cacheReadInputTokens,
1475
+ cacheSavingsPercent
1411
1476
  },
1412
1477
  reasoning,
1413
1478
  reasoning_details: reasoning ? [{
@@ -1750,18 +1815,23 @@ var XAIProvider = class {
1750
1815
  costUSD = this.calculateCost(data.usage);
1751
1816
  }
1752
1817
  const baseProvider = extractProviderFromModel4(this.config.model, "xai");
1818
+ const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
1819
+ const inputTokens = data.usage?.prompt_tokens;
1820
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
1753
1821
  return {
1754
1822
  json: parsed,
1755
1823
  rawText: content,
1756
1824
  metrics: {
1757
1825
  costUSD,
1758
- inputTokens: data.usage?.prompt_tokens,
1826
+ inputTokens,
1759
1827
  outputTokens: data.usage?.completion_tokens,
1760
1828
  latencyMs,
1761
1829
  attemptNumber: 1,
1762
1830
  provider: baseProvider,
1763
1831
  // Base provider (e.g., "x-ai" from "x-ai/grok-...")
1764
- model: this.config.model
1832
+ model: this.config.model,
1833
+ cacheReadInputTokens,
1834
+ cacheSavingsPercent
1765
1835
  },
1766
1836
  reasoning,
1767
1837
  reasoning_details,
@@ -2900,6 +2970,7 @@ export {
2900
2970
  buildOutputFormatPrompt,
2901
2971
  buildSchemaPromptSection,
2902
2972
  buildSourcesPrompt,
2973
+ calculateCacheSavings,
2903
2974
  combineSchemaAndUserPrompt,
2904
2975
  combineSchemaUserAndDerivedPrompts,
2905
2976
  compareNativeVsOpenRouter,