@doclo/providers-llm 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -32,6 +32,26 @@ interface ResourceLimits {
32
32
  */
33
33
  maxJsonDepth?: number;
34
34
  }
35
+ /** Caching configuration for prompt caching */
36
+ interface CachingConfig {
37
+ /**
38
+ * Enable/disable prompt caching.
39
+ * Default varies by provider:
40
+ * - Anthropic: false (cache writes cost 1.25x-2x, opt-in)
41
+ * - OpenAI/Google/XAI/DeepSeek: true (automatic, free)
42
+ */
43
+ enabled?: boolean;
44
+ /**
45
+ * Cache TTL for providers that support it (Anthropic only).
46
+ * - '5m': 5-minute TTL, cache writes cost 1.25x (default)
47
+ * - '1h': 1-hour TTL, cache writes cost 2x
48
+ *
49
+ * Break-even: ~1.4 reads/write (5m) or ~2.2 reads/write (1h).
50
+ * For high-frequency flows (100+ docs/hr with same schema), caching
51
+ * is almost always cost-effective despite the write cost.
52
+ */
53
+ ttl?: '5m' | '1h';
54
+ }
35
55
  /** Provider configuration */
36
56
  interface ProviderConfig {
37
57
  provider: ProviderType;
@@ -40,6 +60,8 @@ interface ProviderConfig {
40
60
  apiKey: string;
41
61
  baseUrl?: string;
42
62
  limits?: ResourceLimits;
63
+ /** Optional caching configuration for prompt caching */
64
+ caching?: CachingConfig;
43
65
  }
44
66
  /** Fallback configuration */
45
67
  interface FallbackConfig {
@@ -67,6 +89,8 @@ interface MultimodalInput {
67
89
  text?: string;
68
90
  images?: ImageInput[];
69
91
  pdfs?: PDFInput[];
92
+ /** Optional system message (text-only, prepended to conversation) */
93
+ systemPrompt?: string;
70
94
  }
71
95
  /** Response metrics */
72
96
  interface ResponseMetrics {
@@ -77,8 +101,12 @@ interface ResponseMetrics {
77
101
  attemptNumber: number;
78
102
  provider: string;
79
103
  model: string;
104
+ /** Tokens written to cache (Anthropic only - costs 1.25x-2x) */
80
105
  cacheCreationInputTokens?: number;
106
+ /** Tokens read from cache (all providers - significant cost savings) */
81
107
  cacheReadInputTokens?: number;
108
+ /** Calculated cache savings percentage (0-100) based on provider discount rates */
109
+ cacheSavingsPercent?: number;
82
110
  httpStatusCode?: number;
83
111
  httpMethod?: string;
84
112
  httpUrl?: string;
@@ -248,6 +276,19 @@ interface CircuitBreakerState {
248
276
  lastFailureTime?: number;
249
277
  isOpen: boolean;
250
278
  }
279
+ /**
280
+ * Calculate the cache savings percentage based on provider discount rates.
281
+ *
282
+ * @param provider - The provider name (e.g., 'anthropic', 'openai', 'google')
283
+ * @param inputTokens - Total input tokens in the request
284
+ * @param cacheReadTokens - Tokens read from cache
285
+ * @returns Savings percentage (0-100) or undefined if not calculable
286
+ *
287
+ * @example
288
+ * // 1000 input tokens, 800 from cache, using Anthropic (90% discount)
289
+ * calculateCacheSavings('anthropic', 1000, 800) // => 72 (72% savings)
290
+ */
291
+ declare function calculateCacheSavings(provider: string, inputTokens: number | undefined, cacheReadTokens: number | undefined): number | undefined;
251
292
 
252
293
  /**
253
294
  * Internal JSON Schema representation for schema translation.
@@ -1688,4 +1729,4 @@ declare function createVLMProvider(config: {
1688
1729
  */
1689
1730
  declare function buildLLMProvider(config: FallbackConfig): VLMProvider;
1690
1731
 
1691
- export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
1732
+ export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CachingConfig, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, calculateCacheSavings, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
package/dist/index.js CHANGED
@@ -11,6 +11,31 @@ import {
11
11
  formatSchemaForPrompt
12
12
  } from "./chunk-7YPJIWRM.js";
13
13
 
14
+ // src/types.ts
15
+ var CACHE_DISCOUNT_RATES = {
16
+ anthropic: 0.9,
17
+ // 90% discount on cached reads (0.1x price)
18
+ openai: 0.5,
19
+ // 50% discount
20
+ google: 0.75,
21
+ // 75% discount (0.25x price)
22
+ "x-ai": 0.75,
23
+ // 75% discount (Grok)
24
+ xai: 0.75,
25
+ // alias
26
+ deepseek: 0.9
27
+ // 90% discount
28
+ };
29
+ function calculateCacheSavings(provider, inputTokens, cacheReadTokens) {
30
+ if (!inputTokens || !cacheReadTokens || inputTokens === 0) {
31
+ return void 0;
32
+ }
33
+ const normalizedProvider = provider.includes("/") ? provider.split("/")[0] : provider;
34
+ const discountRate = CACHE_DISCOUNT_RATES[normalizedProvider.toLowerCase()] ?? 0.5;
35
+ const savingsPercent = Math.round(cacheReadTokens / inputTokens * discountRate * 100);
36
+ return Math.min(savingsPercent, 100);
37
+ }
38
+
14
39
  // src/schema-translator.ts
15
40
  import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
16
41
  var SchemaTranslator = class {
@@ -470,18 +495,23 @@ var OpenAIProvider = class {
470
495
  costUSD = this.calculateCost(data.usage);
471
496
  }
472
497
  const baseProvider = extractProviderFromModel(this.config.model, "openai");
498
+ const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
499
+ const inputTokens = data.usage?.prompt_tokens;
500
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
473
501
  return {
474
502
  json: parsed,
475
503
  rawText: content,
476
504
  metrics: {
477
505
  costUSD,
478
- inputTokens: data.usage?.prompt_tokens,
506
+ inputTokens,
479
507
  outputTokens: data.usage?.completion_tokens,
480
508
  latencyMs,
481
509
  attemptNumber: 1,
482
510
  provider: baseProvider,
483
511
  // Base provider (e.g., "openai" from "openai/gpt-4...")
484
- model: this.config.model
512
+ model: this.config.model,
513
+ cacheReadInputTokens,
514
+ cacheSavingsPercent
485
515
  },
486
516
  reasoning,
487
517
  reasoning_details,
@@ -501,6 +531,10 @@ var OpenAIProvider = class {
501
531
  return Object.keys(config).length > 0 ? config : void 0;
502
532
  }
503
533
  buildMessages(input) {
534
+ const messages = [];
535
+ if (input.systemPrompt) {
536
+ messages.push({ role: "system", content: input.systemPrompt });
537
+ }
504
538
  const content = [];
505
539
  if (input.text) {
506
540
  content.push({ type: "text", text: input.text });
@@ -541,7 +575,8 @@ var OpenAIProvider = class {
541
575
  });
542
576
  }
543
577
  }
544
- return [{ role: "user", content }];
578
+ messages.push({ role: "user", content });
579
+ return messages;
545
580
  }
546
581
  /**
547
582
  * Extract base64 data from a data URL or return as-is if already raw base64
@@ -642,7 +677,9 @@ var AnthropicProvider = class {
642
677
  const requestBody = {
643
678
  model: this.config.model,
644
679
  max_tokens: params.max_tokens || 4096,
645
- messages
680
+ messages,
681
+ // Native Anthropic API uses separate system parameter (text-only)
682
+ ...enhancedInput.systemPrompt && { system: enhancedInput.systemPrompt }
646
683
  };
647
684
  if (mode === "relaxed") {
648
685
  requestBody.messages.push({
@@ -687,7 +724,7 @@ var AnthropicProvider = class {
687
724
  let costUSD;
688
725
  if (this.config.via === "openrouter") {
689
726
  const useNewStructuredOutputs2 = this.supportsNewStructuredOutputs();
690
- const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning);
727
+ const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
691
728
  if (process.env.DEBUG_PROVIDERS) {
692
729
  console.log("[AnthropicProvider] OpenRouter request body (messages):");
693
730
  console.log(JSON.stringify(openRouterRequest.messages, null, 2));
@@ -740,8 +777,8 @@ var AnthropicProvider = class {
740
777
  inputTokens = data.usage?.prompt_tokens;
741
778
  outputTokens = data.usage?.completion_tokens;
742
779
  costUSD = data.usage?.total_cost ?? data.usage?.cost;
743
- const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens;
744
- const cacheReadInputTokens = data.usage?.cache_read_input_tokens;
780
+ const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens ?? data.usage?.prompt_tokens_details?.cache_write_tokens;
781
+ const cacheReadInputTokens = data.usage?.cache_read_input_tokens ?? data.usage?.prompt_tokens_details?.cached_tokens;
745
782
  if (process.env.DEBUG_PROVIDERS) {
746
783
  console.log("[AnthropicProvider] OpenRouter usage response:", JSON.stringify(data.usage, null, 2));
747
784
  console.log("[AnthropicProvider] Extracted costUSD:", costUSD);
@@ -750,6 +787,7 @@ var AnthropicProvider = class {
750
787
  }
751
788
  const latencyMs = Date.now() - startTime;
752
789
  const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
790
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
753
791
  const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
754
792
  return {
755
793
  json: cleanJson,
@@ -764,7 +802,8 @@ var AnthropicProvider = class {
764
802
  // Base provider (e.g., "anthropic" from "anthropic/claude-...")
765
803
  model: this.config.model,
766
804
  cacheCreationInputTokens,
767
- cacheReadInputTokens
805
+ cacheReadInputTokens,
806
+ cacheSavingsPercent
768
807
  },
769
808
  reasoning,
770
809
  reasoning_details,
@@ -873,11 +912,24 @@ var AnthropicProvider = class {
873
912
  budget_tokens
874
913
  };
875
914
  }
876
- translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning) {
915
+ translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning, systemPrompt) {
877
916
  const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
917
+ const cachingEnabled = this.config.caching?.enabled === true;
918
+ const cacheTTL = this.config.caching?.ttl || "5m";
919
+ const jsonInstructions = mode === "strict" ? "You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text.";
920
+ const systemContent = systemPrompt ? `${systemPrompt}
921
+
922
+ ${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}`;
878
923
  const systemMessage = {
879
924
  role: "system",
880
- content: mode === "strict" ? "You are a data extraction assistant. You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You are a data extraction assistant. You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text."
925
+ content: cachingEnabled ? [{
926
+ type: "text",
927
+ text: systemContent,
928
+ cache_control: {
929
+ type: "ephemeral",
930
+ ...cacheTTL === "1h" && { ttl: "1h" }
931
+ }
932
+ }] : systemContent
881
933
  };
882
934
  const messageArray = [systemMessage, ...messages];
883
935
  const requestBody = {
@@ -1044,22 +1096,38 @@ var AnthropicProvider = class {
1044
1096
  });
1045
1097
  }
1046
1098
  }
1099
+ const cachingEnabled = this.config.caching?.enabled === true;
1100
+ const cacheTTL = this.config.caching?.ttl || "5m";
1047
1101
  if (hasMedia) {
1048
1102
  const textContent = input.text || "Extract the requested information from the document.";
1049
1103
  if (process.env.DEBUG_PROVIDERS) {
1050
- console.log("[AnthropicProvider.buildMessages] Adding text block with cache_control");
1104
+ console.log("[AnthropicProvider.buildMessages] Adding text block" + (cachingEnabled ? " with cache_control" : ""));
1051
1105
  console.log(" textContent:", textContent);
1106
+ console.log(" cachingEnabled:", cachingEnabled);
1052
1107
  }
1053
- content.push({
1108
+ const textBlock = {
1054
1109
  type: "text",
1055
- text: textContent,
1056
- cache_control: { type: "ephemeral" }
1057
- });
1110
+ text: textContent
1111
+ };
1112
+ if (cachingEnabled) {
1113
+ textBlock.cache_control = {
1114
+ type: "ephemeral",
1115
+ ...cacheTTL === "1h" && { ttl: "1h" }
1116
+ };
1117
+ }
1118
+ content.push(textBlock);
1058
1119
  } else if (input.text) {
1059
- content.push({
1120
+ const textBlock = {
1060
1121
  type: "text",
1061
1122
  text: input.text
1062
- });
1123
+ };
1124
+ if (cachingEnabled) {
1125
+ textBlock.cache_control = {
1126
+ type: "ephemeral",
1127
+ ...cacheTTL === "1h" && { ttl: "1h" }
1128
+ };
1129
+ }
1130
+ content.push(textBlock);
1063
1131
  }
1064
1132
  } else {
1065
1133
  if (input.text) {
@@ -1291,6 +1359,10 @@ var GoogleProvider = class {
1291
1359
  // Use JSON mode without responseSchema - schema is already in the prompt via combineSchemaAndUserPrompt.
1292
1360
  // See: https://ubaidullahmomer.medium.com/why-google-geminis-response-schema-isn-t-ready-for-complex-json-46f35c3aaaea
1293
1361
  responseMimeType: "application/json"
1362
+ },
1363
+ // Native Gemini API uses systemInstruction with parts array (text-only)
1364
+ ...enhancedInput.systemPrompt && {
1365
+ systemInstruction: { parts: [{ text: enhancedInput.systemPrompt }] }
1294
1366
  }
1295
1367
  };
1296
1368
  if (process.env.DEBUG_PROVIDERS) {
@@ -1307,7 +1379,7 @@ var GoogleProvider = class {
1307
1379
  console.log("[GoogleProvider] Using via:", this.config.via, "Checking:", this.config.via === "openrouter");
1308
1380
  }
1309
1381
  if (this.config.via === "openrouter") {
1310
- const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning);
1382
+ const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
1311
1383
  response = await fetchWithTimeout3("https://openrouter.ai/api/v1/chat/completions", {
1312
1384
  method: "POST",
1313
1385
  headers: {
@@ -1349,10 +1421,12 @@ var GoogleProvider = class {
1349
1421
  costUSD = data.usage?.total_cost ?? data.usage?.cost;
1350
1422
  const reasoning = message?.reasoning;
1351
1423
  const reasoning_details = message?.reasoning_details;
1424
+ const cacheReadInputTokens = data.usage?.cached_tokens;
1352
1425
  content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
1353
1426
  const rawParsed = safeJsonParse3(content);
1354
1427
  const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1355
1428
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1429
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
1356
1430
  return {
1357
1431
  json: parsed,
1358
1432
  rawText: content,
@@ -1364,7 +1438,9 @@ var GoogleProvider = class {
1364
1438
  attemptNumber: 1,
1365
1439
  provider: baseProvider,
1366
1440
  // Base provider (e.g., "google" from "google/gemini-...")
1367
- model: this.config.model
1441
+ model: this.config.model,
1442
+ cacheReadInputTokens,
1443
+ cacheSavingsPercent
1368
1444
  },
1369
1445
  reasoning,
1370
1446
  reasoning_details,
@@ -1376,11 +1452,13 @@ var GoogleProvider = class {
1376
1452
  inputTokens = data.usageMetadata?.promptTokenCount;
1377
1453
  outputTokens = data.usageMetadata?.candidatesTokenCount;
1378
1454
  costUSD = this.calculateCost(data.usageMetadata);
1455
+ const cacheReadInputTokens = data.usageMetadata?.cachedContentTokenCount;
1379
1456
  const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
1380
1457
  const reasoning = thinkingPart?.text;
1381
1458
  const rawParsed = safeJsonParse3(content);
1382
1459
  const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
1383
1460
  const baseProvider = extractProviderFromModel3(this.config.model, "google");
1461
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
1384
1462
  return {
1385
1463
  json: parsed,
1386
1464
  rawText: content,
@@ -1392,7 +1470,9 @@ var GoogleProvider = class {
1392
1470
  attemptNumber: 1,
1393
1471
  provider: baseProvider,
1394
1472
  // Base provider (e.g., "google" from "google/gemini-...")
1395
- model: this.config.model
1473
+ model: this.config.model,
1474
+ cacheReadInputTokens,
1475
+ cacheSavingsPercent
1396
1476
  },
1397
1477
  reasoning,
1398
1478
  reasoning_details: reasoning ? [{
@@ -1419,8 +1499,11 @@ var GoogleProvider = class {
1419
1499
  thinking_budget
1420
1500
  };
1421
1501
  }
1422
- translateToOpenRouterFormat(contents, mode, max_tokens, reasoning) {
1502
+ translateToOpenRouterFormat(contents, mode, max_tokens, reasoning, systemPrompt) {
1423
1503
  const messages = [];
1504
+ if (systemPrompt) {
1505
+ messages.push({ role: "system", content: systemPrompt });
1506
+ }
1424
1507
  for (const content of contents) {
1425
1508
  if (content.role === "user") {
1426
1509
  const messageContent = [];
@@ -1732,18 +1815,23 @@ var XAIProvider = class {
1732
1815
  costUSD = this.calculateCost(data.usage);
1733
1816
  }
1734
1817
  const baseProvider = extractProviderFromModel4(this.config.model, "xai");
1818
+ const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
1819
+ const inputTokens = data.usage?.prompt_tokens;
1820
+ const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
1735
1821
  return {
1736
1822
  json: parsed,
1737
1823
  rawText: content,
1738
1824
  metrics: {
1739
1825
  costUSD,
1740
- inputTokens: data.usage?.prompt_tokens,
1826
+ inputTokens,
1741
1827
  outputTokens: data.usage?.completion_tokens,
1742
1828
  latencyMs,
1743
1829
  attemptNumber: 1,
1744
1830
  provider: baseProvider,
1745
1831
  // Base provider (e.g., "x-ai" from "x-ai/grok-...")
1746
- model: this.config.model
1832
+ model: this.config.model,
1833
+ cacheReadInputTokens,
1834
+ cacheSavingsPercent
1747
1835
  },
1748
1836
  reasoning,
1749
1837
  reasoning_details,
@@ -1763,6 +1851,10 @@ var XAIProvider = class {
1763
1851
  return Object.keys(config).length > 0 ? config : void 0;
1764
1852
  }
1765
1853
  async buildMessages(input) {
1854
+ const messages = [];
1855
+ if (input.systemPrompt) {
1856
+ messages.push({ role: "system", content: input.systemPrompt });
1857
+ }
1766
1858
  const content = [];
1767
1859
  if (input.text) {
1768
1860
  content.push({ type: "text", text: input.text });
@@ -1803,7 +1895,8 @@ var XAIProvider = class {
1803
1895
  });
1804
1896
  }
1805
1897
  }
1806
- return [{ role: "user", content }];
1898
+ messages.push({ role: "user", content });
1899
+ return messages;
1807
1900
  }
1808
1901
  /**
1809
1902
  * Extract base64 data from a data URL or return as-is if already raw base64
@@ -2877,6 +2970,7 @@ export {
2877
2970
  buildOutputFormatPrompt,
2878
2971
  buildSchemaPromptSection,
2879
2972
  buildSourcesPrompt,
2973
+ calculateCacheSavings,
2880
2974
  combineSchemaAndUserPrompt,
2881
2975
  combineSchemaUserAndDerivedPrompts,
2882
2976
  compareNativeVsOpenRouter,