@doclo/providers-llm 0.1.9 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +42 -1
- package/dist/index.js +118 -24
- package/dist/index.js.map +1 -1
- package/package.json +2 -2
package/dist/index.d.ts
CHANGED
|
@@ -32,6 +32,26 @@ interface ResourceLimits {
|
|
|
32
32
|
*/
|
|
33
33
|
maxJsonDepth?: number;
|
|
34
34
|
}
|
|
35
|
+
/** Caching configuration for prompt caching */
|
|
36
|
+
interface CachingConfig {
|
|
37
|
+
/**
|
|
38
|
+
* Enable/disable prompt caching.
|
|
39
|
+
* Default varies by provider:
|
|
40
|
+
* - Anthropic: false (cache writes cost 1.25x-2x, opt-in)
|
|
41
|
+
* - OpenAI/Google/XAI/DeepSeek: true (automatic, free)
|
|
42
|
+
*/
|
|
43
|
+
enabled?: boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Cache TTL for providers that support it (Anthropic only).
|
|
46
|
+
* - '5m': 5-minute TTL, cache writes cost 1.25x (default)
|
|
47
|
+
* - '1h': 1-hour TTL, cache writes cost 2x
|
|
48
|
+
*
|
|
49
|
+
* Break-even: ~1.4 reads/write (5m) or ~2.2 reads/write (1h).
|
|
50
|
+
* For high-frequency flows (100+ docs/hr with same schema), caching
|
|
51
|
+
* is almost always cost-effective despite the write cost.
|
|
52
|
+
*/
|
|
53
|
+
ttl?: '5m' | '1h';
|
|
54
|
+
}
|
|
35
55
|
/** Provider configuration */
|
|
36
56
|
interface ProviderConfig {
|
|
37
57
|
provider: ProviderType;
|
|
@@ -40,6 +60,8 @@ interface ProviderConfig {
|
|
|
40
60
|
apiKey: string;
|
|
41
61
|
baseUrl?: string;
|
|
42
62
|
limits?: ResourceLimits;
|
|
63
|
+
/** Optional caching configuration for prompt caching */
|
|
64
|
+
caching?: CachingConfig;
|
|
43
65
|
}
|
|
44
66
|
/** Fallback configuration */
|
|
45
67
|
interface FallbackConfig {
|
|
@@ -67,6 +89,8 @@ interface MultimodalInput {
|
|
|
67
89
|
text?: string;
|
|
68
90
|
images?: ImageInput[];
|
|
69
91
|
pdfs?: PDFInput[];
|
|
92
|
+
/** Optional system message (text-only, prepended to conversation) */
|
|
93
|
+
systemPrompt?: string;
|
|
70
94
|
}
|
|
71
95
|
/** Response metrics */
|
|
72
96
|
interface ResponseMetrics {
|
|
@@ -77,8 +101,12 @@ interface ResponseMetrics {
|
|
|
77
101
|
attemptNumber: number;
|
|
78
102
|
provider: string;
|
|
79
103
|
model: string;
|
|
104
|
+
/** Tokens written to cache (Anthropic only - costs 1.25x-2x) */
|
|
80
105
|
cacheCreationInputTokens?: number;
|
|
106
|
+
/** Tokens read from cache (all providers - significant cost savings) */
|
|
81
107
|
cacheReadInputTokens?: number;
|
|
108
|
+
/** Calculated cache savings percentage (0-100) based on provider discount rates */
|
|
109
|
+
cacheSavingsPercent?: number;
|
|
82
110
|
httpStatusCode?: number;
|
|
83
111
|
httpMethod?: string;
|
|
84
112
|
httpUrl?: string;
|
|
@@ -248,6 +276,19 @@ interface CircuitBreakerState {
|
|
|
248
276
|
lastFailureTime?: number;
|
|
249
277
|
isOpen: boolean;
|
|
250
278
|
}
|
|
279
|
+
/**
|
|
280
|
+
* Calculate the cache savings percentage based on provider discount rates.
|
|
281
|
+
*
|
|
282
|
+
* @param provider - The provider name (e.g., 'anthropic', 'openai', 'google')
|
|
283
|
+
* @param inputTokens - Total input tokens in the request
|
|
284
|
+
* @param cacheReadTokens - Tokens read from cache
|
|
285
|
+
* @returns Savings percentage (0-100) or undefined if not calculable
|
|
286
|
+
*
|
|
287
|
+
* @example
|
|
288
|
+
* // 1000 input tokens, 800 from cache, using Anthropic (90% discount)
|
|
289
|
+
* calculateCacheSavings('anthropic', 1000, 800) // => 72 (72% savings)
|
|
290
|
+
*/
|
|
291
|
+
declare function calculateCacheSavings(provider: string, inputTokens: number | undefined, cacheReadTokens: number | undefined): number | undefined;
|
|
251
292
|
|
|
252
293
|
/**
|
|
253
294
|
* Internal JSON Schema representation for schema translation.
|
|
@@ -1688,4 +1729,4 @@ declare function createVLMProvider(config: {
|
|
|
1688
1729
|
*/
|
|
1689
1730
|
declare function buildLLMProvider(config: FallbackConfig): VLMProvider;
|
|
1690
1731
|
|
|
1691
|
-
export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
|
|
1732
|
+
export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CachingConfig, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, calculateCacheSavings, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
|
package/dist/index.js
CHANGED
|
@@ -11,6 +11,31 @@ import {
|
|
|
11
11
|
formatSchemaForPrompt
|
|
12
12
|
} from "./chunk-7YPJIWRM.js";
|
|
13
13
|
|
|
14
|
+
// src/types.ts
|
|
15
|
+
var CACHE_DISCOUNT_RATES = {
|
|
16
|
+
anthropic: 0.9,
|
|
17
|
+
// 90% discount on cached reads (0.1x price)
|
|
18
|
+
openai: 0.5,
|
|
19
|
+
// 50% discount
|
|
20
|
+
google: 0.75,
|
|
21
|
+
// 75% discount (0.25x price)
|
|
22
|
+
"x-ai": 0.75,
|
|
23
|
+
// 75% discount (Grok)
|
|
24
|
+
xai: 0.75,
|
|
25
|
+
// alias
|
|
26
|
+
deepseek: 0.9
|
|
27
|
+
// 90% discount
|
|
28
|
+
};
|
|
29
|
+
function calculateCacheSavings(provider, inputTokens, cacheReadTokens) {
|
|
30
|
+
if (!inputTokens || !cacheReadTokens || inputTokens === 0) {
|
|
31
|
+
return void 0;
|
|
32
|
+
}
|
|
33
|
+
const normalizedProvider = provider.includes("/") ? provider.split("/")[0] : provider;
|
|
34
|
+
const discountRate = CACHE_DISCOUNT_RATES[normalizedProvider.toLowerCase()] ?? 0.5;
|
|
35
|
+
const savingsPercent = Math.round(cacheReadTokens / inputTokens * discountRate * 100);
|
|
36
|
+
return Math.min(savingsPercent, 100);
|
|
37
|
+
}
|
|
38
|
+
|
|
14
39
|
// src/schema-translator.ts
|
|
15
40
|
import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
|
|
16
41
|
var SchemaTranslator = class {
|
|
@@ -470,18 +495,23 @@ var OpenAIProvider = class {
|
|
|
470
495
|
costUSD = this.calculateCost(data.usage);
|
|
471
496
|
}
|
|
472
497
|
const baseProvider = extractProviderFromModel(this.config.model, "openai");
|
|
498
|
+
const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
|
|
499
|
+
const inputTokens = data.usage?.prompt_tokens;
|
|
500
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
473
501
|
return {
|
|
474
502
|
json: parsed,
|
|
475
503
|
rawText: content,
|
|
476
504
|
metrics: {
|
|
477
505
|
costUSD,
|
|
478
|
-
inputTokens
|
|
506
|
+
inputTokens,
|
|
479
507
|
outputTokens: data.usage?.completion_tokens,
|
|
480
508
|
latencyMs,
|
|
481
509
|
attemptNumber: 1,
|
|
482
510
|
provider: baseProvider,
|
|
483
511
|
// Base provider (e.g., "openai" from "openai/gpt-4...")
|
|
484
|
-
model: this.config.model
|
|
512
|
+
model: this.config.model,
|
|
513
|
+
cacheReadInputTokens,
|
|
514
|
+
cacheSavingsPercent
|
|
485
515
|
},
|
|
486
516
|
reasoning,
|
|
487
517
|
reasoning_details,
|
|
@@ -501,6 +531,10 @@ var OpenAIProvider = class {
|
|
|
501
531
|
return Object.keys(config).length > 0 ? config : void 0;
|
|
502
532
|
}
|
|
503
533
|
buildMessages(input) {
|
|
534
|
+
const messages = [];
|
|
535
|
+
if (input.systemPrompt) {
|
|
536
|
+
messages.push({ role: "system", content: input.systemPrompt });
|
|
537
|
+
}
|
|
504
538
|
const content = [];
|
|
505
539
|
if (input.text) {
|
|
506
540
|
content.push({ type: "text", text: input.text });
|
|
@@ -541,7 +575,8 @@ var OpenAIProvider = class {
|
|
|
541
575
|
});
|
|
542
576
|
}
|
|
543
577
|
}
|
|
544
|
-
|
|
578
|
+
messages.push({ role: "user", content });
|
|
579
|
+
return messages;
|
|
545
580
|
}
|
|
546
581
|
/**
|
|
547
582
|
* Extract base64 data from a data URL or return as-is if already raw base64
|
|
@@ -642,7 +677,9 @@ var AnthropicProvider = class {
|
|
|
642
677
|
const requestBody = {
|
|
643
678
|
model: this.config.model,
|
|
644
679
|
max_tokens: params.max_tokens || 4096,
|
|
645
|
-
messages
|
|
680
|
+
messages,
|
|
681
|
+
// Native Anthropic API uses separate system parameter (text-only)
|
|
682
|
+
...enhancedInput.systemPrompt && { system: enhancedInput.systemPrompt }
|
|
646
683
|
};
|
|
647
684
|
if (mode === "relaxed") {
|
|
648
685
|
requestBody.messages.push({
|
|
@@ -687,7 +724,7 @@ var AnthropicProvider = class {
|
|
|
687
724
|
let costUSD;
|
|
688
725
|
if (this.config.via === "openrouter") {
|
|
689
726
|
const useNewStructuredOutputs2 = this.supportsNewStructuredOutputs();
|
|
690
|
-
const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning);
|
|
727
|
+
const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
|
|
691
728
|
if (process.env.DEBUG_PROVIDERS) {
|
|
692
729
|
console.log("[AnthropicProvider] OpenRouter request body (messages):");
|
|
693
730
|
console.log(JSON.stringify(openRouterRequest.messages, null, 2));
|
|
@@ -740,8 +777,8 @@ var AnthropicProvider = class {
|
|
|
740
777
|
inputTokens = data.usage?.prompt_tokens;
|
|
741
778
|
outputTokens = data.usage?.completion_tokens;
|
|
742
779
|
costUSD = data.usage?.total_cost ?? data.usage?.cost;
|
|
743
|
-
const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens;
|
|
744
|
-
const cacheReadInputTokens = data.usage?.cache_read_input_tokens;
|
|
780
|
+
const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens ?? data.usage?.prompt_tokens_details?.cache_write_tokens;
|
|
781
|
+
const cacheReadInputTokens = data.usage?.cache_read_input_tokens ?? data.usage?.prompt_tokens_details?.cached_tokens;
|
|
745
782
|
if (process.env.DEBUG_PROVIDERS) {
|
|
746
783
|
console.log("[AnthropicProvider] OpenRouter usage response:", JSON.stringify(data.usage, null, 2));
|
|
747
784
|
console.log("[AnthropicProvider] Extracted costUSD:", costUSD);
|
|
@@ -750,6 +787,7 @@ var AnthropicProvider = class {
|
|
|
750
787
|
}
|
|
751
788
|
const latencyMs = Date.now() - startTime;
|
|
752
789
|
const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
|
|
790
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
753
791
|
const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
|
|
754
792
|
return {
|
|
755
793
|
json: cleanJson,
|
|
@@ -764,7 +802,8 @@ var AnthropicProvider = class {
|
|
|
764
802
|
// Base provider (e.g., "anthropic" from "anthropic/claude-...")
|
|
765
803
|
model: this.config.model,
|
|
766
804
|
cacheCreationInputTokens,
|
|
767
|
-
cacheReadInputTokens
|
|
805
|
+
cacheReadInputTokens,
|
|
806
|
+
cacheSavingsPercent
|
|
768
807
|
},
|
|
769
808
|
reasoning,
|
|
770
809
|
reasoning_details,
|
|
@@ -873,11 +912,24 @@ var AnthropicProvider = class {
|
|
|
873
912
|
budget_tokens
|
|
874
913
|
};
|
|
875
914
|
}
|
|
876
|
-
translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning) {
|
|
915
|
+
translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning, systemPrompt) {
|
|
877
916
|
const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
|
|
917
|
+
const cachingEnabled = this.config.caching?.enabled === true;
|
|
918
|
+
const cacheTTL = this.config.caching?.ttl || "5m";
|
|
919
|
+
const jsonInstructions = mode === "strict" ? "You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text.";
|
|
920
|
+
const systemContent = systemPrompt ? `${systemPrompt}
|
|
921
|
+
|
|
922
|
+
${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}`;
|
|
878
923
|
const systemMessage = {
|
|
879
924
|
role: "system",
|
|
880
|
-
content:
|
|
925
|
+
content: cachingEnabled ? [{
|
|
926
|
+
type: "text",
|
|
927
|
+
text: systemContent,
|
|
928
|
+
cache_control: {
|
|
929
|
+
type: "ephemeral",
|
|
930
|
+
...cacheTTL === "1h" && { ttl: "1h" }
|
|
931
|
+
}
|
|
932
|
+
}] : systemContent
|
|
881
933
|
};
|
|
882
934
|
const messageArray = [systemMessage, ...messages];
|
|
883
935
|
const requestBody = {
|
|
@@ -1044,22 +1096,38 @@ var AnthropicProvider = class {
|
|
|
1044
1096
|
});
|
|
1045
1097
|
}
|
|
1046
1098
|
}
|
|
1099
|
+
const cachingEnabled = this.config.caching?.enabled === true;
|
|
1100
|
+
const cacheTTL = this.config.caching?.ttl || "5m";
|
|
1047
1101
|
if (hasMedia) {
|
|
1048
1102
|
const textContent = input.text || "Extract the requested information from the document.";
|
|
1049
1103
|
if (process.env.DEBUG_PROVIDERS) {
|
|
1050
|
-
console.log("[AnthropicProvider.buildMessages] Adding text block with cache_control");
|
|
1104
|
+
console.log("[AnthropicProvider.buildMessages] Adding text block" + (cachingEnabled ? " with cache_control" : ""));
|
|
1051
1105
|
console.log(" textContent:", textContent);
|
|
1106
|
+
console.log(" cachingEnabled:", cachingEnabled);
|
|
1052
1107
|
}
|
|
1053
|
-
|
|
1108
|
+
const textBlock = {
|
|
1054
1109
|
type: "text",
|
|
1055
|
-
text: textContent
|
|
1056
|
-
|
|
1057
|
-
|
|
1110
|
+
text: textContent
|
|
1111
|
+
};
|
|
1112
|
+
if (cachingEnabled) {
|
|
1113
|
+
textBlock.cache_control = {
|
|
1114
|
+
type: "ephemeral",
|
|
1115
|
+
...cacheTTL === "1h" && { ttl: "1h" }
|
|
1116
|
+
};
|
|
1117
|
+
}
|
|
1118
|
+
content.push(textBlock);
|
|
1058
1119
|
} else if (input.text) {
|
|
1059
|
-
|
|
1120
|
+
const textBlock = {
|
|
1060
1121
|
type: "text",
|
|
1061
1122
|
text: input.text
|
|
1062
|
-
}
|
|
1123
|
+
};
|
|
1124
|
+
if (cachingEnabled) {
|
|
1125
|
+
textBlock.cache_control = {
|
|
1126
|
+
type: "ephemeral",
|
|
1127
|
+
...cacheTTL === "1h" && { ttl: "1h" }
|
|
1128
|
+
};
|
|
1129
|
+
}
|
|
1130
|
+
content.push(textBlock);
|
|
1063
1131
|
}
|
|
1064
1132
|
} else {
|
|
1065
1133
|
if (input.text) {
|
|
@@ -1291,6 +1359,10 @@ var GoogleProvider = class {
|
|
|
1291
1359
|
// Use JSON mode without responseSchema - schema is already in the prompt via combineSchemaAndUserPrompt.
|
|
1292
1360
|
// See: https://ubaidullahmomer.medium.com/why-google-geminis-response-schema-isn-t-ready-for-complex-json-46f35c3aaaea
|
|
1293
1361
|
responseMimeType: "application/json"
|
|
1362
|
+
},
|
|
1363
|
+
// Native Gemini API uses systemInstruction with parts array (text-only)
|
|
1364
|
+
...enhancedInput.systemPrompt && {
|
|
1365
|
+
systemInstruction: { parts: [{ text: enhancedInput.systemPrompt }] }
|
|
1294
1366
|
}
|
|
1295
1367
|
};
|
|
1296
1368
|
if (process.env.DEBUG_PROVIDERS) {
|
|
@@ -1307,7 +1379,7 @@ var GoogleProvider = class {
|
|
|
1307
1379
|
console.log("[GoogleProvider] Using via:", this.config.via, "Checking:", this.config.via === "openrouter");
|
|
1308
1380
|
}
|
|
1309
1381
|
if (this.config.via === "openrouter") {
|
|
1310
|
-
const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning);
|
|
1382
|
+
const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
|
|
1311
1383
|
response = await fetchWithTimeout3("https://openrouter.ai/api/v1/chat/completions", {
|
|
1312
1384
|
method: "POST",
|
|
1313
1385
|
headers: {
|
|
@@ -1349,10 +1421,12 @@ var GoogleProvider = class {
|
|
|
1349
1421
|
costUSD = data.usage?.total_cost ?? data.usage?.cost;
|
|
1350
1422
|
const reasoning = message?.reasoning;
|
|
1351
1423
|
const reasoning_details = message?.reasoning_details;
|
|
1424
|
+
const cacheReadInputTokens = data.usage?.cached_tokens;
|
|
1352
1425
|
content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
1353
1426
|
const rawParsed = safeJsonParse3(content);
|
|
1354
1427
|
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1355
1428
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1429
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
1356
1430
|
return {
|
|
1357
1431
|
json: parsed,
|
|
1358
1432
|
rawText: content,
|
|
@@ -1364,7 +1438,9 @@ var GoogleProvider = class {
|
|
|
1364
1438
|
attemptNumber: 1,
|
|
1365
1439
|
provider: baseProvider,
|
|
1366
1440
|
// Base provider (e.g., "google" from "google/gemini-...")
|
|
1367
|
-
model: this.config.model
|
|
1441
|
+
model: this.config.model,
|
|
1442
|
+
cacheReadInputTokens,
|
|
1443
|
+
cacheSavingsPercent
|
|
1368
1444
|
},
|
|
1369
1445
|
reasoning,
|
|
1370
1446
|
reasoning_details,
|
|
@@ -1376,11 +1452,13 @@ var GoogleProvider = class {
|
|
|
1376
1452
|
inputTokens = data.usageMetadata?.promptTokenCount;
|
|
1377
1453
|
outputTokens = data.usageMetadata?.candidatesTokenCount;
|
|
1378
1454
|
costUSD = this.calculateCost(data.usageMetadata);
|
|
1455
|
+
const cacheReadInputTokens = data.usageMetadata?.cachedContentTokenCount;
|
|
1379
1456
|
const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
|
|
1380
1457
|
const reasoning = thinkingPart?.text;
|
|
1381
1458
|
const rawParsed = safeJsonParse3(content);
|
|
1382
1459
|
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1383
1460
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1461
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
1384
1462
|
return {
|
|
1385
1463
|
json: parsed,
|
|
1386
1464
|
rawText: content,
|
|
@@ -1392,7 +1470,9 @@ var GoogleProvider = class {
|
|
|
1392
1470
|
attemptNumber: 1,
|
|
1393
1471
|
provider: baseProvider,
|
|
1394
1472
|
// Base provider (e.g., "google" from "google/gemini-...")
|
|
1395
|
-
model: this.config.model
|
|
1473
|
+
model: this.config.model,
|
|
1474
|
+
cacheReadInputTokens,
|
|
1475
|
+
cacheSavingsPercent
|
|
1396
1476
|
},
|
|
1397
1477
|
reasoning,
|
|
1398
1478
|
reasoning_details: reasoning ? [{
|
|
@@ -1419,8 +1499,11 @@ var GoogleProvider = class {
|
|
|
1419
1499
|
thinking_budget
|
|
1420
1500
|
};
|
|
1421
1501
|
}
|
|
1422
|
-
translateToOpenRouterFormat(contents, mode, max_tokens, reasoning) {
|
|
1502
|
+
translateToOpenRouterFormat(contents, mode, max_tokens, reasoning, systemPrompt) {
|
|
1423
1503
|
const messages = [];
|
|
1504
|
+
if (systemPrompt) {
|
|
1505
|
+
messages.push({ role: "system", content: systemPrompt });
|
|
1506
|
+
}
|
|
1424
1507
|
for (const content of contents) {
|
|
1425
1508
|
if (content.role === "user") {
|
|
1426
1509
|
const messageContent = [];
|
|
@@ -1732,18 +1815,23 @@ var XAIProvider = class {
|
|
|
1732
1815
|
costUSD = this.calculateCost(data.usage);
|
|
1733
1816
|
}
|
|
1734
1817
|
const baseProvider = extractProviderFromModel4(this.config.model, "xai");
|
|
1818
|
+
const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
|
|
1819
|
+
const inputTokens = data.usage?.prompt_tokens;
|
|
1820
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
1735
1821
|
return {
|
|
1736
1822
|
json: parsed,
|
|
1737
1823
|
rawText: content,
|
|
1738
1824
|
metrics: {
|
|
1739
1825
|
costUSD,
|
|
1740
|
-
inputTokens
|
|
1826
|
+
inputTokens,
|
|
1741
1827
|
outputTokens: data.usage?.completion_tokens,
|
|
1742
1828
|
latencyMs,
|
|
1743
1829
|
attemptNumber: 1,
|
|
1744
1830
|
provider: baseProvider,
|
|
1745
1831
|
// Base provider (e.g., "x-ai" from "x-ai/grok-...")
|
|
1746
|
-
model: this.config.model
|
|
1832
|
+
model: this.config.model,
|
|
1833
|
+
cacheReadInputTokens,
|
|
1834
|
+
cacheSavingsPercent
|
|
1747
1835
|
},
|
|
1748
1836
|
reasoning,
|
|
1749
1837
|
reasoning_details,
|
|
@@ -1763,6 +1851,10 @@ var XAIProvider = class {
|
|
|
1763
1851
|
return Object.keys(config).length > 0 ? config : void 0;
|
|
1764
1852
|
}
|
|
1765
1853
|
async buildMessages(input) {
|
|
1854
|
+
const messages = [];
|
|
1855
|
+
if (input.systemPrompt) {
|
|
1856
|
+
messages.push({ role: "system", content: input.systemPrompt });
|
|
1857
|
+
}
|
|
1766
1858
|
const content = [];
|
|
1767
1859
|
if (input.text) {
|
|
1768
1860
|
content.push({ type: "text", text: input.text });
|
|
@@ -1803,7 +1895,8 @@ var XAIProvider = class {
|
|
|
1803
1895
|
});
|
|
1804
1896
|
}
|
|
1805
1897
|
}
|
|
1806
|
-
|
|
1898
|
+
messages.push({ role: "user", content });
|
|
1899
|
+
return messages;
|
|
1807
1900
|
}
|
|
1808
1901
|
/**
|
|
1809
1902
|
* Extract base64 data from a data URL or return as-is if already raw base64
|
|
@@ -2877,6 +2970,7 @@ export {
|
|
|
2877
2970
|
buildOutputFormatPrompt,
|
|
2878
2971
|
buildSchemaPromptSection,
|
|
2879
2972
|
buildSourcesPrompt,
|
|
2973
|
+
calculateCacheSavings,
|
|
2880
2974
|
combineSchemaAndUserPrompt,
|
|
2881
2975
|
combineSchemaUserAndDerivedPrompts,
|
|
2882
2976
|
compareNativeVsOpenRouter,
|