@doclo/providers-llm 0.1.10 → 0.1.11
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.d.ts +40 -1
- package/dist/index.js +88 -17
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.ts
CHANGED
|
@@ -32,6 +32,26 @@ interface ResourceLimits {
|
|
|
32
32
|
*/
|
|
33
33
|
maxJsonDepth?: number;
|
|
34
34
|
}
|
|
35
|
+
/** Caching configuration for prompt caching */
|
|
36
|
+
interface CachingConfig {
|
|
37
|
+
/**
|
|
38
|
+
* Enable/disable prompt caching.
|
|
39
|
+
* Default varies by provider:
|
|
40
|
+
* - Anthropic: false (cache writes cost 1.25x-2x, opt-in)
|
|
41
|
+
* - OpenAI/Google/XAI/DeepSeek: true (automatic, free)
|
|
42
|
+
*/
|
|
43
|
+
enabled?: boolean;
|
|
44
|
+
/**
|
|
45
|
+
* Cache TTL for providers that support it (Anthropic only).
|
|
46
|
+
* - '5m': 5-minute TTL, cache writes cost 1.25x (default)
|
|
47
|
+
* - '1h': 1-hour TTL, cache writes cost 2x
|
|
48
|
+
*
|
|
49
|
+
* Break-even: ~1.4 reads/write (5m) or ~2.2 reads/write (1h).
|
|
50
|
+
* For high-frequency flows (100+ docs/hr with same schema), caching
|
|
51
|
+
* is almost always cost-effective despite the write cost.
|
|
52
|
+
*/
|
|
53
|
+
ttl?: '5m' | '1h';
|
|
54
|
+
}
|
|
35
55
|
/** Provider configuration */
|
|
36
56
|
interface ProviderConfig {
|
|
37
57
|
provider: ProviderType;
|
|
@@ -40,6 +60,8 @@ interface ProviderConfig {
|
|
|
40
60
|
apiKey: string;
|
|
41
61
|
baseUrl?: string;
|
|
42
62
|
limits?: ResourceLimits;
|
|
63
|
+
/** Optional caching configuration for prompt caching */
|
|
64
|
+
caching?: CachingConfig;
|
|
43
65
|
}
|
|
44
66
|
/** Fallback configuration */
|
|
45
67
|
interface FallbackConfig {
|
|
@@ -79,8 +101,12 @@ interface ResponseMetrics {
|
|
|
79
101
|
attemptNumber: number;
|
|
80
102
|
provider: string;
|
|
81
103
|
model: string;
|
|
104
|
+
/** Tokens written to cache (Anthropic only - costs 1.25x-2x) */
|
|
82
105
|
cacheCreationInputTokens?: number;
|
|
106
|
+
/** Tokens read from cache (all providers - significant cost savings) */
|
|
83
107
|
cacheReadInputTokens?: number;
|
|
108
|
+
/** Calculated cache savings percentage (0-100) based on provider discount rates */
|
|
109
|
+
cacheSavingsPercent?: number;
|
|
84
110
|
httpStatusCode?: number;
|
|
85
111
|
httpMethod?: string;
|
|
86
112
|
httpUrl?: string;
|
|
@@ -250,6 +276,19 @@ interface CircuitBreakerState {
|
|
|
250
276
|
lastFailureTime?: number;
|
|
251
277
|
isOpen: boolean;
|
|
252
278
|
}
|
|
279
|
+
/**
|
|
280
|
+
* Calculate the cache savings percentage based on provider discount rates.
|
|
281
|
+
*
|
|
282
|
+
* @param provider - The provider name (e.g., 'anthropic', 'openai', 'google')
|
|
283
|
+
* @param inputTokens - Total input tokens in the request
|
|
284
|
+
* @param cacheReadTokens - Tokens read from cache
|
|
285
|
+
* @returns Savings percentage (0-100) or undefined if not calculable
|
|
286
|
+
*
|
|
287
|
+
* @example
|
|
288
|
+
* // 1000 input tokens, 800 from cache, using Anthropic (90% discount)
|
|
289
|
+
* calculateCacheSavings('anthropic', 1000, 800) // => 72 (72% savings)
|
|
290
|
+
*/
|
|
291
|
+
declare function calculateCacheSavings(provider: string, inputTokens: number | undefined, cacheReadTokens: number | undefined): number | undefined;
|
|
253
292
|
|
|
254
293
|
/**
|
|
255
294
|
* Internal JSON Schema representation for schema translation.
|
|
@@ -1690,4 +1729,4 @@ declare function createVLMProvider(config: {
|
|
|
1690
1729
|
*/
|
|
1691
1730
|
declare function buildLLMProvider(config: FallbackConfig): VLMProvider;
|
|
1692
1731
|
|
|
1693
|
-
export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
|
|
1732
|
+
export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CachingConfig, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, calculateCacheSavings, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
|
package/dist/index.js
CHANGED
|
@@ -11,6 +11,31 @@ import {
|
|
|
11
11
|
formatSchemaForPrompt
|
|
12
12
|
} from "./chunk-7YPJIWRM.js";
|
|
13
13
|
|
|
14
|
+
// src/types.ts
|
|
15
|
+
var CACHE_DISCOUNT_RATES = {
|
|
16
|
+
anthropic: 0.9,
|
|
17
|
+
// 90% discount on cached reads (0.1x price)
|
|
18
|
+
openai: 0.5,
|
|
19
|
+
// 50% discount
|
|
20
|
+
google: 0.75,
|
|
21
|
+
// 75% discount (0.25x price)
|
|
22
|
+
"x-ai": 0.75,
|
|
23
|
+
// 75% discount (Grok)
|
|
24
|
+
xai: 0.75,
|
|
25
|
+
// alias
|
|
26
|
+
deepseek: 0.9
|
|
27
|
+
// 90% discount
|
|
28
|
+
};
|
|
29
|
+
function calculateCacheSavings(provider, inputTokens, cacheReadTokens) {
|
|
30
|
+
if (!inputTokens || !cacheReadTokens || inputTokens === 0) {
|
|
31
|
+
return void 0;
|
|
32
|
+
}
|
|
33
|
+
const normalizedProvider = provider.includes("/") ? provider.split("/")[0] : provider;
|
|
34
|
+
const discountRate = CACHE_DISCOUNT_RATES[normalizedProvider.toLowerCase()] ?? 0.5;
|
|
35
|
+
const savingsPercent = Math.round(cacheReadTokens / inputTokens * discountRate * 100);
|
|
36
|
+
return Math.min(savingsPercent, 100);
|
|
37
|
+
}
|
|
38
|
+
|
|
14
39
|
// src/schema-translator.ts
|
|
15
40
|
import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
|
|
16
41
|
var SchemaTranslator = class {
|
|
@@ -470,18 +495,23 @@ var OpenAIProvider = class {
|
|
|
470
495
|
costUSD = this.calculateCost(data.usage);
|
|
471
496
|
}
|
|
472
497
|
const baseProvider = extractProviderFromModel(this.config.model, "openai");
|
|
498
|
+
const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
|
|
499
|
+
const inputTokens = data.usage?.prompt_tokens;
|
|
500
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
473
501
|
return {
|
|
474
502
|
json: parsed,
|
|
475
503
|
rawText: content,
|
|
476
504
|
metrics: {
|
|
477
505
|
costUSD,
|
|
478
|
-
inputTokens
|
|
506
|
+
inputTokens,
|
|
479
507
|
outputTokens: data.usage?.completion_tokens,
|
|
480
508
|
latencyMs,
|
|
481
509
|
attemptNumber: 1,
|
|
482
510
|
provider: baseProvider,
|
|
483
511
|
// Base provider (e.g., "openai" from "openai/gpt-4...")
|
|
484
|
-
model: this.config.model
|
|
512
|
+
model: this.config.model,
|
|
513
|
+
cacheReadInputTokens,
|
|
514
|
+
cacheSavingsPercent
|
|
485
515
|
},
|
|
486
516
|
reasoning,
|
|
487
517
|
reasoning_details,
|
|
@@ -747,8 +777,8 @@ var AnthropicProvider = class {
|
|
|
747
777
|
inputTokens = data.usage?.prompt_tokens;
|
|
748
778
|
outputTokens = data.usage?.completion_tokens;
|
|
749
779
|
costUSD = data.usage?.total_cost ?? data.usage?.cost;
|
|
750
|
-
const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens;
|
|
751
|
-
const cacheReadInputTokens = data.usage?.cache_read_input_tokens;
|
|
780
|
+
const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens ?? data.usage?.prompt_tokens_details?.cache_write_tokens;
|
|
781
|
+
const cacheReadInputTokens = data.usage?.cache_read_input_tokens ?? data.usage?.prompt_tokens_details?.cached_tokens;
|
|
752
782
|
if (process.env.DEBUG_PROVIDERS) {
|
|
753
783
|
console.log("[AnthropicProvider] OpenRouter usage response:", JSON.stringify(data.usage, null, 2));
|
|
754
784
|
console.log("[AnthropicProvider] Extracted costUSD:", costUSD);
|
|
@@ -757,6 +787,7 @@ var AnthropicProvider = class {
|
|
|
757
787
|
}
|
|
758
788
|
const latencyMs = Date.now() - startTime;
|
|
759
789
|
const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
|
|
790
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
760
791
|
const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
|
|
761
792
|
return {
|
|
762
793
|
json: cleanJson,
|
|
@@ -771,7 +802,8 @@ var AnthropicProvider = class {
|
|
|
771
802
|
// Base provider (e.g., "anthropic" from "anthropic/claude-...")
|
|
772
803
|
model: this.config.model,
|
|
773
804
|
cacheCreationInputTokens,
|
|
774
|
-
cacheReadInputTokens
|
|
805
|
+
cacheReadInputTokens,
|
|
806
|
+
cacheSavingsPercent
|
|
775
807
|
},
|
|
776
808
|
reasoning,
|
|
777
809
|
reasoning_details,
|
|
@@ -882,13 +914,22 @@ var AnthropicProvider = class {
|
|
|
882
914
|
}
|
|
883
915
|
translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning, systemPrompt) {
|
|
884
916
|
const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
|
|
917
|
+
const cachingEnabled = this.config.caching?.enabled === true;
|
|
918
|
+
const cacheTTL = this.config.caching?.ttl || "5m";
|
|
885
919
|
const jsonInstructions = mode === "strict" ? "You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text.";
|
|
886
920
|
const systemContent = systemPrompt ? `${systemPrompt}
|
|
887
921
|
|
|
888
922
|
${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}`;
|
|
889
923
|
const systemMessage = {
|
|
890
924
|
role: "system",
|
|
891
|
-
content:
|
|
925
|
+
content: cachingEnabled ? [{
|
|
926
|
+
type: "text",
|
|
927
|
+
text: systemContent,
|
|
928
|
+
cache_control: {
|
|
929
|
+
type: "ephemeral",
|
|
930
|
+
...cacheTTL === "1h" && { ttl: "1h" }
|
|
931
|
+
}
|
|
932
|
+
}] : systemContent
|
|
892
933
|
};
|
|
893
934
|
const messageArray = [systemMessage, ...messages];
|
|
894
935
|
const requestBody = {
|
|
@@ -1055,22 +1096,38 @@ ${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}
|
|
|
1055
1096
|
});
|
|
1056
1097
|
}
|
|
1057
1098
|
}
|
|
1099
|
+
const cachingEnabled = this.config.caching?.enabled === true;
|
|
1100
|
+
const cacheTTL = this.config.caching?.ttl || "5m";
|
|
1058
1101
|
if (hasMedia) {
|
|
1059
1102
|
const textContent = input.text || "Extract the requested information from the document.";
|
|
1060
1103
|
if (process.env.DEBUG_PROVIDERS) {
|
|
1061
|
-
console.log("[AnthropicProvider.buildMessages] Adding text block with cache_control");
|
|
1104
|
+
console.log("[AnthropicProvider.buildMessages] Adding text block" + (cachingEnabled ? " with cache_control" : ""));
|
|
1062
1105
|
console.log(" textContent:", textContent);
|
|
1106
|
+
console.log(" cachingEnabled:", cachingEnabled);
|
|
1063
1107
|
}
|
|
1064
|
-
|
|
1108
|
+
const textBlock = {
|
|
1065
1109
|
type: "text",
|
|
1066
|
-
text: textContent
|
|
1067
|
-
|
|
1068
|
-
|
|
1110
|
+
text: textContent
|
|
1111
|
+
};
|
|
1112
|
+
if (cachingEnabled) {
|
|
1113
|
+
textBlock.cache_control = {
|
|
1114
|
+
type: "ephemeral",
|
|
1115
|
+
...cacheTTL === "1h" && { ttl: "1h" }
|
|
1116
|
+
};
|
|
1117
|
+
}
|
|
1118
|
+
content.push(textBlock);
|
|
1069
1119
|
} else if (input.text) {
|
|
1070
|
-
|
|
1120
|
+
const textBlock = {
|
|
1071
1121
|
type: "text",
|
|
1072
1122
|
text: input.text
|
|
1073
|
-
}
|
|
1123
|
+
};
|
|
1124
|
+
if (cachingEnabled) {
|
|
1125
|
+
textBlock.cache_control = {
|
|
1126
|
+
type: "ephemeral",
|
|
1127
|
+
...cacheTTL === "1h" && { ttl: "1h" }
|
|
1128
|
+
};
|
|
1129
|
+
}
|
|
1130
|
+
content.push(textBlock);
|
|
1074
1131
|
}
|
|
1075
1132
|
} else {
|
|
1076
1133
|
if (input.text) {
|
|
@@ -1364,10 +1421,12 @@ var GoogleProvider = class {
|
|
|
1364
1421
|
costUSD = data.usage?.total_cost ?? data.usage?.cost;
|
|
1365
1422
|
const reasoning = message?.reasoning;
|
|
1366
1423
|
const reasoning_details = message?.reasoning_details;
|
|
1424
|
+
const cacheReadInputTokens = data.usage?.cached_tokens;
|
|
1367
1425
|
content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
|
|
1368
1426
|
const rawParsed = safeJsonParse3(content);
|
|
1369
1427
|
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1370
1428
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1429
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
1371
1430
|
return {
|
|
1372
1431
|
json: parsed,
|
|
1373
1432
|
rawText: content,
|
|
@@ -1379,7 +1438,9 @@ var GoogleProvider = class {
|
|
|
1379
1438
|
attemptNumber: 1,
|
|
1380
1439
|
provider: baseProvider,
|
|
1381
1440
|
// Base provider (e.g., "google" from "google/gemini-...")
|
|
1382
|
-
model: this.config.model
|
|
1441
|
+
model: this.config.model,
|
|
1442
|
+
cacheReadInputTokens,
|
|
1443
|
+
cacheSavingsPercent
|
|
1383
1444
|
},
|
|
1384
1445
|
reasoning,
|
|
1385
1446
|
reasoning_details,
|
|
@@ -1391,11 +1452,13 @@ var GoogleProvider = class {
|
|
|
1391
1452
|
inputTokens = data.usageMetadata?.promptTokenCount;
|
|
1392
1453
|
outputTokens = data.usageMetadata?.candidatesTokenCount;
|
|
1393
1454
|
costUSD = this.calculateCost(data.usageMetadata);
|
|
1455
|
+
const cacheReadInputTokens = data.usageMetadata?.cachedContentTokenCount;
|
|
1394
1456
|
const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
|
|
1395
1457
|
const reasoning = thinkingPart?.text;
|
|
1396
1458
|
const rawParsed = safeJsonParse3(content);
|
|
1397
1459
|
const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
|
|
1398
1460
|
const baseProvider = extractProviderFromModel3(this.config.model, "google");
|
|
1461
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
1399
1462
|
return {
|
|
1400
1463
|
json: parsed,
|
|
1401
1464
|
rawText: content,
|
|
@@ -1407,7 +1470,9 @@ var GoogleProvider = class {
|
|
|
1407
1470
|
attemptNumber: 1,
|
|
1408
1471
|
provider: baseProvider,
|
|
1409
1472
|
// Base provider (e.g., "google" from "google/gemini-...")
|
|
1410
|
-
model: this.config.model
|
|
1473
|
+
model: this.config.model,
|
|
1474
|
+
cacheReadInputTokens,
|
|
1475
|
+
cacheSavingsPercent
|
|
1411
1476
|
},
|
|
1412
1477
|
reasoning,
|
|
1413
1478
|
reasoning_details: reasoning ? [{
|
|
@@ -1750,18 +1815,23 @@ var XAIProvider = class {
|
|
|
1750
1815
|
costUSD = this.calculateCost(data.usage);
|
|
1751
1816
|
}
|
|
1752
1817
|
const baseProvider = extractProviderFromModel4(this.config.model, "xai");
|
|
1818
|
+
const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
|
|
1819
|
+
const inputTokens = data.usage?.prompt_tokens;
|
|
1820
|
+
const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
|
|
1753
1821
|
return {
|
|
1754
1822
|
json: parsed,
|
|
1755
1823
|
rawText: content,
|
|
1756
1824
|
metrics: {
|
|
1757
1825
|
costUSD,
|
|
1758
|
-
inputTokens
|
|
1826
|
+
inputTokens,
|
|
1759
1827
|
outputTokens: data.usage?.completion_tokens,
|
|
1760
1828
|
latencyMs,
|
|
1761
1829
|
attemptNumber: 1,
|
|
1762
1830
|
provider: baseProvider,
|
|
1763
1831
|
// Base provider (e.g., "x-ai" from "x-ai/grok-...")
|
|
1764
|
-
model: this.config.model
|
|
1832
|
+
model: this.config.model,
|
|
1833
|
+
cacheReadInputTokens,
|
|
1834
|
+
cacheSavingsPercent
|
|
1765
1835
|
},
|
|
1766
1836
|
reasoning,
|
|
1767
1837
|
reasoning_details,
|
|
@@ -2900,6 +2970,7 @@ export {
|
|
|
2900
2970
|
buildOutputFormatPrompt,
|
|
2901
2971
|
buildSchemaPromptSection,
|
|
2902
2972
|
buildSourcesPrompt,
|
|
2973
|
+
calculateCacheSavings,
|
|
2903
2974
|
combineSchemaAndUserPrompt,
|
|
2904
2975
|
combineSchemaUserAndDerivedPrompts,
|
|
2905
2976
|
compareNativeVsOpenRouter,
|