npm - @doclo/providers-llm - Versions diffs - 0.1.9 → 0.1.11 - Mend

@doclo/providers-llm 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -32,6 +32,26 @@ interface ResourceLimits {
      */
     maxJsonDepth?: number;
 }
+/** Caching configuration for prompt caching */
+interface CachingConfig {
+    /**
+     * Enable/disable prompt caching.
+     * Default varies by provider:
+     * - Anthropic: false (cache writes cost 1.25x-2x, opt-in)
+     * - OpenAI/Google/XAI/DeepSeek: true (automatic, free)
+     */
+    enabled?: boolean;
+    /**
+     * Cache TTL for providers that support it (Anthropic only).
+     * - '5m': 5-minute TTL, cache writes cost 1.25x (default)
+     * - '1h': 1-hour TTL, cache writes cost 2x
+     *
+     * Break-even: ~1.4 reads/write (5m) or ~2.2 reads/write (1h).
+     * For high-frequency flows (100+ docs/hr with same schema), caching
+     * is almost always cost-effective despite the write cost.
+     */
+    ttl?: '5m' | '1h';
+}
 /** Provider configuration */
 interface ProviderConfig {
     provider: ProviderType;
@@ -40,6 +60,8 @@ interface ProviderConfig {
     apiKey: string;
     baseUrl?: string;
     limits?: ResourceLimits;
+    /** Optional caching configuration for prompt caching */
+    caching?: CachingConfig;
 }
 /** Fallback configuration */
 interface FallbackConfig {
@@ -67,6 +89,8 @@ interface MultimodalInput {
     text?: string;
     images?: ImageInput[];
     pdfs?: PDFInput[];
+    /** Optional system message (text-only, prepended to conversation) */
+    systemPrompt?: string;
 }
 /** Response metrics */
 interface ResponseMetrics {
@@ -77,8 +101,12 @@ interface ResponseMetrics {
     attemptNumber: number;
     provider: string;
     model: string;
+    /** Tokens written to cache (Anthropic only - costs 1.25x-2x) */
     cacheCreationInputTokens?: number;
+    /** Tokens read from cache (all providers - significant cost savings) */
     cacheReadInputTokens?: number;
+    /** Calculated cache savings percentage (0-100) based on provider discount rates */
+    cacheSavingsPercent?: number;
     httpStatusCode?: number;
     httpMethod?: string;
     httpUrl?: string;
@@ -248,6 +276,19 @@ interface CircuitBreakerState {
     lastFailureTime?: number;
     isOpen: boolean;
 }
+/**
+ * Calculate the cache savings percentage based on provider discount rates.
+ *
+ * @param provider - The provider name (e.g., 'anthropic', 'openai', 'google')
+ * @param inputTokens - Total input tokens in the request
+ * @param cacheReadTokens - Tokens read from cache
+ * @returns Savings percentage (0-100) or undefined if not calculable
+ *
+ * @example
+ * // 1000 input tokens, 800 from cache, using Anthropic (90% discount)
+ * calculateCacheSavings('anthropic', 1000, 800) // => 72 (72% savings)
+ */
+declare function calculateCacheSavings(provider: string, inputTokens: number | undefined, cacheReadTokens: number | undefined): number | undefined;
 /**
  * Internal JSON Schema representation for schema translation.
@@ -1688,4 +1729,4 @@ declare function createVLMProvider(config: {
  */
 declare function buildLLMProvider(config: FallbackConfig): VLMProvider;
-export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };
+export { type AccessMethod, AnthropicProvider, BLOCK_TYPES, type BlockType, type CachingConfig, type CircuitBreakerState, type DocumentBlock, type FallbackConfig, FallbackManager, GEMINI_BBOX_EXTRACTION_PROMPT, type GeminiBoundingBoxBlock, GoogleProvider, type ImageInput, type JsonMode, type LLMDerivedOptions, type LLMExtractedMetadata, type LLMModelMetadata, type LLMProvider, type LLMProviderMetadata, type LLMProviderType, type LLMResponse, type MultimodalInput, type NodeType, type NormalizedBBox, OpenAIProvider, type PDFInput, PROVIDER_METADATA, type ProviderCapabilities, type ProviderConfig, type ProviderFactory, type ProviderInputType, type ProviderType, type ReasoningConfig, type ReasoningDetail, type ResourceLimits, type ResponseMetrics, SUPPORTED_IMAGE_TYPES, SchemaTranslator, type SupportedImageMimeType, type UnifiedSchema, XAIProvider, adaptToCoreLLMProvider, buildBlockClassificationPrompt, buildConfidencePrompt, buildLLMDerivedFeaturesPrompt, buildLLMProvider, buildLanguageHintsPrompt, buildOutputFormatPrompt, buildSchemaPromptSection, buildSourcesPrompt, calculateCacheSavings, combineSchemaAndUserPrompt, combineSchemaUserAndDerivedPrompts, compareNativeVsOpenRouter, convertGeminiBlocksToDocumentBlocks, createProviderFromRegistry, createVLMProvider, estimateCost, extractMetadataFromResponse, formatSchemaForPrompt, geminiBoundingBoxSchema, getCheapestProvider, getProvidersForNode, isImageTypeSupported, isProviderCompatibleWithNode, normalizeGeminiBBox, providerRegistry, registerProvider, shouldExtractMetadata, supportsPDFsInline, toGeminiBBox };

package/dist/index.js CHANGED Viewed

@@ -11,6 +11,31 @@ import {
   formatSchemaForPrompt
 } from "./chunk-7YPJIWRM.js";
+// src/types.ts
+var CACHE_DISCOUNT_RATES = {
+  anthropic: 0.9,
+  // 90% discount on cached reads (0.1x price)
+  openai: 0.5,
+  // 50% discount
+  google: 0.75,
+  // 75% discount (0.25x price)
+  "x-ai": 0.75,
+  // 75% discount (Grok)
+  xai: 0.75,
+  // alias
+  deepseek: 0.9
+  // 90% discount
+};
+function calculateCacheSavings(provider, inputTokens, cacheReadTokens) {
+  if (!inputTokens || !cacheReadTokens || inputTokens === 0) {
+    return void 0;
+  }
+  const normalizedProvider = provider.includes("/") ? provider.split("/")[0] : provider;
+  const discountRate = CACHE_DISCOUNT_RATES[normalizedProvider.toLowerCase()] ?? 0.5;
+  const savingsPercent = Math.round(cacheReadTokens / inputTokens * discountRate * 100);
+  return Math.min(savingsPercent, 100);
+}
 // src/schema-translator.ts
 import { zodToJsonSchema } from "@alcyone-labs/zod-to-json-schema";
 var SchemaTranslator = class {
@@ -470,18 +495,23 @@ var OpenAIProvider = class {
       costUSD = this.calculateCost(data.usage);
     }
     const baseProvider = extractProviderFromModel(this.config.model, "openai");
+    const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
+    const inputTokens = data.usage?.prompt_tokens;
+    const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
     return {
       json: parsed,
       rawText: content,
       metrics: {
         costUSD,
-        inputTokens: data.usage?.prompt_tokens,
+        inputTokens,
         outputTokens: data.usage?.completion_tokens,
         latencyMs,
         attemptNumber: 1,
         provider: baseProvider,
         // Base provider (e.g., "openai" from "openai/gpt-4...")
-        model: this.config.model
+        model: this.config.model,
+        cacheReadInputTokens,
+        cacheSavingsPercent
       },
       reasoning,
       reasoning_details,
@@ -501,6 +531,10 @@ var OpenAIProvider = class {
     return Object.keys(config).length > 0 ? config : void 0;
   }
   buildMessages(input) {
+    const messages = [];
+    if (input.systemPrompt) {
+      messages.push({ role: "system", content: input.systemPrompt });
+    }
     const content = [];
     if (input.text) {
       content.push({ type: "text", text: input.text });
@@ -541,7 +575,8 @@ var OpenAIProvider = class {
         });
       }
     }
-    return [{ role: "user", content }];
+    messages.push({ role: "user", content });
+    return messages;
   }
   /**
    * Extract base64 data from a data URL or return as-is if already raw base64
@@ -642,7 +677,9 @@ var AnthropicProvider = class {
     const requestBody = {
       model: this.config.model,
       max_tokens: params.max_tokens || 4096,
-      messages
+      messages,
+      // Native Anthropic API uses separate system parameter (text-only)
+      ...enhancedInput.systemPrompt && { system: enhancedInput.systemPrompt }
     };
     if (mode === "relaxed") {
       requestBody.messages.push({
@@ -687,7 +724,7 @@ var AnthropicProvider = class {
     let costUSD;
     if (this.config.via === "openrouter") {
       const useNewStructuredOutputs2 = this.supportsNewStructuredOutputs();
-      const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning);
+      const openRouterRequest = this.translateToOpenRouterFormat(messages, params.schema, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
       if (process.env.DEBUG_PROVIDERS) {
         console.log("[AnthropicProvider] OpenRouter request body (messages):");
         console.log(JSON.stringify(openRouterRequest.messages, null, 2));
@@ -740,8 +777,8 @@ var AnthropicProvider = class {
       inputTokens = data.usage?.prompt_tokens;
       outputTokens = data.usage?.completion_tokens;
       costUSD = data.usage?.total_cost ?? data.usage?.cost;
-      const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens;
-      const cacheReadInputTokens = data.usage?.cache_read_input_tokens;
+      const cacheCreationInputTokens = data.usage?.cache_creation_input_tokens ?? data.usage?.prompt_tokens_details?.cache_write_tokens;
+      const cacheReadInputTokens = data.usage?.cache_read_input_tokens ?? data.usage?.prompt_tokens_details?.cached_tokens;
       if (process.env.DEBUG_PROVIDERS) {
         console.log("[AnthropicProvider] OpenRouter usage response:", JSON.stringify(data.usage, null, 2));
         console.log("[AnthropicProvider] Extracted costUSD:", costUSD);
@@ -750,6 +787,7 @@ var AnthropicProvider = class {
       }
       const latencyMs = Date.now() - startTime;
       const baseProvider = extractProviderFromModel2(this.config.model, "anthropic");
+      const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
       const { json: cleanJson, metadata } = extractMetadata ? extractMetadataFromResponse(parsed) : { json: parsed, metadata: void 0 };
       return {
         json: cleanJson,
@@ -764,7 +802,8 @@ var AnthropicProvider = class {
           // Base provider (e.g., "anthropic" from "anthropic/claude-...")
           model: this.config.model,
           cacheCreationInputTokens,
-          cacheReadInputTokens
+          cacheReadInputTokens,
+          cacheSavingsPercent
         },
         reasoning,
         reasoning_details,
@@ -873,11 +912,24 @@ var AnthropicProvider = class {
       budget_tokens
     };
   }
-  translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning) {
+  translateToOpenRouterFormat(messages, schema, mode, max_tokens, reasoning, systemPrompt) {
     const useNewStructuredOutputs = this.supportsNewStructuredOutputs();
+    const cachingEnabled = this.config.caching?.enabled === true;
+    const cacheTTL = this.config.caching?.ttl || "5m";
+    const jsonInstructions = mode === "strict" ? "You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text.";
+    const systemContent = systemPrompt ? `${systemPrompt}
+${jsonInstructions}` : `You are a data extraction assistant. ${jsonInstructions}`;
     const systemMessage = {
       role: "system",
-      content: mode === "strict" ? "You are a data extraction assistant. You must respond ONLY with valid JSON that matches the provided schema. Do not include any markdown formatting, explanations, or additional text." : "You are a data extraction assistant. You must respond ONLY with valid JSON. Do not include any markdown formatting, explanations, or additional text."
+      content: cachingEnabled ? [{
+        type: "text",
+        text: systemContent,
+        cache_control: {
+          type: "ephemeral",
+          ...cacheTTL === "1h" && { ttl: "1h" }
+        }
+      }] : systemContent
     };
     const messageArray = [systemMessage, ...messages];
     const requestBody = {
@@ -1044,22 +1096,38 @@ var AnthropicProvider = class {
           });
         }
       }
+      const cachingEnabled = this.config.caching?.enabled === true;
+      const cacheTTL = this.config.caching?.ttl || "5m";
       if (hasMedia) {
         const textContent = input.text || "Extract the requested information from the document.";
         if (process.env.DEBUG_PROVIDERS) {
-          console.log("[AnthropicProvider.buildMessages] Adding text block with cache_control");
+          console.log("[AnthropicProvider.buildMessages] Adding text block" + (cachingEnabled ? " with cache_control" : ""));
           console.log("  textContent:", textContent);
+          console.log("  cachingEnabled:", cachingEnabled);
         }
-        content.push({
+        const textBlock = {
           type: "text",
-          text: textContent,
-          cache_control: { type: "ephemeral" }
-        });
+          text: textContent
+        };
+        if (cachingEnabled) {
+          textBlock.cache_control = {
+            type: "ephemeral",
+            ...cacheTTL === "1h" && { ttl: "1h" }
+          };
+        }
+        content.push(textBlock);
       } else if (input.text) {
-        content.push({
+        const textBlock = {
           type: "text",
           text: input.text
-        });
+        };
+        if (cachingEnabled) {
+          textBlock.cache_control = {
+            type: "ephemeral",
+            ...cacheTTL === "1h" && { ttl: "1h" }
+          };
+        }
+        content.push(textBlock);
       }
     } else {
       if (input.text) {
@@ -1291,6 +1359,10 @@ var GoogleProvider = class {
         // Use JSON mode without responseSchema - schema is already in the prompt via combineSchemaAndUserPrompt.
         // See: https://ubaidullahmomer.medium.com/why-google-geminis-response-schema-isn-t-ready-for-complex-json-46f35c3aaaea
         responseMimeType: "application/json"
+      },
+      // Native Gemini API uses systemInstruction with parts array (text-only)
+      ...enhancedInput.systemPrompt && {
+        systemInstruction: { parts: [{ text: enhancedInput.systemPrompt }] }
       }
     };
     if (process.env.DEBUG_PROVIDERS) {
@@ -1307,7 +1379,7 @@ var GoogleProvider = class {
       console.log("[GoogleProvider] Using via:", this.config.via, "Checking:", this.config.via === "openrouter");
     }
     if (this.config.via === "openrouter") {
-      const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning);
+      const openRouterRequest = this.translateToOpenRouterFormat(contents, mode, params.max_tokens, params.reasoning, enhancedInput.systemPrompt);
       response = await fetchWithTimeout3("https://openrouter.ai/api/v1/chat/completions", {
         method: "POST",
         headers: {
@@ -1349,10 +1421,12 @@ var GoogleProvider = class {
       costUSD = data.usage?.total_cost ?? data.usage?.cost;
       const reasoning = message?.reasoning;
       const reasoning_details = message?.reasoning_details;
+      const cacheReadInputTokens = data.usage?.cached_tokens;
       content = content.replace(/^```json\s*\n?/, "").replace(/\n?```\s*$/, "").trim();
       const rawParsed = safeJsonParse3(content);
       const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
       const baseProvider = extractProviderFromModel3(this.config.model, "google");
+      const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
       return {
         json: parsed,
         rawText: content,
@@ -1364,7 +1438,9 @@ var GoogleProvider = class {
           attemptNumber: 1,
           provider: baseProvider,
           // Base provider (e.g., "google" from "google/gemini-...")
-          model: this.config.model
+          model: this.config.model,
+          cacheReadInputTokens,
+          cacheSavingsPercent
         },
         reasoning,
         reasoning_details,
@@ -1376,11 +1452,13 @@ var GoogleProvider = class {
       inputTokens = data.usageMetadata?.promptTokenCount;
       outputTokens = data.usageMetadata?.candidatesTokenCount;
       costUSD = this.calculateCost(data.usageMetadata);
+      const cacheReadInputTokens = data.usageMetadata?.cachedContentTokenCount;
       const thinkingPart = candidate?.content?.parts?.find((part) => part.thought === true);
       const reasoning = thinkingPart?.text;
       const rawParsed = safeJsonParse3(content);
       const { json: parsed, metadata } = extractMetadata ? extractMetadataFromResponse(rawParsed) : { json: rawParsed, metadata: void 0 };
       const baseProvider = extractProviderFromModel3(this.config.model, "google");
+      const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
       return {
         json: parsed,
         rawText: content,
@@ -1392,7 +1470,9 @@ var GoogleProvider = class {
           attemptNumber: 1,
           provider: baseProvider,
           // Base provider (e.g., "google" from "google/gemini-...")
-          model: this.config.model
+          model: this.config.model,
+          cacheReadInputTokens,
+          cacheSavingsPercent
         },
         reasoning,
         reasoning_details: reasoning ? [{
@@ -1419,8 +1499,11 @@ var GoogleProvider = class {
       thinking_budget
     };
   }
-  translateToOpenRouterFormat(contents, mode, max_tokens, reasoning) {
+  translateToOpenRouterFormat(contents, mode, max_tokens, reasoning, systemPrompt) {
     const messages = [];
+    if (systemPrompt) {
+      messages.push({ role: "system", content: systemPrompt });
+    }
     for (const content of contents) {
       if (content.role === "user") {
         const messageContent = [];
@@ -1732,18 +1815,23 @@ var XAIProvider = class {
       costUSD = this.calculateCost(data.usage);
     }
     const baseProvider = extractProviderFromModel4(this.config.model, "xai");
+    const cacheReadInputTokens = data.usage?.prompt_tokens_details?.cached_tokens;
+    const inputTokens = data.usage?.prompt_tokens;
+    const cacheSavingsPercent = calculateCacheSavings(baseProvider, inputTokens, cacheReadInputTokens);
     return {
       json: parsed,
       rawText: content,
       metrics: {
         costUSD,
-        inputTokens: data.usage?.prompt_tokens,
+        inputTokens,
         outputTokens: data.usage?.completion_tokens,
         latencyMs,
         attemptNumber: 1,
         provider: baseProvider,
         // Base provider (e.g., "x-ai" from "x-ai/grok-...")
-        model: this.config.model
+        model: this.config.model,
+        cacheReadInputTokens,
+        cacheSavingsPercent
       },
       reasoning,
       reasoning_details,
@@ -1763,6 +1851,10 @@ var XAIProvider = class {
     return Object.keys(config).length > 0 ? config : void 0;
   }
   async buildMessages(input) {
+    const messages = [];
+    if (input.systemPrompt) {
+      messages.push({ role: "system", content: input.systemPrompt });
+    }
     const content = [];
     if (input.text) {
       content.push({ type: "text", text: input.text });
@@ -1803,7 +1895,8 @@ var XAIProvider = class {
         });
       }
     }
-    return [{ role: "user", content }];
+    messages.push({ role: "user", content });
+    return messages;
   }
   /**
    * Extract base64 data from a data URL or return as-is if already raw base64
@@ -2877,6 +2970,7 @@ export {
   buildOutputFormatPrompt,
   buildSchemaPromptSection,
   buildSourcesPrompt,
+  calculateCacheSavings,
   combineSchemaAndUserPrompt,
   combineSchemaUserAndDerivedPrompts,
   compareNativeVsOpenRouter,