npm - llmist - Versions diffs - 17.1.0 → 17.2.1 - Mend

llmist 17.1.0 → 17.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.cts CHANGED Viewed

@@ -9779,10 +9779,28 @@ declare class OpenRouterProvider extends OpenAICompatibleProvider<OpenRouterConf
     constructor(client: OpenAI, config?: OpenRouterConfig);
     getModelSpecs(): ModelSpec[];
     /**
-     * Override buildApiRequest to inject reasoning parameters.
-     * OpenRouter normalizes reasoning into the standard OpenAI format.
+     * Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
+     * OpenRouter normalizes reasoning into the standard OpenAI format,
+     * and supports cache_control on message content blocks for both
+     * Anthropic Claude and Google Gemini models.
      */
     protected buildApiRequest(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec: ModelSpec | undefined, messages: LLMMessage[]): Parameters<OpenAI["chat"]["completions"]["create"]>[0];
+    /** Minimal shape for messages in the already-built OpenAI-compatible request. */
+    private static readonly CACHE_CONTROL;
+    /**
+     * Add cache_control breakpoints to the last system message and last user message.
+     * This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
+     *
+     * Operates on the already-built request object. We cast through `unknown` because
+     * OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
+     * with the non-standard `cache_control` property.
+     */
+    private injectCacheBreakpoints;
+    /**
+     * Return a new content array with cache_control on the last block.
+     * String content is promoted to a single-element text block array.
+     */
+    private withCacheControl;
     /**
      * Get custom headers for OpenRouter analytics.
      */

package/dist/index.d.ts CHANGED Viewed

@@ -9779,10 +9779,28 @@ declare class OpenRouterProvider extends OpenAICompatibleProvider<OpenRouterConf
     constructor(client: OpenAI, config?: OpenRouterConfig);
     getModelSpecs(): ModelSpec[];
     /**
-     * Override buildApiRequest to inject reasoning parameters.
-     * OpenRouter normalizes reasoning into the standard OpenAI format.
+     * Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
+     * OpenRouter normalizes reasoning into the standard OpenAI format,
+     * and supports cache_control on message content blocks for both
+     * Anthropic Claude and Google Gemini models.
      */
     protected buildApiRequest(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec: ModelSpec | undefined, messages: LLMMessage[]): Parameters<OpenAI["chat"]["completions"]["create"]>[0];
+    /** Minimal shape for messages in the already-built OpenAI-compatible request. */
+    private static readonly CACHE_CONTROL;
+    /**
+     * Add cache_control breakpoints to the last system message and last user message.
+     * This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
+     *
+     * Operates on the already-built request object. We cast through `unknown` because
+     * OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
+     * with the non-standard `cache_control` property.
+     */
+    private injectCacheBreakpoints;
+    /**
+     * Return a new content array with cache_control on the last block.
+     * String content is promoted to a single-element text block array.
+     */
+    private withCacheControl;
     /**
      * Get custom headers for OpenRouter analytics.
      */

package/dist/index.js CHANGED Viewed

@@ -9942,7 +9942,7 @@ var init_openai_compatible_provider = __esm({
             inputTokens: chunk.usage.prompt_tokens,
             outputTokens: chunk.usage.completion_tokens,
             totalTokens: chunk.usage.total_tokens,
-            cachedInputTokens: 0,
+            cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
             reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
           } : void 0;
           if (finishReason || usage) {
@@ -11923,7 +11923,7 @@ var init_openrouter = __esm({
       high: "high",
       maximum: "xhigh"
     };
-    OpenRouterProvider = class extends OpenAICompatibleProvider {
+    OpenRouterProvider = class _OpenRouterProvider extends OpenAICompatibleProvider {
       providerId = "openrouter";
       providerAlias = "or";
       constructor(client, config = {}) {
@@ -11933,8 +11933,10 @@ var init_openrouter = __esm({
         return OPENROUTER_MODELS;
       }
       /**
-       * Override buildApiRequest to inject reasoning parameters.
-       * OpenRouter normalizes reasoning into the standard OpenAI format.
+       * Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
+       * OpenRouter normalizes reasoning into the standard OpenAI format,
+       * and supports cache_control on message content blocks for both
+       * Anthropic Claude and Google Gemini models.
        */
       buildApiRequest(options, descriptor, spec, messages) {
         const request = super.buildApiRequest(options, descriptor, spec, messages);
@@ -11944,8 +11946,49 @@ var init_openrouter = __esm({
             effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
           };
         }
+        const cachingEnabled = options.caching?.enabled !== false;
+        if (cachingEnabled) {
+          this.injectCacheBreakpoints(request);
+        }
         return request;
       }
+      /** Minimal shape for messages in the already-built OpenAI-compatible request. */
+      static CACHE_CONTROL = { type: "ephemeral" };
+      /**
+       * Add cache_control breakpoints to the last system message and last user message.
+       * This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
+       *
+       * Operates on the already-built request object. We cast through `unknown` because
+       * OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
+       * with the non-standard `cache_control` property.
+       */
+      injectCacheBreakpoints(request) {
+        const msgs = request.messages;
+        let lastSystemIdx = -1;
+        let lastUserIdx = -1;
+        for (let i = 0; i < msgs.length; i++) {
+          if (msgs[i].role === "system") lastSystemIdx = i;
+          if (msgs[i].role === "user") lastUserIdx = i;
+        }
+        if (lastSystemIdx >= 0) {
+          msgs[lastSystemIdx].content = this.withCacheControl(msgs[lastSystemIdx].content);
+        }
+        if (lastUserIdx >= 0) {
+          msgs[lastUserIdx].content = this.withCacheControl(msgs[lastUserIdx].content);
+        }
+      }
+      /**
+       * Return a new content array with cache_control on the last block.
+       * String content is promoted to a single-element text block array.
+       */
+      withCacheControl(content) {
+        if (typeof content === "string") {
+          return [{ type: "text", text: content, cache_control: _OpenRouterProvider.CACHE_CONTROL }];
+        }
+        return content.map(
+          (block, i) => i === content.length - 1 ? { ...block, cache_control: _OpenRouterProvider.CACHE_CONTROL } : block
+        );
+      }
       /**
        * Get custom headers for OpenRouter analytics.
        */