llmist 17.1.0 → 17.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.cjs CHANGED
@@ -9950,7 +9950,7 @@ var init_openai_compatible_provider = __esm({
9950
9950
  inputTokens: chunk.usage.prompt_tokens,
9951
9951
  outputTokens: chunk.usage.completion_tokens,
9952
9952
  totalTokens: chunk.usage.total_tokens,
9953
- cachedInputTokens: 0,
9953
+ cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
9954
9954
  reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
9955
9955
  } : void 0;
9956
9956
  if (finishReason || usage) {
@@ -11931,7 +11931,7 @@ var init_openrouter = __esm({
11931
11931
  high: "high",
11932
11932
  maximum: "xhigh"
11933
11933
  };
11934
- OpenRouterProvider = class extends OpenAICompatibleProvider {
11934
+ OpenRouterProvider = class _OpenRouterProvider extends OpenAICompatibleProvider {
11935
11935
  providerId = "openrouter";
11936
11936
  providerAlias = "or";
11937
11937
  constructor(client, config = {}) {
@@ -11941,8 +11941,10 @@ var init_openrouter = __esm({
11941
11941
  return OPENROUTER_MODELS;
11942
11942
  }
11943
11943
  /**
11944
- * Override buildApiRequest to inject reasoning parameters.
11945
- * OpenRouter normalizes reasoning into the standard OpenAI format.
11944
+ * Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
11945
+ * OpenRouter normalizes reasoning into the standard OpenAI format,
11946
+ * and supports cache_control on message content blocks for both
11947
+ * Anthropic Claude and Google Gemini models.
11946
11948
  */
11947
11949
  buildApiRequest(options, descriptor, spec, messages) {
11948
11950
  const request = super.buildApiRequest(options, descriptor, spec, messages);
@@ -11952,8 +11954,49 @@ var init_openrouter = __esm({
11952
11954
  effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
11953
11955
  };
11954
11956
  }
11957
+ const cachingEnabled = options.caching?.enabled !== false;
11958
+ if (cachingEnabled) {
11959
+ this.injectCacheBreakpoints(request);
11960
+ }
11955
11961
  return request;
11956
11962
  }
11963
+ /** Minimal shape for messages in the already-built OpenAI-compatible request. */
11964
+ static CACHE_CONTROL = { type: "ephemeral" };
11965
+ /**
11966
+ * Add cache_control breakpoints to the last system message and last user message.
11967
+ * This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
11968
+ *
11969
+ * Operates on the already-built request object. We cast through `unknown` because
11970
+ * OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
11971
+ * with the non-standard `cache_control` property.
11972
+ */
11973
+ injectCacheBreakpoints(request) {
11974
+ const msgs = request.messages;
11975
+ let lastSystemIdx = -1;
11976
+ let lastUserIdx = -1;
11977
+ for (let i = 0; i < msgs.length; i++) {
11978
+ if (msgs[i].role === "system") lastSystemIdx = i;
11979
+ if (msgs[i].role === "user") lastUserIdx = i;
11980
+ }
11981
+ if (lastSystemIdx >= 0) {
11982
+ msgs[lastSystemIdx].content = this.withCacheControl(msgs[lastSystemIdx].content);
11983
+ }
11984
+ if (lastUserIdx >= 0) {
11985
+ msgs[lastUserIdx].content = this.withCacheControl(msgs[lastUserIdx].content);
11986
+ }
11987
+ }
11988
+ /**
11989
+ * Return a new content array with cache_control on the last block.
11990
+ * String content is promoted to a single-element text block array.
11991
+ */
11992
+ withCacheControl(content) {
11993
+ if (typeof content === "string") {
11994
+ return [{ type: "text", text: content, cache_control: _OpenRouterProvider.CACHE_CONTROL }];
11995
+ }
11996
+ return content.map(
11997
+ (block, i) => i === content.length - 1 ? { ...block, cache_control: _OpenRouterProvider.CACHE_CONTROL } : block
11998
+ );
11999
+ }
11957
12000
  /**
11958
12001
  * Get custom headers for OpenRouter analytics.
11959
12002
  */