llmist 17.1.0 → 17.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.cts CHANGED
@@ -9779,10 +9779,28 @@ declare class OpenRouterProvider extends OpenAICompatibleProvider<OpenRouterConf
9779
9779
  constructor(client: OpenAI, config?: OpenRouterConfig);
9780
9780
  getModelSpecs(): ModelSpec[];
9781
9781
  /**
9782
- * Override buildApiRequest to inject reasoning parameters.
9783
- * OpenRouter normalizes reasoning into the standard OpenAI format.
9782
+ * Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
9783
+ * OpenRouter normalizes reasoning into the standard OpenAI format,
9784
+ * and supports cache_control on message content blocks for both
9785
+ * Anthropic Claude and Google Gemini models.
9784
9786
  */
9785
9787
  protected buildApiRequest(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec: ModelSpec | undefined, messages: LLMMessage[]): Parameters<OpenAI["chat"]["completions"]["create"]>[0];
9788
+ /** Minimal shape for messages in the already-built OpenAI-compatible request. */
9789
+ private static readonly CACHE_CONTROL;
9790
+ /**
9791
+ * Add cache_control breakpoints to the last system message and last user message.
9792
+ * This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
9793
+ *
9794
+ * Operates on the already-built request object. We cast through `unknown` because
9795
+ * OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
9796
+ * with the non-standard `cache_control` property.
9797
+ */
9798
+ private injectCacheBreakpoints;
9799
+ /**
9800
+ * Return a new content array with cache_control on the last block.
9801
+ * String content is promoted to a single-element text block array.
9802
+ */
9803
+ private withCacheControl;
9786
9804
  /**
9787
9805
  * Get custom headers for OpenRouter analytics.
9788
9806
  */
package/dist/index.d.ts CHANGED
@@ -9779,10 +9779,28 @@ declare class OpenRouterProvider extends OpenAICompatibleProvider<OpenRouterConf
9779
9779
  constructor(client: OpenAI, config?: OpenRouterConfig);
9780
9780
  getModelSpecs(): ModelSpec[];
9781
9781
  /**
9782
- * Override buildApiRequest to inject reasoning parameters.
9783
- * OpenRouter normalizes reasoning into the standard OpenAI format.
9782
+ * Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
9783
+ * OpenRouter normalizes reasoning into the standard OpenAI format,
9784
+ * and supports cache_control on message content blocks for both
9785
+ * Anthropic Claude and Google Gemini models.
9784
9786
  */
9785
9787
  protected buildApiRequest(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec: ModelSpec | undefined, messages: LLMMessage[]): Parameters<OpenAI["chat"]["completions"]["create"]>[0];
9788
+ /** Minimal shape for messages in the already-built OpenAI-compatible request. */
9789
+ private static readonly CACHE_CONTROL;
9790
+ /**
9791
+ * Add cache_control breakpoints to the last system message and last user message.
9792
+ * This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
9793
+ *
9794
+ * Operates on the already-built request object. We cast through `unknown` because
9795
+ * OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
9796
+ * with the non-standard `cache_control` property.
9797
+ */
9798
+ private injectCacheBreakpoints;
9799
+ /**
9800
+ * Return a new content array with cache_control on the last block.
9801
+ * String content is promoted to a single-element text block array.
9802
+ */
9803
+ private withCacheControl;
9786
9804
  /**
9787
9805
  * Get custom headers for OpenRouter analytics.
9788
9806
  */
package/dist/index.js CHANGED
@@ -9942,7 +9942,7 @@ var init_openai_compatible_provider = __esm({
9942
9942
  inputTokens: chunk.usage.prompt_tokens,
9943
9943
  outputTokens: chunk.usage.completion_tokens,
9944
9944
  totalTokens: chunk.usage.total_tokens,
9945
- cachedInputTokens: 0,
9945
+ cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
9946
9946
  reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
9947
9947
  } : void 0;
9948
9948
  if (finishReason || usage) {
@@ -11923,7 +11923,7 @@ var init_openrouter = __esm({
11923
11923
  high: "high",
11924
11924
  maximum: "xhigh"
11925
11925
  };
11926
- OpenRouterProvider = class extends OpenAICompatibleProvider {
11926
+ OpenRouterProvider = class _OpenRouterProvider extends OpenAICompatibleProvider {
11927
11927
  providerId = "openrouter";
11928
11928
  providerAlias = "or";
11929
11929
  constructor(client, config = {}) {
@@ -11933,8 +11933,10 @@ var init_openrouter = __esm({
11933
11933
  return OPENROUTER_MODELS;
11934
11934
  }
11935
11935
  /**
11936
- * Override buildApiRequest to inject reasoning parameters.
11937
- * OpenRouter normalizes reasoning into the standard OpenAI format.
11936
+ * Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
11937
+ * OpenRouter normalizes reasoning into the standard OpenAI format,
11938
+ * and supports cache_control on message content blocks for both
11939
+ * Anthropic Claude and Google Gemini models.
11938
11940
  */
11939
11941
  buildApiRequest(options, descriptor, spec, messages) {
11940
11942
  const request = super.buildApiRequest(options, descriptor, spec, messages);
@@ -11944,8 +11946,49 @@ var init_openrouter = __esm({
11944
11946
  effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
11945
11947
  };
11946
11948
  }
11949
+ const cachingEnabled = options.caching?.enabled !== false;
11950
+ if (cachingEnabled) {
11951
+ this.injectCacheBreakpoints(request);
11952
+ }
11947
11953
  return request;
11948
11954
  }
11955
+ /** Minimal shape for messages in the already-built OpenAI-compatible request. */
11956
+ static CACHE_CONTROL = { type: "ephemeral" };
11957
+ /**
11958
+ * Add cache_control breakpoints to the last system message and last user message.
11959
+ * This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
11960
+ *
11961
+ * Operates on the already-built request object. We cast through `unknown` because
11962
+ * OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
11963
+ * with the non-standard `cache_control` property.
11964
+ */
11965
+ injectCacheBreakpoints(request) {
11966
+ const msgs = request.messages;
11967
+ let lastSystemIdx = -1;
11968
+ let lastUserIdx = -1;
11969
+ for (let i = 0; i < msgs.length; i++) {
11970
+ if (msgs[i].role === "system") lastSystemIdx = i;
11971
+ if (msgs[i].role === "user") lastUserIdx = i;
11972
+ }
11973
+ if (lastSystemIdx >= 0) {
11974
+ msgs[lastSystemIdx].content = this.withCacheControl(msgs[lastSystemIdx].content);
11975
+ }
11976
+ if (lastUserIdx >= 0) {
11977
+ msgs[lastUserIdx].content = this.withCacheControl(msgs[lastUserIdx].content);
11978
+ }
11979
+ }
11980
+ /**
11981
+ * Return a new content array with cache_control on the last block.
11982
+ * String content is promoted to a single-element text block array.
11983
+ */
11984
+ withCacheControl(content) {
11985
+ if (typeof content === "string") {
11986
+ return [{ type: "text", text: content, cache_control: _OpenRouterProvider.CACHE_CONTROL }];
11987
+ }
11988
+ return content.map(
11989
+ (block, i) => i === content.length - 1 ? { ...block, cache_control: _OpenRouterProvider.CACHE_CONTROL } : block
11990
+ );
11991
+ }
11949
11992
  /**
11950
11993
  * Get custom headers for OpenRouter analytics.
11951
11994
  */