llmist 16.2.5 → 17.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +144 -4
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +20 -2
- package/dist/index.d.ts +20 -2
- package/dist/index.js +144 -4
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.d.cts
CHANGED
|
@@ -9779,10 +9779,28 @@ declare class OpenRouterProvider extends OpenAICompatibleProvider<OpenRouterConf
|
|
|
9779
9779
|
constructor(client: OpenAI, config?: OpenRouterConfig);
|
|
9780
9780
|
getModelSpecs(): ModelSpec[];
|
|
9781
9781
|
/**
|
|
9782
|
-
* Override buildApiRequest to inject reasoning parameters.
|
|
9783
|
-
* OpenRouter normalizes reasoning into the standard OpenAI format
|
|
9782
|
+
* Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
|
|
9783
|
+
* OpenRouter normalizes reasoning into the standard OpenAI format,
|
|
9784
|
+
* and supports cache_control on message content blocks for both
|
|
9785
|
+
* Anthropic Claude and Google Gemini models.
|
|
9784
9786
|
*/
|
|
9785
9787
|
protected buildApiRequest(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec: ModelSpec | undefined, messages: LLMMessage[]): Parameters<OpenAI["chat"]["completions"]["create"]>[0];
|
|
9788
|
+
/** Minimal shape for messages in the already-built OpenAI-compatible request. */
|
|
9789
|
+
private static readonly CACHE_CONTROL;
|
|
9790
|
+
/**
|
|
9791
|
+
* Add cache_control breakpoints to the last system message and last user message.
|
|
9792
|
+
* This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
|
|
9793
|
+
*
|
|
9794
|
+
* Operates on the already-built request object. We cast through `unknown` because
|
|
9795
|
+
* OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
|
|
9796
|
+
* with the non-standard `cache_control` property.
|
|
9797
|
+
*/
|
|
9798
|
+
private injectCacheBreakpoints;
|
|
9799
|
+
/**
|
|
9800
|
+
* Return a new content array with cache_control on the last block.
|
|
9801
|
+
* String content is promoted to a single-element text block array.
|
|
9802
|
+
*/
|
|
9803
|
+
private withCacheControl;
|
|
9786
9804
|
/**
|
|
9787
9805
|
* Get custom headers for OpenRouter analytics.
|
|
9788
9806
|
*/
|
package/dist/index.d.ts
CHANGED
|
@@ -9779,10 +9779,28 @@ declare class OpenRouterProvider extends OpenAICompatibleProvider<OpenRouterConf
|
|
|
9779
9779
|
constructor(client: OpenAI, config?: OpenRouterConfig);
|
|
9780
9780
|
getModelSpecs(): ModelSpec[];
|
|
9781
9781
|
/**
|
|
9782
|
-
* Override buildApiRequest to inject reasoning parameters.
|
|
9783
|
-
* OpenRouter normalizes reasoning into the standard OpenAI format
|
|
9782
|
+
* Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
|
|
9783
|
+
* OpenRouter normalizes reasoning into the standard OpenAI format,
|
|
9784
|
+
* and supports cache_control on message content blocks for both
|
|
9785
|
+
* Anthropic Claude and Google Gemini models.
|
|
9784
9786
|
*/
|
|
9785
9787
|
protected buildApiRequest(options: LLMGenerationOptions, descriptor: ModelDescriptor, spec: ModelSpec | undefined, messages: LLMMessage[]): Parameters<OpenAI["chat"]["completions"]["create"]>[0];
|
|
9788
|
+
/** Minimal shape for messages in the already-built OpenAI-compatible request. */
|
|
9789
|
+
private static readonly CACHE_CONTROL;
|
|
9790
|
+
/**
|
|
9791
|
+
* Add cache_control breakpoints to the last system message and last user message.
|
|
9792
|
+
* This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
|
|
9793
|
+
*
|
|
9794
|
+
* Operates on the already-built request object. We cast through `unknown` because
|
|
9795
|
+
* OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
|
|
9796
|
+
* with the non-standard `cache_control` property.
|
|
9797
|
+
*/
|
|
9798
|
+
private injectCacheBreakpoints;
|
|
9799
|
+
/**
|
|
9800
|
+
* Return a new content array with cache_control on the last block.
|
|
9801
|
+
* String content is promoted to a single-element text block array.
|
|
9802
|
+
*/
|
|
9803
|
+
private withCacheControl;
|
|
9786
9804
|
/**
|
|
9787
9805
|
* Get custom headers for OpenRouter analytics.
|
|
9788
9806
|
*/
|
package/dist/index.js
CHANGED
|
@@ -9942,7 +9942,7 @@ var init_openai_compatible_provider = __esm({
|
|
|
9942
9942
|
inputTokens: chunk.usage.prompt_tokens,
|
|
9943
9943
|
outputTokens: chunk.usage.completion_tokens,
|
|
9944
9944
|
totalTokens: chunk.usage.total_tokens,
|
|
9945
|
-
cachedInputTokens: 0,
|
|
9945
|
+
cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
|
|
9946
9946
|
reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
|
|
9947
9947
|
} : void 0;
|
|
9948
9948
|
if (finishReason || usage) {
|
|
@@ -11501,6 +11501,103 @@ var init_openrouter_models = __esm({
|
|
|
11501
11501
|
}
|
|
11502
11502
|
},
|
|
11503
11503
|
// ============================================================
|
|
11504
|
+
// Google Gemini 3.1 Models (via OpenRouter)
|
|
11505
|
+
// ============================================================
|
|
11506
|
+
{
|
|
11507
|
+
provider: "openrouter",
|
|
11508
|
+
modelId: "google/gemini-3.1-pro-preview",
|
|
11509
|
+
displayName: "Gemini 3.1 Pro Preview (OpenRouter)",
|
|
11510
|
+
contextWindow: 1048576,
|
|
11511
|
+
maxOutputTokens: 65536,
|
|
11512
|
+
pricing: {
|
|
11513
|
+
input: 2,
|
|
11514
|
+
output: 12
|
|
11515
|
+
},
|
|
11516
|
+
knowledgeCutoff: "2025-01",
|
|
11517
|
+
features: {
|
|
11518
|
+
streaming: true,
|
|
11519
|
+
functionCalling: true,
|
|
11520
|
+
vision: true,
|
|
11521
|
+
reasoning: true,
|
|
11522
|
+
structuredOutputs: true
|
|
11523
|
+
},
|
|
11524
|
+
metadata: {
|
|
11525
|
+
family: "Gemini 3.1",
|
|
11526
|
+
releaseDate: "2026-03",
|
|
11527
|
+
notes: "Gemini 3.1 Pro Preview via OpenRouter. Frontier reasoning with enhanced software engineering performance."
|
|
11528
|
+
}
|
|
11529
|
+
},
|
|
11530
|
+
{
|
|
11531
|
+
provider: "openrouter",
|
|
11532
|
+
modelId: "google/gemini-3.1-pro-preview-customtools",
|
|
11533
|
+
displayName: "Gemini 3.1 Pro Preview Custom Tools (OpenRouter)",
|
|
11534
|
+
contextWindow: 1048576,
|
|
11535
|
+
maxOutputTokens: 65536,
|
|
11536
|
+
pricing: {
|
|
11537
|
+
input: 2,
|
|
11538
|
+
output: 12
|
|
11539
|
+
},
|
|
11540
|
+
knowledgeCutoff: "2025-01",
|
|
11541
|
+
features: {
|
|
11542
|
+
streaming: true,
|
|
11543
|
+
functionCalling: true,
|
|
11544
|
+
vision: true,
|
|
11545
|
+
reasoning: true,
|
|
11546
|
+
structuredOutputs: true
|
|
11547
|
+
},
|
|
11548
|
+
metadata: {
|
|
11549
|
+
family: "Gemini 3.1",
|
|
11550
|
+
releaseDate: "2026-03",
|
|
11551
|
+
notes: "Gemini 3.1 Pro Preview Custom Tools via OpenRouter. Improved tool selection to prevent overuse of general tools in agent workflows."
|
|
11552
|
+
}
|
|
11553
|
+
},
|
|
11554
|
+
{
|
|
11555
|
+
provider: "openrouter",
|
|
11556
|
+
modelId: "google/gemini-3.1-flash-lite-preview",
|
|
11557
|
+
displayName: "Gemini 3.1 Flash Lite Preview (OpenRouter)",
|
|
11558
|
+
contextWindow: 1048576,
|
|
11559
|
+
maxOutputTokens: 65536,
|
|
11560
|
+
pricing: {
|
|
11561
|
+
input: 0.25,
|
|
11562
|
+
output: 1.5
|
|
11563
|
+
},
|
|
11564
|
+
knowledgeCutoff: "2025-01",
|
|
11565
|
+
features: {
|
|
11566
|
+
streaming: true,
|
|
11567
|
+
functionCalling: true,
|
|
11568
|
+
vision: true,
|
|
11569
|
+
reasoning: true,
|
|
11570
|
+
structuredOutputs: true
|
|
11571
|
+
},
|
|
11572
|
+
metadata: {
|
|
11573
|
+
family: "Gemini 3.1",
|
|
11574
|
+
releaseDate: "2026-03",
|
|
11575
|
+
notes: "Gemini 3.1 Flash Lite Preview via OpenRouter. High-efficiency model with full thinking levels for cost/performance trade-offs."
|
|
11576
|
+
}
|
|
11577
|
+
},
|
|
11578
|
+
{
|
|
11579
|
+
provider: "openrouter",
|
|
11580
|
+
modelId: "google/gemini-3.1-flash-image-preview",
|
|
11581
|
+
displayName: "Gemini 3.1 Flash Image Preview (OpenRouter)",
|
|
11582
|
+
contextWindow: 65536,
|
|
11583
|
+
maxOutputTokens: 65536,
|
|
11584
|
+
pricing: {
|
|
11585
|
+
input: 0.5,
|
|
11586
|
+
output: 3
|
|
11587
|
+
},
|
|
11588
|
+
knowledgeCutoff: "2025-01",
|
|
11589
|
+
features: {
|
|
11590
|
+
streaming: true,
|
|
11591
|
+
functionCalling: false,
|
|
11592
|
+
vision: true
|
|
11593
|
+
},
|
|
11594
|
+
metadata: {
|
|
11595
|
+
family: "Gemini 3.1",
|
|
11596
|
+
releaseDate: "2026-03",
|
|
11597
|
+
notes: "Gemini 3.1 Flash Image Preview via OpenRouter. Pro-level image generation and editing at Flash speed."
|
|
11598
|
+
}
|
|
11599
|
+
},
|
|
11600
|
+
// ============================================================
|
|
11504
11601
|
// Meta Llama Models (via OpenRouter)
|
|
11505
11602
|
// ============================================================
|
|
11506
11603
|
{
|
|
@@ -11826,7 +11923,7 @@ var init_openrouter = __esm({
|
|
|
11826
11923
|
high: "high",
|
|
11827
11924
|
maximum: "xhigh"
|
|
11828
11925
|
};
|
|
11829
|
-
OpenRouterProvider = class extends OpenAICompatibleProvider {
|
|
11926
|
+
OpenRouterProvider = class _OpenRouterProvider extends OpenAICompatibleProvider {
|
|
11830
11927
|
providerId = "openrouter";
|
|
11831
11928
|
providerAlias = "or";
|
|
11832
11929
|
constructor(client, config = {}) {
|
|
@@ -11836,8 +11933,10 @@ var init_openrouter = __esm({
|
|
|
11836
11933
|
return OPENROUTER_MODELS;
|
|
11837
11934
|
}
|
|
11838
11935
|
/**
|
|
11839
|
-
* Override buildApiRequest to inject reasoning parameters.
|
|
11840
|
-
* OpenRouter normalizes reasoning into the standard OpenAI format
|
|
11936
|
+
* Override buildApiRequest to inject reasoning parameters and cache_control breakpoints.
|
|
11937
|
+
* OpenRouter normalizes reasoning into the standard OpenAI format,
|
|
11938
|
+
* and supports cache_control on message content blocks for both
|
|
11939
|
+
* Anthropic Claude and Google Gemini models.
|
|
11841
11940
|
*/
|
|
11842
11941
|
buildApiRequest(options, descriptor, spec, messages) {
|
|
11843
11942
|
const request = super.buildApiRequest(options, descriptor, spec, messages);
|
|
@@ -11847,8 +11946,49 @@ var init_openrouter = __esm({
|
|
|
11847
11946
|
effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
|
|
11848
11947
|
};
|
|
11849
11948
|
}
|
|
11949
|
+
const cachingEnabled = options.caching?.enabled !== false;
|
|
11950
|
+
if (cachingEnabled) {
|
|
11951
|
+
this.injectCacheBreakpoints(request);
|
|
11952
|
+
}
|
|
11850
11953
|
return request;
|
|
11851
11954
|
}
|
|
11955
|
+
/** Minimal shape for messages in the already-built OpenAI-compatible request. */
|
|
11956
|
+
static CACHE_CONTROL = { type: "ephemeral" };
|
|
11957
|
+
/**
|
|
11958
|
+
* Add cache_control breakpoints to the last system message and last user message.
|
|
11959
|
+
* This enables OpenRouter's prompt caching for supported providers (Anthropic, Gemini).
|
|
11960
|
+
*
|
|
11961
|
+
* Operates on the already-built request object. We cast through `unknown` because
|
|
11962
|
+
* OpenAI's `ChatCompletionMessageParam` union is too narrow to assign content arrays
|
|
11963
|
+
* with the non-standard `cache_control` property.
|
|
11964
|
+
*/
|
|
11965
|
+
injectCacheBreakpoints(request) {
|
|
11966
|
+
const msgs = request.messages;
|
|
11967
|
+
let lastSystemIdx = -1;
|
|
11968
|
+
let lastUserIdx = -1;
|
|
11969
|
+
for (let i = 0; i < msgs.length; i++) {
|
|
11970
|
+
if (msgs[i].role === "system") lastSystemIdx = i;
|
|
11971
|
+
if (msgs[i].role === "user") lastUserIdx = i;
|
|
11972
|
+
}
|
|
11973
|
+
if (lastSystemIdx >= 0) {
|
|
11974
|
+
msgs[lastSystemIdx].content = this.withCacheControl(msgs[lastSystemIdx].content);
|
|
11975
|
+
}
|
|
11976
|
+
if (lastUserIdx >= 0) {
|
|
11977
|
+
msgs[lastUserIdx].content = this.withCacheControl(msgs[lastUserIdx].content);
|
|
11978
|
+
}
|
|
11979
|
+
}
|
|
11980
|
+
/**
|
|
11981
|
+
* Return a new content array with cache_control on the last block.
|
|
11982
|
+
* String content is promoted to a single-element text block array.
|
|
11983
|
+
*/
|
|
11984
|
+
withCacheControl(content) {
|
|
11985
|
+
if (typeof content === "string") {
|
|
11986
|
+
return [{ type: "text", text: content, cache_control: _OpenRouterProvider.CACHE_CONTROL }];
|
|
11987
|
+
}
|
|
11988
|
+
return content.map(
|
|
11989
|
+
(block, i) => i === content.length - 1 ? { ...block, cache_control: _OpenRouterProvider.CACHE_CONTROL } : block
|
|
11990
|
+
);
|
|
11991
|
+
}
|
|
11852
11992
|
/**
|
|
11853
11993
|
* Get custom headers for OpenRouter analytics.
|
|
11854
11994
|
*/
|