llmist 15.12.0 → 15.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +629 -43
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1627 -1359
- package/dist/index.d.ts +1627 -1359
- package/dist/index.js +629 -43
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -229,7 +229,8 @@ var init_execution_tree = __esm({
|
|
|
229
229
|
response: llmNode.response,
|
|
230
230
|
usage: llmNode.usage,
|
|
231
231
|
finishReason: llmNode.finishReason,
|
|
232
|
-
cost: llmNode.cost
|
|
232
|
+
cost: llmNode.cost,
|
|
233
|
+
thinkingContent: params.thinkingContent
|
|
233
234
|
});
|
|
234
235
|
}
|
|
235
236
|
/**
|
|
@@ -4529,7 +4530,10 @@ var init_hook_presets = __esm({
|
|
|
4529
4530
|
const costEstimate = modelRegistry.estimateCost(
|
|
4530
4531
|
modelName,
|
|
4531
4532
|
ctx.usage.inputTokens,
|
|
4532
|
-
ctx.usage.outputTokens
|
|
4533
|
+
ctx.usage.outputTokens,
|
|
4534
|
+
ctx.usage.cachedInputTokens ?? 0,
|
|
4535
|
+
ctx.usage.cacheCreationInputTokens ?? 0,
|
|
4536
|
+
ctx.usage.reasoningTokens ?? 0
|
|
4533
4537
|
);
|
|
4534
4538
|
if (costEstimate) {
|
|
4535
4539
|
totalCost += costEstimate.totalCost;
|
|
@@ -5026,10 +5030,10 @@ var init_anthropic_models = __esm({
|
|
|
5026
5030
|
contextWindow: 2e5,
|
|
5027
5031
|
maxOutputTokens: 64e3,
|
|
5028
5032
|
pricing: {
|
|
5029
|
-
input:
|
|
5030
|
-
output:
|
|
5031
|
-
cachedInput: 0.
|
|
5032
|
-
cacheWriteInput: 1
|
|
5033
|
+
input: 1,
|
|
5034
|
+
output: 5,
|
|
5035
|
+
cachedInput: 0.1,
|
|
5036
|
+
cacheWriteInput: 1.25
|
|
5033
5037
|
},
|
|
5034
5038
|
knowledgeCutoff: "2025-02",
|
|
5035
5039
|
features: {
|
|
@@ -5225,10 +5229,10 @@ var init_anthropic_models = __esm({
|
|
|
5225
5229
|
contextWindow: 2e5,
|
|
5226
5230
|
maxOutputTokens: 64e3,
|
|
5227
5231
|
pricing: {
|
|
5228
|
-
input:
|
|
5229
|
-
output:
|
|
5230
|
-
cachedInput: 0.
|
|
5231
|
-
cacheWriteInput: 1
|
|
5232
|
+
input: 1,
|
|
5233
|
+
output: 5,
|
|
5234
|
+
cachedInput: 0.1,
|
|
5235
|
+
cacheWriteInput: 1.25
|
|
5232
5236
|
},
|
|
5233
5237
|
knowledgeCutoff: "2025-02",
|
|
5234
5238
|
features: {
|
|
@@ -5371,10 +5375,15 @@ var init_utils = __esm({
|
|
|
5371
5375
|
});
|
|
5372
5376
|
|
|
5373
5377
|
// src/providers/anthropic.ts
|
|
5378
|
+
function resolveAnthropicThinking(reasoning) {
|
|
5379
|
+
if (!reasoning?.enabled) return void 0;
|
|
5380
|
+
const budget = reasoning.budgetTokens ? Math.max(1024, reasoning.budgetTokens) : ANTHROPIC_EFFORT_BUDGET[reasoning.effort ?? "medium"];
|
|
5381
|
+
return { type: "enabled", budget_tokens: budget };
|
|
5382
|
+
}
|
|
5374
5383
|
function createAnthropicProviderFromEnv() {
|
|
5375
5384
|
return createProviderFromEnv("ANTHROPIC_API_KEY", import_sdk.default, AnthropicMessagesProvider);
|
|
5376
5385
|
}
|
|
5377
|
-
var import_sdk, AnthropicMessagesProvider;
|
|
5386
|
+
var import_sdk, ANTHROPIC_EFFORT_BUDGET, AnthropicMessagesProvider;
|
|
5378
5387
|
var init_anthropic = __esm({
|
|
5379
5388
|
"src/providers/anthropic.ts"() {
|
|
5380
5389
|
"use strict";
|
|
@@ -5384,6 +5393,14 @@ var init_anthropic = __esm({
|
|
|
5384
5393
|
init_base_provider();
|
|
5385
5394
|
init_constants2();
|
|
5386
5395
|
init_utils();
|
|
5396
|
+
ANTHROPIC_EFFORT_BUDGET = {
|
|
5397
|
+
none: 1024,
|
|
5398
|
+
// Minimum allowed by Anthropic
|
|
5399
|
+
low: 2048,
|
|
5400
|
+
medium: 8192,
|
|
5401
|
+
high: 16384,
|
|
5402
|
+
maximum: 32768
|
|
5403
|
+
};
|
|
5387
5404
|
AnthropicMessagesProvider = class extends BaseProviderAdapter {
|
|
5388
5405
|
providerId = "anthropic";
|
|
5389
5406
|
supports(descriptor) {
|
|
@@ -5415,12 +5432,13 @@ var init_anthropic = __esm({
|
|
|
5415
5432
|
);
|
|
5416
5433
|
}
|
|
5417
5434
|
buildApiRequest(options, descriptor, spec, messages) {
|
|
5435
|
+
const cachingEnabled = options.caching?.enabled !== false;
|
|
5418
5436
|
const systemMessages = messages.filter((message) => message.role === "system");
|
|
5419
5437
|
const system = systemMessages.length > 0 ? systemMessages.map((m, index) => ({
|
|
5420
5438
|
type: "text",
|
|
5421
5439
|
text: extractMessageText(m.content),
|
|
5422
|
-
// Add cache_control to the LAST system message block
|
|
5423
|
-
...index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
5440
|
+
// Add cache_control to the LAST system message block (only when caching is enabled)
|
|
5441
|
+
...cachingEnabled && index === systemMessages.length - 1 ? { cache_control: { type: "ephemeral" } } : {}
|
|
5424
5442
|
})) : void 0;
|
|
5425
5443
|
const nonSystemMessages = messages.filter(
|
|
5426
5444
|
(message) => message.role !== "system"
|
|
@@ -5433,19 +5451,22 @@ var init_anthropic = __esm({
|
|
|
5433
5451
|
role: message.role,
|
|
5434
5452
|
content: this.convertToAnthropicContent(
|
|
5435
5453
|
message.content,
|
|
5436
|
-
message.role === "user" && index === lastUserIndex
|
|
5454
|
+
cachingEnabled && message.role === "user" && index === lastUserIndex
|
|
5437
5455
|
)
|
|
5438
5456
|
}));
|
|
5439
5457
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
5458
|
+
const thinking = resolveAnthropicThinking(options.reasoning);
|
|
5459
|
+
const temperature = thinking ? void 0 : options.temperature;
|
|
5440
5460
|
const payload = {
|
|
5441
5461
|
model: descriptor.name,
|
|
5442
5462
|
system,
|
|
5443
5463
|
messages: conversation,
|
|
5444
5464
|
max_tokens: options.maxTokens ?? defaultMaxTokens,
|
|
5445
|
-
temperature
|
|
5465
|
+
temperature,
|
|
5446
5466
|
top_p: options.topP,
|
|
5447
5467
|
stop_sequences: options.stopSequences,
|
|
5448
5468
|
stream: true,
|
|
5469
|
+
...thinking ? { thinking } : {},
|
|
5449
5470
|
...options.extra
|
|
5450
5471
|
};
|
|
5451
5472
|
return payload;
|
|
@@ -5525,8 +5546,39 @@ var init_anthropic = __esm({
|
|
|
5525
5546
|
};
|
|
5526
5547
|
continue;
|
|
5527
5548
|
}
|
|
5528
|
-
if (event.type === "
|
|
5529
|
-
|
|
5549
|
+
if (event.type === "content_block_start") {
|
|
5550
|
+
const block = event.content_block;
|
|
5551
|
+
if (block.type === "thinking") {
|
|
5552
|
+
yield { text: "", thinking: { content: "", type: "thinking" }, rawEvent: event };
|
|
5553
|
+
continue;
|
|
5554
|
+
}
|
|
5555
|
+
if (block.type === "redacted_thinking") {
|
|
5556
|
+
yield { text: "", thinking: { content: "", type: "redacted" }, rawEvent: event };
|
|
5557
|
+
continue;
|
|
5558
|
+
}
|
|
5559
|
+
}
|
|
5560
|
+
if (event.type === "content_block_delta") {
|
|
5561
|
+
const delta = event.delta;
|
|
5562
|
+
if (delta.type === "thinking_delta" && delta.thinking) {
|
|
5563
|
+
yield {
|
|
5564
|
+
text: "",
|
|
5565
|
+
thinking: { content: delta.thinking, type: "thinking" },
|
|
5566
|
+
rawEvent: event
|
|
5567
|
+
};
|
|
5568
|
+
continue;
|
|
5569
|
+
}
|
|
5570
|
+
if (delta.type === "signature_delta" && delta.signature) {
|
|
5571
|
+
yield {
|
|
5572
|
+
text: "",
|
|
5573
|
+
thinking: { content: "", type: "thinking", signature: delta.signature },
|
|
5574
|
+
rawEvent: event
|
|
5575
|
+
};
|
|
5576
|
+
continue;
|
|
5577
|
+
}
|
|
5578
|
+
if (delta.type === "text_delta") {
|
|
5579
|
+
yield { text: delta.text ?? "", rawEvent: event };
|
|
5580
|
+
continue;
|
|
5581
|
+
}
|
|
5530
5582
|
continue;
|
|
5531
5583
|
}
|
|
5532
5584
|
if (event.type === "message_delta") {
|
|
@@ -5616,6 +5668,177 @@ var init_anthropic = __esm({
|
|
|
5616
5668
|
}
|
|
5617
5669
|
});
|
|
5618
5670
|
|
|
5671
|
+
// src/providers/gemini-cache-manager.ts
|
|
5672
|
+
var import_node_crypto3, GeminiCacheManager;
|
|
5673
|
+
var init_gemini_cache_manager = __esm({
|
|
5674
|
+
"src/providers/gemini-cache-manager.ts"() {
|
|
5675
|
+
"use strict";
|
|
5676
|
+
import_node_crypto3 = require("crypto");
|
|
5677
|
+
GeminiCacheManager = class {
|
|
5678
|
+
constructor(client) {
|
|
5679
|
+
this.client = client;
|
|
5680
|
+
}
|
|
5681
|
+
activeCache = null;
|
|
5682
|
+
/**
|
|
5683
|
+
* Get or create a cache for the given content.
|
|
5684
|
+
*
|
|
5685
|
+
* Returns the cache name if a cache was created/reused, or `null` if caching
|
|
5686
|
+
* was skipped (disabled, below threshold, or API error).
|
|
5687
|
+
*
|
|
5688
|
+
* @param model - Gemini model name (e.g., "gemini-2.5-flash")
|
|
5689
|
+
* @param allContents - All Gemini-formatted contents (system + conversation)
|
|
5690
|
+
* @param config - Caching configuration from the user
|
|
5691
|
+
* @param lastUserMessageIndex - Index of the last user message (content after this is not cached)
|
|
5692
|
+
* @returns Cache name string or null
|
|
5693
|
+
*/
|
|
5694
|
+
async getOrCreateCache(model, allContents, config, lastUserMessageIndex) {
|
|
5695
|
+
if (!config.enabled) return null;
|
|
5696
|
+
const scope = config.scope ?? "conversation";
|
|
5697
|
+
const ttl = config.ttl ?? "3600s";
|
|
5698
|
+
const minTokenThreshold = config.minTokenThreshold ?? 32768;
|
|
5699
|
+
const cacheableContents = this.selectCacheableContents(
|
|
5700
|
+
allContents,
|
|
5701
|
+
scope,
|
|
5702
|
+
lastUserMessageIndex
|
|
5703
|
+
);
|
|
5704
|
+
if (cacheableContents.length === 0) return null;
|
|
5705
|
+
const estimatedTokens = this.estimateTokenCount(cacheableContents);
|
|
5706
|
+
if (estimatedTokens < minTokenThreshold) return null;
|
|
5707
|
+
const contentHash = this.computeContentHash(cacheableContents, model);
|
|
5708
|
+
if (this.activeCache && this.canReuseCache(this.activeCache, model, contentHash)) {
|
|
5709
|
+
return {
|
|
5710
|
+
cacheName: this.activeCache.name,
|
|
5711
|
+
cachedContentCount: cacheableContents.length
|
|
5712
|
+
};
|
|
5713
|
+
}
|
|
5714
|
+
try {
|
|
5715
|
+
await this.cleanupActiveCache();
|
|
5716
|
+
const response = await this.client.caches.create({
|
|
5717
|
+
model,
|
|
5718
|
+
config: {
|
|
5719
|
+
contents: cacheableContents,
|
|
5720
|
+
ttl,
|
|
5721
|
+
displayName: `llmist-${scope}-${Date.now()}`
|
|
5722
|
+
}
|
|
5723
|
+
});
|
|
5724
|
+
if (!response.name) {
|
|
5725
|
+
return null;
|
|
5726
|
+
}
|
|
5727
|
+
this.activeCache = {
|
|
5728
|
+
name: response.name,
|
|
5729
|
+
model,
|
|
5730
|
+
contentHash,
|
|
5731
|
+
expireTime: response.expireTime ?? ""
|
|
5732
|
+
};
|
|
5733
|
+
return {
|
|
5734
|
+
cacheName: response.name,
|
|
5735
|
+
cachedContentCount: cacheableContents.length
|
|
5736
|
+
};
|
|
5737
|
+
} catch (error) {
|
|
5738
|
+
console.warn("Gemini cache creation failed, continuing without cache:", error);
|
|
5739
|
+
return null;
|
|
5740
|
+
}
|
|
5741
|
+
}
|
|
5742
|
+
/**
|
|
5743
|
+
* Clean up the active cache (best-effort).
|
|
5744
|
+
* Caches auto-expire via TTL, so failure is non-critical.
|
|
5745
|
+
*/
|
|
5746
|
+
async dispose() {
|
|
5747
|
+
await this.cleanupActiveCache();
|
|
5748
|
+
}
|
|
5749
|
+
/**
|
|
5750
|
+
* Select which contents to cache based on scope.
|
|
5751
|
+
*
|
|
5752
|
+
* - "system": Only system-derived messages (the initial user+model exchanges
|
|
5753
|
+
* generated from system messages)
|
|
5754
|
+
* - "conversation": Everything except the last user message
|
|
5755
|
+
*/
|
|
5756
|
+
selectCacheableContents(allContents, scope, lastUserMessageIndex) {
|
|
5757
|
+
if (scope === "system") {
|
|
5758
|
+
let systemEndIndex = 0;
|
|
5759
|
+
for (let i = 0; i < allContents.length; i++) {
|
|
5760
|
+
const content = allContents[i];
|
|
5761
|
+
if (content.role === "model" && content.parts.length === 1 && "text" in content.parts[0] && content.parts[0].text === "Understood.") {
|
|
5762
|
+
systemEndIndex = i + 1;
|
|
5763
|
+
} else if (content.role === "user") {
|
|
5764
|
+
const next = allContents[i + 1];
|
|
5765
|
+
if (next && next.role === "model" && next.parts.length === 1 && "text" in next.parts[0] && next.parts[0].text === "Understood.") {
|
|
5766
|
+
continue;
|
|
5767
|
+
}
|
|
5768
|
+
break;
|
|
5769
|
+
} else {
|
|
5770
|
+
break;
|
|
5771
|
+
}
|
|
5772
|
+
}
|
|
5773
|
+
return allContents.slice(0, systemEndIndex);
|
|
5774
|
+
}
|
|
5775
|
+
if (lastUserMessageIndex <= 0) return [];
|
|
5776
|
+
return allContents.slice(0, lastUserMessageIndex);
|
|
5777
|
+
}
|
|
5778
|
+
/**
|
|
5779
|
+
* Estimate token count from contents using character-based heuristic.
|
|
5780
|
+
* Uses ~4 characters per token (conservative estimate for English text).
|
|
5781
|
+
*/
|
|
5782
|
+
estimateTokenCount(contents) {
|
|
5783
|
+
let totalChars = 0;
|
|
5784
|
+
for (const content of contents) {
|
|
5785
|
+
for (const part of content.parts) {
|
|
5786
|
+
if ("text" in part) {
|
|
5787
|
+
totalChars += part.text.length;
|
|
5788
|
+
} else if ("inlineData" in part) {
|
|
5789
|
+
totalChars += 258 * 4;
|
|
5790
|
+
}
|
|
5791
|
+
}
|
|
5792
|
+
}
|
|
5793
|
+
return Math.ceil(totalChars / 4);
|
|
5794
|
+
}
|
|
5795
|
+
/**
|
|
5796
|
+
* Compute a stable hash of the cacheable contents for change detection.
|
|
5797
|
+
*/
|
|
5798
|
+
computeContentHash(contents, model) {
|
|
5799
|
+
const hash = (0, import_node_crypto3.createHash)("sha256");
|
|
5800
|
+
hash.update(model);
|
|
5801
|
+
for (const content of contents) {
|
|
5802
|
+
hash.update(content.role);
|
|
5803
|
+
for (const part of content.parts) {
|
|
5804
|
+
if ("text" in part) {
|
|
5805
|
+
hash.update(part.text);
|
|
5806
|
+
} else if ("inlineData" in part) {
|
|
5807
|
+
hash.update(part.inlineData.mimeType);
|
|
5808
|
+
hash.update(part.inlineData.data);
|
|
5809
|
+
}
|
|
5810
|
+
}
|
|
5811
|
+
}
|
|
5812
|
+
return hash.digest("hex");
|
|
5813
|
+
}
|
|
5814
|
+
/**
|
|
5815
|
+
* Check if an existing cache can be reused.
|
|
5816
|
+
*/
|
|
5817
|
+
canReuseCache(cache, model, contentHash) {
|
|
5818
|
+
if (cache.model !== model) return false;
|
|
5819
|
+
if (cache.contentHash !== contentHash) return false;
|
|
5820
|
+
if (cache.expireTime) {
|
|
5821
|
+
const expiresAt = new Date(cache.expireTime).getTime();
|
|
5822
|
+
const now = Date.now();
|
|
5823
|
+
if (expiresAt - now < 6e4) return false;
|
|
5824
|
+
}
|
|
5825
|
+
return true;
|
|
5826
|
+
}
|
|
5827
|
+
/**
|
|
5828
|
+
* Delete the active cache (best-effort).
|
|
5829
|
+
*/
|
|
5830
|
+
async cleanupActiveCache() {
|
|
5831
|
+
if (!this.activeCache) return;
|
|
5832
|
+
try {
|
|
5833
|
+
await this.client.caches.delete({ name: this.activeCache.name });
|
|
5834
|
+
} catch {
|
|
5835
|
+
}
|
|
5836
|
+
this.activeCache = null;
|
|
5837
|
+
}
|
|
5838
|
+
};
|
|
5839
|
+
}
|
|
5840
|
+
});
|
|
5841
|
+
|
|
5619
5842
|
// src/providers/gemini-image-models.ts
|
|
5620
5843
|
function getGeminiImageModelSpec(modelId) {
|
|
5621
5844
|
return geminiImageModels.find((m) => m.modelId === modelId);
|
|
@@ -5835,10 +6058,10 @@ var init_gemini_models = __esm({
|
|
|
5835
6058
|
contextWindow: 1048576,
|
|
5836
6059
|
maxOutputTokens: 65536,
|
|
5837
6060
|
pricing: {
|
|
5838
|
-
input: 0.
|
|
5839
|
-
// $0.
|
|
6061
|
+
input: 0.5,
|
|
6062
|
+
// $0.50 for text/image/video
|
|
5840
6063
|
output: 3,
|
|
5841
|
-
cachedInput: 0.
|
|
6064
|
+
cachedInput: 0.05
|
|
5842
6065
|
},
|
|
5843
6066
|
knowledgeCutoff: "2025-01",
|
|
5844
6067
|
features: {
|
|
@@ -6132,6 +6355,23 @@ var init_gemini_speech_models = __esm({
|
|
|
6132
6355
|
});
|
|
6133
6356
|
|
|
6134
6357
|
// src/providers/gemini.ts
|
|
6358
|
+
function resolveGeminiThinkingConfig(reasoning, modelName) {
|
|
6359
|
+
if (!reasoning?.enabled) return void 0;
|
|
6360
|
+
const isGemini3 = modelName.includes("gemini-3");
|
|
6361
|
+
if (isGemini3) {
|
|
6362
|
+
return {
|
|
6363
|
+
thinkingConfig: {
|
|
6364
|
+
thinkingLevel: GEMINI3_THINKING_LEVEL[reasoning.effort ?? "medium"]
|
|
6365
|
+
}
|
|
6366
|
+
};
|
|
6367
|
+
}
|
|
6368
|
+
const budget = reasoning.budgetTokens ?? GEMINI25_THINKING_BUDGET[reasoning.effort ?? "medium"];
|
|
6369
|
+
return {
|
|
6370
|
+
thinkingConfig: {
|
|
6371
|
+
thinkingBudget: budget
|
|
6372
|
+
}
|
|
6373
|
+
};
|
|
6374
|
+
}
|
|
6135
6375
|
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
6136
6376
|
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
6137
6377
|
const blockAlign = numChannels * bitsPerSample / 8;
|
|
@@ -6160,7 +6400,7 @@ function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
|
6160
6400
|
function createGeminiProviderFromEnv() {
|
|
6161
6401
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
6162
6402
|
}
|
|
6163
|
-
var import_genai, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
|
|
6403
|
+
var import_genai, GEMINI3_THINKING_LEVEL, GEMINI25_THINKING_BUDGET, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
|
|
6164
6404
|
var init_gemini = __esm({
|
|
6165
6405
|
"src/providers/gemini.ts"() {
|
|
6166
6406
|
"use strict";
|
|
@@ -6168,10 +6408,25 @@ var init_gemini = __esm({
|
|
|
6168
6408
|
init_messages();
|
|
6169
6409
|
init_base_provider();
|
|
6170
6410
|
init_constants2();
|
|
6411
|
+
init_gemini_cache_manager();
|
|
6171
6412
|
init_gemini_image_models();
|
|
6172
6413
|
init_gemini_models();
|
|
6173
6414
|
init_gemini_speech_models();
|
|
6174
6415
|
init_utils();
|
|
6416
|
+
GEMINI3_THINKING_LEVEL = {
|
|
6417
|
+
none: "minimal",
|
|
6418
|
+
low: "low",
|
|
6419
|
+
medium: "medium",
|
|
6420
|
+
high: "high",
|
|
6421
|
+
maximum: "high"
|
|
6422
|
+
};
|
|
6423
|
+
GEMINI25_THINKING_BUDGET = {
|
|
6424
|
+
none: 0,
|
|
6425
|
+
low: 2048,
|
|
6426
|
+
medium: 8192,
|
|
6427
|
+
high: 16384,
|
|
6428
|
+
maximum: 24576
|
|
6429
|
+
};
|
|
6175
6430
|
GEMINI_ROLE_MAP = {
|
|
6176
6431
|
system: "user",
|
|
6177
6432
|
user: "user",
|
|
@@ -6179,12 +6434,62 @@ var init_gemini = __esm({
|
|
|
6179
6434
|
};
|
|
6180
6435
|
GeminiGenerativeProvider = class extends BaseProviderAdapter {
|
|
6181
6436
|
providerId = "gemini";
|
|
6437
|
+
cacheManager;
|
|
6438
|
+
constructor(client) {
|
|
6439
|
+
super(client);
|
|
6440
|
+
this.cacheManager = new GeminiCacheManager(client);
|
|
6441
|
+
}
|
|
6182
6442
|
supports(descriptor) {
|
|
6183
6443
|
return descriptor.provider === this.providerId;
|
|
6184
6444
|
}
|
|
6185
6445
|
getModelSpecs() {
|
|
6186
6446
|
return GEMINI_MODELS;
|
|
6187
6447
|
}
|
|
6448
|
+
/**
|
|
6449
|
+
* Override the base stream method to inject cache logic.
|
|
6450
|
+
*
|
|
6451
|
+
* When caching is enabled, we:
|
|
6452
|
+
* 1. Prepare messages as usual
|
|
6453
|
+
* 2. Attempt to get/create a cache for the cacheable prefix
|
|
6454
|
+
* 3. If a cache is available, strip cached contents from the request and add cachedContent ref
|
|
6455
|
+
* 4. Otherwise, proceed normally (graceful degradation)
|
|
6456
|
+
*/
|
|
6457
|
+
async *stream(options, descriptor, spec) {
|
|
6458
|
+
const preparedMessages = this.prepareMessages(options.messages);
|
|
6459
|
+
const contents = this.convertMessagesToContents(preparedMessages);
|
|
6460
|
+
const cachingConfig = options.caching;
|
|
6461
|
+
let cacheName = null;
|
|
6462
|
+
let cachedContentCount = 0;
|
|
6463
|
+
if (cachingConfig?.enabled) {
|
|
6464
|
+
let lastUserIndex = -1;
|
|
6465
|
+
for (let i = contents.length - 1; i >= 0; i--) {
|
|
6466
|
+
if (contents[i].role === "user") {
|
|
6467
|
+
lastUserIndex = i;
|
|
6468
|
+
break;
|
|
6469
|
+
}
|
|
6470
|
+
}
|
|
6471
|
+
const cacheResult = await this.cacheManager.getOrCreateCache(
|
|
6472
|
+
descriptor.name,
|
|
6473
|
+
contents,
|
|
6474
|
+
cachingConfig,
|
|
6475
|
+
lastUserIndex
|
|
6476
|
+
);
|
|
6477
|
+
if (cacheResult) {
|
|
6478
|
+
cacheName = cacheResult.cacheName;
|
|
6479
|
+
cachedContentCount = cacheResult.cachedContentCount;
|
|
6480
|
+
}
|
|
6481
|
+
}
|
|
6482
|
+
const payload = this.buildApiRequestFromContents(
|
|
6483
|
+
options,
|
|
6484
|
+
descriptor,
|
|
6485
|
+
spec,
|
|
6486
|
+
contents,
|
|
6487
|
+
cacheName,
|
|
6488
|
+
cachedContentCount
|
|
6489
|
+
);
|
|
6490
|
+
const rawStream = await this.executeStreamRequest(payload, options.signal);
|
|
6491
|
+
yield* this.normalizeProviderStream(rawStream);
|
|
6492
|
+
}
|
|
6188
6493
|
// =========================================================================
|
|
6189
6494
|
// Image Generation
|
|
6190
6495
|
// =========================================================================
|
|
@@ -6320,7 +6625,19 @@ var init_gemini = __esm({
|
|
|
6320
6625
|
}
|
|
6321
6626
|
buildApiRequest(options, descriptor, _spec, messages) {
|
|
6322
6627
|
const contents = this.convertMessagesToContents(messages);
|
|
6628
|
+
return this.buildApiRequestFromContents(options, descriptor, _spec, contents, null, 0);
|
|
6629
|
+
}
|
|
6630
|
+
/**
|
|
6631
|
+
* Build API request from pre-converted Gemini contents.
|
|
6632
|
+
*
|
|
6633
|
+
* When a cache name is provided, the cached prefix is stripped from contents
|
|
6634
|
+
* and the cache reference is added to the config. This tells Gemini to use
|
|
6635
|
+
* the pre-computed KV pairs instead of reprocessing the cached content.
|
|
6636
|
+
*/
|
|
6637
|
+
buildApiRequestFromContents(options, descriptor, _spec, contents, cacheName, cachedContentCount) {
|
|
6638
|
+
const effectiveContents = cacheName ? contents.slice(cachedContentCount) : contents;
|
|
6323
6639
|
const generationConfig = this.buildGenerationConfig(options);
|
|
6640
|
+
const thinkingConfig = resolveGeminiThinkingConfig(options.reasoning, descriptor.name);
|
|
6324
6641
|
const config = {
|
|
6325
6642
|
// Note: systemInstruction removed - it doesn't work with countTokens()
|
|
6326
6643
|
// System messages are now included in contents as user+model exchanges
|
|
@@ -6331,11 +6648,14 @@ var init_gemini = __esm({
|
|
|
6331
6648
|
mode: import_genai.FunctionCallingConfigMode.NONE
|
|
6332
6649
|
}
|
|
6333
6650
|
},
|
|
6651
|
+
...thinkingConfig ?? {},
|
|
6652
|
+
// Add cache reference if available
|
|
6653
|
+
...cacheName ? { cachedContent: cacheName } : {},
|
|
6334
6654
|
...options.extra
|
|
6335
6655
|
};
|
|
6336
6656
|
return {
|
|
6337
6657
|
model: descriptor.name,
|
|
6338
|
-
contents,
|
|
6658
|
+
contents: effectiveContents,
|
|
6339
6659
|
config
|
|
6340
6660
|
};
|
|
6341
6661
|
}
|
|
@@ -6468,7 +6788,18 @@ var init_gemini = __esm({
|
|
|
6468
6788
|
async *normalizeProviderStream(iterable) {
|
|
6469
6789
|
const stream2 = iterable;
|
|
6470
6790
|
for await (const chunk of stream2) {
|
|
6471
|
-
const text3 = this.
|
|
6791
|
+
const { text: text3, thinkingText, thinkingSignature } = this.extractTextAndThinking(chunk);
|
|
6792
|
+
if (thinkingText) {
|
|
6793
|
+
yield {
|
|
6794
|
+
text: "",
|
|
6795
|
+
thinking: {
|
|
6796
|
+
content: thinkingText,
|
|
6797
|
+
type: "thinking",
|
|
6798
|
+
signature: thinkingSignature
|
|
6799
|
+
},
|
|
6800
|
+
rawEvent: chunk
|
|
6801
|
+
};
|
|
6802
|
+
}
|
|
6472
6803
|
if (text3) {
|
|
6473
6804
|
yield { text: text3, rawEvent: chunk };
|
|
6474
6805
|
}
|
|
@@ -6479,11 +6810,30 @@ var init_gemini = __esm({
|
|
|
6479
6810
|
}
|
|
6480
6811
|
}
|
|
6481
6812
|
}
|
|
6482
|
-
|
|
6813
|
+
/**
|
|
6814
|
+
* Extract both regular text and thinking text from a chunk.
|
|
6815
|
+
* Gemini marks thinking parts with `thought: true`.
|
|
6816
|
+
*/
|
|
6817
|
+
extractTextAndThinking(chunk) {
|
|
6483
6818
|
if (!chunk?.candidates) {
|
|
6484
|
-
return "";
|
|
6819
|
+
return { text: "", thinkingText: "" };
|
|
6820
|
+
}
|
|
6821
|
+
let text3 = "";
|
|
6822
|
+
let thinkingText = "";
|
|
6823
|
+
let thinkingSignature;
|
|
6824
|
+
for (const candidate of chunk.candidates) {
|
|
6825
|
+
for (const part of candidate.content?.parts ?? []) {
|
|
6826
|
+
if (part.thought) {
|
|
6827
|
+
thinkingText += part.text ?? "";
|
|
6828
|
+
if (part.thoughtSignature) {
|
|
6829
|
+
thinkingSignature = part.thoughtSignature;
|
|
6830
|
+
}
|
|
6831
|
+
} else {
|
|
6832
|
+
text3 += part.text ?? "";
|
|
6833
|
+
}
|
|
6834
|
+
}
|
|
6485
6835
|
}
|
|
6486
|
-
return
|
|
6836
|
+
return { text: text3, thinkingText, thinkingSignature };
|
|
6487
6837
|
}
|
|
6488
6838
|
extractFinishReason(chunk) {
|
|
6489
6839
|
const candidate = chunk?.candidates?.find((item) => item.finishReason);
|
|
@@ -6499,7 +6849,9 @@ var init_gemini = __esm({
|
|
|
6499
6849
|
outputTokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
6500
6850
|
totalTokens: usageMetadata.totalTokenCount ?? 0,
|
|
6501
6851
|
// Gemini returns cached token count in cachedContentTokenCount
|
|
6502
|
-
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
|
|
6852
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0,
|
|
6853
|
+
// Gemini returns thinking tokens in thoughtsTokenCount
|
|
6854
|
+
reasoningTokens: usageMetadata.thoughtsTokenCount
|
|
6503
6855
|
};
|
|
6504
6856
|
}
|
|
6505
6857
|
/**
|
|
@@ -7520,11 +7872,13 @@ var init_openai_compatible_provider = __esm({
|
|
|
7520
7872
|
yield { text: text3, rawEvent: chunk };
|
|
7521
7873
|
}
|
|
7522
7874
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
7875
|
+
const usageDetails = chunk.usage;
|
|
7523
7876
|
const usage = chunk.usage ? {
|
|
7524
7877
|
inputTokens: chunk.usage.prompt_tokens,
|
|
7525
7878
|
outputTokens: chunk.usage.completion_tokens,
|
|
7526
7879
|
totalTokens: chunk.usage.total_tokens,
|
|
7527
|
-
cachedInputTokens: 0
|
|
7880
|
+
cachedInputTokens: 0,
|
|
7881
|
+
reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
|
|
7528
7882
|
} : void 0;
|
|
7529
7883
|
if (finishReason || usage) {
|
|
7530
7884
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -7600,6 +7954,21 @@ var init_huggingface = __esm({
|
|
|
7600
7954
|
getModelSpecs() {
|
|
7601
7955
|
return HUGGINGFACE_MODELS;
|
|
7602
7956
|
}
|
|
7957
|
+
/**
|
|
7958
|
+
* Override buildApiRequest to inject DeepSeek-specific thinking parameters.
|
|
7959
|
+
* DeepSeek models use `extra_body: { thinking: { type: "enabled" } }` for reasoning.
|
|
7960
|
+
*/
|
|
7961
|
+
buildApiRequest(options, descriptor, spec, messages) {
|
|
7962
|
+
const request = super.buildApiRequest(options, descriptor, spec, messages);
|
|
7963
|
+
if (options.reasoning?.enabled && descriptor.name.toLowerCase().includes("deepseek")) {
|
|
7964
|
+
const requestObj = request;
|
|
7965
|
+
requestObj.extra_body = {
|
|
7966
|
+
...requestObj.extra_body,
|
|
7967
|
+
thinking: { type: "enabled" }
|
|
7968
|
+
};
|
|
7969
|
+
}
|
|
7970
|
+
return request;
|
|
7971
|
+
}
|
|
7603
7972
|
/**
|
|
7604
7973
|
* Enhance error messages with HuggingFace-specific guidance.
|
|
7605
7974
|
*/
|
|
@@ -8485,7 +8854,7 @@ function sanitizeExtra(extra, allowTemperature) {
|
|
|
8485
8854
|
function createOpenAIProviderFromEnv() {
|
|
8486
8855
|
return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
|
|
8487
8856
|
}
|
|
8488
|
-
var import_openai3, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
|
|
8857
|
+
var import_openai3, import_tiktoken, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
|
|
8489
8858
|
var init_openai = __esm({
|
|
8490
8859
|
"src/providers/openai.ts"() {
|
|
8491
8860
|
"use strict";
|
|
@@ -8503,6 +8872,13 @@ var init_openai = __esm({
|
|
|
8503
8872
|
user: "user",
|
|
8504
8873
|
assistant: "assistant"
|
|
8505
8874
|
};
|
|
8875
|
+
OPENAI_EFFORT_MAP = {
|
|
8876
|
+
none: "none",
|
|
8877
|
+
low: "low",
|
|
8878
|
+
medium: "medium",
|
|
8879
|
+
high: "high",
|
|
8880
|
+
maximum: "xhigh"
|
|
8881
|
+
};
|
|
8506
8882
|
OpenAIChatProvider = class extends BaseProviderAdapter {
|
|
8507
8883
|
providerId = "openai";
|
|
8508
8884
|
supports(descriptor) {
|
|
@@ -8593,10 +8969,15 @@ var init_openai = __esm({
|
|
|
8593
8969
|
};
|
|
8594
8970
|
}
|
|
8595
8971
|
buildApiRequest(options, descriptor, spec, messages) {
|
|
8596
|
-
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
8972
|
+
const { maxTokens, temperature, topP, stopSequences, extra, reasoning } = options;
|
|
8597
8973
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
8598
8974
|
const shouldIncludeTemperature = typeof temperature === "number" && supportsTemperature;
|
|
8599
8975
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
8976
|
+
const reasoningParam = reasoning?.enabled !== void 0 ? {
|
|
8977
|
+
reasoning: {
|
|
8978
|
+
effort: OPENAI_EFFORT_MAP[reasoning.effort ?? "medium"]
|
|
8979
|
+
}
|
|
8980
|
+
} : {};
|
|
8600
8981
|
return {
|
|
8601
8982
|
model: descriptor.name,
|
|
8602
8983
|
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
@@ -8607,6 +8988,7 @@ var init_openai = __esm({
|
|
|
8607
8988
|
stop: stopSequences,
|
|
8608
8989
|
stream: true,
|
|
8609
8990
|
stream_options: { include_usage: true },
|
|
8991
|
+
...reasoningParam,
|
|
8610
8992
|
...sanitizedExtra ?? {},
|
|
8611
8993
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
8612
8994
|
};
|
|
@@ -8695,11 +9077,13 @@ var init_openai = __esm({
|
|
|
8695
9077
|
yield { text: text3, rawEvent: chunk };
|
|
8696
9078
|
}
|
|
8697
9079
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
9080
|
+
const usageDetails = chunk.usage;
|
|
8698
9081
|
const usage = chunk.usage ? {
|
|
8699
9082
|
inputTokens: chunk.usage.prompt_tokens,
|
|
8700
9083
|
outputTokens: chunk.usage.completion_tokens,
|
|
8701
9084
|
totalTokens: chunk.usage.total_tokens,
|
|
8702
|
-
cachedInputTokens:
|
|
9085
|
+
cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
|
|
9086
|
+
reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
|
|
8703
9087
|
} : void 0;
|
|
8704
9088
|
if (finishReason || usage) {
|
|
8705
9089
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -9234,7 +9618,7 @@ function createOpenRouterProviderFromEnv() {
|
|
|
9234
9618
|
});
|
|
9235
9619
|
return new OpenRouterProvider(client, config);
|
|
9236
9620
|
}
|
|
9237
|
-
var import_openai4, OpenRouterProvider;
|
|
9621
|
+
var import_openai4, OPENROUTER_EFFORT_MAP, OpenRouterProvider;
|
|
9238
9622
|
var init_openrouter = __esm({
|
|
9239
9623
|
"src/providers/openrouter.ts"() {
|
|
9240
9624
|
"use strict";
|
|
@@ -9242,6 +9626,13 @@ var init_openrouter = __esm({
|
|
|
9242
9626
|
init_openai_compatible_provider();
|
|
9243
9627
|
init_openrouter_models();
|
|
9244
9628
|
init_utils();
|
|
9629
|
+
OPENROUTER_EFFORT_MAP = {
|
|
9630
|
+
none: "none",
|
|
9631
|
+
low: "low",
|
|
9632
|
+
medium: "medium",
|
|
9633
|
+
high: "high",
|
|
9634
|
+
maximum: "xhigh"
|
|
9635
|
+
};
|
|
9245
9636
|
OpenRouterProvider = class extends OpenAICompatibleProvider {
|
|
9246
9637
|
providerId = "openrouter";
|
|
9247
9638
|
providerAlias = "or";
|
|
@@ -9251,6 +9642,20 @@ var init_openrouter = __esm({
|
|
|
9251
9642
|
getModelSpecs() {
|
|
9252
9643
|
return OPENROUTER_MODELS;
|
|
9253
9644
|
}
|
|
9645
|
+
/**
|
|
9646
|
+
* Override buildApiRequest to inject reasoning parameters.
|
|
9647
|
+
* OpenRouter normalizes reasoning into the standard OpenAI format.
|
|
9648
|
+
*/
|
|
9649
|
+
buildApiRequest(options, descriptor, spec, messages) {
|
|
9650
|
+
const request = super.buildApiRequest(options, descriptor, spec, messages);
|
|
9651
|
+
if (options.reasoning?.enabled !== void 0) {
|
|
9652
|
+
const requestObj = request;
|
|
9653
|
+
requestObj.reasoning = {
|
|
9654
|
+
effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
|
|
9655
|
+
};
|
|
9656
|
+
}
|
|
9657
|
+
return request;
|
|
9658
|
+
}
|
|
9254
9659
|
/**
|
|
9255
9660
|
* Get custom headers for OpenRouter analytics.
|
|
9256
9661
|
*/
|
|
@@ -9488,9 +9893,10 @@ var init_model_registry = __esm({
|
|
|
9488
9893
|
* @param outputTokens - Number of output tokens
|
|
9489
9894
|
* @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
|
|
9490
9895
|
* @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
|
|
9896
|
+
* @param reasoningTokens - Number of reasoning/thinking tokens (subset of outputTokens)
|
|
9491
9897
|
* @returns CostEstimate if model found, undefined otherwise
|
|
9492
9898
|
*/
|
|
9493
|
-
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
9899
|
+
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
|
|
9494
9900
|
const spec = this.getModelSpec(modelId);
|
|
9495
9901
|
if (!spec) return void 0;
|
|
9496
9902
|
const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
|
|
@@ -9500,13 +9906,18 @@ var init_model_registry = __esm({
|
|
|
9500
9906
|
const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
|
|
9501
9907
|
const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
|
|
9502
9908
|
const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
|
|
9503
|
-
const
|
|
9909
|
+
const reasoningRate = spec.pricing.reasoningOutput ?? spec.pricing.output;
|
|
9910
|
+
const nonReasoningOutputTokens = outputTokens - reasoningTokens;
|
|
9911
|
+
const reasoningCost = reasoningTokens / 1e6 * reasoningRate;
|
|
9912
|
+
const nonReasoningOutputCost = nonReasoningOutputTokens / 1e6 * spec.pricing.output;
|
|
9913
|
+
const outputCost = nonReasoningOutputCost + reasoningCost;
|
|
9504
9914
|
const totalCost = inputCost + outputCost;
|
|
9505
9915
|
return {
|
|
9506
9916
|
inputCost,
|
|
9507
9917
|
cachedInputCost,
|
|
9508
9918
|
cacheCreationCost,
|
|
9509
9919
|
outputCost,
|
|
9920
|
+
reasoningCost,
|
|
9510
9921
|
totalCost,
|
|
9511
9922
|
currency: "USD"
|
|
9512
9923
|
};
|
|
@@ -10221,6 +10632,8 @@ var init_builder = __esm({
|
|
|
10221
10632
|
// Shared retry config from parent for consistent backoff behavior
|
|
10222
10633
|
// When a gadget calls withParentContext(ctx), this config is shared
|
|
10223
10634
|
sharedRetryConfig;
|
|
10635
|
+
reasoningConfig;
|
|
10636
|
+
cachingConfig;
|
|
10224
10637
|
constructor(client) {
|
|
10225
10638
|
this.client = client;
|
|
10226
10639
|
}
|
|
@@ -10806,6 +11219,116 @@ var init_builder = __esm({
|
|
|
10806
11219
|
this.signal = signal;
|
|
10807
11220
|
return this;
|
|
10808
11221
|
}
|
|
11222
|
+
/**
|
|
11223
|
+
* Enable reasoning/thinking mode for reasoning-capable models.
|
|
11224
|
+
*
|
|
11225
|
+
* Can be called with:
|
|
11226
|
+
* - No args: enables reasoning at "medium" effort
|
|
11227
|
+
* - A string effort level: `withReasoning("high")`
|
|
11228
|
+
* - A full config object: `withReasoning({ enabled: true, budgetTokens: 10000 })`
|
|
11229
|
+
*
|
|
11230
|
+
* @param config - Optional effort level or full reasoning config
|
|
11231
|
+
* @returns This builder for chaining
|
|
11232
|
+
*
|
|
11233
|
+
* @example
|
|
11234
|
+
* ```typescript
|
|
11235
|
+
* // Simple — medium effort
|
|
11236
|
+
* LLMist.createAgent()
|
|
11237
|
+
* .withModel("o3")
|
|
11238
|
+
* .withReasoning()
|
|
11239
|
+
* .ask("Solve this logic puzzle...");
|
|
11240
|
+
*
|
|
11241
|
+
* // Explicit effort level
|
|
11242
|
+
* LLMist.createAgent()
|
|
11243
|
+
* .withModel("anthropic:claude-4-opus")
|
|
11244
|
+
* .withReasoning("high")
|
|
11245
|
+
* .ask("Analyze this complex problem");
|
|
11246
|
+
*
|
|
11247
|
+
* // Full config with explicit token budget
|
|
11248
|
+
* LLMist.createAgent()
|
|
11249
|
+
* .withModel("anthropic:claude-4-opus")
|
|
11250
|
+
* .withReasoning({ enabled: true, budgetTokens: 16000 })
|
|
11251
|
+
* .ask("Step through this proof");
|
|
11252
|
+
* ```
|
|
11253
|
+
*/
|
|
11254
|
+
withReasoning(config) {
|
|
11255
|
+
if (typeof config === "string") {
|
|
11256
|
+
this.reasoningConfig = { enabled: true, effort: config };
|
|
11257
|
+
} else if (config === void 0) {
|
|
11258
|
+
this.reasoningConfig = { enabled: true, effort: "medium" };
|
|
11259
|
+
} else {
|
|
11260
|
+
this.reasoningConfig = config;
|
|
11261
|
+
}
|
|
11262
|
+
return this;
|
|
11263
|
+
}
|
|
11264
|
+
/**
|
|
11265
|
+
* Explicitly disable reasoning for this agent, even if the model supports it.
|
|
11266
|
+
*
|
|
11267
|
+
* By default, reasoning is auto-enabled at "medium" effort for models with
|
|
11268
|
+
* `features.reasoning: true`. Use this to opt out.
|
|
11269
|
+
*
|
|
11270
|
+
* @returns This builder for chaining
|
|
11271
|
+
*/
|
|
11272
|
+
withoutReasoning() {
|
|
11273
|
+
this.reasoningConfig = { enabled: false };
|
|
11274
|
+
return this;
|
|
11275
|
+
}
|
|
11276
|
+
/**
|
|
11277
|
+
* Enable context caching for supported providers.
|
|
11278
|
+
*
|
|
11279
|
+
* Can be called with:
|
|
11280
|
+
* - No args: enables caching with defaults (`{ enabled: true }`)
|
|
11281
|
+
* - A full config object: `withCaching({ enabled: true, scope: "system", ttl: "7200s" })`
|
|
11282
|
+
*
|
|
11283
|
+
* Provider behavior:
|
|
11284
|
+
* - **Anthropic**: Caching is always-on by default via `cache_control` markers.
|
|
11285
|
+
* Calling `withCaching()` explicitly is a no-op (it's already enabled).
|
|
11286
|
+
* - **Gemini**: Creates an explicit cache via `caches.create()` for the configured scope.
|
|
11287
|
+
* - **OpenAI**: Server-side automatic caching (no-op).
|
|
11288
|
+
*
|
|
11289
|
+
* @param config - Optional caching configuration
|
|
11290
|
+
* @returns This builder for chaining
|
|
11291
|
+
*
|
|
11292
|
+
* @example
|
|
11293
|
+
* ```typescript
|
|
11294
|
+
* // Simple — enable with defaults
|
|
11295
|
+
* LLMist.createAgent()
|
|
11296
|
+
* .withModel("gemini:gemini-2.5-flash")
|
|
11297
|
+
* .withCaching()
|
|
11298
|
+
* .ask("Analyze this large codebase...");
|
|
11299
|
+
*
|
|
11300
|
+
* // Cache only system prompt with longer TTL
|
|
11301
|
+
* LLMist.createAgent()
|
|
11302
|
+
* .withModel("gemini:gemini-2.5-pro")
|
|
11303
|
+
* .withCaching({ enabled: true, scope: "system", ttl: "7200s" })
|
|
11304
|
+
* .ask("...");
|
|
11305
|
+
* ```
|
|
11306
|
+
*/
|
|
11307
|
+
withCaching(config) {
|
|
11308
|
+
this.cachingConfig = config ?? { enabled: true };
|
|
11309
|
+
return this;
|
|
11310
|
+
}
|
|
11311
|
+
/**
|
|
11312
|
+
* Explicitly disable context caching.
|
|
11313
|
+
*
|
|
11314
|
+
* For Anthropic, this removes `cache_control` markers from requests,
|
|
11315
|
+
* opting out of prompt caching entirely.
|
|
11316
|
+
*
|
|
11317
|
+
* @returns This builder for chaining
|
|
11318
|
+
*
|
|
11319
|
+
* @example
|
|
11320
|
+
* ```typescript
|
|
11321
|
+
* // Disable Anthropic's automatic caching
|
|
11322
|
+
* LLMist.createAgent()
|
|
11323
|
+
* .withModel("sonnet")
|
|
11324
|
+
* .withoutCaching()
|
|
11325
|
+
* .ask("...");
|
|
11326
|
+
* ```
|
|
11327
|
+
*/
|
|
11328
|
+
withoutCaching() {
|
|
11329
|
+
this.cachingConfig = { enabled: false };
|
|
11330
|
+
return this;
|
|
11331
|
+
}
|
|
10809
11332
|
/**
|
|
10810
11333
|
* Set subagent configuration overrides.
|
|
10811
11334
|
*
|
|
@@ -11091,6 +11614,8 @@ ${endPrefix}`
|
|
|
11091
11614
|
retryConfig: this.retryConfig,
|
|
11092
11615
|
rateLimitConfig: this.rateLimitConfig,
|
|
11093
11616
|
signal: this.signal,
|
|
11617
|
+
reasoning: this.reasoningConfig,
|
|
11618
|
+
caching: this.cachingConfig,
|
|
11094
11619
|
subagentConfig: this.subagentConfig,
|
|
11095
11620
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
11096
11621
|
parentTree: this.parentContext?.tree,
|
|
@@ -11278,6 +11803,8 @@ ${endPrefix}`
|
|
|
11278
11803
|
retryConfig: this.retryConfig,
|
|
11279
11804
|
rateLimitConfig: this.rateLimitConfig,
|
|
11280
11805
|
signal: this.signal,
|
|
11806
|
+
reasoning: this.reasoningConfig,
|
|
11807
|
+
caching: this.cachingConfig,
|
|
11281
11808
|
subagentConfig: this.subagentConfig,
|
|
11282
11809
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
11283
11810
|
parentTree: this.parentContext?.tree,
|
|
@@ -11732,6 +12259,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11732
12259
|
let outputTokens = 0;
|
|
11733
12260
|
let cachedInputTokens = 0;
|
|
11734
12261
|
let cacheCreationInputTokens = 0;
|
|
12262
|
+
let reasoningTokens = 0;
|
|
11735
12263
|
const messages = [
|
|
11736
12264
|
...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
|
|
11737
12265
|
{ role: "user", content: prompt }
|
|
@@ -11748,6 +12276,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11748
12276
|
outputTokens = chunk.usage.outputTokens;
|
|
11749
12277
|
cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
|
|
11750
12278
|
cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
|
|
12279
|
+
reasoningTokens = chunk.usage.reasoningTokens ?? 0;
|
|
11751
12280
|
}
|
|
11752
12281
|
}
|
|
11753
12282
|
this.reportCostFromUsage(
|
|
@@ -11755,7 +12284,8 @@ var init_cost_reporting_client = __esm({
|
|
|
11755
12284
|
inputTokens,
|
|
11756
12285
|
outputTokens,
|
|
11757
12286
|
cachedInputTokens,
|
|
11758
|
-
cacheCreationInputTokens
|
|
12287
|
+
cacheCreationInputTokens,
|
|
12288
|
+
reasoningTokens
|
|
11759
12289
|
);
|
|
11760
12290
|
return result;
|
|
11761
12291
|
}
|
|
@@ -11774,6 +12304,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11774
12304
|
let outputTokens = 0;
|
|
11775
12305
|
let cachedInputTokens = 0;
|
|
11776
12306
|
let cacheCreationInputTokens = 0;
|
|
12307
|
+
let reasoningTokens = 0;
|
|
11777
12308
|
const messages = [
|
|
11778
12309
|
...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
|
|
11779
12310
|
{ role: "user", content: prompt }
|
|
@@ -11793,6 +12324,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11793
12324
|
outputTokens = chunk.usage.outputTokens;
|
|
11794
12325
|
cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
|
|
11795
12326
|
cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
|
|
12327
|
+
reasoningTokens = chunk.usage.reasoningTokens ?? 0;
|
|
11796
12328
|
}
|
|
11797
12329
|
}
|
|
11798
12330
|
} finally {
|
|
@@ -11801,7 +12333,8 @@ var init_cost_reporting_client = __esm({
|
|
|
11801
12333
|
inputTokens,
|
|
11802
12334
|
outputTokens,
|
|
11803
12335
|
cachedInputTokens,
|
|
11804
|
-
cacheCreationInputTokens
|
|
12336
|
+
cacheCreationInputTokens,
|
|
12337
|
+
reasoningTokens
|
|
11805
12338
|
);
|
|
11806
12339
|
}
|
|
11807
12340
|
}
|
|
@@ -11828,6 +12361,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11828
12361
|
let outputTokens = 0;
|
|
11829
12362
|
let cachedInputTokens = 0;
|
|
11830
12363
|
let cacheCreationInputTokens = 0;
|
|
12364
|
+
let reasoningTokens = 0;
|
|
11831
12365
|
try {
|
|
11832
12366
|
for await (const chunk of innerStream) {
|
|
11833
12367
|
if (chunk.usage) {
|
|
@@ -11835,6 +12369,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11835
12369
|
outputTokens = chunk.usage.outputTokens;
|
|
11836
12370
|
cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
|
|
11837
12371
|
cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
|
|
12372
|
+
reasoningTokens = chunk.usage.reasoningTokens ?? 0;
|
|
11838
12373
|
}
|
|
11839
12374
|
yield chunk;
|
|
11840
12375
|
}
|
|
@@ -11845,7 +12380,8 @@ var init_cost_reporting_client = __esm({
|
|
|
11845
12380
|
inputTokens,
|
|
11846
12381
|
outputTokens,
|
|
11847
12382
|
cachedInputTokens,
|
|
11848
|
-
cacheCreationInputTokens
|
|
12383
|
+
cacheCreationInputTokens,
|
|
12384
|
+
reasoningTokens
|
|
11849
12385
|
);
|
|
11850
12386
|
}
|
|
11851
12387
|
}
|
|
@@ -11855,14 +12391,15 @@ var init_cost_reporting_client = __esm({
|
|
|
11855
12391
|
/**
|
|
11856
12392
|
* Calculates and reports cost from token usage.
|
|
11857
12393
|
*/
|
|
11858
|
-
reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
12394
|
+
reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
|
|
11859
12395
|
if (inputTokens === 0 && outputTokens === 0) return;
|
|
11860
12396
|
const estimate = this.client.modelRegistry.estimateCost(
|
|
11861
12397
|
model,
|
|
11862
12398
|
inputTokens,
|
|
11863
12399
|
outputTokens,
|
|
11864
12400
|
cachedInputTokens,
|
|
11865
|
-
cacheCreationInputTokens
|
|
12401
|
+
cacheCreationInputTokens,
|
|
12402
|
+
reasoningTokens
|
|
11866
12403
|
);
|
|
11867
12404
|
if (estimate && estimate.totalCost > 0) {
|
|
11868
12405
|
this.reportCost(estimate.totalCost);
|
|
@@ -12954,9 +13491,18 @@ var init_stream_processor = __esm({
|
|
|
12954
13491
|
let usage;
|
|
12955
13492
|
let didExecuteGadgets = false;
|
|
12956
13493
|
let shouldBreakLoop = false;
|
|
13494
|
+
let thinkingContent = "";
|
|
12957
13495
|
for await (const chunk of stream2) {
|
|
12958
13496
|
if (chunk.finishReason) finishReason = chunk.finishReason;
|
|
12959
13497
|
if (chunk.usage) usage = chunk.usage;
|
|
13498
|
+
if (chunk.thinking?.content) {
|
|
13499
|
+
thinkingContent += chunk.thinking.content;
|
|
13500
|
+
yield {
|
|
13501
|
+
type: "thinking",
|
|
13502
|
+
content: chunk.thinking.content,
|
|
13503
|
+
thinkingType: chunk.thinking.type
|
|
13504
|
+
};
|
|
13505
|
+
}
|
|
12960
13506
|
let processedChunk = "";
|
|
12961
13507
|
if (chunk.text) {
|
|
12962
13508
|
processedChunk = chunk.text;
|
|
@@ -13070,7 +13616,8 @@ var init_stream_processor = __esm({
|
|
|
13070
13616
|
finishReason,
|
|
13071
13617
|
usage,
|
|
13072
13618
|
rawResponse: this.responseText,
|
|
13073
|
-
finalMessage
|
|
13619
|
+
finalMessage,
|
|
13620
|
+
thinkingContent: thinkingContent || void 0
|
|
13074
13621
|
};
|
|
13075
13622
|
yield completionEvent;
|
|
13076
13623
|
}
|
|
@@ -13872,6 +14419,8 @@ var init_agent = __esm({
|
|
|
13872
14419
|
mediaStore;
|
|
13873
14420
|
// Cancellation
|
|
13874
14421
|
signal;
|
|
14422
|
+
reasoning;
|
|
14423
|
+
caching;
|
|
13875
14424
|
// Retry configuration
|
|
13876
14425
|
retryConfig;
|
|
13877
14426
|
// Rate limit tracker for proactive throttling
|
|
@@ -13963,6 +14512,8 @@ var init_agent = __esm({
|
|
|
13963
14512
|
);
|
|
13964
14513
|
}
|
|
13965
14514
|
this.signal = options.signal;
|
|
14515
|
+
this.reasoning = options.reasoning;
|
|
14516
|
+
this.caching = options.caching;
|
|
13966
14517
|
this.retryConfig = options.sharedRetryConfig ?? resolveRetryConfig(options.retryConfig);
|
|
13967
14518
|
if (options.sharedRateLimitTracker) {
|
|
13968
14519
|
this.rateLimitTracker = options.sharedRateLimitTracker;
|
|
@@ -14365,6 +14916,7 @@ var init_agent = __esm({
|
|
|
14365
14916
|
usage: result.usage,
|
|
14366
14917
|
rawResponse: result.rawResponse,
|
|
14367
14918
|
finalMessage: result.finalMessage,
|
|
14919
|
+
thinkingContent: result.thinkingContent,
|
|
14368
14920
|
logger: this.logger,
|
|
14369
14921
|
subagentContext
|
|
14370
14922
|
};
|
|
@@ -14665,17 +15217,49 @@ var init_agent = __esm({
|
|
|
14665
15217
|
});
|
|
14666
15218
|
return { type: "compaction", event: compactionEvent };
|
|
14667
15219
|
}
|
|
15220
|
+
/**
|
|
15221
|
+
* Resolve reasoning configuration with auto-enable logic.
|
|
15222
|
+
*
|
|
15223
|
+
* Priority: explicit config > auto-enable for reasoning models > undefined
|
|
15224
|
+
* When a model has `features.reasoning: true` and no explicit config is set,
|
|
15225
|
+
* reasoning is automatically enabled at "medium" effort.
|
|
15226
|
+
*/
|
|
15227
|
+
resolveReasoningConfig(spec) {
|
|
15228
|
+
if (this.reasoning !== void 0) return this.reasoning;
|
|
15229
|
+
if (spec?.features?.reasoning) {
|
|
15230
|
+
return { enabled: true, effort: "medium" };
|
|
15231
|
+
}
|
|
15232
|
+
return void 0;
|
|
15233
|
+
}
|
|
15234
|
+
/**
|
|
15235
|
+
* Resolve caching configuration.
|
|
15236
|
+
*
|
|
15237
|
+
* Priority: explicit config > default enabled (preserves Anthropic's existing behavior)
|
|
15238
|
+
* Default is `{ enabled: true }` which means:
|
|
15239
|
+
* - Anthropic: `cache_control` markers are added (existing behavior preserved)
|
|
15240
|
+
* - Gemini: Cache manager is consulted but skips if no explicit config was set
|
|
15241
|
+
* - OpenAI: No-op (server-side automatic)
|
|
15242
|
+
*/
|
|
15243
|
+
resolveCachingConfig() {
|
|
15244
|
+
if (this.caching !== void 0) return this.caching;
|
|
15245
|
+
return { enabled: true };
|
|
15246
|
+
}
|
|
14668
15247
|
/**
|
|
14669
15248
|
* Prepare LLM call options, create tree node, and process beforeLLMCall controller.
|
|
14670
15249
|
* @returns options, node ID, and optional skipWithSynthetic response if controller wants to skip
|
|
14671
15250
|
*/
|
|
14672
15251
|
async prepareLLMCall(iteration) {
|
|
15252
|
+
const spec = this.client.modelRegistry?.getModelSpec?.(this.model);
|
|
15253
|
+
const reasoning = this.resolveReasoningConfig(spec);
|
|
15254
|
+
const caching = this.resolveCachingConfig();
|
|
14673
15255
|
let llmOptions = {
|
|
14674
15256
|
model: this.model,
|
|
14675
15257
|
messages: this.conversation.getMessages(),
|
|
14676
15258
|
temperature: this.temperature,
|
|
14677
15259
|
maxTokens: this.defaultMaxTokens,
|
|
14678
|
-
signal: this.signal
|
|
15260
|
+
signal: this.signal,
|
|
15261
|
+
reasoning,
|
|
15262
|
+
caching
|
|
14679
15263
|
};
|
|
14680
15264
|
const llmNode = this.tree.addLLMCall({
|
|
14681
15265
|
iteration,
|
|
@@ -14745,13 +15329,15 @@ var init_agent = __esm({
|
|
|
14745
15329
|
inputTokens,
|
|
14746
15330
|
outputTokens,
|
|
14747
15331
|
result.usage?.cachedInputTokens ?? 0,
|
|
14748
|
-
result.usage?.cacheCreationInputTokens ?? 0
|
|
15332
|
+
result.usage?.cacheCreationInputTokens ?? 0,
|
|
15333
|
+
result.usage?.reasoningTokens ?? 0
|
|
14749
15334
|
)?.totalCost;
|
|
14750
15335
|
this.tree.completeLLMCall(nodeId, {
|
|
14751
15336
|
response: result.rawResponse,
|
|
14752
15337
|
usage: result.usage,
|
|
14753
15338
|
finishReason: result.finishReason,
|
|
14754
|
-
cost: llmCost
|
|
15339
|
+
cost: llmCost,
|
|
15340
|
+
thinkingContent: result.thinkingContent
|
|
14755
15341
|
});
|
|
14756
15342
|
}
|
|
14757
15343
|
/**
|