llmist 15.12.0 → 15.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.cjs +313 -39
- package/dist/index.cjs.map +1 -1
- package/dist/index.d.cts +1272 -1123
- package/dist/index.d.ts +1272 -1123
- package/dist/index.js +313 -39
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/dist/index.cjs
CHANGED
|
@@ -229,7 +229,8 @@ var init_execution_tree = __esm({
|
|
|
229
229
|
response: llmNode.response,
|
|
230
230
|
usage: llmNode.usage,
|
|
231
231
|
finishReason: llmNode.finishReason,
|
|
232
|
-
cost: llmNode.cost
|
|
232
|
+
cost: llmNode.cost,
|
|
233
|
+
thinkingContent: params.thinkingContent
|
|
233
234
|
});
|
|
234
235
|
}
|
|
235
236
|
/**
|
|
@@ -4529,7 +4530,10 @@ var init_hook_presets = __esm({
|
|
|
4529
4530
|
const costEstimate = modelRegistry.estimateCost(
|
|
4530
4531
|
modelName,
|
|
4531
4532
|
ctx.usage.inputTokens,
|
|
4532
|
-
ctx.usage.outputTokens
|
|
4533
|
+
ctx.usage.outputTokens,
|
|
4534
|
+
ctx.usage.cachedInputTokens ?? 0,
|
|
4535
|
+
ctx.usage.cacheCreationInputTokens ?? 0,
|
|
4536
|
+
ctx.usage.reasoningTokens ?? 0
|
|
4533
4537
|
);
|
|
4534
4538
|
if (costEstimate) {
|
|
4535
4539
|
totalCost += costEstimate.totalCost;
|
|
@@ -5026,10 +5030,10 @@ var init_anthropic_models = __esm({
|
|
|
5026
5030
|
contextWindow: 2e5,
|
|
5027
5031
|
maxOutputTokens: 64e3,
|
|
5028
5032
|
pricing: {
|
|
5029
|
-
input:
|
|
5030
|
-
output:
|
|
5031
|
-
cachedInput: 0.
|
|
5032
|
-
cacheWriteInput: 1
|
|
5033
|
+
input: 1,
|
|
5034
|
+
output: 5,
|
|
5035
|
+
cachedInput: 0.1,
|
|
5036
|
+
cacheWriteInput: 1.25
|
|
5033
5037
|
},
|
|
5034
5038
|
knowledgeCutoff: "2025-02",
|
|
5035
5039
|
features: {
|
|
@@ -5225,10 +5229,10 @@ var init_anthropic_models = __esm({
|
|
|
5225
5229
|
contextWindow: 2e5,
|
|
5226
5230
|
maxOutputTokens: 64e3,
|
|
5227
5231
|
pricing: {
|
|
5228
|
-
input:
|
|
5229
|
-
output:
|
|
5230
|
-
cachedInput: 0.
|
|
5231
|
-
cacheWriteInput: 1
|
|
5232
|
+
input: 1,
|
|
5233
|
+
output: 5,
|
|
5234
|
+
cachedInput: 0.1,
|
|
5235
|
+
cacheWriteInput: 1.25
|
|
5232
5236
|
},
|
|
5233
5237
|
knowledgeCutoff: "2025-02",
|
|
5234
5238
|
features: {
|
|
@@ -5371,10 +5375,15 @@ var init_utils = __esm({
|
|
|
5371
5375
|
});
|
|
5372
5376
|
|
|
5373
5377
|
// src/providers/anthropic.ts
|
|
5378
|
+
function resolveAnthropicThinking(reasoning) {
|
|
5379
|
+
if (!reasoning?.enabled) return void 0;
|
|
5380
|
+
const budget = reasoning.budgetTokens ? Math.max(1024, reasoning.budgetTokens) : ANTHROPIC_EFFORT_BUDGET[reasoning.effort ?? "medium"];
|
|
5381
|
+
return { type: "enabled", budget_tokens: budget };
|
|
5382
|
+
}
|
|
5374
5383
|
function createAnthropicProviderFromEnv() {
|
|
5375
5384
|
return createProviderFromEnv("ANTHROPIC_API_KEY", import_sdk.default, AnthropicMessagesProvider);
|
|
5376
5385
|
}
|
|
5377
|
-
var import_sdk, AnthropicMessagesProvider;
|
|
5386
|
+
var import_sdk, ANTHROPIC_EFFORT_BUDGET, AnthropicMessagesProvider;
|
|
5378
5387
|
var init_anthropic = __esm({
|
|
5379
5388
|
"src/providers/anthropic.ts"() {
|
|
5380
5389
|
"use strict";
|
|
@@ -5384,6 +5393,14 @@ var init_anthropic = __esm({
|
|
|
5384
5393
|
init_base_provider();
|
|
5385
5394
|
init_constants2();
|
|
5386
5395
|
init_utils();
|
|
5396
|
+
ANTHROPIC_EFFORT_BUDGET = {
|
|
5397
|
+
none: 1024,
|
|
5398
|
+
// Minimum allowed by Anthropic
|
|
5399
|
+
low: 2048,
|
|
5400
|
+
medium: 8192,
|
|
5401
|
+
high: 16384,
|
|
5402
|
+
maximum: 32768
|
|
5403
|
+
};
|
|
5387
5404
|
AnthropicMessagesProvider = class extends BaseProviderAdapter {
|
|
5388
5405
|
providerId = "anthropic";
|
|
5389
5406
|
supports(descriptor) {
|
|
@@ -5437,15 +5454,18 @@ var init_anthropic = __esm({
|
|
|
5437
5454
|
)
|
|
5438
5455
|
}));
|
|
5439
5456
|
const defaultMaxTokens = spec?.maxOutputTokens ?? ANTHROPIC_DEFAULT_MAX_OUTPUT_TOKENS;
|
|
5457
|
+
const thinking = resolveAnthropicThinking(options.reasoning);
|
|
5458
|
+
const temperature = thinking ? void 0 : options.temperature;
|
|
5440
5459
|
const payload = {
|
|
5441
5460
|
model: descriptor.name,
|
|
5442
5461
|
system,
|
|
5443
5462
|
messages: conversation,
|
|
5444
5463
|
max_tokens: options.maxTokens ?? defaultMaxTokens,
|
|
5445
|
-
temperature
|
|
5464
|
+
temperature,
|
|
5446
5465
|
top_p: options.topP,
|
|
5447
5466
|
stop_sequences: options.stopSequences,
|
|
5448
5467
|
stream: true,
|
|
5468
|
+
...thinking ? { thinking } : {},
|
|
5449
5469
|
...options.extra
|
|
5450
5470
|
};
|
|
5451
5471
|
return payload;
|
|
@@ -5525,8 +5545,39 @@ var init_anthropic = __esm({
|
|
|
5525
5545
|
};
|
|
5526
5546
|
continue;
|
|
5527
5547
|
}
|
|
5528
|
-
if (event.type === "
|
|
5529
|
-
|
|
5548
|
+
if (event.type === "content_block_start") {
|
|
5549
|
+
const block = event.content_block;
|
|
5550
|
+
if (block.type === "thinking") {
|
|
5551
|
+
yield { text: "", thinking: { content: "", type: "thinking" }, rawEvent: event };
|
|
5552
|
+
continue;
|
|
5553
|
+
}
|
|
5554
|
+
if (block.type === "redacted_thinking") {
|
|
5555
|
+
yield { text: "", thinking: { content: "", type: "redacted" }, rawEvent: event };
|
|
5556
|
+
continue;
|
|
5557
|
+
}
|
|
5558
|
+
}
|
|
5559
|
+
if (event.type === "content_block_delta") {
|
|
5560
|
+
const delta = event.delta;
|
|
5561
|
+
if (delta.type === "thinking_delta" && delta.thinking) {
|
|
5562
|
+
yield {
|
|
5563
|
+
text: "",
|
|
5564
|
+
thinking: { content: delta.thinking, type: "thinking" },
|
|
5565
|
+
rawEvent: event
|
|
5566
|
+
};
|
|
5567
|
+
continue;
|
|
5568
|
+
}
|
|
5569
|
+
if (delta.type === "signature_delta" && delta.signature) {
|
|
5570
|
+
yield {
|
|
5571
|
+
text: "",
|
|
5572
|
+
thinking: { content: "", type: "thinking", signature: delta.signature },
|
|
5573
|
+
rawEvent: event
|
|
5574
|
+
};
|
|
5575
|
+
continue;
|
|
5576
|
+
}
|
|
5577
|
+
if (delta.type === "text_delta") {
|
|
5578
|
+
yield { text: delta.text ?? "", rawEvent: event };
|
|
5579
|
+
continue;
|
|
5580
|
+
}
|
|
5530
5581
|
continue;
|
|
5531
5582
|
}
|
|
5532
5583
|
if (event.type === "message_delta") {
|
|
@@ -5835,10 +5886,10 @@ var init_gemini_models = __esm({
|
|
|
5835
5886
|
contextWindow: 1048576,
|
|
5836
5887
|
maxOutputTokens: 65536,
|
|
5837
5888
|
pricing: {
|
|
5838
|
-
input: 0.
|
|
5839
|
-
// $0.
|
|
5889
|
+
input: 0.5,
|
|
5890
|
+
// $0.50 for text/image/video
|
|
5840
5891
|
output: 3,
|
|
5841
|
-
cachedInput: 0.
|
|
5892
|
+
cachedInput: 0.05
|
|
5842
5893
|
},
|
|
5843
5894
|
knowledgeCutoff: "2025-01",
|
|
5844
5895
|
features: {
|
|
@@ -6132,6 +6183,23 @@ var init_gemini_speech_models = __esm({
|
|
|
6132
6183
|
});
|
|
6133
6184
|
|
|
6134
6185
|
// src/providers/gemini.ts
|
|
6186
|
+
function resolveGeminiThinkingConfig(reasoning, modelName) {
|
|
6187
|
+
if (!reasoning?.enabled) return void 0;
|
|
6188
|
+
const isGemini3 = modelName.includes("gemini-3");
|
|
6189
|
+
if (isGemini3) {
|
|
6190
|
+
return {
|
|
6191
|
+
thinkingConfig: {
|
|
6192
|
+
thinkingLevel: GEMINI3_THINKING_LEVEL[reasoning.effort ?? "medium"]
|
|
6193
|
+
}
|
|
6194
|
+
};
|
|
6195
|
+
}
|
|
6196
|
+
const budget = reasoning.budgetTokens ?? GEMINI25_THINKING_BUDGET[reasoning.effort ?? "medium"];
|
|
6197
|
+
return {
|
|
6198
|
+
thinkingConfig: {
|
|
6199
|
+
thinkingBudget: budget
|
|
6200
|
+
}
|
|
6201
|
+
};
|
|
6202
|
+
}
|
|
6135
6203
|
function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
6136
6204
|
const byteRate = sampleRate * numChannels * bitsPerSample / 8;
|
|
6137
6205
|
const blockAlign = numChannels * bitsPerSample / 8;
|
|
@@ -6160,7 +6228,7 @@ function wrapPcmInWav(pcmData, sampleRate, bitsPerSample, numChannels) {
|
|
|
6160
6228
|
function createGeminiProviderFromEnv() {
|
|
6161
6229
|
return createProviderFromEnv("GEMINI_API_KEY", import_genai.GoogleGenAI, GeminiGenerativeProvider);
|
|
6162
6230
|
}
|
|
6163
|
-
var import_genai, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
|
|
6231
|
+
var import_genai, GEMINI3_THINKING_LEVEL, GEMINI25_THINKING_BUDGET, GEMINI_ROLE_MAP, GeminiGenerativeProvider;
|
|
6164
6232
|
var init_gemini = __esm({
|
|
6165
6233
|
"src/providers/gemini.ts"() {
|
|
6166
6234
|
"use strict";
|
|
@@ -6172,6 +6240,20 @@ var init_gemini = __esm({
|
|
|
6172
6240
|
init_gemini_models();
|
|
6173
6241
|
init_gemini_speech_models();
|
|
6174
6242
|
init_utils();
|
|
6243
|
+
GEMINI3_THINKING_LEVEL = {
|
|
6244
|
+
none: "minimal",
|
|
6245
|
+
low: "low",
|
|
6246
|
+
medium: "medium",
|
|
6247
|
+
high: "high",
|
|
6248
|
+
maximum: "high"
|
|
6249
|
+
};
|
|
6250
|
+
GEMINI25_THINKING_BUDGET = {
|
|
6251
|
+
none: 0,
|
|
6252
|
+
low: 2048,
|
|
6253
|
+
medium: 8192,
|
|
6254
|
+
high: 16384,
|
|
6255
|
+
maximum: 24576
|
|
6256
|
+
};
|
|
6175
6257
|
GEMINI_ROLE_MAP = {
|
|
6176
6258
|
system: "user",
|
|
6177
6259
|
user: "user",
|
|
@@ -6321,6 +6403,7 @@ var init_gemini = __esm({
|
|
|
6321
6403
|
buildApiRequest(options, descriptor, _spec, messages) {
|
|
6322
6404
|
const contents = this.convertMessagesToContents(messages);
|
|
6323
6405
|
const generationConfig = this.buildGenerationConfig(options);
|
|
6406
|
+
const thinkingConfig = resolveGeminiThinkingConfig(options.reasoning, descriptor.name);
|
|
6324
6407
|
const config = {
|
|
6325
6408
|
// Note: systemInstruction removed - it doesn't work with countTokens()
|
|
6326
6409
|
// System messages are now included in contents as user+model exchanges
|
|
@@ -6331,6 +6414,7 @@ var init_gemini = __esm({
|
|
|
6331
6414
|
mode: import_genai.FunctionCallingConfigMode.NONE
|
|
6332
6415
|
}
|
|
6333
6416
|
},
|
|
6417
|
+
...thinkingConfig ?? {},
|
|
6334
6418
|
...options.extra
|
|
6335
6419
|
};
|
|
6336
6420
|
return {
|
|
@@ -6468,7 +6552,18 @@ var init_gemini = __esm({
|
|
|
6468
6552
|
async *normalizeProviderStream(iterable) {
|
|
6469
6553
|
const stream2 = iterable;
|
|
6470
6554
|
for await (const chunk of stream2) {
|
|
6471
|
-
const text3 = this.
|
|
6555
|
+
const { text: text3, thinkingText, thinkingSignature } = this.extractTextAndThinking(chunk);
|
|
6556
|
+
if (thinkingText) {
|
|
6557
|
+
yield {
|
|
6558
|
+
text: "",
|
|
6559
|
+
thinking: {
|
|
6560
|
+
content: thinkingText,
|
|
6561
|
+
type: "thinking",
|
|
6562
|
+
signature: thinkingSignature
|
|
6563
|
+
},
|
|
6564
|
+
rawEvent: chunk
|
|
6565
|
+
};
|
|
6566
|
+
}
|
|
6472
6567
|
if (text3) {
|
|
6473
6568
|
yield { text: text3, rawEvent: chunk };
|
|
6474
6569
|
}
|
|
@@ -6479,11 +6574,30 @@ var init_gemini = __esm({
|
|
|
6479
6574
|
}
|
|
6480
6575
|
}
|
|
6481
6576
|
}
|
|
6482
|
-
|
|
6577
|
+
/**
|
|
6578
|
+
* Extract both regular text and thinking text from a chunk.
|
|
6579
|
+
* Gemini marks thinking parts with `thought: true`.
|
|
6580
|
+
*/
|
|
6581
|
+
extractTextAndThinking(chunk) {
|
|
6483
6582
|
if (!chunk?.candidates) {
|
|
6484
|
-
return "";
|
|
6583
|
+
return { text: "", thinkingText: "" };
|
|
6584
|
+
}
|
|
6585
|
+
let text3 = "";
|
|
6586
|
+
let thinkingText = "";
|
|
6587
|
+
let thinkingSignature;
|
|
6588
|
+
for (const candidate of chunk.candidates) {
|
|
6589
|
+
for (const part of candidate.content?.parts ?? []) {
|
|
6590
|
+
if (part.thought) {
|
|
6591
|
+
thinkingText += part.text ?? "";
|
|
6592
|
+
if (part.thoughtSignature) {
|
|
6593
|
+
thinkingSignature = part.thoughtSignature;
|
|
6594
|
+
}
|
|
6595
|
+
} else {
|
|
6596
|
+
text3 += part.text ?? "";
|
|
6597
|
+
}
|
|
6598
|
+
}
|
|
6485
6599
|
}
|
|
6486
|
-
return
|
|
6600
|
+
return { text: text3, thinkingText, thinkingSignature };
|
|
6487
6601
|
}
|
|
6488
6602
|
extractFinishReason(chunk) {
|
|
6489
6603
|
const candidate = chunk?.candidates?.find((item) => item.finishReason);
|
|
@@ -6499,7 +6613,9 @@ var init_gemini = __esm({
|
|
|
6499
6613
|
outputTokens: usageMetadata.candidatesTokenCount ?? 0,
|
|
6500
6614
|
totalTokens: usageMetadata.totalTokenCount ?? 0,
|
|
6501
6615
|
// Gemini returns cached token count in cachedContentTokenCount
|
|
6502
|
-
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0
|
|
6616
|
+
cachedInputTokens: usageMetadata.cachedContentTokenCount ?? 0,
|
|
6617
|
+
// Gemini returns thinking tokens in thoughtsTokenCount
|
|
6618
|
+
reasoningTokens: usageMetadata.thoughtsTokenCount
|
|
6503
6619
|
};
|
|
6504
6620
|
}
|
|
6505
6621
|
/**
|
|
@@ -7520,11 +7636,13 @@ var init_openai_compatible_provider = __esm({
|
|
|
7520
7636
|
yield { text: text3, rawEvent: chunk };
|
|
7521
7637
|
}
|
|
7522
7638
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
7639
|
+
const usageDetails = chunk.usage;
|
|
7523
7640
|
const usage = chunk.usage ? {
|
|
7524
7641
|
inputTokens: chunk.usage.prompt_tokens,
|
|
7525
7642
|
outputTokens: chunk.usage.completion_tokens,
|
|
7526
7643
|
totalTokens: chunk.usage.total_tokens,
|
|
7527
|
-
cachedInputTokens: 0
|
|
7644
|
+
cachedInputTokens: 0,
|
|
7645
|
+
reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
|
|
7528
7646
|
} : void 0;
|
|
7529
7647
|
if (finishReason || usage) {
|
|
7530
7648
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -7600,6 +7718,21 @@ var init_huggingface = __esm({
|
|
|
7600
7718
|
getModelSpecs() {
|
|
7601
7719
|
return HUGGINGFACE_MODELS;
|
|
7602
7720
|
}
|
|
7721
|
+
/**
|
|
7722
|
+
* Override buildApiRequest to inject DeepSeek-specific thinking parameters.
|
|
7723
|
+
* DeepSeek models use `extra_body: { thinking: { type: "enabled" } }` for reasoning.
|
|
7724
|
+
*/
|
|
7725
|
+
buildApiRequest(options, descriptor, spec, messages) {
|
|
7726
|
+
const request = super.buildApiRequest(options, descriptor, spec, messages);
|
|
7727
|
+
if (options.reasoning?.enabled && descriptor.name.toLowerCase().includes("deepseek")) {
|
|
7728
|
+
const requestObj = request;
|
|
7729
|
+
requestObj.extra_body = {
|
|
7730
|
+
...requestObj.extra_body,
|
|
7731
|
+
thinking: { type: "enabled" }
|
|
7732
|
+
};
|
|
7733
|
+
}
|
|
7734
|
+
return request;
|
|
7735
|
+
}
|
|
7603
7736
|
/**
|
|
7604
7737
|
* Enhance error messages with HuggingFace-specific guidance.
|
|
7605
7738
|
*/
|
|
@@ -8485,7 +8618,7 @@ function sanitizeExtra(extra, allowTemperature) {
|
|
|
8485
8618
|
function createOpenAIProviderFromEnv() {
|
|
8486
8619
|
return createProviderFromEnv("OPENAI_API_KEY", import_openai3.default, OpenAIChatProvider);
|
|
8487
8620
|
}
|
|
8488
|
-
var import_openai3, import_tiktoken, ROLE_MAP2, OpenAIChatProvider;
|
|
8621
|
+
var import_openai3, import_tiktoken, ROLE_MAP2, OPENAI_EFFORT_MAP, OpenAIChatProvider;
|
|
8489
8622
|
var init_openai = __esm({
|
|
8490
8623
|
"src/providers/openai.ts"() {
|
|
8491
8624
|
"use strict";
|
|
@@ -8503,6 +8636,13 @@ var init_openai = __esm({
|
|
|
8503
8636
|
user: "user",
|
|
8504
8637
|
assistant: "assistant"
|
|
8505
8638
|
};
|
|
8639
|
+
OPENAI_EFFORT_MAP = {
|
|
8640
|
+
none: "none",
|
|
8641
|
+
low: "low",
|
|
8642
|
+
medium: "medium",
|
|
8643
|
+
high: "high",
|
|
8644
|
+
maximum: "xhigh"
|
|
8645
|
+
};
|
|
8506
8646
|
OpenAIChatProvider = class extends BaseProviderAdapter {
|
|
8507
8647
|
providerId = "openai";
|
|
8508
8648
|
supports(descriptor) {
|
|
@@ -8593,10 +8733,15 @@ var init_openai = __esm({
|
|
|
8593
8733
|
};
|
|
8594
8734
|
}
|
|
8595
8735
|
buildApiRequest(options, descriptor, spec, messages) {
|
|
8596
|
-
const { maxTokens, temperature, topP, stopSequences, extra } = options;
|
|
8736
|
+
const { maxTokens, temperature, topP, stopSequences, extra, reasoning } = options;
|
|
8597
8737
|
const supportsTemperature = spec?.metadata?.supportsTemperature !== false;
|
|
8598
8738
|
const shouldIncludeTemperature = typeof temperature === "number" && supportsTemperature;
|
|
8599
8739
|
const sanitizedExtra = sanitizeExtra(extra, shouldIncludeTemperature);
|
|
8740
|
+
const reasoningParam = reasoning?.enabled !== void 0 ? {
|
|
8741
|
+
reasoning: {
|
|
8742
|
+
effort: OPENAI_EFFORT_MAP[reasoning.effort ?? "medium"]
|
|
8743
|
+
}
|
|
8744
|
+
} : {};
|
|
8600
8745
|
return {
|
|
8601
8746
|
model: descriptor.name,
|
|
8602
8747
|
messages: messages.map((message) => this.convertToOpenAIMessage(message)),
|
|
@@ -8607,6 +8752,7 @@ var init_openai = __esm({
|
|
|
8607
8752
|
stop: stopSequences,
|
|
8608
8753
|
stream: true,
|
|
8609
8754
|
stream_options: { include_usage: true },
|
|
8755
|
+
...reasoningParam,
|
|
8610
8756
|
...sanitizedExtra ?? {},
|
|
8611
8757
|
...shouldIncludeTemperature ? { temperature } : {}
|
|
8612
8758
|
};
|
|
@@ -8695,11 +8841,13 @@ var init_openai = __esm({
|
|
|
8695
8841
|
yield { text: text3, rawEvent: chunk };
|
|
8696
8842
|
}
|
|
8697
8843
|
const finishReason = chunk.choices.find((choice) => choice.finish_reason)?.finish_reason;
|
|
8844
|
+
const usageDetails = chunk.usage;
|
|
8698
8845
|
const usage = chunk.usage ? {
|
|
8699
8846
|
inputTokens: chunk.usage.prompt_tokens,
|
|
8700
8847
|
outputTokens: chunk.usage.completion_tokens,
|
|
8701
8848
|
totalTokens: chunk.usage.total_tokens,
|
|
8702
|
-
cachedInputTokens:
|
|
8849
|
+
cachedInputTokens: usageDetails?.prompt_tokens_details?.cached_tokens ?? 0,
|
|
8850
|
+
reasoningTokens: usageDetails?.completion_tokens_details?.reasoning_tokens
|
|
8703
8851
|
} : void 0;
|
|
8704
8852
|
if (finishReason || usage) {
|
|
8705
8853
|
yield { text: "", finishReason, usage, rawEvent: chunk };
|
|
@@ -9234,7 +9382,7 @@ function createOpenRouterProviderFromEnv() {
|
|
|
9234
9382
|
});
|
|
9235
9383
|
return new OpenRouterProvider(client, config);
|
|
9236
9384
|
}
|
|
9237
|
-
var import_openai4, OpenRouterProvider;
|
|
9385
|
+
var import_openai4, OPENROUTER_EFFORT_MAP, OpenRouterProvider;
|
|
9238
9386
|
var init_openrouter = __esm({
|
|
9239
9387
|
"src/providers/openrouter.ts"() {
|
|
9240
9388
|
"use strict";
|
|
@@ -9242,6 +9390,13 @@ var init_openrouter = __esm({
|
|
|
9242
9390
|
init_openai_compatible_provider();
|
|
9243
9391
|
init_openrouter_models();
|
|
9244
9392
|
init_utils();
|
|
9393
|
+
OPENROUTER_EFFORT_MAP = {
|
|
9394
|
+
none: "none",
|
|
9395
|
+
low: "low",
|
|
9396
|
+
medium: "medium",
|
|
9397
|
+
high: "high",
|
|
9398
|
+
maximum: "xhigh"
|
|
9399
|
+
};
|
|
9245
9400
|
OpenRouterProvider = class extends OpenAICompatibleProvider {
|
|
9246
9401
|
providerId = "openrouter";
|
|
9247
9402
|
providerAlias = "or";
|
|
@@ -9251,6 +9406,20 @@ var init_openrouter = __esm({
|
|
|
9251
9406
|
getModelSpecs() {
|
|
9252
9407
|
return OPENROUTER_MODELS;
|
|
9253
9408
|
}
|
|
9409
|
+
/**
|
|
9410
|
+
* Override buildApiRequest to inject reasoning parameters.
|
|
9411
|
+
* OpenRouter normalizes reasoning into the standard OpenAI format.
|
|
9412
|
+
*/
|
|
9413
|
+
buildApiRequest(options, descriptor, spec, messages) {
|
|
9414
|
+
const request = super.buildApiRequest(options, descriptor, spec, messages);
|
|
9415
|
+
if (options.reasoning?.enabled !== void 0) {
|
|
9416
|
+
const requestObj = request;
|
|
9417
|
+
requestObj.reasoning = {
|
|
9418
|
+
effort: OPENROUTER_EFFORT_MAP[options.reasoning.effort ?? "medium"]
|
|
9419
|
+
};
|
|
9420
|
+
}
|
|
9421
|
+
return request;
|
|
9422
|
+
}
|
|
9254
9423
|
/**
|
|
9255
9424
|
* Get custom headers for OpenRouter analytics.
|
|
9256
9425
|
*/
|
|
@@ -9488,9 +9657,10 @@ var init_model_registry = __esm({
|
|
|
9488
9657
|
* @param outputTokens - Number of output tokens
|
|
9489
9658
|
* @param cachedInputTokens - Number of cached input tokens (subset of inputTokens)
|
|
9490
9659
|
* @param cacheCreationInputTokens - Number of cache creation tokens (subset of inputTokens, Anthropic only)
|
|
9660
|
+
* @param reasoningTokens - Number of reasoning/thinking tokens (subset of outputTokens)
|
|
9491
9661
|
* @returns CostEstimate if model found, undefined otherwise
|
|
9492
9662
|
*/
|
|
9493
|
-
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
9663
|
+
estimateCost(modelId, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
|
|
9494
9664
|
const spec = this.getModelSpec(modelId);
|
|
9495
9665
|
if (!spec) return void 0;
|
|
9496
9666
|
const cachedRate = spec.pricing.cachedInput ?? spec.pricing.input;
|
|
@@ -9500,13 +9670,18 @@ var init_model_registry = __esm({
|
|
|
9500
9670
|
const cachedInputCost = cachedInputTokens / 1e6 * cachedRate;
|
|
9501
9671
|
const cacheCreationCost = cacheCreationInputTokens / 1e6 * cacheWriteRate;
|
|
9502
9672
|
const inputCost = uncachedInputCost + cachedInputCost + cacheCreationCost;
|
|
9503
|
-
const
|
|
9673
|
+
const reasoningRate = spec.pricing.reasoningOutput ?? spec.pricing.output;
|
|
9674
|
+
const nonReasoningOutputTokens = outputTokens - reasoningTokens;
|
|
9675
|
+
const reasoningCost = reasoningTokens / 1e6 * reasoningRate;
|
|
9676
|
+
const nonReasoningOutputCost = nonReasoningOutputTokens / 1e6 * spec.pricing.output;
|
|
9677
|
+
const outputCost = nonReasoningOutputCost + reasoningCost;
|
|
9504
9678
|
const totalCost = inputCost + outputCost;
|
|
9505
9679
|
return {
|
|
9506
9680
|
inputCost,
|
|
9507
9681
|
cachedInputCost,
|
|
9508
9682
|
cacheCreationCost,
|
|
9509
9683
|
outputCost,
|
|
9684
|
+
reasoningCost,
|
|
9510
9685
|
totalCost,
|
|
9511
9686
|
currency: "USD"
|
|
9512
9687
|
};
|
|
@@ -10221,6 +10396,7 @@ var init_builder = __esm({
|
|
|
10221
10396
|
// Shared retry config from parent for consistent backoff behavior
|
|
10222
10397
|
// When a gadget calls withParentContext(ctx), this config is shared
|
|
10223
10398
|
sharedRetryConfig;
|
|
10399
|
+
reasoningConfig;
|
|
10224
10400
|
constructor(client) {
|
|
10225
10401
|
this.client = client;
|
|
10226
10402
|
}
|
|
@@ -10806,6 +10982,60 @@ var init_builder = __esm({
|
|
|
10806
10982
|
this.signal = signal;
|
|
10807
10983
|
return this;
|
|
10808
10984
|
}
|
|
10985
|
+
/**
|
|
10986
|
+
* Enable reasoning/thinking mode for reasoning-capable models.
|
|
10987
|
+
*
|
|
10988
|
+
* Can be called with:
|
|
10989
|
+
* - No args: enables reasoning at "medium" effort
|
|
10990
|
+
* - A string effort level: `withReasoning("high")`
|
|
10991
|
+
* - A full config object: `withReasoning({ enabled: true, budgetTokens: 10000 })`
|
|
10992
|
+
*
|
|
10993
|
+
* @param config - Optional effort level or full reasoning config
|
|
10994
|
+
* @returns This builder for chaining
|
|
10995
|
+
*
|
|
10996
|
+
* @example
|
|
10997
|
+
* ```typescript
|
|
10998
|
+
* // Simple — medium effort
|
|
10999
|
+
* LLMist.createAgent()
|
|
11000
|
+
* .withModel("o3")
|
|
11001
|
+
* .withReasoning()
|
|
11002
|
+
* .ask("Solve this logic puzzle...");
|
|
11003
|
+
*
|
|
11004
|
+
* // Explicit effort level
|
|
11005
|
+
* LLMist.createAgent()
|
|
11006
|
+
* .withModel("anthropic:claude-4-opus")
|
|
11007
|
+
* .withReasoning("high")
|
|
11008
|
+
* .ask("Analyze this complex problem");
|
|
11009
|
+
*
|
|
11010
|
+
* // Full config with explicit token budget
|
|
11011
|
+
* LLMist.createAgent()
|
|
11012
|
+
* .withModel("anthropic:claude-4-opus")
|
|
11013
|
+
* .withReasoning({ enabled: true, budgetTokens: 16000 })
|
|
11014
|
+
* .ask("Step through this proof");
|
|
11015
|
+
* ```
|
|
11016
|
+
*/
|
|
11017
|
+
withReasoning(config) {
|
|
11018
|
+
if (typeof config === "string") {
|
|
11019
|
+
this.reasoningConfig = { enabled: true, effort: config };
|
|
11020
|
+
} else if (config === void 0) {
|
|
11021
|
+
this.reasoningConfig = { enabled: true, effort: "medium" };
|
|
11022
|
+
} else {
|
|
11023
|
+
this.reasoningConfig = config;
|
|
11024
|
+
}
|
|
11025
|
+
return this;
|
|
11026
|
+
}
|
|
11027
|
+
/**
|
|
11028
|
+
* Explicitly disable reasoning for this agent, even if the model supports it.
|
|
11029
|
+
*
|
|
11030
|
+
* By default, reasoning is auto-enabled at "medium" effort for models with
|
|
11031
|
+
* `features.reasoning: true`. Use this to opt out.
|
|
11032
|
+
*
|
|
11033
|
+
* @returns This builder for chaining
|
|
11034
|
+
*/
|
|
11035
|
+
withoutReasoning() {
|
|
11036
|
+
this.reasoningConfig = { enabled: false };
|
|
11037
|
+
return this;
|
|
11038
|
+
}
|
|
10809
11039
|
/**
|
|
10810
11040
|
* Set subagent configuration overrides.
|
|
10811
11041
|
*
|
|
@@ -11091,6 +11321,7 @@ ${endPrefix}`
|
|
|
11091
11321
|
retryConfig: this.retryConfig,
|
|
11092
11322
|
rateLimitConfig: this.rateLimitConfig,
|
|
11093
11323
|
signal: this.signal,
|
|
11324
|
+
reasoning: this.reasoningConfig,
|
|
11094
11325
|
subagentConfig: this.subagentConfig,
|
|
11095
11326
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
11096
11327
|
parentTree: this.parentContext?.tree,
|
|
@@ -11278,6 +11509,7 @@ ${endPrefix}`
|
|
|
11278
11509
|
retryConfig: this.retryConfig,
|
|
11279
11510
|
rateLimitConfig: this.rateLimitConfig,
|
|
11280
11511
|
signal: this.signal,
|
|
11512
|
+
reasoning: this.reasoningConfig,
|
|
11281
11513
|
subagentConfig: this.subagentConfig,
|
|
11282
11514
|
// Tree context for shared tree model (subagents share parent's tree)
|
|
11283
11515
|
parentTree: this.parentContext?.tree,
|
|
@@ -11732,6 +11964,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11732
11964
|
let outputTokens = 0;
|
|
11733
11965
|
let cachedInputTokens = 0;
|
|
11734
11966
|
let cacheCreationInputTokens = 0;
|
|
11967
|
+
let reasoningTokens = 0;
|
|
11735
11968
|
const messages = [
|
|
11736
11969
|
...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
|
|
11737
11970
|
{ role: "user", content: prompt }
|
|
@@ -11748,6 +11981,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11748
11981
|
outputTokens = chunk.usage.outputTokens;
|
|
11749
11982
|
cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
|
|
11750
11983
|
cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
|
|
11984
|
+
reasoningTokens = chunk.usage.reasoningTokens ?? 0;
|
|
11751
11985
|
}
|
|
11752
11986
|
}
|
|
11753
11987
|
this.reportCostFromUsage(
|
|
@@ -11755,7 +11989,8 @@ var init_cost_reporting_client = __esm({
|
|
|
11755
11989
|
inputTokens,
|
|
11756
11990
|
outputTokens,
|
|
11757
11991
|
cachedInputTokens,
|
|
11758
|
-
cacheCreationInputTokens
|
|
11992
|
+
cacheCreationInputTokens,
|
|
11993
|
+
reasoningTokens
|
|
11759
11994
|
);
|
|
11760
11995
|
return result;
|
|
11761
11996
|
}
|
|
@@ -11774,6 +12009,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11774
12009
|
let outputTokens = 0;
|
|
11775
12010
|
let cachedInputTokens = 0;
|
|
11776
12011
|
let cacheCreationInputTokens = 0;
|
|
12012
|
+
let reasoningTokens = 0;
|
|
11777
12013
|
const messages = [
|
|
11778
12014
|
...options?.systemPrompt ? [{ role: "system", content: options.systemPrompt }] : [],
|
|
11779
12015
|
{ role: "user", content: prompt }
|
|
@@ -11793,6 +12029,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11793
12029
|
outputTokens = chunk.usage.outputTokens;
|
|
11794
12030
|
cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
|
|
11795
12031
|
cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
|
|
12032
|
+
reasoningTokens = chunk.usage.reasoningTokens ?? 0;
|
|
11796
12033
|
}
|
|
11797
12034
|
}
|
|
11798
12035
|
} finally {
|
|
@@ -11801,7 +12038,8 @@ var init_cost_reporting_client = __esm({
|
|
|
11801
12038
|
inputTokens,
|
|
11802
12039
|
outputTokens,
|
|
11803
12040
|
cachedInputTokens,
|
|
11804
|
-
cacheCreationInputTokens
|
|
12041
|
+
cacheCreationInputTokens,
|
|
12042
|
+
reasoningTokens
|
|
11805
12043
|
);
|
|
11806
12044
|
}
|
|
11807
12045
|
}
|
|
@@ -11828,6 +12066,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11828
12066
|
let outputTokens = 0;
|
|
11829
12067
|
let cachedInputTokens = 0;
|
|
11830
12068
|
let cacheCreationInputTokens = 0;
|
|
12069
|
+
let reasoningTokens = 0;
|
|
11831
12070
|
try {
|
|
11832
12071
|
for await (const chunk of innerStream) {
|
|
11833
12072
|
if (chunk.usage) {
|
|
@@ -11835,6 +12074,7 @@ var init_cost_reporting_client = __esm({
|
|
|
11835
12074
|
outputTokens = chunk.usage.outputTokens;
|
|
11836
12075
|
cachedInputTokens = chunk.usage.cachedInputTokens ?? 0;
|
|
11837
12076
|
cacheCreationInputTokens = chunk.usage.cacheCreationInputTokens ?? 0;
|
|
12077
|
+
reasoningTokens = chunk.usage.reasoningTokens ?? 0;
|
|
11838
12078
|
}
|
|
11839
12079
|
yield chunk;
|
|
11840
12080
|
}
|
|
@@ -11845,7 +12085,8 @@ var init_cost_reporting_client = __esm({
|
|
|
11845
12085
|
inputTokens,
|
|
11846
12086
|
outputTokens,
|
|
11847
12087
|
cachedInputTokens,
|
|
11848
|
-
cacheCreationInputTokens
|
|
12088
|
+
cacheCreationInputTokens,
|
|
12089
|
+
reasoningTokens
|
|
11849
12090
|
);
|
|
11850
12091
|
}
|
|
11851
12092
|
}
|
|
@@ -11855,14 +12096,15 @@ var init_cost_reporting_client = __esm({
|
|
|
11855
12096
|
/**
|
|
11856
12097
|
* Calculates and reports cost from token usage.
|
|
11857
12098
|
*/
|
|
11858
|
-
reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0) {
|
|
12099
|
+
reportCostFromUsage(model, inputTokens, outputTokens, cachedInputTokens = 0, cacheCreationInputTokens = 0, reasoningTokens = 0) {
|
|
11859
12100
|
if (inputTokens === 0 && outputTokens === 0) return;
|
|
11860
12101
|
const estimate = this.client.modelRegistry.estimateCost(
|
|
11861
12102
|
model,
|
|
11862
12103
|
inputTokens,
|
|
11863
12104
|
outputTokens,
|
|
11864
12105
|
cachedInputTokens,
|
|
11865
|
-
cacheCreationInputTokens
|
|
12106
|
+
cacheCreationInputTokens,
|
|
12107
|
+
reasoningTokens
|
|
11866
12108
|
);
|
|
11867
12109
|
if (estimate && estimate.totalCost > 0) {
|
|
11868
12110
|
this.reportCost(estimate.totalCost);
|
|
@@ -12954,9 +13196,18 @@ var init_stream_processor = __esm({
|
|
|
12954
13196
|
let usage;
|
|
12955
13197
|
let didExecuteGadgets = false;
|
|
12956
13198
|
let shouldBreakLoop = false;
|
|
13199
|
+
let thinkingContent = "";
|
|
12957
13200
|
for await (const chunk of stream2) {
|
|
12958
13201
|
if (chunk.finishReason) finishReason = chunk.finishReason;
|
|
12959
13202
|
if (chunk.usage) usage = chunk.usage;
|
|
13203
|
+
if (chunk.thinking?.content) {
|
|
13204
|
+
thinkingContent += chunk.thinking.content;
|
|
13205
|
+
yield {
|
|
13206
|
+
type: "thinking",
|
|
13207
|
+
content: chunk.thinking.content,
|
|
13208
|
+
thinkingType: chunk.thinking.type
|
|
13209
|
+
};
|
|
13210
|
+
}
|
|
12960
13211
|
let processedChunk = "";
|
|
12961
13212
|
if (chunk.text) {
|
|
12962
13213
|
processedChunk = chunk.text;
|
|
@@ -13070,7 +13321,8 @@ var init_stream_processor = __esm({
|
|
|
13070
13321
|
finishReason,
|
|
13071
13322
|
usage,
|
|
13072
13323
|
rawResponse: this.responseText,
|
|
13073
|
-
finalMessage
|
|
13324
|
+
finalMessage,
|
|
13325
|
+
thinkingContent: thinkingContent || void 0
|
|
13074
13326
|
};
|
|
13075
13327
|
yield completionEvent;
|
|
13076
13328
|
}
|
|
@@ -13872,6 +14124,7 @@ var init_agent = __esm({
|
|
|
13872
14124
|
mediaStore;
|
|
13873
14125
|
// Cancellation
|
|
13874
14126
|
signal;
|
|
14127
|
+
reasoning;
|
|
13875
14128
|
// Retry configuration
|
|
13876
14129
|
retryConfig;
|
|
13877
14130
|
// Rate limit tracker for proactive throttling
|
|
@@ -13963,6 +14216,7 @@ var init_agent = __esm({
|
|
|
13963
14216
|
);
|
|
13964
14217
|
}
|
|
13965
14218
|
this.signal = options.signal;
|
|
14219
|
+
this.reasoning = options.reasoning;
|
|
13966
14220
|
this.retryConfig = options.sharedRetryConfig ?? resolveRetryConfig(options.retryConfig);
|
|
13967
14221
|
if (options.sharedRateLimitTracker) {
|
|
13968
14222
|
this.rateLimitTracker = options.sharedRateLimitTracker;
|
|
@@ -14365,6 +14619,7 @@ var init_agent = __esm({
|
|
|
14365
14619
|
usage: result.usage,
|
|
14366
14620
|
rawResponse: result.rawResponse,
|
|
14367
14621
|
finalMessage: result.finalMessage,
|
|
14622
|
+
thinkingContent: result.thinkingContent,
|
|
14368
14623
|
logger: this.logger,
|
|
14369
14624
|
subagentContext
|
|
14370
14625
|
};
|
|
@@ -14665,17 +14920,34 @@ var init_agent = __esm({
|
|
|
14665
14920
|
});
|
|
14666
14921
|
return { type: "compaction", event: compactionEvent };
|
|
14667
14922
|
}
|
|
14923
|
+
/**
|
|
14924
|
+
* Resolve reasoning configuration with auto-enable logic.
|
|
14925
|
+
*
|
|
14926
|
+
* Priority: explicit config > auto-enable for reasoning models > undefined
|
|
14927
|
+
* When a model has `features.reasoning: true` and no explicit config is set,
|
|
14928
|
+
* reasoning is automatically enabled at "medium" effort.
|
|
14929
|
+
*/
|
|
14930
|
+
resolveReasoningConfig(spec) {
|
|
14931
|
+
if (this.reasoning !== void 0) return this.reasoning;
|
|
14932
|
+
if (spec?.features?.reasoning) {
|
|
14933
|
+
return { enabled: true, effort: "medium" };
|
|
14934
|
+
}
|
|
14935
|
+
return void 0;
|
|
14936
|
+
}
|
|
14668
14937
|
/**
|
|
14669
14938
|
* Prepare LLM call options, create tree node, and process beforeLLMCall controller.
|
|
14670
14939
|
* @returns options, node ID, and optional skipWithSynthetic response if controller wants to skip
|
|
14671
14940
|
*/
|
|
14672
14941
|
async prepareLLMCall(iteration) {
|
|
14942
|
+
const spec = this.client.modelRegistry?.getModelSpec?.(this.model);
|
|
14943
|
+
const reasoning = this.resolveReasoningConfig(spec);
|
|
14673
14944
|
let llmOptions = {
|
|
14674
14945
|
model: this.model,
|
|
14675
14946
|
messages: this.conversation.getMessages(),
|
|
14676
14947
|
temperature: this.temperature,
|
|
14677
14948
|
maxTokens: this.defaultMaxTokens,
|
|
14678
|
-
signal: this.signal
|
|
14949
|
+
signal: this.signal,
|
|
14950
|
+
reasoning
|
|
14679
14951
|
};
|
|
14680
14952
|
const llmNode = this.tree.addLLMCall({
|
|
14681
14953
|
iteration,
|
|
@@ -14745,13 +15017,15 @@ var init_agent = __esm({
|
|
|
14745
15017
|
inputTokens,
|
|
14746
15018
|
outputTokens,
|
|
14747
15019
|
result.usage?.cachedInputTokens ?? 0,
|
|
14748
|
-
result.usage?.cacheCreationInputTokens ?? 0
|
|
15020
|
+
result.usage?.cacheCreationInputTokens ?? 0,
|
|
15021
|
+
result.usage?.reasoningTokens ?? 0
|
|
14749
15022
|
)?.totalCost;
|
|
14750
15023
|
this.tree.completeLLMCall(nodeId, {
|
|
14751
15024
|
response: result.rawResponse,
|
|
14752
15025
|
usage: result.usage,
|
|
14753
15026
|
finishReason: result.finishReason,
|
|
14754
|
-
cost: llmCost
|
|
15027
|
+
cost: llmCost,
|
|
15028
|
+
thinkingContent: result.thinkingContent
|
|
14755
15029
|
});
|
|
14756
15030
|
}
|
|
14757
15031
|
/**
|