claudish 3.0.5 → 3.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/index.js +927 -52
- package/package.json +1 -1
- package/recommended-models.json +133 -0
package/dist/index.js
CHANGED
|
@@ -31121,9 +31121,11 @@ Tokens: ${result.usage.input} input, ${result.usage.output} output`;
|
|
|
31121
31121
|
## Quick Picks
|
|
31122
31122
|
`;
|
|
31123
31123
|
output += "- **Fast & cheap**: `x-ai/grok-code-fast-1` ($0.85/1M)\n";
|
|
31124
|
-
output += "- **
|
|
31124
|
+
output += "- **Most advanced**: `openai/gpt-5.2` ($5.00/1M)\n";
|
|
31125
31125
|
output += "- **Large context**: `google/gemini-3-pro-preview` (1M tokens)\n";
|
|
31126
|
-
output += "- **
|
|
31126
|
+
output += "- **Reasoning**: `moonshotai/kimi-k2-thinking` ($2.50/1M)\n";
|
|
31127
|
+
output += "- **Budget**: `minimax/minimax-m2.1` ($0.60/1M)\n";
|
|
31128
|
+
output += "- **Code specialist**: `deepseek/deepseek-v3.2` ($1.00/1M)\n";
|
|
31127
31129
|
return { content: [{ type: "text", text: output }] };
|
|
31128
31130
|
});
|
|
31129
31131
|
server.tool("search_models", "Search all OpenRouter models by name, provider, or capability", {
|
|
@@ -34237,7 +34239,7 @@ __export(exports_config, {
|
|
|
34237
34239
|
DEFAULT_PORT_RANGE: () => DEFAULT_PORT_RANGE,
|
|
34238
34240
|
DEFAULT_MODEL: () => DEFAULT_MODEL
|
|
34239
34241
|
});
|
|
34240
|
-
var DEFAULT_MODEL = "
|
|
34242
|
+
var DEFAULT_MODEL = "openai/gpt-5.2", DEFAULT_PORT_RANGE, MODEL_INFO, ENV, OPENROUTER_API_URL = "https://openrouter.ai/api/v1/chat/completions", OPENROUTER_HEADERS;
|
|
34241
34243
|
var init_config = __esm(() => {
|
|
34242
34244
|
DEFAULT_PORT_RANGE = { start: 3000, end: 9000 };
|
|
34243
34245
|
MODEL_INFO = {
|
|
@@ -34247,41 +34249,47 @@ var init_config = __esm(() => {
|
|
|
34247
34249
|
priority: 1,
|
|
34248
34250
|
provider: "xAI"
|
|
34249
34251
|
},
|
|
34250
|
-
"minimax/minimax-m2": {
|
|
34251
|
-
name: "Compact high-efficiency",
|
|
34252
|
-
description: "Compact high-efficiency",
|
|
34252
|
+
"minimax/minimax-m2.1": {
|
|
34253
|
+
name: "Compact high-efficiency v2.1",
|
|
34254
|
+
description: "Compact high-efficiency v2.1",
|
|
34253
34255
|
priority: 2,
|
|
34254
34256
|
provider: "MiniMax"
|
|
34255
34257
|
},
|
|
34256
|
-
"
|
|
34257
|
-
name: "
|
|
34258
|
-
description: "
|
|
34259
|
-
priority:
|
|
34260
|
-
provider: "
|
|
34258
|
+
"z-ai/glm-4.7": {
|
|
34259
|
+
name: "GLM 4.7 balanced model",
|
|
34260
|
+
description: "GLM 4.7 balanced model",
|
|
34261
|
+
priority: 3,
|
|
34262
|
+
provider: "Z.AI"
|
|
34261
34263
|
},
|
|
34262
|
-
"
|
|
34263
|
-
name: "
|
|
34264
|
-
description: "
|
|
34264
|
+
"google/gemini-3-pro-preview": {
|
|
34265
|
+
name: "Gemini 3 Pro preview",
|
|
34266
|
+
description: "Gemini 3 Pro preview (1M context)",
|
|
34265
34267
|
priority: 4,
|
|
34266
|
-
provider: "
|
|
34268
|
+
provider: "Google"
|
|
34267
34269
|
},
|
|
34268
|
-
"openai/gpt-5.
|
|
34269
|
-
name: "
|
|
34270
|
-
description: "
|
|
34270
|
+
"openai/gpt-5.2": {
|
|
34271
|
+
name: "GPT-5.2 most advanced",
|
|
34272
|
+
description: "GPT-5.2 most advanced reasoning",
|
|
34271
34273
|
priority: 5,
|
|
34272
34274
|
provider: "OpenAI"
|
|
34273
34275
|
},
|
|
34274
|
-
"
|
|
34275
|
-
name: "
|
|
34276
|
-
description: "
|
|
34276
|
+
"moonshotai/kimi-k2-thinking": {
|
|
34277
|
+
name: "Kimi K2 with reasoning",
|
|
34278
|
+
description: "Kimi K2 with extended thinking",
|
|
34279
|
+
priority: 6,
|
|
34280
|
+
provider: "MoonShot"
|
|
34281
|
+
},
|
|
34282
|
+
"deepseek/deepseek-v3.2": {
|
|
34283
|
+
name: "DeepSeek V3.2 coding",
|
|
34284
|
+
description: "DeepSeek V3.2 coding specialist",
|
|
34277
34285
|
priority: 7,
|
|
34278
|
-
provider: "
|
|
34286
|
+
provider: "DeepSeek"
|
|
34279
34287
|
},
|
|
34280
|
-
"
|
|
34281
|
-
name: "
|
|
34282
|
-
description: "
|
|
34288
|
+
"qwen/qwen3-vl-235b-a22b-thinking": {
|
|
34289
|
+
name: "Qwen3 VL thinking",
|
|
34290
|
+
description: "Qwen3 VL 235B with reasoning",
|
|
34283
34291
|
priority: 8,
|
|
34284
|
-
provider: "
|
|
34292
|
+
provider: "Alibaba"
|
|
34285
34293
|
},
|
|
34286
34294
|
custom: {
|
|
34287
34295
|
name: "Custom Model",
|
|
@@ -34309,6 +34317,10 @@ var init_config = __esm(() => {
|
|
|
34309
34317
|
OLLAMA_HOST: "OLLAMA_HOST",
|
|
34310
34318
|
LMSTUDIO_BASE_URL: "LMSTUDIO_BASE_URL",
|
|
34311
34319
|
VLLM_BASE_URL: "VLLM_BASE_URL",
|
|
34320
|
+
GEMINI_API_KEY: "GEMINI_API_KEY",
|
|
34321
|
+
GEMINI_BASE_URL: "GEMINI_BASE_URL",
|
|
34322
|
+
OPENAI_API_KEY: "OPENAI_API_KEY",
|
|
34323
|
+
OPENAI_BASE_URL: "OPENAI_BASE_URL",
|
|
34312
34324
|
CLAUDISH_SUMMARIZE_TOOLS: "CLAUDISH_SUMMARIZE_TOOLS"
|
|
34313
34325
|
};
|
|
34314
34326
|
OPENROUTER_HEADERS = {
|
|
@@ -34402,7 +34414,7 @@ function createTempSettingsFile(modelDisplay, port) {
|
|
|
34402
34414
|
const DIM2 = "\\033[2m";
|
|
34403
34415
|
const RESET2 = "\\033[0m";
|
|
34404
34416
|
const BOLD2 = "\\033[1m";
|
|
34405
|
-
statusCommand = `JSON=$(cat) && DIR=$(basename "$(pwd)") && [ \${#DIR} -gt 15 ] && DIR="\${DIR:0:12}..." || true && CTX=100 && COST="0" && if [ -f "${tokenFilePath}" ]; then TOKENS=$(cat "${tokenFilePath}" 2>/dev/null) &&
|
|
34417
|
+
statusCommand = `JSON=$(cat) && DIR=$(basename "$(pwd)") && [ \${#DIR} -gt 15 ] && DIR="\${DIR:0:12}..." || true && CTX=100 && COST="0" && if [ -f "${tokenFilePath}" ]; then TOKENS=$(cat "${tokenFilePath}" 2>/dev/null) && REAL_CTX=$(echo "$TOKENS" | grep -o '"context_left_percent":[0-9]*' | grep -o '[0-9]*') && if [ ! -z "$REAL_CTX" ]; then CTX="$REAL_CTX"; fi; fi && COST=$(echo "$JSON" | grep -o '"total_cost_usd":[0-9.]*' | cut -d: -f2) && [ -z "$COST" ] && COST="0" || true && if [ "$CLAUDISH_IS_LOCAL" = "true" ]; then COST_DISPLAY="LOCAL"; else COST_DISPLAY=$(printf "\\$%.3f" "$COST"); fi && printf "${CYAN2}${BOLD2}%s${RESET2} ${DIM2}•${RESET2} ${YELLOW2}%s${RESET2} ${DIM2}•${RESET2} ${GREEN2}%s${RESET2} ${DIM2}•${RESET2} ${MAGENTA}%s%%${RESET2}\\n" "$DIR" "$CLAUDISH_ACTIVE_MODEL_NAME" "$COST_DISPLAY" "$CTX"`;
|
|
34406
34418
|
}
|
|
34407
34419
|
const settings = {
|
|
34408
34420
|
statusLine: {
|
|
@@ -34540,12 +34552,13 @@ var OPENROUTER_MODELS;
|
|
|
34540
34552
|
var init_types4 = __esm(() => {
|
|
34541
34553
|
OPENROUTER_MODELS = [
|
|
34542
34554
|
"x-ai/grok-code-fast-1",
|
|
34543
|
-
"minimax/minimax-m2",
|
|
34544
|
-
"
|
|
34545
|
-
"
|
|
34546
|
-
"openai/gpt-5.
|
|
34547
|
-
"
|
|
34548
|
-
"
|
|
34555
|
+
"minimax/minimax-m2.1",
|
|
34556
|
+
"z-ai/glm-4.7",
|
|
34557
|
+
"google/gemini-3-pro-preview",
|
|
34558
|
+
"openai/gpt-5.2",
|
|
34559
|
+
"moonshotai/kimi-k2-thinking",
|
|
34560
|
+
"deepseek/deepseek-v3.2",
|
|
34561
|
+
"qwen/qwen3-vl-235b-a22b-thinking",
|
|
34549
34562
|
"custom"
|
|
34550
34563
|
];
|
|
34551
34564
|
});
|
|
@@ -35236,18 +35249,17 @@ async function updateModelsFromOpenRouter() {
|
|
|
35236
35249
|
console.error("\uD83D\uDD04 Updating model recommendations from OpenRouter...");
|
|
35237
35250
|
try {
|
|
35238
35251
|
const topWeeklyProgrammingModels = [
|
|
35239
|
-
"google/gemini-3-pro-preview",
|
|
35240
|
-
"openai/gpt-5.1-codex",
|
|
35241
35252
|
"x-ai/grok-code-fast-1",
|
|
35253
|
+
"minimax/minimax-m2.1",
|
|
35254
|
+
"z-ai/glm-4.7",
|
|
35255
|
+
"google/gemini-3-pro-preview",
|
|
35256
|
+
"openai/gpt-5.2",
|
|
35257
|
+
"moonshotai/kimi-k2-thinking",
|
|
35258
|
+
"deepseek/deepseek-v3.2",
|
|
35259
|
+
"qwen/qwen3-vl-235b-a22b-thinking",
|
|
35242
35260
|
"anthropic/claude-sonnet-4.5",
|
|
35243
|
-
"google/gemini-2.5-flash",
|
|
35244
|
-
"minimax/minimax-m2",
|
|
35245
35261
|
"anthropic/claude-sonnet-4",
|
|
35246
|
-
"
|
|
35247
|
-
"anthropic/claude-haiku-4.5",
|
|
35248
|
-
"openai/gpt-5",
|
|
35249
|
-
"qwen/qwen3-vl-235b-a22b-instruct",
|
|
35250
|
-
"openrouter/polaris-alpha"
|
|
35262
|
+
"anthropic/claude-haiku-4.5"
|
|
35251
35263
|
];
|
|
35252
35264
|
const apiResponse = await fetch("https://openrouter.ai/api/v1/models");
|
|
35253
35265
|
if (!apiResponse.ok) {
|
|
@@ -61057,6 +61069,754 @@ var init_local_provider_handler = __esm(() => {
|
|
|
61057
61069
|
});
|
|
61058
61070
|
});
|
|
61059
61071
|
|
|
61072
|
+
// src/handlers/shared/remote-provider-types.ts
|
|
61073
|
+
function getModelPricing(provider, modelName) {
|
|
61074
|
+
let pricingTable;
|
|
61075
|
+
switch (provider) {
|
|
61076
|
+
case "gemini":
|
|
61077
|
+
case "google":
|
|
61078
|
+
pricingTable = GEMINI_PRICING;
|
|
61079
|
+
break;
|
|
61080
|
+
case "openai":
|
|
61081
|
+
case "oai":
|
|
61082
|
+
pricingTable = OPENAI_PRICING;
|
|
61083
|
+
break;
|
|
61084
|
+
default:
|
|
61085
|
+
return { inputCostPer1M: 1, outputCostPer1M: 4 };
|
|
61086
|
+
}
|
|
61087
|
+
if (pricingTable[modelName]) {
|
|
61088
|
+
return pricingTable[modelName];
|
|
61089
|
+
}
|
|
61090
|
+
for (const [key, pricing] of Object.entries(pricingTable)) {
|
|
61091
|
+
if (key !== "default" && modelName.startsWith(key)) {
|
|
61092
|
+
return pricing;
|
|
61093
|
+
}
|
|
61094
|
+
}
|
|
61095
|
+
return pricingTable["default"];
|
|
61096
|
+
}
|
|
61097
|
+
var GEMINI_PRICING, OPENAI_PRICING;
|
|
61098
|
+
var init_remote_provider_types = __esm(() => {
|
|
61099
|
+
GEMINI_PRICING = {
|
|
61100
|
+
"gemini-2.5-flash": { inputCostPer1M: 0.15, outputCostPer1M: 0.6 },
|
|
61101
|
+
"gemini-2.5-flash-preview-05-20": { inputCostPer1M: 0.15, outputCostPer1M: 0.6 },
|
|
61102
|
+
"gemini-2.5-pro": { inputCostPer1M: 1.25, outputCostPer1M: 10 },
|
|
61103
|
+
"gemini-2.5-pro-preview-05-06": { inputCostPer1M: 1.25, outputCostPer1M: 10 },
|
|
61104
|
+
"gemini-3-pro-preview": { inputCostPer1M: 2.5, outputCostPer1M: 10 },
|
|
61105
|
+
"gemini-3.0-flash": { inputCostPer1M: 0.1, outputCostPer1M: 0.4 },
|
|
61106
|
+
"gemini-2.0-flash": { inputCostPer1M: 0.1, outputCostPer1M: 0.4 },
|
|
61107
|
+
"gemini-2.0-flash-thinking": { inputCostPer1M: 0.1, outputCostPer1M: 0.4 },
|
|
61108
|
+
default: { inputCostPer1M: 0.5, outputCostPer1M: 2 }
|
|
61109
|
+
};
|
|
61110
|
+
OPENAI_PRICING = {
|
|
61111
|
+
"gpt-5": { inputCostPer1M: 2, outputCostPer1M: 8 },
|
|
61112
|
+
"gpt-5.2": { inputCostPer1M: 2.5, outputCostPer1M: 10 },
|
|
61113
|
+
"gpt-5-turbo": { inputCostPer1M: 1.5, outputCostPer1M: 6 },
|
|
61114
|
+
"gpt-5.1-codex": { inputCostPer1M: 3, outputCostPer1M: 12 },
|
|
61115
|
+
"gpt-4o": { inputCostPer1M: 2.5, outputCostPer1M: 10 },
|
|
61116
|
+
"gpt-4o-mini": { inputCostPer1M: 0.15, outputCostPer1M: 0.6 },
|
|
61117
|
+
"gpt-4o-audio": { inputCostPer1M: 2.5, outputCostPer1M: 10 },
|
|
61118
|
+
o1: { inputCostPer1M: 15, outputCostPer1M: 60 },
|
|
61119
|
+
"o1-mini": { inputCostPer1M: 3, outputCostPer1M: 12 },
|
|
61120
|
+
"o1-preview": { inputCostPer1M: 15, outputCostPer1M: 60 },
|
|
61121
|
+
o3: { inputCostPer1M: 15, outputCostPer1M: 60 },
|
|
61122
|
+
"o3-mini": { inputCostPer1M: 3, outputCostPer1M: 12 },
|
|
61123
|
+
"gpt-4-turbo": { inputCostPer1M: 10, outputCostPer1M: 30 },
|
|
61124
|
+
"gpt-4-turbo-preview": { inputCostPer1M: 10, outputCostPer1M: 30 },
|
|
61125
|
+
"gpt-4": { inputCostPer1M: 30, outputCostPer1M: 60 },
|
|
61126
|
+
"gpt-3.5-turbo": { inputCostPer1M: 0.5, outputCostPer1M: 1.5 },
|
|
61127
|
+
default: { inputCostPer1M: 2, outputCostPer1M: 8 }
|
|
61128
|
+
};
|
|
61129
|
+
});
|
|
61130
|
+
|
|
61131
|
+
// src/handlers/gemini-handler.ts
|
|
61132
|
+
import { writeFileSync as writeFileSync10, mkdirSync as mkdirSync7 } from "node:fs";
|
|
61133
|
+
import { homedir as homedir4 } from "node:os";
|
|
61134
|
+
import { join as join10 } from "node:path";
|
|
61135
|
+
|
|
61136
|
+
class GeminiHandler {
|
|
61137
|
+
provider;
|
|
61138
|
+
modelName;
|
|
61139
|
+
apiKey;
|
|
61140
|
+
port;
|
|
61141
|
+
adapterManager;
|
|
61142
|
+
middlewareManager;
|
|
61143
|
+
sessionTotalCost = 0;
|
|
61144
|
+
sessionInputTokens = 0;
|
|
61145
|
+
sessionOutputTokens = 0;
|
|
61146
|
+
contextWindow = 1e6;
|
|
61147
|
+
toolCallMap = new Map;
|
|
61148
|
+
constructor(provider, modelName, apiKey, port) {
|
|
61149
|
+
this.provider = provider;
|
|
61150
|
+
this.modelName = modelName;
|
|
61151
|
+
this.apiKey = apiKey;
|
|
61152
|
+
this.port = port;
|
|
61153
|
+
this.adapterManager = new AdapterManager(`gemini/${modelName}`);
|
|
61154
|
+
this.middlewareManager = new MiddlewareManager;
|
|
61155
|
+
this.middlewareManager.register(new GeminiThoughtSignatureMiddleware);
|
|
61156
|
+
this.middlewareManager.initialize().catch((err) => log(`[GeminiHandler:${modelName}] Middleware init error: ${err}`));
|
|
61157
|
+
}
|
|
61158
|
+
getPricing() {
|
|
61159
|
+
return getModelPricing("gemini", this.modelName);
|
|
61160
|
+
}
|
|
61161
|
+
getApiEndpoint() {
|
|
61162
|
+
const baseUrl = this.provider.baseUrl;
|
|
61163
|
+
const apiPath = this.provider.apiPath.replace("{model}", this.modelName);
|
|
61164
|
+
return `${baseUrl}${apiPath}`;
|
|
61165
|
+
}
|
|
61166
|
+
writeTokenFile(input, output) {
|
|
61167
|
+
try {
|
|
61168
|
+
const total = input + output;
|
|
61169
|
+
const leftPct = this.contextWindow > 0 ? Math.max(0, Math.min(100, Math.round((this.contextWindow - total) / this.contextWindow * 100))) : 100;
|
|
61170
|
+
const data = {
|
|
61171
|
+
input_tokens: input,
|
|
61172
|
+
output_tokens: output,
|
|
61173
|
+
total_tokens: total,
|
|
61174
|
+
total_cost: this.sessionTotalCost,
|
|
61175
|
+
context_window: this.contextWindow,
|
|
61176
|
+
context_left_percent: leftPct,
|
|
61177
|
+
updated_at: Date.now()
|
|
61178
|
+
};
|
|
61179
|
+
const claudishDir = join10(homedir4(), ".claudish");
|
|
61180
|
+
mkdirSync7(claudishDir, { recursive: true });
|
|
61181
|
+
writeFileSync10(join10(claudishDir, `tokens-${this.port}.json`), JSON.stringify(data), "utf-8");
|
|
61182
|
+
} catch (e) {
|
|
61183
|
+
log(`[GeminiHandler] Error writing token file: ${e}`);
|
|
61184
|
+
}
|
|
61185
|
+
}
|
|
61186
|
+
updateTokenTracking(inputTokens, outputTokens) {
|
|
61187
|
+
this.sessionInputTokens = inputTokens;
|
|
61188
|
+
this.sessionOutputTokens += outputTokens;
|
|
61189
|
+
const pricing = this.getPricing();
|
|
61190
|
+
const cost = inputTokens / 1e6 * pricing.inputCostPer1M + outputTokens / 1e6 * pricing.outputCostPer1M;
|
|
61191
|
+
this.sessionTotalCost += cost;
|
|
61192
|
+
this.writeTokenFile(inputTokens, this.sessionOutputTokens);
|
|
61193
|
+
}
|
|
61194
|
+
convertToGeminiMessages(claudeRequest) {
|
|
61195
|
+
const messages = [];
|
|
61196
|
+
if (claudeRequest.messages) {
|
|
61197
|
+
for (const msg of claudeRequest.messages) {
|
|
61198
|
+
if (msg.role === "user") {
|
|
61199
|
+
const parts = this.convertUserMessageParts(msg);
|
|
61200
|
+
if (parts.length > 0) {
|
|
61201
|
+
messages.push({ role: "user", parts });
|
|
61202
|
+
}
|
|
61203
|
+
} else if (msg.role === "assistant") {
|
|
61204
|
+
const parts = this.convertAssistantMessageParts(msg);
|
|
61205
|
+
if (parts.length > 0) {
|
|
61206
|
+
messages.push({ role: "model", parts });
|
|
61207
|
+
}
|
|
61208
|
+
}
|
|
61209
|
+
}
|
|
61210
|
+
}
|
|
61211
|
+
return messages;
|
|
61212
|
+
}
|
|
61213
|
+
convertUserMessageParts(msg) {
|
|
61214
|
+
const parts = [];
|
|
61215
|
+
if (Array.isArray(msg.content)) {
|
|
61216
|
+
for (const block of msg.content) {
|
|
61217
|
+
if (block.type === "text") {
|
|
61218
|
+
parts.push({ text: block.text });
|
|
61219
|
+
} else if (block.type === "image") {
|
|
61220
|
+
parts.push({
|
|
61221
|
+
inlineData: {
|
|
61222
|
+
mimeType: block.source.media_type,
|
|
61223
|
+
data: block.source.data
|
|
61224
|
+
}
|
|
61225
|
+
});
|
|
61226
|
+
} else if (block.type === "tool_result") {
|
|
61227
|
+
const functionName = this.toolCallMap.get(block.tool_use_id);
|
|
61228
|
+
if (!functionName) {
|
|
61229
|
+
log(`[GeminiHandler:${this.modelName}] Warning: No function name found for tool_use_id ${block.tool_use_id}`);
|
|
61230
|
+
continue;
|
|
61231
|
+
}
|
|
61232
|
+
parts.push({
|
|
61233
|
+
functionResponse: {
|
|
61234
|
+
name: functionName,
|
|
61235
|
+
response: {
|
|
61236
|
+
content: typeof block.content === "string" ? block.content : JSON.stringify(block.content)
|
|
61237
|
+
}
|
|
61238
|
+
}
|
|
61239
|
+
});
|
|
61240
|
+
}
|
|
61241
|
+
}
|
|
61242
|
+
} else if (typeof msg.content === "string") {
|
|
61243
|
+
parts.push({ text: msg.content });
|
|
61244
|
+
}
|
|
61245
|
+
return parts;
|
|
61246
|
+
}
|
|
61247
|
+
convertAssistantMessageParts(msg) {
|
|
61248
|
+
const parts = [];
|
|
61249
|
+
if (Array.isArray(msg.content)) {
|
|
61250
|
+
for (const block of msg.content) {
|
|
61251
|
+
if (block.type === "text") {
|
|
61252
|
+
parts.push({ text: block.text });
|
|
61253
|
+
} else if (block.type === "tool_use") {
|
|
61254
|
+
this.toolCallMap.set(block.id, block.name);
|
|
61255
|
+
parts.push({
|
|
61256
|
+
functionCall: {
|
|
61257
|
+
name: block.name,
|
|
61258
|
+
args: block.input
|
|
61259
|
+
}
|
|
61260
|
+
});
|
|
61261
|
+
}
|
|
61262
|
+
}
|
|
61263
|
+
} else if (typeof msg.content === "string") {
|
|
61264
|
+
parts.push({ text: msg.content });
|
|
61265
|
+
}
|
|
61266
|
+
return parts;
|
|
61267
|
+
}
|
|
61268
|
+
convertToGeminiTools(claudeRequest) {
|
|
61269
|
+
if (!claudeRequest.tools || claudeRequest.tools.length === 0) {
|
|
61270
|
+
return;
|
|
61271
|
+
}
|
|
61272
|
+
const functionDeclarations = claudeRequest.tools.map((tool) => ({
|
|
61273
|
+
name: tool.name,
|
|
61274
|
+
description: tool.description,
|
|
61275
|
+
parameters: this.convertJsonSchemaToGemini(tool.input_schema)
|
|
61276
|
+
}));
|
|
61277
|
+
return [{ functionDeclarations }];
|
|
61278
|
+
}
|
|
61279
|
+
convertJsonSchemaToGemini(schema) {
|
|
61280
|
+
if (!schema)
|
|
61281
|
+
return {};
|
|
61282
|
+
const geminiSchema = {
|
|
61283
|
+
type: schema.type || "object"
|
|
61284
|
+
};
|
|
61285
|
+
if (schema.properties) {
|
|
61286
|
+
geminiSchema.properties = {};
|
|
61287
|
+
for (const [key, prop] of Object.entries(schema.properties)) {
|
|
61288
|
+
geminiSchema.properties[key] = this.convertPropertyToGemini(prop);
|
|
61289
|
+
}
|
|
61290
|
+
}
|
|
61291
|
+
if (schema.required) {
|
|
61292
|
+
geminiSchema.required = schema.required;
|
|
61293
|
+
}
|
|
61294
|
+
return geminiSchema;
|
|
61295
|
+
}
|
|
61296
|
+
convertPropertyToGemini(prop) {
|
|
61297
|
+
const result = {
|
|
61298
|
+
type: prop.type || "string"
|
|
61299
|
+
};
|
|
61300
|
+
if (prop.description)
|
|
61301
|
+
result.description = prop.description;
|
|
61302
|
+
if (prop.enum)
|
|
61303
|
+
result.enum = prop.enum;
|
|
61304
|
+
if (prop.items)
|
|
61305
|
+
result.items = this.convertPropertyToGemini(prop.items);
|
|
61306
|
+
if (prop.properties) {
|
|
61307
|
+
result.properties = {};
|
|
61308
|
+
for (const [k, v] of Object.entries(prop.properties)) {
|
|
61309
|
+
result.properties[k] = this.convertPropertyToGemini(v);
|
|
61310
|
+
}
|
|
61311
|
+
}
|
|
61312
|
+
return result;
|
|
61313
|
+
}
|
|
61314
|
+
buildGeminiPayload(claudeRequest) {
|
|
61315
|
+
const contents = this.convertToGeminiMessages(claudeRequest);
|
|
61316
|
+
const tools = this.convertToGeminiTools(claudeRequest);
|
|
61317
|
+
const payload = {
|
|
61318
|
+
contents,
|
|
61319
|
+
generationConfig: {
|
|
61320
|
+
temperature: claudeRequest.temperature ?? 1,
|
|
61321
|
+
maxOutputTokens: claudeRequest.max_tokens
|
|
61322
|
+
}
|
|
61323
|
+
};
|
|
61324
|
+
if (claudeRequest.system) {
|
|
61325
|
+
let systemContent = Array.isArray(claudeRequest.system) ? claudeRequest.system.map((i) => i.text || i).join(`
|
|
61326
|
+
|
|
61327
|
+
`) : claudeRequest.system;
|
|
61328
|
+
systemContent = filterIdentity(systemContent);
|
|
61329
|
+
systemContent += `
|
|
61330
|
+
|
|
61331
|
+
CRITICAL INSTRUCTION FOR OUTPUT FORMAT:
|
|
61332
|
+
1. Keep ALL internal reasoning INTERNAL. Never output your thought process as visible text.
|
|
61333
|
+
2. Do NOT start responses with phrases like "Wait, I'm...", "Let me think...", "Okay, so..."
|
|
61334
|
+
3. Only output: final responses, tool calls, and code. Nothing else.`;
|
|
61335
|
+
payload.systemInstruction = { parts: [{ text: systemContent }] };
|
|
61336
|
+
}
|
|
61337
|
+
if (tools) {
|
|
61338
|
+
payload.tools = tools;
|
|
61339
|
+
}
|
|
61340
|
+
if (claudeRequest.thinking) {
|
|
61341
|
+
const { budget_tokens } = claudeRequest.thinking;
|
|
61342
|
+
if (this.modelName.includes("gemini-3")) {
|
|
61343
|
+
payload.generationConfig.thinkingConfig = {
|
|
61344
|
+
thinkingLevel: budget_tokens >= 16000 ? "high" : "low"
|
|
61345
|
+
};
|
|
61346
|
+
} else {
|
|
61347
|
+
const MAX_GEMINI_BUDGET = 24576;
|
|
61348
|
+
const budget = Math.min(budget_tokens, MAX_GEMINI_BUDGET);
|
|
61349
|
+
payload.generationConfig.thinkingConfig = {
|
|
61350
|
+
thinkingBudget: budget
|
|
61351
|
+
};
|
|
61352
|
+
}
|
|
61353
|
+
}
|
|
61354
|
+
return payload;
|
|
61355
|
+
}
|
|
61356
|
+
handleStreamingResponse(c, response, _claudeRequest) {
|
|
61357
|
+
let isClosed = false;
|
|
61358
|
+
let ping2 = null;
|
|
61359
|
+
const encoder = new TextEncoder;
|
|
61360
|
+
const decoder = new TextDecoder;
|
|
61361
|
+
const streamMetadata = new Map;
|
|
61362
|
+
const adapter = this.adapterManager.getAdapter();
|
|
61363
|
+
if (typeof adapter.reset === "function")
|
|
61364
|
+
adapter.reset();
|
|
61365
|
+
return c.body(new ReadableStream({
|
|
61366
|
+
start: async (controller) => {
|
|
61367
|
+
const send = (e, d) => {
|
|
61368
|
+
if (!isClosed) {
|
|
61369
|
+
controller.enqueue(encoder.encode(`event: ${e}
|
|
61370
|
+
data: ${JSON.stringify(d)}
|
|
61371
|
+
|
|
61372
|
+
`));
|
|
61373
|
+
}
|
|
61374
|
+
};
|
|
61375
|
+
const msgId = `msg_${Date.now()}_${Math.random().toString(36).slice(2)}`;
|
|
61376
|
+
let usage = null;
|
|
61377
|
+
let finalized = false;
|
|
61378
|
+
let textStarted = false;
|
|
61379
|
+
let textIdx = -1;
|
|
61380
|
+
let thinkingStarted = false;
|
|
61381
|
+
let thinkingIdx = -1;
|
|
61382
|
+
let curIdx = 0;
|
|
61383
|
+
const tools = new Map;
|
|
61384
|
+
let lastActivity = Date.now();
|
|
61385
|
+
let accumulatedText = "";
|
|
61386
|
+
send("message_start", {
|
|
61387
|
+
type: "message_start",
|
|
61388
|
+
message: {
|
|
61389
|
+
id: msgId,
|
|
61390
|
+
type: "message",
|
|
61391
|
+
role: "assistant",
|
|
61392
|
+
content: [],
|
|
61393
|
+
model: `gemini/${this.modelName}`,
|
|
61394
|
+
stop_reason: null,
|
|
61395
|
+
stop_sequence: null,
|
|
61396
|
+
usage: { input_tokens: 100, output_tokens: 1 }
|
|
61397
|
+
}
|
|
61398
|
+
});
|
|
61399
|
+
send("ping", { type: "ping" });
|
|
61400
|
+
ping2 = setInterval(() => {
|
|
61401
|
+
if (!isClosed && Date.now() - lastActivity > 1000) {
|
|
61402
|
+
send("ping", { type: "ping" });
|
|
61403
|
+
}
|
|
61404
|
+
}, 1000);
|
|
61405
|
+
const finalize = async (reason, err) => {
|
|
61406
|
+
if (finalized)
|
|
61407
|
+
return;
|
|
61408
|
+
finalized = true;
|
|
61409
|
+
if (thinkingStarted) {
|
|
61410
|
+
send("content_block_stop", { type: "content_block_stop", index: thinkingIdx });
|
|
61411
|
+
}
|
|
61412
|
+
if (textStarted) {
|
|
61413
|
+
send("content_block_stop", { type: "content_block_stop", index: textIdx });
|
|
61414
|
+
}
|
|
61415
|
+
for (const t of Array.from(tools.values())) {
|
|
61416
|
+
if (t.started && !t.closed) {
|
|
61417
|
+
send("content_block_stop", { type: "content_block_stop", index: t.blockIndex });
|
|
61418
|
+
t.closed = true;
|
|
61419
|
+
}
|
|
61420
|
+
}
|
|
61421
|
+
await this.middlewareManager.afterStreamComplete(`gemini/${this.modelName}`, streamMetadata);
|
|
61422
|
+
if (usage) {
|
|
61423
|
+
log(`[GeminiHandler] Usage: prompt=${usage.promptTokenCount || 0}, completion=${usage.candidatesTokenCount || 0}`);
|
|
61424
|
+
this.updateTokenTracking(usage.promptTokenCount || 0, usage.candidatesTokenCount || 0);
|
|
61425
|
+
}
|
|
61426
|
+
if (reason === "error") {
|
|
61427
|
+
log(`[GeminiHandler] Stream error: ${err}`);
|
|
61428
|
+
send("error", { type: "error", error: { type: "api_error", message: err } });
|
|
61429
|
+
} else {
|
|
61430
|
+
const hasToolCalls = tools.size > 0;
|
|
61431
|
+
send("message_delta", {
|
|
61432
|
+
type: "message_delta",
|
|
61433
|
+
delta: { stop_reason: hasToolCalls ? "tool_use" : "end_turn", stop_sequence: null },
|
|
61434
|
+
usage: { output_tokens: usage?.candidatesTokenCount || 0 }
|
|
61435
|
+
});
|
|
61436
|
+
send("message_stop", { type: "message_stop" });
|
|
61437
|
+
}
|
|
61438
|
+
if (!isClosed) {
|
|
61439
|
+
try {
|
|
61440
|
+
controller.enqueue(encoder.encode(`data: [DONE]
|
|
61441
|
+
|
|
61442
|
+
|
|
61443
|
+
`));
|
|
61444
|
+
} catch (e) {}
|
|
61445
|
+
controller.close();
|
|
61446
|
+
isClosed = true;
|
|
61447
|
+
if (ping2)
|
|
61448
|
+
clearInterval(ping2);
|
|
61449
|
+
}
|
|
61450
|
+
};
|
|
61451
|
+
try {
|
|
61452
|
+
const reader = response.body.getReader();
|
|
61453
|
+
let buffer = "";
|
|
61454
|
+
while (true) {
|
|
61455
|
+
const { done, value } = await reader.read();
|
|
61456
|
+
if (done)
|
|
61457
|
+
break;
|
|
61458
|
+
buffer += decoder.decode(value, { stream: true });
|
|
61459
|
+
const lines = buffer.split(`
|
|
61460
|
+
`);
|
|
61461
|
+
buffer = lines.pop() || "";
|
|
61462
|
+
for (const line of lines) {
|
|
61463
|
+
if (!line.trim() || !line.startsWith("data: "))
|
|
61464
|
+
continue;
|
|
61465
|
+
const dataStr = line.slice(6);
|
|
61466
|
+
if (dataStr === "[DONE]") {
|
|
61467
|
+
await finalize("done");
|
|
61468
|
+
return;
|
|
61469
|
+
}
|
|
61470
|
+
try {
|
|
61471
|
+
const chunk = JSON.parse(dataStr);
|
|
61472
|
+
if (chunk.usageMetadata) {
|
|
61473
|
+
usage = chunk.usageMetadata;
|
|
61474
|
+
}
|
|
61475
|
+
const candidate = chunk.candidates?.[0];
|
|
61476
|
+
if (candidate?.content?.parts) {
|
|
61477
|
+
for (const part of candidate.content.parts) {
|
|
61478
|
+
lastActivity = Date.now();
|
|
61479
|
+
if (part.thought || part.thoughtText) {
|
|
61480
|
+
const thinkingContent = part.thought || part.thoughtText;
|
|
61481
|
+
if (!thinkingStarted) {
|
|
61482
|
+
thinkingIdx = curIdx++;
|
|
61483
|
+
send("content_block_start", {
|
|
61484
|
+
type: "content_block_start",
|
|
61485
|
+
index: thinkingIdx,
|
|
61486
|
+
content_block: { type: "thinking", thinking: "" }
|
|
61487
|
+
});
|
|
61488
|
+
thinkingStarted = true;
|
|
61489
|
+
}
|
|
61490
|
+
send("content_block_delta", {
|
|
61491
|
+
type: "content_block_delta",
|
|
61492
|
+
index: thinkingIdx,
|
|
61493
|
+
delta: { type: "thinking_delta", thinking: thinkingContent }
|
|
61494
|
+
});
|
|
61495
|
+
}
|
|
61496
|
+
if (part.text) {
|
|
61497
|
+
if (thinkingStarted) {
|
|
61498
|
+
send("content_block_stop", { type: "content_block_stop", index: thinkingIdx });
|
|
61499
|
+
thinkingStarted = false;
|
|
61500
|
+
}
|
|
61501
|
+
const res = adapter.processTextContent(part.text, accumulatedText);
|
|
61502
|
+
accumulatedText += res.cleanedText || "";
|
|
61503
|
+
if (res.cleanedText) {
|
|
61504
|
+
if (!textStarted) {
|
|
61505
|
+
textIdx = curIdx++;
|
|
61506
|
+
send("content_block_start", {
|
|
61507
|
+
type: "content_block_start",
|
|
61508
|
+
index: textIdx,
|
|
61509
|
+
content_block: { type: "text", text: "" }
|
|
61510
|
+
});
|
|
61511
|
+
textStarted = true;
|
|
61512
|
+
}
|
|
61513
|
+
send("content_block_delta", {
|
|
61514
|
+
type: "content_block_delta",
|
|
61515
|
+
index: textIdx,
|
|
61516
|
+
delta: { type: "text_delta", text: res.cleanedText }
|
|
61517
|
+
});
|
|
61518
|
+
}
|
|
61519
|
+
}
|
|
61520
|
+
if (part.functionCall) {
|
|
61521
|
+
if (thinkingStarted) {
|
|
61522
|
+
send("content_block_stop", { type: "content_block_stop", index: thinkingIdx });
|
|
61523
|
+
thinkingStarted = false;
|
|
61524
|
+
}
|
|
61525
|
+
if (textStarted) {
|
|
61526
|
+
send("content_block_stop", { type: "content_block_stop", index: textIdx });
|
|
61527
|
+
textStarted = false;
|
|
61528
|
+
}
|
|
61529
|
+
const toolIdx = tools.size;
|
|
61530
|
+
const toolId = `tool_${Date.now()}_${toolIdx}`;
|
|
61531
|
+
const t = {
|
|
61532
|
+
id: toolId,
|
|
61533
|
+
name: part.functionCall.name,
|
|
61534
|
+
blockIndex: curIdx++,
|
|
61535
|
+
started: true,
|
|
61536
|
+
closed: false,
|
|
61537
|
+
arguments: JSON.stringify(part.functionCall.args || {})
|
|
61538
|
+
};
|
|
61539
|
+
tools.set(toolIdx, t);
|
|
61540
|
+
send("content_block_start", {
|
|
61541
|
+
type: "content_block_start",
|
|
61542
|
+
index: t.blockIndex,
|
|
61543
|
+
content_block: { type: "tool_use", id: t.id, name: t.name }
|
|
61544
|
+
});
|
|
61545
|
+
send("content_block_delta", {
|
|
61546
|
+
type: "content_block_delta",
|
|
61547
|
+
index: t.blockIndex,
|
|
61548
|
+
delta: { type: "input_json_delta", partial_json: t.arguments }
|
|
61549
|
+
});
|
|
61550
|
+
send("content_block_stop", { type: "content_block_stop", index: t.blockIndex });
|
|
61551
|
+
t.closed = true;
|
|
61552
|
+
}
|
|
61553
|
+
}
|
|
61554
|
+
}
|
|
61555
|
+
if (candidate?.finishReason) {
|
|
61556
|
+
if (candidate.finishReason === "STOP" || candidate.finishReason === "MAX_TOKENS") {
|
|
61557
|
+
await finalize("done");
|
|
61558
|
+
return;
|
|
61559
|
+
}
|
|
61560
|
+
}
|
|
61561
|
+
} catch (e) {
|
|
61562
|
+
log(`[GeminiHandler] Parse error: ${e}`);
|
|
61563
|
+
}
|
|
61564
|
+
}
|
|
61565
|
+
}
|
|
61566
|
+
await finalize("unexpected");
|
|
61567
|
+
} catch (e) {
|
|
61568
|
+
await finalize("error", String(e));
|
|
61569
|
+
}
|
|
61570
|
+
},
|
|
61571
|
+
cancel() {
|
|
61572
|
+
isClosed = true;
|
|
61573
|
+
if (ping2)
|
|
61574
|
+
clearInterval(ping2);
|
|
61575
|
+
}
|
|
61576
|
+
}), {
|
|
61577
|
+
headers: {
|
|
61578
|
+
"Content-Type": "text/event-stream",
|
|
61579
|
+
"Cache-Control": "no-cache",
|
|
61580
|
+
Connection: "keep-alive"
|
|
61581
|
+
}
|
|
61582
|
+
});
|
|
61583
|
+
}
|
|
61584
|
+
async handle(c, payload) {
|
|
61585
|
+
const { claudeRequest, droppedParams } = transformOpenAIToClaude(payload);
|
|
61586
|
+
const systemPromptLength = typeof claudeRequest.system === "string" ? claudeRequest.system.length : 0;
|
|
61587
|
+
logStructured("Gemini Request", {
|
|
61588
|
+
targetModel: `gemini/${this.modelName}`,
|
|
61589
|
+
originalModel: payload.model,
|
|
61590
|
+
messageCount: claudeRequest.messages?.length || 0,
|
|
61591
|
+
toolCount: claudeRequest.tools?.length || 0,
|
|
61592
|
+
systemPromptLength,
|
|
61593
|
+
maxTokens: claudeRequest.max_tokens
|
|
61594
|
+
});
|
|
61595
|
+
const geminiPayload = this.buildGeminiPayload(claudeRequest);
|
|
61596
|
+
await this.middlewareManager.beforeRequest({
|
|
61597
|
+
modelId: `gemini/${this.modelName}`,
|
|
61598
|
+
messages: geminiPayload.contents,
|
|
61599
|
+
tools: claudeRequest.tools || [],
|
|
61600
|
+
stream: true
|
|
61601
|
+
});
|
|
61602
|
+
const endpoint = this.getApiEndpoint();
|
|
61603
|
+
log(`[GeminiHandler] Calling API: ${endpoint}`);
|
|
61604
|
+
const response = await fetch(endpoint, {
|
|
61605
|
+
method: "POST",
|
|
61606
|
+
headers: {
|
|
61607
|
+
"Content-Type": "application/json",
|
|
61608
|
+
"x-goog-api-key": this.apiKey
|
|
61609
|
+
},
|
|
61610
|
+
body: JSON.stringify(geminiPayload)
|
|
61611
|
+
});
|
|
61612
|
+
log(`[GeminiHandler] Response status: ${response.status}`);
|
|
61613
|
+
if (!response.ok) {
|
|
61614
|
+
const errorText = await response.text();
|
|
61615
|
+
log(`[GeminiHandler] Error: ${errorText}`);
|
|
61616
|
+
return c.json({ error: errorText }, response.status);
|
|
61617
|
+
}
|
|
61618
|
+
if (droppedParams.length > 0) {
|
|
61619
|
+
c.header("X-Dropped-Params", droppedParams.join(", "));
|
|
61620
|
+
}
|
|
61621
|
+
return this.handleStreamingResponse(c, response, claudeRequest);
|
|
61622
|
+
}
|
|
61623
|
+
async shutdown() {}
|
|
61624
|
+
}
|
|
61625
|
+
var init_gemini_handler = __esm(() => {
|
|
61626
|
+
init_adapter_manager();
|
|
61627
|
+
init_middleware();
|
|
61628
|
+
init_transform();
|
|
61629
|
+
init_logger();
|
|
61630
|
+
init_openai_compat();
|
|
61631
|
+
init_remote_provider_types();
|
|
61632
|
+
});
|
|
61633
|
+
|
|
61634
|
+
// src/handlers/openai-handler.ts
|
|
61635
|
+
import { writeFileSync as writeFileSync11, mkdirSync as mkdirSync8 } from "node:fs";
|
|
61636
|
+
import { homedir as homedir5 } from "node:os";
|
|
61637
|
+
import { join as join11 } from "node:path";
|
|
61638
|
+
|
|
61639
|
+
class OpenAIHandler {
|
|
61640
|
+
provider;
|
|
61641
|
+
modelName;
|
|
61642
|
+
apiKey;
|
|
61643
|
+
port;
|
|
61644
|
+
adapterManager;
|
|
61645
|
+
middlewareManager;
|
|
61646
|
+
sessionTotalCost = 0;
|
|
61647
|
+
sessionInputTokens = 0;
|
|
61648
|
+
sessionOutputTokens = 0;
|
|
61649
|
+
contextWindow = 128000;
|
|
61650
|
+
constructor(provider, modelName, apiKey, port) {
|
|
61651
|
+
this.provider = provider;
|
|
61652
|
+
this.modelName = modelName;
|
|
61653
|
+
this.apiKey = apiKey;
|
|
61654
|
+
this.port = port;
|
|
61655
|
+
this.adapterManager = new AdapterManager(`openai/${modelName}`);
|
|
61656
|
+
this.middlewareManager = new MiddlewareManager;
|
|
61657
|
+
this.middlewareManager.register(new GeminiThoughtSignatureMiddleware);
|
|
61658
|
+
this.middlewareManager.initialize().catch((err) => log(`[OpenAIHandler:${modelName}] Middleware init error: ${err}`));
|
|
61659
|
+
this.setContextWindow();
|
|
61660
|
+
}
|
|
61661
|
+
setContextWindow() {
|
|
61662
|
+
const model = this.modelName.toLowerCase();
|
|
61663
|
+
if (model.includes("gpt-4o") || model.includes("gpt-4-turbo")) {
|
|
61664
|
+
this.contextWindow = 128000;
|
|
61665
|
+
} else if (model.includes("gpt-5")) {
|
|
61666
|
+
this.contextWindow = 256000;
|
|
61667
|
+
} else if (model.includes("o1") || model.includes("o3")) {
|
|
61668
|
+
this.contextWindow = 200000;
|
|
61669
|
+
} else if (model.includes("gpt-3.5")) {
|
|
61670
|
+
this.contextWindow = 16385;
|
|
61671
|
+
} else {
|
|
61672
|
+
this.contextWindow = 128000;
|
|
61673
|
+
}
|
|
61674
|
+
}
|
|
61675
|
+
getPricing() {
|
|
61676
|
+
return getModelPricing("openai", this.modelName);
|
|
61677
|
+
}
|
|
61678
|
+
getApiEndpoint() {
|
|
61679
|
+
return `${this.provider.baseUrl}${this.provider.apiPath}`;
|
|
61680
|
+
}
|
|
61681
|
+
writeTokenFile(input, output) {
|
|
61682
|
+
try {
|
|
61683
|
+
const total = input + output;
|
|
61684
|
+
const leftPct = this.contextWindow > 0 ? Math.max(0, Math.min(100, Math.round((this.contextWindow - total) / this.contextWindow * 100))) : 100;
|
|
61685
|
+
const data = {
|
|
61686
|
+
input_tokens: input,
|
|
61687
|
+
output_tokens: output,
|
|
61688
|
+
total_tokens: total,
|
|
61689
|
+
total_cost: this.sessionTotalCost,
|
|
61690
|
+
context_window: this.contextWindow,
|
|
61691
|
+
context_left_percent: leftPct,
|
|
61692
|
+
updated_at: Date.now()
|
|
61693
|
+
};
|
|
61694
|
+
const claudishDir = join11(homedir5(), ".claudish");
|
|
61695
|
+
mkdirSync8(claudishDir, { recursive: true });
|
|
61696
|
+
writeFileSync11(join11(claudishDir, `tokens-${this.port}.json`), JSON.stringify(data), "utf-8");
|
|
61697
|
+
} catch (e) {
|
|
61698
|
+
log(`[OpenAIHandler] Error writing token file: ${e}`);
|
|
61699
|
+
}
|
|
61700
|
+
}
|
|
61701
|
+
updateTokenTracking(inputTokens, outputTokens) {
|
|
61702
|
+
this.sessionInputTokens = inputTokens;
|
|
61703
|
+
this.sessionOutputTokens += outputTokens;
|
|
61704
|
+
const pricing = this.getPricing();
|
|
61705
|
+
const cost = inputTokens / 1e6 * pricing.inputCostPer1M + outputTokens / 1e6 * pricing.outputCostPer1M;
|
|
61706
|
+
this.sessionTotalCost += cost;
|
|
61707
|
+
this.writeTokenFile(inputTokens, this.sessionOutputTokens);
|
|
61708
|
+
}
|
|
61709
|
+
convertMessages(claudeRequest) {
|
|
61710
|
+
return convertMessagesToOpenAI(claudeRequest, `openai/${this.modelName}`, filterIdentity);
|
|
61711
|
+
}
|
|
61712
|
+
convertTools(claudeRequest) {
|
|
61713
|
+
return convertToolsToOpenAI(claudeRequest);
|
|
61714
|
+
}
|
|
61715
|
+
supportsReasoning() {
|
|
61716
|
+
const model = this.modelName.toLowerCase();
|
|
61717
|
+
return model.includes("o1") || model.includes("o3");
|
|
61718
|
+
}
|
|
61719
|
+
buildOpenAIPayload(claudeRequest, messages, tools) {
|
|
61720
|
+
const payload = {
|
|
61721
|
+
model: this.modelName,
|
|
61722
|
+
messages,
|
|
61723
|
+
temperature: claudeRequest.temperature ?? 1,
|
|
61724
|
+
stream: true,
|
|
61725
|
+
max_tokens: claudeRequest.max_tokens,
|
|
61726
|
+
stream_options: { include_usage: true }
|
|
61727
|
+
};
|
|
61728
|
+
if (tools.length > 0) {
|
|
61729
|
+
payload.tools = tools;
|
|
61730
|
+
}
|
|
61731
|
+
if (claudeRequest.tool_choice) {
|
|
61732
|
+
const { type, name } = claudeRequest.tool_choice;
|
|
61733
|
+
if (type === "tool" && name) {
|
|
61734
|
+
payload.tool_choice = { type: "function", function: { name } };
|
|
61735
|
+
} else if (type === "auto" || type === "none") {
|
|
61736
|
+
payload.tool_choice = type;
|
|
61737
|
+
}
|
|
61738
|
+
}
|
|
61739
|
+
if (claudeRequest.thinking && this.supportsReasoning()) {
|
|
61740
|
+
const { budget_tokens } = claudeRequest.thinking;
|
|
61741
|
+
let effort = "medium";
|
|
61742
|
+
if (budget_tokens < 4000)
|
|
61743
|
+
effort = "minimal";
|
|
61744
|
+
else if (budget_tokens < 16000)
|
|
61745
|
+
effort = "low";
|
|
61746
|
+
else if (budget_tokens >= 32000)
|
|
61747
|
+
effort = "high";
|
|
61748
|
+
payload.reasoning_effort = effort;
|
|
61749
|
+
log(`[OpenAIHandler] Mapped thinking.budget_tokens ${budget_tokens} -> reasoning_effort: ${effort}`);
|
|
61750
|
+
}
|
|
61751
|
+
return payload;
|
|
61752
|
+
}
|
|
61753
|
+
async handle(c, payload) {
|
|
61754
|
+
const { claudeRequest, droppedParams } = transformOpenAIToClaude(payload);
|
|
61755
|
+
const messages = this.convertMessages(claudeRequest);
|
|
61756
|
+
const tools = this.convertTools(claudeRequest);
|
|
61757
|
+
const systemPromptLength = typeof claudeRequest.system === "string" ? claudeRequest.system.length : 0;
|
|
61758
|
+
logStructured("OpenAI Request", {
|
|
61759
|
+
targetModel: `openai/${this.modelName}`,
|
|
61760
|
+
originalModel: payload.model,
|
|
61761
|
+
messageCount: messages.length,
|
|
61762
|
+
toolCount: tools.length,
|
|
61763
|
+
systemPromptLength,
|
|
61764
|
+
maxTokens: claudeRequest.max_tokens
|
|
61765
|
+
});
|
|
61766
|
+
if (getLogLevel() === "debug") {
|
|
61767
|
+
const lastUserMsg = messages.filter((m) => m.role === "user").pop();
|
|
61768
|
+
if (lastUserMsg) {
|
|
61769
|
+
const content = typeof lastUserMsg.content === "string" ? lastUserMsg.content : JSON.stringify(lastUserMsg.content);
|
|
61770
|
+
log(`[OpenAI] Last user message: ${truncateContent(content, 500)}`);
|
|
61771
|
+
}
|
|
61772
|
+
if (tools.length > 0) {
|
|
61773
|
+
const toolNames = tools.map((t) => t.function?.name || t.name).join(", ");
|
|
61774
|
+
log(`[OpenAI] Tools: ${toolNames}`);
|
|
61775
|
+
}
|
|
61776
|
+
}
|
|
61777
|
+
const openAIPayload = this.buildOpenAIPayload(claudeRequest, messages, tools);
|
|
61778
|
+
const adapter = this.adapterManager.getAdapter();
|
|
61779
|
+
if (typeof adapter.reset === "function")
|
|
61780
|
+
adapter.reset();
|
|
61781
|
+
adapter.prepareRequest(openAIPayload, claudeRequest);
|
|
61782
|
+
await this.middlewareManager.beforeRequest({
|
|
61783
|
+
modelId: `openai/${this.modelName}`,
|
|
61784
|
+
messages,
|
|
61785
|
+
tools,
|
|
61786
|
+
stream: true
|
|
61787
|
+
});
|
|
61788
|
+
const endpoint = this.getApiEndpoint();
|
|
61789
|
+
log(`[OpenAIHandler] Calling API: ${endpoint}`);
|
|
61790
|
+
const response = await fetch(endpoint, {
|
|
61791
|
+
method: "POST",
|
|
61792
|
+
headers: {
|
|
61793
|
+
"Content-Type": "application/json",
|
|
61794
|
+
Authorization: `Bearer ${this.apiKey}`
|
|
61795
|
+
},
|
|
61796
|
+
body: JSON.stringify(openAIPayload)
|
|
61797
|
+
});
|
|
61798
|
+
log(`[OpenAIHandler] Response status: ${response.status}`);
|
|
61799
|
+
if (!response.ok) {
|
|
61800
|
+
const errorText = await response.text();
|
|
61801
|
+
log(`[OpenAIHandler] Error: ${errorText}`);
|
|
61802
|
+
return c.json({ error: errorText }, response.status);
|
|
61803
|
+
}
|
|
61804
|
+
if (droppedParams.length > 0) {
|
|
61805
|
+
c.header("X-Dropped-Params", droppedParams.join(", "));
|
|
61806
|
+
}
|
|
61807
|
+
return createStreamingResponseHandler(c, response, adapter, `openai/${this.modelName}`, this.middlewareManager, (input, output) => this.updateTokenTracking(input, output), claudeRequest.tools);
|
|
61808
|
+
}
|
|
61809
|
+
async shutdown() {}
|
|
61810
|
+
}
|
|
61811
|
+
var init_openai_handler = __esm(() => {
|
|
61812
|
+
init_adapter_manager();
|
|
61813
|
+
init_middleware();
|
|
61814
|
+
init_transform();
|
|
61815
|
+
init_logger();
|
|
61816
|
+
init_openai_compat();
|
|
61817
|
+
init_remote_provider_types();
|
|
61818
|
+
});
|
|
61819
|
+
|
|
61060
61820
|
// src/providers/provider-registry.ts
|
|
61061
61821
|
function resolveProvider(modelId) {
|
|
61062
61822
|
const providers = getProviders();
|
|
@@ -61168,6 +61928,86 @@ var getProviders = () => [
|
|
|
61168
61928
|
}
|
|
61169
61929
|
];
|
|
61170
61930
|
|
|
61931
|
+
// src/providers/remote-provider-registry.ts
|
|
61932
|
+
function resolveRemoteProvider(modelId) {
|
|
61933
|
+
const providers = getRemoteProviders();
|
|
61934
|
+
for (const provider of providers) {
|
|
61935
|
+
for (const prefix of provider.prefixes) {
|
|
61936
|
+
if (modelId.startsWith(prefix)) {
|
|
61937
|
+
return {
|
|
61938
|
+
provider,
|
|
61939
|
+
modelName: modelId.slice(prefix.length)
|
|
61940
|
+
};
|
|
61941
|
+
}
|
|
61942
|
+
}
|
|
61943
|
+
}
|
|
61944
|
+
return null;
|
|
61945
|
+
}
|
|
61946
|
+
function validateRemoteProviderApiKey(provider) {
|
|
61947
|
+
const apiKey = process.env[provider.apiKeyEnvVar];
|
|
61948
|
+
if (!apiKey) {
|
|
61949
|
+
const examples = {
|
|
61950
|
+
GEMINI_API_KEY: "export GEMINI_API_KEY='your-key' (get from https://aistudio.google.com/app/apikey)",
|
|
61951
|
+
OPENAI_API_KEY: "export OPENAI_API_KEY='sk-...' (get from https://platform.openai.com/api-keys)",
|
|
61952
|
+
OPENROUTER_API_KEY: "export OPENROUTER_API_KEY='sk-or-...' (get from https://openrouter.ai/keys)"
|
|
61953
|
+
};
|
|
61954
|
+
const example = examples[provider.apiKeyEnvVar] || `export ${provider.apiKeyEnvVar}='your-key'`;
|
|
61955
|
+
return `Missing ${provider.apiKeyEnvVar} environment variable.
|
|
61956
|
+
|
|
61957
|
+
Set it with:
|
|
61958
|
+
${example}`;
|
|
61959
|
+
}
|
|
61960
|
+
return null;
|
|
61961
|
+
}
|
|
61962
|
+
var getRemoteProviders = () => [
|
|
61963
|
+
{
|
|
61964
|
+
name: "gemini",
|
|
61965
|
+
baseUrl: process.env.GEMINI_BASE_URL || "https://generativelanguage.googleapis.com",
|
|
61966
|
+
apiPath: "/v1beta/models/{model}:streamGenerateContent?alt=sse",
|
|
61967
|
+
apiKeyEnvVar: "GEMINI_API_KEY",
|
|
61968
|
+
prefixes: ["g/", "gemini/", "google/"],
|
|
61969
|
+
capabilities: {
|
|
61970
|
+
supportsTools: true,
|
|
61971
|
+
supportsVision: true,
|
|
61972
|
+
supportsStreaming: true,
|
|
61973
|
+
supportsJsonMode: false,
|
|
61974
|
+
supportsReasoning: true
|
|
61975
|
+
}
|
|
61976
|
+
},
|
|
61977
|
+
{
|
|
61978
|
+
name: "openai",
|
|
61979
|
+
baseUrl: process.env.OPENAI_BASE_URL || "https://api.openai.com",
|
|
61980
|
+
apiPath: "/v1/chat/completions",
|
|
61981
|
+
apiKeyEnvVar: "OPENAI_API_KEY",
|
|
61982
|
+
prefixes: ["oai/", "openai/"],
|
|
61983
|
+
capabilities: {
|
|
61984
|
+
supportsTools: true,
|
|
61985
|
+
supportsVision: true,
|
|
61986
|
+
supportsStreaming: true,
|
|
61987
|
+
supportsJsonMode: true,
|
|
61988
|
+
supportsReasoning: true
|
|
61989
|
+
}
|
|
61990
|
+
},
|
|
61991
|
+
{
|
|
61992
|
+
name: "openrouter",
|
|
61993
|
+
baseUrl: "https://openrouter.ai",
|
|
61994
|
+
apiPath: "/api/v1/chat/completions",
|
|
61995
|
+
apiKeyEnvVar: "OPENROUTER_API_KEY",
|
|
61996
|
+
prefixes: ["or/"],
|
|
61997
|
+
headers: {
|
|
61998
|
+
"HTTP-Referer": "https://claudish.com",
|
|
61999
|
+
"X-Title": "Claudish - OpenRouter Proxy"
|
|
62000
|
+
},
|
|
62001
|
+
capabilities: {
|
|
62002
|
+
supportsTools: true,
|
|
62003
|
+
supportsVision: true,
|
|
62004
|
+
supportsStreaming: true,
|
|
62005
|
+
supportsJsonMode: true,
|
|
62006
|
+
supportsReasoning: true
|
|
62007
|
+
}
|
|
62008
|
+
}
|
|
62009
|
+
];
|
|
62010
|
+
|
|
61171
62011
|
// src/proxy-server.ts
|
|
61172
62012
|
var exports_proxy_server = {};
|
|
61173
62013
|
__export(exports_proxy_server, {
|
|
@@ -61177,6 +62017,7 @@ async function createProxyServer(port, openrouterApiKey, model, monitorMode = fa
|
|
|
61177
62017
|
const nativeHandler = new NativeHandler(anthropicApiKey);
|
|
61178
62018
|
const openRouterHandlers = new Map;
|
|
61179
62019
|
const localProviderHandlers = new Map;
|
|
62020
|
+
const remoteProviderHandlers = new Map;
|
|
61180
62021
|
const getOpenRouterHandler = (targetModel) => {
|
|
61181
62022
|
if (!openRouterHandlers.has(targetModel)) {
|
|
61182
62023
|
openRouterHandlers.set(targetModel, new OpenRouterHandler(targetModel, openrouterApiKey, port));
|
|
@@ -61207,6 +62048,35 @@ async function createProxyServer(port, openrouterApiKey, model, monitorMode = fa
|
|
|
61207
62048
|
}
|
|
61208
62049
|
return null;
|
|
61209
62050
|
};
|
|
62051
|
+
const getRemoteProviderHandler = (targetModel) => {
|
|
62052
|
+
if (remoteProviderHandlers.has(targetModel)) {
|
|
62053
|
+
return remoteProviderHandlers.get(targetModel);
|
|
62054
|
+
}
|
|
62055
|
+
const resolved = resolveRemoteProvider(targetModel);
|
|
62056
|
+
if (!resolved) {
|
|
62057
|
+
return null;
|
|
62058
|
+
}
|
|
62059
|
+
if (resolved.provider.name === "openrouter") {
|
|
62060
|
+
return null;
|
|
62061
|
+
}
|
|
62062
|
+
const apiKeyError = validateRemoteProviderApiKey(resolved.provider);
|
|
62063
|
+
if (apiKeyError) {
|
|
62064
|
+
throw new Error(apiKeyError);
|
|
62065
|
+
}
|
|
62066
|
+
const apiKey = process.env[resolved.provider.apiKeyEnvVar];
|
|
62067
|
+
let handler;
|
|
62068
|
+
if (resolved.provider.name === "gemini") {
|
|
62069
|
+
handler = new GeminiHandler(resolved.provider, resolved.modelName, apiKey, port);
|
|
62070
|
+
log(`[Proxy] Created Gemini handler: ${resolved.modelName}`);
|
|
62071
|
+
} else if (resolved.provider.name === "openai") {
|
|
62072
|
+
handler = new OpenAIHandler(resolved.provider, resolved.modelName, apiKey, port);
|
|
62073
|
+
log(`[Proxy] Created OpenAI handler: ${resolved.modelName}`);
|
|
62074
|
+
} else {
|
|
62075
|
+
return null;
|
|
62076
|
+
}
|
|
62077
|
+
remoteProviderHandlers.set(targetModel, handler);
|
|
62078
|
+
return handler;
|
|
62079
|
+
};
|
|
61210
62080
|
const getHandlerForRequest = (requestedModel) => {
|
|
61211
62081
|
if (monitorMode)
|
|
61212
62082
|
return nativeHandler;
|
|
@@ -61220,6 +62090,9 @@ async function createProxyServer(port, openrouterApiKey, model, monitorMode = fa
|
|
|
61220
62090
|
else if (req.includes("haiku") && modelMap.haiku)
|
|
61221
62091
|
target = modelMap.haiku;
|
|
61222
62092
|
}
|
|
62093
|
+
const remoteHandler = getRemoteProviderHandler(target);
|
|
62094
|
+
if (remoteHandler)
|
|
62095
|
+
return remoteHandler;
|
|
61223
62096
|
const localHandler = getLocalProviderHandler(target);
|
|
61224
62097
|
if (localHandler)
|
|
61225
62098
|
return localHandler;
|
|
@@ -61284,6 +62157,8 @@ var init_proxy_server = __esm(() => {
|
|
|
61284
62157
|
init_native_handler();
|
|
61285
62158
|
init_openrouter_handler();
|
|
61286
62159
|
init_local_provider_handler();
|
|
62160
|
+
init_gemini_handler();
|
|
62161
|
+
init_openai_handler();
|
|
61287
62162
|
});
|
|
61288
62163
|
|
|
61289
62164
|
// src/update-checker.ts
|
|
@@ -61293,24 +62168,24 @@ __export(exports_update_checker, {
|
|
|
61293
62168
|
});
|
|
61294
62169
|
import { execSync } from "node:child_process";
|
|
61295
62170
|
import { createInterface as createInterface2 } from "node:readline";
|
|
61296
|
-
import { existsSync as existsSync7, readFileSync as readFileSync6, writeFileSync as
|
|
61297
|
-
import { join as
|
|
61298
|
-
import { tmpdir as tmpdir2, homedir as
|
|
62171
|
+
import { existsSync as existsSync7, readFileSync as readFileSync6, writeFileSync as writeFileSync12, mkdirSync as mkdirSync9, unlinkSync as unlinkSync2 } from "node:fs";
|
|
62172
|
+
import { join as join12 } from "node:path";
|
|
62173
|
+
import { tmpdir as tmpdir2, homedir as homedir6, platform } from "node:os";
|
|
61299
62174
|
function getCacheFilePath() {
|
|
61300
62175
|
let cacheDir;
|
|
61301
62176
|
if (isWindows2) {
|
|
61302
|
-
const localAppData = process.env.LOCALAPPDATA ||
|
|
61303
|
-
cacheDir =
|
|
62177
|
+
const localAppData = process.env.LOCALAPPDATA || join12(homedir6(), "AppData", "Local");
|
|
62178
|
+
cacheDir = join12(localAppData, "claudish");
|
|
61304
62179
|
} else {
|
|
61305
|
-
cacheDir =
|
|
62180
|
+
cacheDir = join12(homedir6(), ".cache", "claudish");
|
|
61306
62181
|
}
|
|
61307
62182
|
try {
|
|
61308
62183
|
if (!existsSync7(cacheDir)) {
|
|
61309
|
-
|
|
62184
|
+
mkdirSync9(cacheDir, { recursive: true });
|
|
61310
62185
|
}
|
|
61311
|
-
return
|
|
62186
|
+
return join12(cacheDir, "update-check.json");
|
|
61312
62187
|
} catch {
|
|
61313
|
-
return
|
|
62188
|
+
return join12(tmpdir2(), "claudish-update-check.json");
|
|
61314
62189
|
}
|
|
61315
62190
|
}
|
|
61316
62191
|
function readCache() {
|
|
@@ -61332,7 +62207,7 @@ function writeCache(latestVersion) {
|
|
|
61332
62207
|
lastCheck: Date.now(),
|
|
61333
62208
|
latestVersion
|
|
61334
62209
|
};
|
|
61335
|
-
|
|
62210
|
+
writeFileSync12(cachePath, JSON.stringify(data), "utf-8");
|
|
61336
62211
|
} catch {}
|
|
61337
62212
|
}
|
|
61338
62213
|
function isCacheValid(cache) {
|
package/package.json
CHANGED
|
@@ -0,0 +1,133 @@
|
|
|
1
|
+
{
|
|
2
|
+
"version": "1.1.5",
|
|
3
|
+
"lastUpdated": "2026-01-04",
|
|
4
|
+
"source": "https://openrouter.ai/models?categories=programming&fmt=cards&order=top-weekly",
|
|
5
|
+
"models": [
|
|
6
|
+
{
|
|
7
|
+
"id": "google/gemini-3-pro-preview",
|
|
8
|
+
"name": "Google: Gemini 3 Pro Preview",
|
|
9
|
+
"description": "Gemini 3 Pro is Google’s flagship frontier model for high-precision multimodal reasoning, combining strong performance across text, image, video, audio, and code with a 1M-token context window. Reasoning Details must be preserved when using multi-turn tool calling, see our docs here: https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks. It delivers state-of-the-art benchmark results in general reasoning, STEM problem solving, factual QA, and multimodal understanding, including leading scores on LMArena, GPQA Diamond, MathArena Apex, MMMU-Pro, and Video-MMMU. Interactions emphasize depth and interpretability: the model is designed to infer intent with minimal prompting and produce direct, insight-focused responses.\n\nBuilt for advanced development and agentic workflows, Gemini 3 Pro provides robust tool-calling, long-horizon planning stability, and strong zero-shot generation for complex UI, visualization, and coding tasks. It excels at agentic coding (SWE-Bench Verified, Terminal-Bench 2.0), multimodal analysis, and structured long-form tasks such as research synthesis, planning, and interactive learning experiences. Suitable applications include autonomous agents, coding assistants, multimodal analytics, scientific reasoning, and high-context information processing.",
|
|
10
|
+
"provider": "Google",
|
|
11
|
+
"category": "vision",
|
|
12
|
+
"priority": 1,
|
|
13
|
+
"pricing": {
|
|
14
|
+
"input": "$2.00/1M",
|
|
15
|
+
"output": "$12.00/1M",
|
|
16
|
+
"average": "$7.00/1M"
|
|
17
|
+
},
|
|
18
|
+
"context": "1048K",
|
|
19
|
+
"maxOutputTokens": 65536,
|
|
20
|
+
"modality": "text+image->text",
|
|
21
|
+
"supportsTools": true,
|
|
22
|
+
"supportsReasoning": true,
|
|
23
|
+
"supportsVision": true,
|
|
24
|
+
"isModerated": false,
|
|
25
|
+
"recommended": true
|
|
26
|
+
},
|
|
27
|
+
{
|
|
28
|
+
"id": "openai/gpt-5.1-codex",
|
|
29
|
+
"name": "OpenAI: GPT-5.1-Codex",
|
|
30
|
+
"description": "GPT-5.1-Codex is a specialized version of GPT-5.1 optimized for software engineering and coding workflows. It is designed for both interactive development sessions and long, independent execution of complex engineering tasks. The model supports building projects from scratch, feature development, debugging, large-scale refactoring, and code review. Compared to GPT-5.1, Codex is more steerable, adheres closely to developer instructions, and produces cleaner, higher-quality code outputs. Reasoning effort can be adjusted with the `reasoning.effort` parameter. Read the [docs here](https://openrouter.ai/docs/use-cases/reasoning-tokens#reasoning-effort-level)\n\nCodex integrates into developer environments including the CLI, IDE extensions, GitHub, and cloud tasks. It adapts reasoning effort dynamically—providing fast responses for small tasks while sustaining extended multi-hour runs for large projects. The model is trained to perform structured code reviews, catching critical flaws by reasoning over dependencies and validating behavior against tests. It also supports multimodal inputs such as images or screenshots for UI development and integrates tool use for search, dependency installation, and environment setup. Codex is intended specifically for agentic coding applications.",
|
|
31
|
+
"provider": "Openai",
|
|
32
|
+
"category": "vision",
|
|
33
|
+
"priority": 2,
|
|
34
|
+
"pricing": {
|
|
35
|
+
"input": "$1.25/1M",
|
|
36
|
+
"output": "$10.00/1M",
|
|
37
|
+
"average": "$5.63/1M"
|
|
38
|
+
},
|
|
39
|
+
"context": "400K",
|
|
40
|
+
"maxOutputTokens": 128000,
|
|
41
|
+
"modality": "text+image->text",
|
|
42
|
+
"supportsTools": true,
|
|
43
|
+
"supportsReasoning": true,
|
|
44
|
+
"supportsVision": true,
|
|
45
|
+
"isModerated": true,
|
|
46
|
+
"recommended": true
|
|
47
|
+
},
|
|
48
|
+
{
|
|
49
|
+
"id": "x-ai/grok-code-fast-1",
|
|
50
|
+
"name": "xAI: Grok Code Fast 1",
|
|
51
|
+
"description": "Grok Code Fast 1 is a speedy and economical reasoning model that excels at agentic coding. With reasoning traces visible in the response, developers can steer Grok Code for high-quality work flows.",
|
|
52
|
+
"provider": "X-ai",
|
|
53
|
+
"category": "reasoning",
|
|
54
|
+
"priority": 3,
|
|
55
|
+
"pricing": {
|
|
56
|
+
"input": "$0.20/1M",
|
|
57
|
+
"output": "$1.50/1M",
|
|
58
|
+
"average": "$0.85/1M"
|
|
59
|
+
},
|
|
60
|
+
"context": "256K",
|
|
61
|
+
"maxOutputTokens": 10000,
|
|
62
|
+
"modality": "text->text",
|
|
63
|
+
"supportsTools": true,
|
|
64
|
+
"supportsReasoning": true,
|
|
65
|
+
"supportsVision": false,
|
|
66
|
+
"isModerated": false,
|
|
67
|
+
"recommended": true
|
|
68
|
+
},
|
|
69
|
+
{
|
|
70
|
+
"id": "minimax/minimax-m2",
|
|
71
|
+
"name": "MiniMax: MiniMax M2",
|
|
72
|
+
"description": "MiniMax-M2 is a compact, high-efficiency large language model optimized for end-to-end coding and agentic workflows. With 10 billion activated parameters (230 billion total), it delivers near-frontier intelligence across general reasoning, tool use, and multi-step task execution while maintaining low latency and deployment efficiency.\n\nThe model excels in code generation, multi-file editing, compile-run-fix loops, and test-validated repair, showing strong results on SWE-Bench Verified, Multi-SWE-Bench, and Terminal-Bench. It also performs competitively in agentic evaluations such as BrowseComp and GAIA, effectively handling long-horizon planning, retrieval, and recovery from execution errors.\n\nBenchmarked by [Artificial Analysis](https://artificialanalysis.ai/models/minimax-m2), MiniMax-M2 ranks among the top open-source models for composite intelligence, spanning mathematics, science, and instruction-following. Its small activation footprint enables fast inference, high concurrency, and improved unit economics, making it well-suited for large-scale agents, developer assistants, and reasoning-driven applications that require responsiveness and cost efficiency.\n\nTo avoid degrading this model's performance, MiniMax highly recommends preserving reasoning between turns. Learn more about using reasoning_details to pass back reasoning in our [docs](https://openrouter.ai/docs/use-cases/reasoning-tokens#preserving-reasoning-blocks).",
|
|
73
|
+
"provider": "Minimax",
|
|
74
|
+
"category": "reasoning",
|
|
75
|
+
"priority": 4,
|
|
76
|
+
"pricing": {
|
|
77
|
+
"input": "$0.20/1M",
|
|
78
|
+
"output": "$1.00/1M",
|
|
79
|
+
"average": "$0.60/1M"
|
|
80
|
+
},
|
|
81
|
+
"context": "196K",
|
|
82
|
+
"maxOutputTokens": 65536,
|
|
83
|
+
"modality": "text->text",
|
|
84
|
+
"supportsTools": true,
|
|
85
|
+
"supportsReasoning": true,
|
|
86
|
+
"supportsVision": false,
|
|
87
|
+
"isModerated": false,
|
|
88
|
+
"recommended": true
|
|
89
|
+
},
|
|
90
|
+
{
|
|
91
|
+
"id": "z-ai/glm-4.6",
|
|
92
|
+
"name": "Z.AI: GLM 4.6",
|
|
93
|
+
"description": "Compared with GLM-4.5, this generation brings several key improvements:\n\nLonger context window: The context window has been expanded from 128K to 200K tokens, enabling the model to handle more complex agentic tasks.\nSuperior coding performance: The model achieves higher scores on code benchmarks and demonstrates better real-world performance in applications such as Claude Code、Cline、Roo Code and Kilo Code, including improvements in generating visually polished front-end pages.\nAdvanced reasoning: GLM-4.6 shows a clear improvement in reasoning performance and supports tool use during inference, leading to stronger overall capability.\nMore capable agents: GLM-4.6 exhibits stronger performance in tool using and search-based agents, and integrates more effectively within agent frameworks.\nRefined writing: Better aligns with human preferences in style and readability, and performs more naturally in role-playing scenarios.",
|
|
94
|
+
"provider": "Z-ai",
|
|
95
|
+
"category": "reasoning",
|
|
96
|
+
"priority": 5,
|
|
97
|
+
"pricing": {
|
|
98
|
+
"input": "$0.35/1M",
|
|
99
|
+
"output": "$1.50/1M",
|
|
100
|
+
"average": "$0.93/1M"
|
|
101
|
+
},
|
|
102
|
+
"context": "202K",
|
|
103
|
+
"maxOutputTokens": 65536,
|
|
104
|
+
"modality": "text->text",
|
|
105
|
+
"supportsTools": true,
|
|
106
|
+
"supportsReasoning": true,
|
|
107
|
+
"supportsVision": false,
|
|
108
|
+
"isModerated": false,
|
|
109
|
+
"recommended": true
|
|
110
|
+
},
|
|
111
|
+
{
|
|
112
|
+
"id": "qwen/qwen3-vl-235b-a22b-instruct",
|
|
113
|
+
"name": "Qwen: Qwen3 VL 235B A22B Instruct",
|
|
114
|
+
"description": "Qwen3-VL-235B-A22B Instruct is an open-weight multimodal model that unifies strong text generation with visual understanding across images and video. The Instruct model targets general vision-language use (VQA, document parsing, chart/table extraction, multilingual OCR). The series emphasizes robust perception (recognition of diverse real-world and synthetic categories), spatial understanding (2D/3D grounding), and long-form visual comprehension, with competitive results on public multimodal benchmarks for both perception and reasoning.\n\nBeyond analysis, Qwen3-VL supports agentic interaction and tool use: it can follow complex instructions over multi-image, multi-turn dialogues; align text to video timelines for precise temporal queries; and operate GUI elements for automation tasks. The models also enable visual coding workflows—turning sketches or mockups into code and assisting with UI debugging—while maintaining strong text-only performance comparable to the flagship Qwen3 language models. This makes Qwen3-VL suitable for production scenarios spanning document AI, multilingual OCR, software/UI assistance, spatial/embodied tasks, and research on vision-language agents.",
|
|
115
|
+
"provider": "Qwen",
|
|
116
|
+
"category": "vision",
|
|
117
|
+
"priority": 6,
|
|
118
|
+
"pricing": {
|
|
119
|
+
"input": "$0.20/1M",
|
|
120
|
+
"output": "$1.20/1M",
|
|
121
|
+
"average": "$0.70/1M"
|
|
122
|
+
},
|
|
123
|
+
"context": "262K",
|
|
124
|
+
"maxOutputTokens": null,
|
|
125
|
+
"modality": "text+image->text",
|
|
126
|
+
"supportsTools": true,
|
|
127
|
+
"supportsReasoning": false,
|
|
128
|
+
"supportsVision": true,
|
|
129
|
+
"isModerated": false,
|
|
130
|
+
"recommended": true
|
|
131
|
+
}
|
|
132
|
+
]
|
|
133
|
+
}
|