@blockrun/clawrouter 0.9.36 → 0.9.38
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/cli.js +36 -29
- package/dist/cli.js.map +1 -1
- package/dist/index.js +36 -38
- package/dist/index.js.map +1 -1
- package/package.json +7 -6
package/dist/cli.js
CHANGED
|
@@ -1201,8 +1201,6 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1201
1201
|
primary: "moonshot/kimi-k2.5",
|
|
1202
1202
|
// $0.50/$2.40 - best quality/price for simple tasks
|
|
1203
1203
|
fallback: [
|
|
1204
|
-
"minimax/minimax-m2.5",
|
|
1205
|
-
// $0.30/$1.20 - cheap with reasoning
|
|
1206
1204
|
"google/gemini-2.5-flash",
|
|
1207
1205
|
// 1M context, cost-effective
|
|
1208
1206
|
"nvidia/gpt-oss-120b",
|
|
@@ -1214,8 +1212,6 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1214
1212
|
primary: "xai/grok-code-fast-1",
|
|
1215
1213
|
// Code specialist, $0.20/$1.50
|
|
1216
1214
|
fallback: [
|
|
1217
|
-
"minimax/minimax-m2.5",
|
|
1218
|
-
// $0.30/$1.20 - cheap with reasoning
|
|
1219
1215
|
"google/gemini-2.5-flash",
|
|
1220
1216
|
// 1M context, cost-effective
|
|
1221
1217
|
"deepseek/deepseek-chat",
|
|
@@ -1230,10 +1226,7 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1230
1226
|
"google/gemini-2.5-flash",
|
|
1231
1227
|
// CRITICAL: 1M context, cheap failsafe before expensive models
|
|
1232
1228
|
"google/gemini-2.5-pro",
|
|
1233
|
-
"minimax/minimax-m2.5",
|
|
1234
|
-
// $0.30/$1.20 - cheap with reasoning
|
|
1235
1229
|
"deepseek/deepseek-chat",
|
|
1236
|
-
// Another cheap option
|
|
1237
1230
|
"xai/grok-4-0709",
|
|
1238
1231
|
"openai/gpt-5.2",
|
|
1239
1232
|
// Newer and cheaper input than gpt-4o
|
|
@@ -1245,8 +1238,6 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1245
1238
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
1246
1239
|
// Upgraded Grok 4.1 reasoning $0.20/$0.50
|
|
1247
1240
|
fallback: [
|
|
1248
|
-
"minimax/minimax-m2.5",
|
|
1249
|
-
// $0.30/$1.20 - reasoning capable
|
|
1250
1241
|
"deepseek/deepseek-reasoner",
|
|
1251
1242
|
// Cheap reasoning model
|
|
1252
1243
|
"openai/o4-mini",
|
|
@@ -1260,22 +1251,22 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1260
1251
|
SIMPLE: {
|
|
1261
1252
|
primary: "nvidia/gpt-oss-120b",
|
|
1262
1253
|
// FREE! $0.00/$0.00
|
|
1263
|
-
fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat"
|
|
1254
|
+
fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat"]
|
|
1264
1255
|
},
|
|
1265
1256
|
MEDIUM: {
|
|
1266
1257
|
primary: "google/gemini-2.5-flash",
|
|
1267
1258
|
// $0.15/$0.60 - cheapest capable
|
|
1268
|
-
fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b"
|
|
1259
|
+
fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
|
|
1269
1260
|
},
|
|
1270
1261
|
COMPLEX: {
|
|
1271
1262
|
primary: "google/gemini-2.5-flash",
|
|
1272
1263
|
// $0.15/$0.60 - 1M context handles complexity
|
|
1273
|
-
fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709"
|
|
1264
|
+
fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709"]
|
|
1274
1265
|
},
|
|
1275
1266
|
REASONING: {
|
|
1276
1267
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
1277
|
-
// $0.20/$0.50
|
|
1278
|
-
fallback: ["deepseek/deepseek-reasoner"
|
|
1268
|
+
// $0.20/$0.50
|
|
1269
|
+
fallback: ["deepseek/deepseek-reasoner"]
|
|
1279
1270
|
}
|
|
1280
1271
|
},
|
|
1281
1272
|
// Premium tier configs - best quality (blockrun/premium)
|
|
@@ -1326,8 +1317,6 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1326
1317
|
primary: "moonshot/kimi-k2.5",
|
|
1327
1318
|
// Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
|
|
1328
1319
|
fallback: [
|
|
1329
|
-
"minimax/minimax-m2.5",
|
|
1330
|
-
// $0.30/$1.20 - agentic capable, cheaper than kimi
|
|
1331
1320
|
"claude-haiku-4.5",
|
|
1332
1321
|
"xai/grok-4-1-fast-non-reasoning",
|
|
1333
1322
|
"openai/gpt-4o-mini"
|
|
@@ -1337,8 +1326,6 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1337
1326
|
primary: "xai/grok-code-fast-1",
|
|
1338
1327
|
// Code specialist for agentic coding
|
|
1339
1328
|
fallback: [
|
|
1340
|
-
"minimax/minimax-m2.5",
|
|
1341
|
-
// $0.30/$1.20 - agentic capable
|
|
1342
1329
|
"moonshot/kimi-k2.5",
|
|
1343
1330
|
"claude-haiku-4.5",
|
|
1344
1331
|
"claude-sonnet-4"
|
|
@@ -1349,8 +1336,6 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1349
1336
|
fallback: [
|
|
1350
1337
|
"claude-opus-4",
|
|
1351
1338
|
// Latest Opus - best agentic
|
|
1352
|
-
"minimax/minimax-m2.5",
|
|
1353
|
-
// $0.30/$1.20 - cheap agentic fallback
|
|
1354
1339
|
"openai/gpt-5.2",
|
|
1355
1340
|
"google/gemini-3-pro-preview",
|
|
1356
1341
|
"xai/grok-4-0709"
|
|
@@ -1361,8 +1346,6 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1361
1346
|
// Strong tool use + reasoning for agentic tasks
|
|
1362
1347
|
fallback: [
|
|
1363
1348
|
"claude-opus-4",
|
|
1364
|
-
"minimax/minimax-m2.5",
|
|
1365
|
-
// $0.30/$1.20 - reasoning + agentic
|
|
1366
1349
|
"xai/grok-4-1-fast-reasoning",
|
|
1367
1350
|
"deepseek/deepseek-reasoner"
|
|
1368
1351
|
]
|
|
@@ -3621,6 +3604,7 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
|
|
|
3621
3604
|
]);
|
|
3622
3605
|
var FREE_MODEL = "nvidia/gpt-oss-120b";
|
|
3623
3606
|
var MAX_MESSAGES = 200;
|
|
3607
|
+
var CONTEXT_LIMIT_KB = 5120;
|
|
3624
3608
|
var HEARTBEAT_INTERVAL_MS = 2e3;
|
|
3625
3609
|
var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
|
|
3626
3610
|
var MAX_FALLBACK_ATTEMPTS = 5;
|
|
@@ -4007,15 +3991,28 @@ function normalizeMessagesForThinking(messages) {
|
|
|
4007
3991
|
return hasChanges ? normalized : messages;
|
|
4008
3992
|
}
|
|
4009
3993
|
function truncateMessages(messages) {
|
|
4010
|
-
if (!messages || messages.length <= MAX_MESSAGES)
|
|
3994
|
+
if (!messages || messages.length <= MAX_MESSAGES) {
|
|
3995
|
+
return {
|
|
3996
|
+
messages,
|
|
3997
|
+
wasTruncated: false,
|
|
3998
|
+
originalCount: messages?.length ?? 0,
|
|
3999
|
+
truncatedCount: messages?.length ?? 0
|
|
4000
|
+
};
|
|
4001
|
+
}
|
|
4011
4002
|
const systemMsgs = messages.filter((m) => m.role === "system");
|
|
4012
4003
|
const conversationMsgs = messages.filter((m) => m.role !== "system");
|
|
4013
4004
|
const maxConversation = MAX_MESSAGES - systemMsgs.length;
|
|
4014
4005
|
const truncatedConversation = conversationMsgs.slice(-maxConversation);
|
|
4006
|
+
const result = [...systemMsgs, ...truncatedConversation];
|
|
4015
4007
|
console.log(
|
|
4016
|
-
`[ClawRouter] Truncated messages: ${messages.length} \u2192 ${
|
|
4008
|
+
`[ClawRouter] Truncated messages: ${messages.length} \u2192 ${result.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
|
|
4017
4009
|
);
|
|
4018
|
-
return
|
|
4010
|
+
return {
|
|
4011
|
+
messages: result,
|
|
4012
|
+
wasTruncated: true,
|
|
4013
|
+
originalCount: messages.length,
|
|
4014
|
+
truncatedCount: result.length
|
|
4015
|
+
};
|
|
4019
4016
|
}
|
|
4020
4017
|
var KIMI_BLOCK_RE = /<[||][^<>]*begin[^<>]*[||]>[\s\S]*?<[||][^<>]*end[^<>]*[||]>/gi;
|
|
4021
4018
|
var KIMI_TOKEN_RE = /<[||][^<>]*[||]>/g;
|
|
@@ -4348,7 +4345,8 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
|
|
|
4348
4345
|
parsed.messages = normalizeMessageRoles(parsed.messages);
|
|
4349
4346
|
}
|
|
4350
4347
|
if (Array.isArray(parsed.messages)) {
|
|
4351
|
-
|
|
4348
|
+
const truncationResult = truncateMessages(parsed.messages);
|
|
4349
|
+
parsed.messages = truncationResult.messages;
|
|
4352
4350
|
}
|
|
4353
4351
|
if (Array.isArray(parsed.messages)) {
|
|
4354
4352
|
parsed.messages = sanitizeToolIds(parsed.messages);
|
|
@@ -4422,6 +4420,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
4422
4420
|
bodyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
|
|
4423
4421
|
}
|
|
4424
4422
|
let body = Buffer.concat(bodyChunks);
|
|
4423
|
+
const originalContextSizeKB = Math.ceil(body.length / 1024);
|
|
4425
4424
|
let routingDecision;
|
|
4426
4425
|
let isStreaming = false;
|
|
4427
4426
|
let modelId = "";
|
|
@@ -4529,7 +4528,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
4529
4528
|
const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
|
|
4530
4529
|
const tools = parsed.tools;
|
|
4531
4530
|
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
4532
|
-
if (hasTools) {
|
|
4531
|
+
if (hasTools && tools) {
|
|
4533
4532
|
console.log(
|
|
4534
4533
|
`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
|
|
4535
4534
|
);
|
|
@@ -4674,7 +4673,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
4674
4673
|
res.writeHead(200, {
|
|
4675
4674
|
"content-type": "text/event-stream",
|
|
4676
4675
|
"cache-control": "no-cache",
|
|
4677
|
-
connection: "keep-alive"
|
|
4676
|
+
connection: "keep-alive",
|
|
4677
|
+
"x-context-used-kb": String(originalContextSizeKB),
|
|
4678
|
+
"x-context-limit-kb": String(CONTEXT_LIMIT_KB)
|
|
4678
4679
|
});
|
|
4679
4680
|
headersSentEarly = true;
|
|
4680
4681
|
safeWrite(res, ": heartbeat\n\n");
|
|
@@ -4837,7 +4838,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
4837
4838
|
completedAt: Date.now()
|
|
4838
4839
|
});
|
|
4839
4840
|
} else {
|
|
4840
|
-
res.writeHead(errStatus, {
|
|
4841
|
+
res.writeHead(errStatus, {
|
|
4842
|
+
"Content-Type": "application/json",
|
|
4843
|
+
"x-context-used-kb": String(originalContextSizeKB),
|
|
4844
|
+
"x-context-limit-kb": String(CONTEXT_LIMIT_KB)
|
|
4845
|
+
});
|
|
4841
4846
|
res.end(transformedErr);
|
|
4842
4847
|
deduplicator.complete(dedupKey, {
|
|
4843
4848
|
status: errStatus,
|
|
@@ -4963,6 +4968,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
4963
4968
|
return;
|
|
4964
4969
|
responseHeaders[key] = value;
|
|
4965
4970
|
});
|
|
4971
|
+
responseHeaders["x-context-used-kb"] = String(originalContextSizeKB);
|
|
4972
|
+
responseHeaders["x-context-limit-kb"] = String(CONTEXT_LIMIT_KB);
|
|
4966
4973
|
res.writeHead(upstream.status, responseHeaders);
|
|
4967
4974
|
if (upstream.body) {
|
|
4968
4975
|
const reader = upstream.body.getReader();
|