@blockrun/clawrouter 0.9.36 → 0.9.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1201,8 +1201,6 @@ var DEFAULT_ROUTING_CONFIG = {
1201
1201
  primary: "moonshot/kimi-k2.5",
1202
1202
  // $0.50/$2.40 - best quality/price for simple tasks
1203
1203
  fallback: [
1204
- "minimax/minimax-m2.5",
1205
- // $0.30/$1.20 - cheap with reasoning
1206
1204
  "google/gemini-2.5-flash",
1207
1205
  // 1M context, cost-effective
1208
1206
  "nvidia/gpt-oss-120b",
@@ -1214,8 +1212,6 @@ var DEFAULT_ROUTING_CONFIG = {
1214
1212
  primary: "xai/grok-code-fast-1",
1215
1213
  // Code specialist, $0.20/$1.50
1216
1214
  fallback: [
1217
- "minimax/minimax-m2.5",
1218
- // $0.30/$1.20 - cheap with reasoning
1219
1215
  "google/gemini-2.5-flash",
1220
1216
  // 1M context, cost-effective
1221
1217
  "deepseek/deepseek-chat",
@@ -1230,10 +1226,7 @@ var DEFAULT_ROUTING_CONFIG = {
1230
1226
  "google/gemini-2.5-flash",
1231
1227
  // CRITICAL: 1M context, cheap failsafe before expensive models
1232
1228
  "google/gemini-2.5-pro",
1233
- "minimax/minimax-m2.5",
1234
- // $0.30/$1.20 - cheap with reasoning
1235
1229
  "deepseek/deepseek-chat",
1236
- // Another cheap option
1237
1230
  "xai/grok-4-0709",
1238
1231
  "openai/gpt-5.2",
1239
1232
  // Newer and cheaper input than gpt-4o
@@ -1245,8 +1238,6 @@ var DEFAULT_ROUTING_CONFIG = {
1245
1238
  primary: "xai/grok-4-1-fast-reasoning",
1246
1239
  // Upgraded Grok 4.1 reasoning $0.20/$0.50
1247
1240
  fallback: [
1248
- "minimax/minimax-m2.5",
1249
- // $0.30/$1.20 - reasoning capable
1250
1241
  "deepseek/deepseek-reasoner",
1251
1242
  // Cheap reasoning model
1252
1243
  "openai/o4-mini",
@@ -1260,22 +1251,22 @@ var DEFAULT_ROUTING_CONFIG = {
1260
1251
  SIMPLE: {
1261
1252
  primary: "nvidia/gpt-oss-120b",
1262
1253
  // FREE! $0.00/$0.00
1263
- fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "minimax/minimax-m2.5"]
1254
+ fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat"]
1264
1255
  },
1265
1256
  MEDIUM: {
1266
1257
  primary: "google/gemini-2.5-flash",
1267
1258
  // $0.15/$0.60 - cheapest capable
1268
- fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "minimax/minimax-m2.5"]
1259
+ fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
1269
1260
  },
1270
1261
  COMPLEX: {
1271
1262
  primary: "google/gemini-2.5-flash",
1272
1263
  // $0.15/$0.60 - 1M context handles complexity
1273
- fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709", "minimax/minimax-m2.5"]
1264
+ fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709"]
1274
1265
  },
1275
1266
  REASONING: {
1276
1267
  primary: "xai/grok-4-1-fast-reasoning",
1277
- // $0.20/$0.50 - was MORE expensive than AUTO!
1278
- fallback: ["deepseek/deepseek-reasoner", "minimax/minimax-m2.5"]
1268
+ // $0.20/$0.50
1269
+ fallback: ["deepseek/deepseek-reasoner"]
1279
1270
  }
1280
1271
  },
1281
1272
  // Premium tier configs - best quality (blockrun/premium)
@@ -1326,8 +1317,6 @@ var DEFAULT_ROUTING_CONFIG = {
1326
1317
  primary: "moonshot/kimi-k2.5",
1327
1318
  // Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
1328
1319
  fallback: [
1329
- "minimax/minimax-m2.5",
1330
- // $0.30/$1.20 - agentic capable, cheaper than kimi
1331
1320
  "claude-haiku-4.5",
1332
1321
  "xai/grok-4-1-fast-non-reasoning",
1333
1322
  "openai/gpt-4o-mini"
@@ -1337,8 +1326,6 @@ var DEFAULT_ROUTING_CONFIG = {
1337
1326
  primary: "xai/grok-code-fast-1",
1338
1327
  // Code specialist for agentic coding
1339
1328
  fallback: [
1340
- "minimax/minimax-m2.5",
1341
- // $0.30/$1.20 - agentic capable
1342
1329
  "moonshot/kimi-k2.5",
1343
1330
  "claude-haiku-4.5",
1344
1331
  "claude-sonnet-4"
@@ -1349,8 +1336,6 @@ var DEFAULT_ROUTING_CONFIG = {
1349
1336
  fallback: [
1350
1337
  "claude-opus-4",
1351
1338
  // Latest Opus - best agentic
1352
- "minimax/minimax-m2.5",
1353
- // $0.30/$1.20 - cheap agentic fallback
1354
1339
  "openai/gpt-5.2",
1355
1340
  "google/gemini-3-pro-preview",
1356
1341
  "xai/grok-4-0709"
@@ -1361,8 +1346,6 @@ var DEFAULT_ROUTING_CONFIG = {
1361
1346
  // Strong tool use + reasoning for agentic tasks
1362
1347
  fallback: [
1363
1348
  "claude-opus-4",
1364
- "minimax/minimax-m2.5",
1365
- // $0.30/$1.20 - reasoning + agentic
1366
1349
  "xai/grok-4-1-fast-reasoning",
1367
1350
  "deepseek/deepseek-reasoner"
1368
1351
  ]
@@ -3621,6 +3604,7 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
3621
3604
  ]);
3622
3605
  var FREE_MODEL = "nvidia/gpt-oss-120b";
3623
3606
  var MAX_MESSAGES = 200;
3607
+ var CONTEXT_LIMIT_KB = 5120;
3624
3608
  var HEARTBEAT_INTERVAL_MS = 2e3;
3625
3609
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
3626
3610
  var MAX_FALLBACK_ATTEMPTS = 5;
@@ -4007,15 +3991,28 @@ function normalizeMessagesForThinking(messages) {
4007
3991
  return hasChanges ? normalized : messages;
4008
3992
  }
4009
3993
  function truncateMessages(messages) {
4010
- if (!messages || messages.length <= MAX_MESSAGES) return messages;
3994
+ if (!messages || messages.length <= MAX_MESSAGES) {
3995
+ return {
3996
+ messages,
3997
+ wasTruncated: false,
3998
+ originalCount: messages?.length ?? 0,
3999
+ truncatedCount: messages?.length ?? 0
4000
+ };
4001
+ }
4011
4002
  const systemMsgs = messages.filter((m) => m.role === "system");
4012
4003
  const conversationMsgs = messages.filter((m) => m.role !== "system");
4013
4004
  const maxConversation = MAX_MESSAGES - systemMsgs.length;
4014
4005
  const truncatedConversation = conversationMsgs.slice(-maxConversation);
4006
+ const result = [...systemMsgs, ...truncatedConversation];
4015
4007
  console.log(
4016
- `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${systemMsgs.length + truncatedConversation.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
4008
+ `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${result.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
4017
4009
  );
4018
- return [...systemMsgs, ...truncatedConversation];
4010
+ return {
4011
+ messages: result,
4012
+ wasTruncated: true,
4013
+ originalCount: messages.length,
4014
+ truncatedCount: result.length
4015
+ };
4019
4016
  }
4020
4017
  var KIMI_BLOCK_RE = /<[||][^<>]*begin[^<>]*[||]>[\s\S]*?<[||][^<>]*end[^<>]*[||]>/gi;
4021
4018
  var KIMI_TOKEN_RE = /<[||][^<>]*[||]>/g;
@@ -4348,7 +4345,8 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
4348
4345
  parsed.messages = normalizeMessageRoles(parsed.messages);
4349
4346
  }
4350
4347
  if (Array.isArray(parsed.messages)) {
4351
- parsed.messages = truncateMessages(parsed.messages);
4348
+ const truncationResult = truncateMessages(parsed.messages);
4349
+ parsed.messages = truncationResult.messages;
4352
4350
  }
4353
4351
  if (Array.isArray(parsed.messages)) {
4354
4352
  parsed.messages = sanitizeToolIds(parsed.messages);
@@ -4422,6 +4420,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4422
4420
  bodyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
4423
4421
  }
4424
4422
  let body = Buffer.concat(bodyChunks);
4423
+ const originalContextSizeKB = Math.ceil(body.length / 1024);
4425
4424
  let routingDecision;
4426
4425
  let isStreaming = false;
4427
4426
  let modelId = "";
@@ -4529,7 +4528,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4529
4528
  const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
4530
4529
  const tools = parsed.tools;
4531
4530
  const hasTools = Array.isArray(tools) && tools.length > 0;
4532
- if (hasTools) {
4531
+ if (hasTools && tools) {
4533
4532
  console.log(
4534
4533
  `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
4535
4534
  );
@@ -4674,7 +4673,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4674
4673
  res.writeHead(200, {
4675
4674
  "content-type": "text/event-stream",
4676
4675
  "cache-control": "no-cache",
4677
- connection: "keep-alive"
4676
+ connection: "keep-alive",
4677
+ "x-context-used-kb": String(originalContextSizeKB),
4678
+ "x-context-limit-kb": String(CONTEXT_LIMIT_KB)
4678
4679
  });
4679
4680
  headersSentEarly = true;
4680
4681
  safeWrite(res, ": heartbeat\n\n");
@@ -4837,7 +4838,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4837
4838
  completedAt: Date.now()
4838
4839
  });
4839
4840
  } else {
4840
- res.writeHead(errStatus, { "Content-Type": "application/json" });
4841
+ res.writeHead(errStatus, {
4842
+ "Content-Type": "application/json",
4843
+ "x-context-used-kb": String(originalContextSizeKB),
4844
+ "x-context-limit-kb": String(CONTEXT_LIMIT_KB)
4845
+ });
4841
4846
  res.end(transformedErr);
4842
4847
  deduplicator.complete(dedupKey, {
4843
4848
  status: errStatus,
@@ -4963,6 +4968,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4963
4968
  return;
4964
4969
  responseHeaders[key] = value;
4965
4970
  });
4971
+ responseHeaders["x-context-used-kb"] = String(originalContextSizeKB);
4972
+ responseHeaders["x-context-limit-kb"] = String(CONTEXT_LIMIT_KB);
4966
4973
  res.writeHead(upstream.status, responseHeaders);
4967
4974
  if (upstream.body) {
4968
4975
  const reader = upstream.body.getReader();