@blockrun/clawrouter 0.9.5 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1125,8 +1125,8 @@ var DEFAULT_ROUTING_CONFIG = {
1125
1125
  // Auto (balanced) tier configs - current default smart routing
1126
1126
  tiers: {
1127
1127
  SIMPLE: {
1128
- primary: "nvidia/kimi-k2.5",
1129
- // $0.55/$2.5 - best quality/price for simple tasks
1128
+ primary: "moonshot/kimi-k2.5",
1129
+ // $0.50/$2.40 - best quality/price for simple tasks
1130
1130
  fallback: [
1131
1131
  "google/gemini-2.5-flash",
1132
1132
  // 1M context, cost-effective
@@ -1178,14 +1178,14 @@ var DEFAULT_ROUTING_CONFIG = {
1178
1178
  // Eco tier configs - ultra cost-optimized (blockrun/eco)
1179
1179
  ecoTiers: {
1180
1180
  SIMPLE: {
1181
- primary: "nvidia/kimi-k2.5",
1182
- // $0.55/$2.5
1181
+ primary: "moonshot/kimi-k2.5",
1182
+ // $0.50/$2.40
1183
1183
  fallback: ["nvidia/gpt-oss-120b", "deepseek/deepseek-chat", "google/gemini-2.5-flash"]
1184
1184
  },
1185
1185
  MEDIUM: {
1186
1186
  primary: "deepseek/deepseek-chat",
1187
1187
  // $0.14/$0.28
1188
- fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
1188
+ fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "moonshot/kimi-k2.5"]
1189
1189
  },
1190
1190
  COMPLEX: {
1191
1191
  primary: "xai/grok-4-0709",
@@ -3109,6 +3109,7 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
3109
3109
  "premium"
3110
3110
  ]);
3111
3111
  var FREE_MODEL = "nvidia/gpt-oss-120b";
3112
+ var MAX_MESSAGES = 200;
3112
3113
  var HEARTBEAT_INTERVAL_MS = 2e3;
3113
3114
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
3114
3115
  var MAX_FALLBACK_ATTEMPTS = 5;
@@ -3417,6 +3418,17 @@ function normalizeMessagesForThinking(messages) {
3417
3418
  });
3418
3419
  return hasChanges ? normalized : messages;
3419
3420
  }
3421
+ function truncateMessages(messages) {
3422
+ if (!messages || messages.length <= MAX_MESSAGES) return messages;
3423
+ const systemMsgs = messages.filter((m) => m.role === "system");
3424
+ const conversationMsgs = messages.filter((m) => m.role !== "system");
3425
+ const maxConversation = MAX_MESSAGES - systemMsgs.length;
3426
+ const truncatedConversation = conversationMsgs.slice(-maxConversation);
3427
+ console.log(
3428
+ `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${systemMsgs.length + truncatedConversation.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
3429
+ );
3430
+ return [...systemMsgs, ...truncatedConversation];
3431
+ }
3420
3432
  var KIMI_BLOCK_RE = /<[||][^<>]*begin[^<>]*[||]>[\s\S]*?<[||][^<>]*end[^<>]*[||]>/gi;
3421
3433
  var KIMI_TOKEN_RE = /<[||][^<>]*[||]>/g;
3422
3434
  var THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
@@ -3726,6 +3738,9 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
3726
3738
  if (Array.isArray(parsed.messages)) {
3727
3739
  parsed.messages = normalizeMessageRoles(parsed.messages);
3728
3740
  }
3741
+ if (Array.isArray(parsed.messages)) {
3742
+ parsed.messages = truncateMessages(parsed.messages);
3743
+ }
3729
3744
  if (Array.isArray(parsed.messages)) {
3730
3745
  parsed.messages = sanitizeToolIds(parsed.messages);
3731
3746
  }