@blockrun/clawrouter 0.9.5 → 0.9.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1571,8 +1571,8 @@ var DEFAULT_ROUTING_CONFIG = {
1571
1571
  // Auto (balanced) tier configs - current default smart routing
1572
1572
  tiers: {
1573
1573
  SIMPLE: {
1574
- primary: "nvidia/kimi-k2.5",
1575
- // $0.55/$2.5 - best quality/price for simple tasks
1574
+ primary: "moonshot/kimi-k2.5",
1575
+ // $0.50/$2.40 - best quality/price for simple tasks
1576
1576
  fallback: [
1577
1577
  "google/gemini-2.5-flash",
1578
1578
  // 1M context, cost-effective
@@ -1624,14 +1624,14 @@ var DEFAULT_ROUTING_CONFIG = {
1624
1624
  // Eco tier configs - ultra cost-optimized (blockrun/eco)
1625
1625
  ecoTiers: {
1626
1626
  SIMPLE: {
1627
- primary: "nvidia/kimi-k2.5",
1628
- // $0.55/$2.5
1627
+ primary: "moonshot/kimi-k2.5",
1628
+ // $0.50/$2.40
1629
1629
  fallback: ["nvidia/gpt-oss-120b", "deepseek/deepseek-chat", "google/gemini-2.5-flash"]
1630
1630
  },
1631
1631
  MEDIUM: {
1632
1632
  primary: "deepseek/deepseek-chat",
1633
1633
  // $0.14/$0.28
1634
- fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
1634
+ fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "moonshot/kimi-k2.5"]
1635
1635
  },
1636
1636
  COMPLEX: {
1637
1637
  primary: "xai/grok-4-0709",
@@ -3249,6 +3249,7 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
3249
3249
  "premium"
3250
3250
  ]);
3251
3251
  var FREE_MODEL = "nvidia/gpt-oss-120b";
3252
+ var MAX_MESSAGES = 200;
3252
3253
  var HEARTBEAT_INTERVAL_MS = 2e3;
3253
3254
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
3254
3255
  var MAX_FALLBACK_ATTEMPTS = 5;
@@ -3557,6 +3558,17 @@ function normalizeMessagesForThinking(messages) {
3557
3558
  });
3558
3559
  return hasChanges ? normalized : messages;
3559
3560
  }
3561
+ function truncateMessages(messages) {
3562
+ if (!messages || messages.length <= MAX_MESSAGES) return messages;
3563
+ const systemMsgs = messages.filter((m) => m.role === "system");
3564
+ const conversationMsgs = messages.filter((m) => m.role !== "system");
3565
+ const maxConversation = MAX_MESSAGES - systemMsgs.length;
3566
+ const truncatedConversation = conversationMsgs.slice(-maxConversation);
3567
+ console.log(
3568
+ `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${systemMsgs.length + truncatedConversation.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
3569
+ );
3570
+ return [...systemMsgs, ...truncatedConversation];
3571
+ }
3560
3572
  var KIMI_BLOCK_RE = /<[||][^<>]*begin[^<>]*[||]>[\s\S]*?<[||][^<>]*end[^<>]*[||]>/gi;
3561
3573
  var KIMI_TOKEN_RE = /<[||][^<>]*[||]>/g;
3562
3574
  var THINKING_TAG_RE = /<\s*\/?\s*(?:think(?:ing)?|thought|antthinking)\b[^>]*>/gi;
@@ -3866,6 +3878,9 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
3866
3878
  if (Array.isArray(parsed.messages)) {
3867
3879
  parsed.messages = normalizeMessageRoles(parsed.messages);
3868
3880
  }
3881
+ if (Array.isArray(parsed.messages)) {
3882
+ parsed.messages = truncateMessages(parsed.messages);
3883
+ }
3869
3884
  if (Array.isArray(parsed.messages)) {
3870
3885
  parsed.messages = sanitizeToolIds(parsed.messages);
3871
3886
  }