npm - @blockrun/clawrouter - Versions diffs - 0.9.36 → 0.9.38 - Mend

@blockrun/clawrouter 0.9.36 → 0.9.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/cli.js CHANGED Viewed

@@ -1201,8 +1201,6 @@ var DEFAULT_ROUTING_CONFIG = {
       primary: "moonshot/kimi-k2.5",
       // $0.50/$2.40 - best quality/price for simple tasks
       fallback: [
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - cheap with reasoning
         "google/gemini-2.5-flash",
         // 1M context, cost-effective
         "nvidia/gpt-oss-120b",
@@ -1214,8 +1212,6 @@ var DEFAULT_ROUTING_CONFIG = {
       primary: "xai/grok-code-fast-1",
       // Code specialist, $0.20/$1.50
       fallback: [
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - cheap with reasoning
         "google/gemini-2.5-flash",
         // 1M context, cost-effective
         "deepseek/deepseek-chat",
@@ -1230,10 +1226,7 @@ var DEFAULT_ROUTING_CONFIG = {
         "google/gemini-2.5-flash",
         // CRITICAL: 1M context, cheap failsafe before expensive models
         "google/gemini-2.5-pro",
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - cheap with reasoning
         "deepseek/deepseek-chat",
-        // Another cheap option
         "xai/grok-4-0709",
         "openai/gpt-5.2",
         // Newer and cheaper input than gpt-4o
@@ -1245,8 +1238,6 @@ var DEFAULT_ROUTING_CONFIG = {
       primary: "xai/grok-4-1-fast-reasoning",
       // Upgraded Grok 4.1 reasoning $0.20/$0.50
       fallback: [
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - reasoning capable
         "deepseek/deepseek-reasoner",
         // Cheap reasoning model
         "openai/o4-mini",
@@ -1260,22 +1251,22 @@ var DEFAULT_ROUTING_CONFIG = {
     SIMPLE: {
       primary: "nvidia/gpt-oss-120b",
       // FREE! $0.00/$0.00
-      fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "minimax/minimax-m2.5"]
+      fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat"]
     },
     MEDIUM: {
       primary: "google/gemini-2.5-flash",
       // $0.15/$0.60 - cheapest capable
-      fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "minimax/minimax-m2.5"]
+      fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
     },
     COMPLEX: {
       primary: "google/gemini-2.5-flash",
       // $0.15/$0.60 - 1M context handles complexity
-      fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709", "minimax/minimax-m2.5"]
+      fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709"]
     },
     REASONING: {
       primary: "xai/grok-4-1-fast-reasoning",
-      // $0.20/$0.50 - was MORE expensive than AUTO!
-      fallback: ["deepseek/deepseek-reasoner", "minimax/minimax-m2.5"]
+      // $0.20/$0.50
+      fallback: ["deepseek/deepseek-reasoner"]
     }
   },
   // Premium tier configs - best quality (blockrun/premium)
@@ -1326,8 +1317,6 @@ var DEFAULT_ROUTING_CONFIG = {
       primary: "moonshot/kimi-k2.5",
       // Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
       fallback: [
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - agentic capable, cheaper than kimi
         "claude-haiku-4.5",
         "xai/grok-4-1-fast-non-reasoning",
         "openai/gpt-4o-mini"
@@ -1337,8 +1326,6 @@ var DEFAULT_ROUTING_CONFIG = {
       primary: "xai/grok-code-fast-1",
       // Code specialist for agentic coding
       fallback: [
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - agentic capable
         "moonshot/kimi-k2.5",
         "claude-haiku-4.5",
         "claude-sonnet-4"
@@ -1349,8 +1336,6 @@ var DEFAULT_ROUTING_CONFIG = {
       fallback: [
         "claude-opus-4",
         // Latest Opus - best agentic
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - cheap agentic fallback
         "openai/gpt-5.2",
         "google/gemini-3-pro-preview",
         "xai/grok-4-0709"
@@ -1361,8 +1346,6 @@ var DEFAULT_ROUTING_CONFIG = {
       // Strong tool use + reasoning for agentic tasks
       fallback: [
         "claude-opus-4",
-        "minimax/minimax-m2.5",
-        // $0.30/$1.20 - reasoning + agentic
         "xai/grok-4-1-fast-reasoning",
         "deepseek/deepseek-reasoner"
       ]
@@ -3621,6 +3604,7 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
 ]);
 var FREE_MODEL = "nvidia/gpt-oss-120b";
 var MAX_MESSAGES = 200;
+var CONTEXT_LIMIT_KB = 5120;
 var HEARTBEAT_INTERVAL_MS = 2e3;
 var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
 var MAX_FALLBACK_ATTEMPTS = 5;
@@ -4007,15 +3991,28 @@ function normalizeMessagesForThinking(messages) {
   return hasChanges ? normalized : messages;
 }
 function truncateMessages(messages) {
-  if (!messages || messages.length <= MAX_MESSAGES) return messages;
+  if (!messages || messages.length <= MAX_MESSAGES) {
+    return {
+      messages,
+      wasTruncated: false,
+      originalCount: messages?.length ?? 0,
+      truncatedCount: messages?.length ?? 0
+    };
+  }
   const systemMsgs = messages.filter((m) => m.role === "system");
   const conversationMsgs = messages.filter((m) => m.role !== "system");
   const maxConversation = MAX_MESSAGES - systemMsgs.length;
   const truncatedConversation = conversationMsgs.slice(-maxConversation);
+  const result = [...systemMsgs, ...truncatedConversation];
   console.log(
-    `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${systemMsgs.length + truncatedConversation.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
+    `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${result.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
   );
-  return [...systemMsgs, ...truncatedConversation];
+  return {
+    messages: result,
+    wasTruncated: true,
+    originalCount: messages.length,
+    truncatedCount: result.length
+  };
 }
 var KIMI_BLOCK_RE = /<[｜|][^<>]*begin[^<>]*[｜|]>[\s\S]*?<[｜|][^<>]*end[^<>]*[｜|]>/gi;
 var KIMI_TOKEN_RE = /<[｜|][^<>]*[｜|]>/g;
@@ -4348,7 +4345,8 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
       parsed.messages = normalizeMessageRoles(parsed.messages);
     }
     if (Array.isArray(parsed.messages)) {
-      parsed.messages = truncateMessages(parsed.messages);
+      const truncationResult = truncateMessages(parsed.messages);
+      parsed.messages = truncationResult.messages;
     }
     if (Array.isArray(parsed.messages)) {
       parsed.messages = sanitizeToolIds(parsed.messages);
@@ -4422,6 +4420,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
     bodyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
   }
   let body = Buffer.concat(bodyChunks);
+  const originalContextSizeKB = Math.ceil(body.length / 1024);
   let routingDecision;
   let isStreaming = false;
   let modelId = "";
@@ -4529,7 +4528,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
             const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
             const tools = parsed.tools;
             const hasTools = Array.isArray(tools) && tools.length > 0;
-            if (hasTools) {
+            if (hasTools && tools) {
               console.log(
                 `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
               );
@@ -4674,7 +4673,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
     res.writeHead(200, {
       "content-type": "text/event-stream",
       "cache-control": "no-cache",
-      connection: "keep-alive"
+      connection: "keep-alive",
+      "x-context-used-kb": String(originalContextSizeKB),
+      "x-context-limit-kb": String(CONTEXT_LIMIT_KB)
     });
     headersSentEarly = true;
     safeWrite(res, ": heartbeat\n\n");
@@ -4837,7 +4838,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
           completedAt: Date.now()
         });
       } else {
-        res.writeHead(errStatus, { "Content-Type": "application/json" });
+        res.writeHead(errStatus, {
+          "Content-Type": "application/json",
+          "x-context-used-kb": String(originalContextSizeKB),
+          "x-context-limit-kb": String(CONTEXT_LIMIT_KB)
+        });
         res.end(transformedErr);
         deduplicator.complete(dedupKey, {
           status: errStatus,
@@ -4963,6 +4968,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
           return;
         responseHeaders[key] = value;
       });
+      responseHeaders["x-context-used-kb"] = String(originalContextSizeKB);
+      responseHeaders["x-context-limit-kb"] = String(CONTEXT_LIMIT_KB);
       res.writeHead(upstream.status, responseHeaders);
       if (upstream.body) {
         const reader = upstream.body.getReader();