@blockrun/clawrouter 0.9.36 → 0.9.38

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -1683,8 +1683,6 @@ var DEFAULT_ROUTING_CONFIG = {
1683
1683
  primary: "moonshot/kimi-k2.5",
1684
1684
  // $0.50/$2.40 - best quality/price for simple tasks
1685
1685
  fallback: [
1686
- "minimax/minimax-m2.5",
1687
- // $0.30/$1.20 - cheap with reasoning
1688
1686
  "google/gemini-2.5-flash",
1689
1687
  // 1M context, cost-effective
1690
1688
  "nvidia/gpt-oss-120b",
@@ -1696,8 +1694,6 @@ var DEFAULT_ROUTING_CONFIG = {
1696
1694
  primary: "xai/grok-code-fast-1",
1697
1695
  // Code specialist, $0.20/$1.50
1698
1696
  fallback: [
1699
- "minimax/minimax-m2.5",
1700
- // $0.30/$1.20 - cheap with reasoning
1701
1697
  "google/gemini-2.5-flash",
1702
1698
  // 1M context, cost-effective
1703
1699
  "deepseek/deepseek-chat",
@@ -1712,10 +1708,7 @@ var DEFAULT_ROUTING_CONFIG = {
1712
1708
  "google/gemini-2.5-flash",
1713
1709
  // CRITICAL: 1M context, cheap failsafe before expensive models
1714
1710
  "google/gemini-2.5-pro",
1715
- "minimax/minimax-m2.5",
1716
- // $0.30/$1.20 - cheap with reasoning
1717
1711
  "deepseek/deepseek-chat",
1718
- // Another cheap option
1719
1712
  "xai/grok-4-0709",
1720
1713
  "openai/gpt-5.2",
1721
1714
  // Newer and cheaper input than gpt-4o
@@ -1727,8 +1720,6 @@ var DEFAULT_ROUTING_CONFIG = {
1727
1720
  primary: "xai/grok-4-1-fast-reasoning",
1728
1721
  // Upgraded Grok 4.1 reasoning $0.20/$0.50
1729
1722
  fallback: [
1730
- "minimax/minimax-m2.5",
1731
- // $0.30/$1.20 - reasoning capable
1732
1723
  "deepseek/deepseek-reasoner",
1733
1724
  // Cheap reasoning model
1734
1725
  "openai/o4-mini",
@@ -1742,22 +1733,22 @@ var DEFAULT_ROUTING_CONFIG = {
1742
1733
  SIMPLE: {
1743
1734
  primary: "nvidia/gpt-oss-120b",
1744
1735
  // FREE! $0.00/$0.00
1745
- fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "minimax/minimax-m2.5"]
1736
+ fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat"]
1746
1737
  },
1747
1738
  MEDIUM: {
1748
1739
  primary: "google/gemini-2.5-flash",
1749
1740
  // $0.15/$0.60 - cheapest capable
1750
- fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "minimax/minimax-m2.5"]
1741
+ fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
1751
1742
  },
1752
1743
  COMPLEX: {
1753
1744
  primary: "google/gemini-2.5-flash",
1754
1745
  // $0.15/$0.60 - 1M context handles complexity
1755
- fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709", "minimax/minimax-m2.5"]
1746
+ fallback: ["deepseek/deepseek-chat", "xai/grok-4-0709"]
1756
1747
  },
1757
1748
  REASONING: {
1758
1749
  primary: "xai/grok-4-1-fast-reasoning",
1759
- // $0.20/$0.50 - was MORE expensive than AUTO!
1760
- fallback: ["deepseek/deepseek-reasoner", "minimax/minimax-m2.5"]
1750
+ // $0.20/$0.50
1751
+ fallback: ["deepseek/deepseek-reasoner"]
1761
1752
  }
1762
1753
  },
1763
1754
  // Premium tier configs - best quality (blockrun/premium)
@@ -1808,8 +1799,6 @@ var DEFAULT_ROUTING_CONFIG = {
1808
1799
  primary: "moonshot/kimi-k2.5",
1809
1800
  // Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
1810
1801
  fallback: [
1811
- "minimax/minimax-m2.5",
1812
- // $0.30/$1.20 - agentic capable, cheaper than kimi
1813
1802
  "claude-haiku-4.5",
1814
1803
  "xai/grok-4-1-fast-non-reasoning",
1815
1804
  "openai/gpt-4o-mini"
@@ -1819,8 +1808,6 @@ var DEFAULT_ROUTING_CONFIG = {
1819
1808
  primary: "xai/grok-code-fast-1",
1820
1809
  // Code specialist for agentic coding
1821
1810
  fallback: [
1822
- "minimax/minimax-m2.5",
1823
- // $0.30/$1.20 - agentic capable
1824
1811
  "moonshot/kimi-k2.5",
1825
1812
  "claude-haiku-4.5",
1826
1813
  "claude-sonnet-4"
@@ -1831,8 +1818,6 @@ var DEFAULT_ROUTING_CONFIG = {
1831
1818
  fallback: [
1832
1819
  "claude-opus-4",
1833
1820
  // Latest Opus - best agentic
1834
- "minimax/minimax-m2.5",
1835
- // $0.30/$1.20 - cheap agentic fallback
1836
1821
  "openai/gpt-5.2",
1837
1822
  "google/gemini-3-pro-preview",
1838
1823
  "xai/grok-4-0709"
@@ -1843,8 +1828,6 @@ var DEFAULT_ROUTING_CONFIG = {
1843
1828
  // Strong tool use + reasoning for agentic tasks
1844
1829
  fallback: [
1845
1830
  "claude-opus-4",
1846
- "minimax/minimax-m2.5",
1847
- // $0.30/$1.20 - reasoning + agentic
1848
1831
  "xai/grok-4-1-fast-reasoning",
1849
1832
  "deepseek/deepseek-reasoner"
1850
1833
  ]
@@ -3761,6 +3744,7 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
3761
3744
  ]);
3762
3745
  var FREE_MODEL = "nvidia/gpt-oss-120b";
3763
3746
  var MAX_MESSAGES = 200;
3747
+ var CONTEXT_LIMIT_KB = 5120;
3764
3748
  var HEARTBEAT_INTERVAL_MS = 2e3;
3765
3749
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
3766
3750
  var MAX_FALLBACK_ATTEMPTS = 5;
@@ -4147,15 +4131,28 @@ function normalizeMessagesForThinking(messages) {
4147
4131
  return hasChanges ? normalized : messages;
4148
4132
  }
4149
4133
  function truncateMessages(messages) {
4150
- if (!messages || messages.length <= MAX_MESSAGES) return messages;
4134
+ if (!messages || messages.length <= MAX_MESSAGES) {
4135
+ return {
4136
+ messages,
4137
+ wasTruncated: false,
4138
+ originalCount: messages?.length ?? 0,
4139
+ truncatedCount: messages?.length ?? 0
4140
+ };
4141
+ }
4151
4142
  const systemMsgs = messages.filter((m) => m.role === "system");
4152
4143
  const conversationMsgs = messages.filter((m) => m.role !== "system");
4153
4144
  const maxConversation = MAX_MESSAGES - systemMsgs.length;
4154
4145
  const truncatedConversation = conversationMsgs.slice(-maxConversation);
4146
+ const result = [...systemMsgs, ...truncatedConversation];
4155
4147
  console.log(
4156
- `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${systemMsgs.length + truncatedConversation.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
4148
+ `[ClawRouter] Truncated messages: ${messages.length} \u2192 ${result.length} (kept ${systemMsgs.length} system + ${truncatedConversation.length} recent)`
4157
4149
  );
4158
- return [...systemMsgs, ...truncatedConversation];
4150
+ return {
4151
+ messages: result,
4152
+ wasTruncated: true,
4153
+ originalCount: messages.length,
4154
+ truncatedCount: result.length
4155
+ };
4159
4156
  }
4160
4157
  var KIMI_BLOCK_RE = /<[||][^<>]*begin[^<>]*[||]>[\s\S]*?<[||][^<>]*end[^<>]*[||]>/gi;
4161
4158
  var KIMI_TOKEN_RE = /<[||][^<>]*[||]>/g;
@@ -4488,7 +4485,8 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
4488
4485
  parsed.messages = normalizeMessageRoles(parsed.messages);
4489
4486
  }
4490
4487
  if (Array.isArray(parsed.messages)) {
4491
- parsed.messages = truncateMessages(parsed.messages);
4488
+ const truncationResult = truncateMessages(parsed.messages);
4489
+ parsed.messages = truncationResult.messages;
4492
4490
  }
4493
4491
  if (Array.isArray(parsed.messages)) {
4494
4492
  parsed.messages = sanitizeToolIds(parsed.messages);
@@ -4562,6 +4560,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4562
4560
  bodyChunks.push(Buffer.isBuffer(chunk) ? chunk : Buffer.from(chunk));
4563
4561
  }
4564
4562
  let body = Buffer.concat(bodyChunks);
4563
+ const originalContextSizeKB = Math.ceil(body.length / 1024);
4565
4564
  let routingDecision;
4566
4565
  let isStreaming = false;
4567
4566
  let modelId = "";
@@ -4669,7 +4668,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4669
4668
  const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
4670
4669
  const tools = parsed.tools;
4671
4670
  const hasTools = Array.isArray(tools) && tools.length > 0;
4672
- if (hasTools) {
4671
+ if (hasTools && tools) {
4673
4672
  console.log(
4674
4673
  `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
4675
4674
  );
@@ -4814,7 +4813,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4814
4813
  res.writeHead(200, {
4815
4814
  "content-type": "text/event-stream",
4816
4815
  "cache-control": "no-cache",
4817
- connection: "keep-alive"
4816
+ connection: "keep-alive",
4817
+ "x-context-used-kb": String(originalContextSizeKB),
4818
+ "x-context-limit-kb": String(CONTEXT_LIMIT_KB)
4818
4819
  });
4819
4820
  headersSentEarly = true;
4820
4821
  safeWrite(res, ": heartbeat\n\n");
@@ -4977,7 +4978,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4977
4978
  completedAt: Date.now()
4978
4979
  });
4979
4980
  } else {
4980
- res.writeHead(errStatus, { "Content-Type": "application/json" });
4981
+ res.writeHead(errStatus, {
4982
+ "Content-Type": "application/json",
4983
+ "x-context-used-kb": String(originalContextSizeKB),
4984
+ "x-context-limit-kb": String(CONTEXT_LIMIT_KB)
4985
+ });
4981
4986
  res.end(transformedErr);
4982
4987
  deduplicator.complete(dedupKey, {
4983
4988
  status: errStatus,
@@ -5103,6 +5108,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
5103
5108
  return;
5104
5109
  responseHeaders[key] = value;
5105
5110
  });
5111
+ responseHeaders["x-context-used-kb"] = String(originalContextSizeKB);
5112
+ responseHeaders["x-context-limit-kb"] = String(CONTEXT_LIMIT_KB);
5106
5113
  res.writeHead(upstream.status, responseHeaders);
5107
5114
  if (upstream.body) {
5108
5115
  const reader = upstream.body.getReader();
@@ -5768,15 +5775,6 @@ var plugin = {
5768
5775
  apiKey: "x402-proxy-handles-auth",
5769
5776
  models: OPENCLAW_MODELS
5770
5777
  };
5771
- if (!api.config.agents) api.config.agents = {};
5772
- const agents = api.config.agents;
5773
- if (!agents.defaults) agents.defaults = {};
5774
- const defaults = agents.defaults;
5775
- if (!defaults.model) defaults.model = {};
5776
- const model = defaults.model;
5777
- if (!model.primary) {
5778
- model.primary = "blockrun/auto";
5779
- }
5780
5778
  api.logger.info("BlockRun provider registered (30+ models via x402)");
5781
5779
  createWalletCommand().then((walletCommand) => {
5782
5780
  api.registerCommand(walletCommand);