@blockrun/clawrouter 0.12.44 → 0.12.46

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -334,7 +334,7 @@ npm test
334
334
  | --------------------- | ------------------------------------------------------------------ |
335
335
  | 📅 Schedule Demo | [calendly.com/vickyfu9/30min](https://calendly.com/vickyfu9/30min) |
336
336
  | 💬 Community Telegram | [t.me/blockrunAI](https://t.me/blockrunAI) |
337
- | 🐦 X / Twitter | [x.com/BlockRunAI](https://x.com/BlockRunAI) |
337
+ | 🐦 X / Twitter | [x.com/ClawRou](https://x.com/ClawRou) |
338
338
  | 📱 Founder Telegram | [@bc1max](https://t.me/bc1max) |
339
339
  | ✉️ Email | vicky@blockrun.ai |
340
340
 
package/dist/cli.js CHANGED
@@ -1645,63 +1645,74 @@ var DEFAULT_ROUTING_CONFIG = {
1645
1645
  confidenceThreshold: 0.7
1646
1646
  },
1647
1647
  // Auto (balanced) tier configs - current default smart routing
1648
+ // Benchmark-tuned 2026-03-16: latency-ranked via blockrun.ai x402 end-to-end
1648
1649
  tiers: {
1649
1650
  SIMPLE: {
1650
- primary: "moonshot/kimi-k2.5",
1651
- // $0.60/$3.00 - best quality/price for simple tasks
1651
+ primary: "xai/grok-4-fast-non-reasoning",
1652
+ // 1,143ms, $0.20/$0.50 fastest overall
1652
1653
  fallback: [
1654
+ "xai/grok-3-mini",
1655
+ // 1,202ms, $0.30/$0.50
1653
1656
  "google/gemini-2.5-flash",
1654
- // 60% retention (best), fast growth (+800%)
1657
+ // 1,238ms, 60% retention (best)
1655
1658
  "google/gemini-2.5-flash-lite",
1656
- // 1M context, ultra cheap ($0.10/$0.40)
1659
+ // 1,353ms, 1M context, ultra cheap ($0.10/$0.40)
1657
1660
  "deepseek/deepseek-chat",
1658
- // 41% retention
1661
+ // 1,431ms, 41% retention
1659
1662
  "nvidia/gpt-oss-120b"
1660
- // FREE fallback
1663
+ // 1,252ms, FREE fallback
1661
1664
  ]
1662
1665
  },
1663
1666
  MEDIUM: {
1664
- primary: "moonshot/kimi-k2.5",
1665
- // $0.50/$2.40 - strong tool use, proper function call format
1667
+ primary: "xai/grok-4-1-fast-non-reasoning",
1668
+ // 1,244ms, $0.20/$0.50 fast + tool calling
1666
1669
  fallback: [
1667
1670
  "deepseek/deepseek-chat",
1668
- // 41% retention
1671
+ // 1,431ms, 41% retention
1672
+ "moonshot/kimi-k2.5",
1673
+ // 1,646ms, strong tool use quality
1669
1674
  "google/gemini-2.5-flash",
1670
- // 60% retention, cheap fast model
1675
+ // 1,238ms, 60% retention
1671
1676
  "google/gemini-2.5-flash-lite",
1672
- // 1M context, ultra cheap ($0.10/$0.40)
1673
- "xai/grok-4-1-fast-non-reasoning"
1674
- // Upgraded Grok 4.1
1677
+ // 1,353ms, 1M context ($0.10/$0.40)
1678
+ "xai/grok-3-mini"
1679
+ // 1,202ms, $0.30/$0.50
1675
1680
  ]
1676
1681
  },
1677
1682
  COMPLEX: {
1678
1683
  primary: "google/gemini-3.1-pro",
1679
- // Newest Gemini 3.1 - upgraded from 3.0
1684
+ // 1,609ms fast flagship quality
1680
1685
  fallback: [
1681
1686
  "google/gemini-2.5-flash",
1682
- // 60% retention, cheap failsafe before expensive models
1687
+ // 1,238ms, cheap failsafe before expensive models
1683
1688
  "google/gemini-2.5-flash-lite",
1684
- // CRITICAL: 1M context, ultra-cheap failsafe ($0.10/$0.40)
1689
+ // 1,353ms, 1M context, ultra-cheap failsafe ($0.10/$0.40)
1685
1690
  "google/gemini-3-pro-preview",
1686
- // 3.0 fallback
1691
+ // 1,352ms
1687
1692
  "google/gemini-2.5-pro",
1688
- "deepseek/deepseek-chat",
1693
+ // 1,294ms
1689
1694
  "xai/grok-4-0709",
1690
- "openai/gpt-5.4",
1691
- // Newest flagship, same price as 4o
1692
- "openai/gpt-4o",
1693
- "anthropic/claude-sonnet-4.6"
1695
+ // 1,348ms
1696
+ "deepseek/deepseek-chat",
1697
+ // 1,431ms
1698
+ "anthropic/claude-sonnet-4.6",
1699
+ // 2,110ms — quality fallback
1700
+ "openai/gpt-5.4"
1701
+ // 6,213ms — slowest but highest quality
1694
1702
  ]
1695
1703
  },
1696
1704
  REASONING: {
1697
1705
  primary: "xai/grok-4-1-fast-reasoning",
1698
- // Upgraded Grok 4.1 reasoning $0.20/$0.50
1706
+ // 1,454ms, $0.20/$0.50
1699
1707
  fallback: [
1708
+ "xai/grok-4-fast-reasoning",
1709
+ // 1,298ms, $0.20/$0.50
1700
1710
  "deepseek/deepseek-reasoner",
1701
- // Cheap reasoning model
1711
+ // 1,454ms, cheap reasoning
1702
1712
  "openai/o4-mini",
1703
- // Newer and cheaper than o3 ($1.10 vs $2.00)
1713
+ // 2,328ms ($1.10/$4.40)
1704
1714
  "openai/o3"
1715
+ // 2,862ms
1705
1716
  ]
1706
1717
  }
1707
1718
  },
@@ -1709,27 +1720,30 @@ var DEFAULT_ROUTING_CONFIG = {
1709
1720
  ecoTiers: {
1710
1721
  SIMPLE: {
1711
1722
  primary: "nvidia/gpt-oss-120b",
1712
- // FREE! $0.00/$0.00
1723
+ // 1,252ms, FREE! $0.00/$0.00
1713
1724
  fallback: [
1714
1725
  "google/gemini-2.5-flash-lite",
1715
- "google/gemini-2.5-flash",
1716
- "deepseek/deepseek-chat"
1726
+ // 1,353ms, $0.10/$0.40
1727
+ "xai/grok-4-fast-non-reasoning",
1728
+ // 1,143ms, $0.20/$0.50
1729
+ "google/gemini-2.5-flash"
1730
+ // 1,238ms
1717
1731
  ]
1718
1732
  },
1719
1733
  MEDIUM: {
1720
1734
  primary: "google/gemini-2.5-flash-lite",
1721
- // $0.10/$0.40 - cheapest capable with 1M context
1722
- fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
1735
+ // 1,353ms, $0.10/$0.40 - cheapest capable with 1M context
1736
+ fallback: ["xai/grok-4-fast-non-reasoning", "google/gemini-2.5-flash", "deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
1723
1737
  },
1724
1738
  COMPLEX: {
1725
1739
  primary: "google/gemini-2.5-flash-lite",
1726
- // $0.10/$0.40 - 1M context handles complexity
1727
- fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "xai/grok-4-0709"]
1740
+ // 1,353ms, $0.10/$0.40 - 1M context handles complexity
1741
+ fallback: ["xai/grok-4-0709", "google/gemini-2.5-flash", "deepseek/deepseek-chat"]
1728
1742
  },
1729
1743
  REASONING: {
1730
1744
  primary: "xai/grok-4-1-fast-reasoning",
1731
- // $0.20/$0.50
1732
- fallback: ["deepseek/deepseek-reasoner"]
1745
+ // 1,454ms, $0.20/$0.50
1746
+ fallback: ["xai/grok-4-fast-reasoning", "deepseek/deepseek-reasoner"]
1733
1747
  }
1734
1748
  },
1735
1749
  // Premium tier configs - best quality (blockrun/premium)
@@ -1775,57 +1789,71 @@ var DEFAULT_ROUTING_CONFIG = {
1775
1789
  },
1776
1790
  REASONING: {
1777
1791
  primary: "anthropic/claude-sonnet-4.6",
1778
- // $3/$15 - best for reasoning/instructions
1792
+ // 2,110ms, $3/$15 - best for reasoning/instructions
1779
1793
  fallback: [
1780
1794
  "anthropic/claude-opus-4.6",
1781
- "anthropic/claude-opus-4.6",
1795
+ // 2,139ms
1796
+ "xai/grok-4-1-fast-reasoning",
1797
+ // 1,454ms, cheap fast reasoning
1782
1798
  "openai/o4-mini",
1783
- // Newer and cheaper than o3 ($1.10 vs $2.00)
1784
- "openai/o3",
1785
- "xai/grok-4-1-fast-reasoning"
1799
+ // 2,328ms ($1.10/$4.40)
1800
+ "openai/o3"
1801
+ // 2,862ms
1786
1802
  ]
1787
1803
  }
1788
1804
  },
1789
1805
  // Agentic tier configs - models that excel at multi-step autonomous tasks
1790
1806
  agenticTiers: {
1791
1807
  SIMPLE: {
1792
- primary: "moonshot/kimi-k2.5",
1793
- // Cheaper than Haiku ($0.5/$2.4 vs $1/$5), larger context
1808
+ primary: "xai/grok-4-1-fast-non-reasoning",
1809
+ // 1,244ms, $0.20/$0.50 fast tool calling
1794
1810
  fallback: [
1795
- "anthropic/claude-haiku-4.5",
1796
- "xai/grok-4-1-fast-non-reasoning",
1797
- "openai/gpt-4o-mini"
1811
+ "openai/gpt-4o-mini",
1812
+ // 2,764ms, $0.15/$0.60 - reliable tool compliance
1813
+ "moonshot/kimi-k2.5",
1814
+ // 1,646ms, strong tool use quality
1815
+ "anthropic/claude-haiku-4.5"
1816
+ // 2,305ms
1798
1817
  ]
1799
1818
  },
1800
1819
  MEDIUM: {
1801
1820
  primary: "moonshot/kimi-k2.5",
1802
- // $0.50/$2.40 - strong tool use, handles function calls correctly
1821
+ // 1,646ms, $0.60/$3.00 - strong tool use, proper function calls
1803
1822
  fallback: [
1823
+ "xai/grok-4-1-fast-non-reasoning",
1824
+ // 1,244ms, fast fallback
1825
+ "openai/gpt-4o-mini",
1826
+ // 2,764ms, reliable tool calling
1804
1827
  "anthropic/claude-haiku-4.5",
1805
- "deepseek/deepseek-chat",
1806
- "xai/grok-4-1-fast-non-reasoning"
1828
+ // 2,305ms
1829
+ "deepseek/deepseek-chat"
1830
+ // 1,431ms
1807
1831
  ]
1808
1832
  },
1809
1833
  COMPLEX: {
1810
1834
  primary: "anthropic/claude-sonnet-4.6",
1835
+ // 2,110ms — best agentic quality
1811
1836
  fallback: [
1812
1837
  "anthropic/claude-opus-4.6",
1813
- // Latest Opus - best agentic
1814
- "openai/gpt-5.4",
1815
- // Newest flagship
1838
+ // 2,139ms top quality
1816
1839
  "google/gemini-3.1-pro",
1817
- // Newest Gemini
1818
- "google/gemini-3-pro-preview",
1819
- "xai/grok-4-0709"
1840
+ // 1,609ms
1841
+ "xai/grok-4-0709",
1842
+ // 1,348ms
1843
+ "openai/gpt-5.4"
1844
+ // 6,213ms — slow but highest quality fallback
1820
1845
  ]
1821
1846
  },
1822
1847
  REASONING: {
1823
1848
  primary: "anthropic/claude-sonnet-4.6",
1824
- // Strong tool use + reasoning for agentic tasks
1849
+ // 2,110ms — strong tool use + reasoning
1825
1850
  fallback: [
1826
1851
  "anthropic/claude-opus-4.6",
1852
+ // 2,139ms
1827
1853
  "xai/grok-4-1-fast-reasoning",
1854
+ // 1,454ms
1828
1855
  "deepseek/deepseek-reasoner"
1856
+ // 1,454ms
1829
1857
  ]
1830
1858
  }
1831
1859
  },
@@ -6987,17 +7015,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
6987
7015
  hasTools
6988
7016
  });
6989
7017
  if (hasTools && routingDecision.tier === "SIMPLE") {
6990
- const simpleRoutingDecision = route(prompt, systemPrompt, maxTokens, {
6991
- ...routerOpts,
6992
- routingProfile: routingProfile ?? void 0,
6993
- hasTools: false
6994
- });
6995
- if (simpleRoutingDecision.tier === "SIMPLE") {
6996
- console.log(
6997
- `[ClawRouter] SIMPLE+tools: using non-agentic model ${simpleRoutingDecision.model} (tools present but query is trivial)`
6998
- );
6999
- routingDecision = simpleRoutingDecision;
7000
- }
7018
+ console.log(
7019
+ `[ClawRouter] SIMPLE+tools: keeping agentic model ${routingDecision.model} (tools need reliable function-call support)`
7020
+ );
7001
7021
  }
7002
7022
  if (existingSession) {
7003
7023
  const tierRank = {
@@ -7314,7 +7334,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
7314
7334
  } else {
7315
7335
  modelsToTry = modelId ? [modelId] : [];
7316
7336
  }
7317
- if (!modelsToTry.includes(FREE_MODEL)) {
7337
+ if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
7318
7338
  modelsToTry.push(FREE_MODEL);
7319
7339
  }
7320
7340
  let upstream;