@blockrun/clawrouter 0.12.45 → 0.12.47

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1645,63 +1645,74 @@ var DEFAULT_ROUTING_CONFIG = {
1645
1645
  confidenceThreshold: 0.7
1646
1646
  },
1647
1647
  // Auto (balanced) tier configs - current default smart routing
1648
+ // Benchmark-tuned 2026-03-16: balancing quality (retention) + latency
1648
1649
  tiers: {
1649
1650
  SIMPLE: {
1650
- primary: "moonshot/kimi-k2.5",
1651
- // $0.60/$3.00 - best quality/price for simple tasks
1651
+ primary: "google/gemini-2.5-flash",
1652
+ // 1,238ms, 60% retention (best) fast AND quality
1652
1653
  fallback: [
1653
- "google/gemini-2.5-flash",
1654
- // 60% retention (best), fast growth (+800%)
1655
- "google/gemini-2.5-flash-lite",
1656
- // 1M context, ultra cheap ($0.10/$0.40)
1657
1654
  "deepseek/deepseek-chat",
1658
- // 41% retention
1655
+ // 1,431ms, 41% retention
1656
+ "moonshot/kimi-k2.5",
1657
+ // 1,646ms, strong quality
1658
+ "google/gemini-2.5-flash-lite",
1659
+ // 1,353ms, 1M context, ultra cheap ($0.10/$0.40)
1660
+ "xai/grok-4-fast-non-reasoning",
1661
+ // 1,143ms, $0.20/$0.50 — fast fallback
1659
1662
  "nvidia/gpt-oss-120b"
1660
- // FREE fallback
1663
+ // 1,252ms, FREE fallback
1661
1664
  ]
1662
1665
  },
1663
1666
  MEDIUM: {
1664
1667
  primary: "moonshot/kimi-k2.5",
1665
- // $0.50/$2.40 - strong tool use, proper function call format
1668
+ // 1,646ms, $0.60/$3.00 strong tool use, quality output
1666
1669
  fallback: [
1667
1670
  "deepseek/deepseek-chat",
1668
- // 41% retention
1671
+ // 1,431ms, 41% retention
1669
1672
  "google/gemini-2.5-flash",
1670
- // 60% retention, cheap fast model
1673
+ // 1,238ms, 60% retention
1671
1674
  "google/gemini-2.5-flash-lite",
1672
- // 1M context, ultra cheap ($0.10/$0.40)
1673
- "xai/grok-4-1-fast-non-reasoning"
1674
- // Upgraded Grok 4.1
1675
+ // 1,353ms, 1M context ($0.10/$0.40)
1676
+ "xai/grok-4-1-fast-non-reasoning",
1677
+ // 1,244ms, fast fallback
1678
+ "xai/grok-3-mini"
1679
+ // 1,202ms, $0.30/$0.50
1675
1680
  ]
1676
1681
  },
1677
1682
  COMPLEX: {
1678
1683
  primary: "google/gemini-3.1-pro",
1679
- // Newest Gemini 3.1 - upgraded from 3.0
1684
+ // 1,609ms fast flagship quality
1680
1685
  fallback: [
1681
1686
  "google/gemini-2.5-flash",
1682
- // 60% retention, cheap failsafe before expensive models
1687
+ // 1,238ms, cheap failsafe before expensive models
1683
1688
  "google/gemini-2.5-flash-lite",
1684
- // CRITICAL: 1M context, ultra-cheap failsafe ($0.10/$0.40)
1689
+ // 1,353ms, 1M context, ultra-cheap failsafe ($0.10/$0.40)
1685
1690
  "google/gemini-3-pro-preview",
1686
- // 3.0 fallback
1691
+ // 1,352ms
1687
1692
  "google/gemini-2.5-pro",
1688
- "deepseek/deepseek-chat",
1693
+ // 1,294ms
1689
1694
  "xai/grok-4-0709",
1690
- "openai/gpt-5.4",
1691
- // Newest flagship, same price as 4o
1692
- "openai/gpt-4o",
1693
- "anthropic/claude-sonnet-4.6"
1695
+ // 1,348ms
1696
+ "deepseek/deepseek-chat",
1697
+ // 1,431ms
1698
+ "anthropic/claude-sonnet-4.6",
1699
+ // 2,110ms — quality fallback
1700
+ "openai/gpt-5.4"
1701
+ // 6,213ms — slowest but highest quality
1694
1702
  ]
1695
1703
  },
1696
1704
  REASONING: {
1697
1705
  primary: "xai/grok-4-1-fast-reasoning",
1698
- // Upgraded Grok 4.1 reasoning $0.20/$0.50
1706
+ // 1,454ms, $0.20/$0.50
1699
1707
  fallback: [
1708
+ "xai/grok-4-fast-reasoning",
1709
+ // 1,298ms, $0.20/$0.50
1700
1710
  "deepseek/deepseek-reasoner",
1701
- // Cheap reasoning model
1711
+ // 1,454ms, cheap reasoning
1702
1712
  "openai/o4-mini",
1703
- // Newer and cheaper than o3 ($1.10 vs $2.00)
1713
+ // 2,328ms ($1.10/$4.40)
1704
1714
  "openai/o3"
1715
+ // 2,862ms
1705
1716
  ]
1706
1717
  }
1707
1718
  },
@@ -1709,27 +1720,30 @@ var DEFAULT_ROUTING_CONFIG = {
1709
1720
  ecoTiers: {
1710
1721
  SIMPLE: {
1711
1722
  primary: "nvidia/gpt-oss-120b",
1712
- // FREE! $0.00/$0.00
1723
+ // 1,252ms, FREE! $0.00/$0.00
1713
1724
  fallback: [
1714
1725
  "google/gemini-2.5-flash-lite",
1715
- "google/gemini-2.5-flash",
1716
- "deepseek/deepseek-chat"
1726
+ // 1,353ms, $0.10/$0.40
1727
+ "xai/grok-4-fast-non-reasoning",
1728
+ // 1,143ms, $0.20/$0.50
1729
+ "google/gemini-2.5-flash"
1730
+ // 1,238ms
1717
1731
  ]
1718
1732
  },
1719
1733
  MEDIUM: {
1720
1734
  primary: "google/gemini-2.5-flash-lite",
1721
- // $0.10/$0.40 - cheapest capable with 1M context
1722
- fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
1735
+ // 1,353ms, $0.10/$0.40 - cheapest capable with 1M context
1736
+ fallback: ["xai/grok-4-fast-non-reasoning", "google/gemini-2.5-flash", "deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
1723
1737
  },
1724
1738
  COMPLEX: {
1725
1739
  primary: "google/gemini-2.5-flash-lite",
1726
- // $0.10/$0.40 - 1M context handles complexity
1727
- fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "xai/grok-4-0709"]
1740
+ // 1,353ms, $0.10/$0.40 - 1M context handles complexity
1741
+ fallback: ["xai/grok-4-0709", "google/gemini-2.5-flash", "deepseek/deepseek-chat"]
1728
1742
  },
1729
1743
  REASONING: {
1730
1744
  primary: "xai/grok-4-1-fast-reasoning",
1731
- // $0.20/$0.50
1732
- fallback: ["deepseek/deepseek-reasoner"]
1745
+ // 1,454ms, $0.20/$0.50
1746
+ fallback: ["xai/grok-4-fast-reasoning", "deepseek/deepseek-reasoner"]
1733
1747
  }
1734
1748
  },
1735
1749
  // Premium tier configs - best quality (blockrun/premium)
@@ -1775,14 +1789,16 @@ var DEFAULT_ROUTING_CONFIG = {
1775
1789
  },
1776
1790
  REASONING: {
1777
1791
  primary: "anthropic/claude-sonnet-4.6",
1778
- // $3/$15 - best for reasoning/instructions
1792
+ // 2,110ms, $3/$15 - best for reasoning/instructions
1779
1793
  fallback: [
1780
1794
  "anthropic/claude-opus-4.6",
1781
- "anthropic/claude-opus-4.6",
1795
+ // 2,139ms
1796
+ "xai/grok-4-1-fast-reasoning",
1797
+ // 1,454ms, cheap fast reasoning
1782
1798
  "openai/o4-mini",
1783
- // Newer and cheaper than o3 ($1.10 vs $2.00)
1784
- "openai/o3",
1785
- "xai/grok-4-1-fast-reasoning"
1799
+ // 2,328ms ($1.10/$4.40)
1800
+ "openai/o3"
1801
+ // 2,862ms
1786
1802
  ]
1787
1803
  }
1788
1804
  },
@@ -1793,41 +1809,51 @@ var DEFAULT_ROUTING_CONFIG = {
1793
1809
  // $0.15/$0.60 - best tool compliance at lowest cost
1794
1810
  fallback: [
1795
1811
  "moonshot/kimi-k2.5",
1812
+ // 1,646ms, strong tool use quality
1796
1813
  "anthropic/claude-haiku-4.5",
1814
+ // 2,305ms
1797
1815
  "xai/grok-4-1-fast-non-reasoning"
1816
+ // 1,244ms, fast fallback
1798
1817
  ]
1799
1818
  },
1800
1819
  MEDIUM: {
1801
1820
  primary: "moonshot/kimi-k2.5",
1802
- // $0.50/$2.40 - strong tool use, handles function calls correctly
1821
+ // 1,646ms, $0.60/$3.00 - strong tool use, proper function calls
1803
1822
  fallback: [
1823
+ "xai/grok-4-1-fast-non-reasoning",
1824
+ // 1,244ms, fast fallback
1804
1825
  "openai/gpt-4o-mini",
1805
- // $0.15/$0.60 - reliable tool calling fallback
1826
+ // 2,764ms, reliable tool calling
1806
1827
  "anthropic/claude-haiku-4.5",
1807
- "deepseek/deepseek-chat",
1808
- "xai/grok-4-1-fast-non-reasoning"
1828
+ // 2,305ms
1829
+ "deepseek/deepseek-chat"
1830
+ // 1,431ms
1809
1831
  ]
1810
1832
  },
1811
1833
  COMPLEX: {
1812
1834
  primary: "anthropic/claude-sonnet-4.6",
1835
+ // 2,110ms — best agentic quality
1813
1836
  fallback: [
1814
1837
  "anthropic/claude-opus-4.6",
1815
- // Latest Opus - best agentic
1816
- "openai/gpt-5.4",
1817
- // Newest flagship
1838
+ // 2,139ms top quality
1818
1839
  "google/gemini-3.1-pro",
1819
- // Newest Gemini
1820
- "google/gemini-3-pro-preview",
1821
- "xai/grok-4-0709"
1840
+ // 1,609ms
1841
+ "xai/grok-4-0709",
1842
+ // 1,348ms
1843
+ "openai/gpt-5.4"
1844
+ // 6,213ms — slow but highest quality fallback
1822
1845
  ]
1823
1846
  },
1824
1847
  REASONING: {
1825
1848
  primary: "anthropic/claude-sonnet-4.6",
1826
- // Strong tool use + reasoning for agentic tasks
1849
+ // 2,110ms — strong tool use + reasoning
1827
1850
  fallback: [
1828
1851
  "anthropic/claude-opus-4.6",
1852
+ // 2,139ms
1829
1853
  "xai/grok-4-1-fast-reasoning",
1854
+ // 1,454ms
1830
1855
  "deepseek/deepseek-reasoner"
1856
+ // 1,454ms
1831
1857
  ]
1832
1858
  }
1833
1859
  },