@blockrun/clawrouter 0.12.48 → 0.12.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/cli.js CHANGED
@@ -1649,12 +1649,14 @@ var DEFAULT_ROUTING_CONFIG = {
1649
1649
  tiers: {
1650
1650
  SIMPLE: {
1651
1651
  primary: "google/gemini-2.5-flash",
1652
- // 1,238ms, 60% retention (best) — fast AND quality
1652
+ // 1,238ms, IQ 20, 60% retention (best) — fast AND quality
1653
1653
  fallback: [
1654
+ "google/gemini-3-flash-preview",
1655
+ // 1,398ms, IQ 46 — smarter fallback
1654
1656
  "deepseek/deepseek-chat",
1655
- // 1,431ms, 41% retention
1657
+ // 1,431ms, IQ 32, 41% retention
1656
1658
  "moonshot/kimi-k2.5",
1657
- // 1,646ms, strong quality
1659
+ // 1,646ms, IQ 47, strong quality
1658
1660
  "google/gemini-2.5-flash-lite",
1659
1661
  // 1,353ms, 1M context, ultra cheap ($0.10/$0.40)
1660
1662
  "xai/grok-4-fast-non-reasoning",
@@ -1665,10 +1667,12 @@ var DEFAULT_ROUTING_CONFIG = {
1665
1667
  },
1666
1668
  MEDIUM: {
1667
1669
  primary: "moonshot/kimi-k2.5",
1668
- // 1,646ms, $0.60/$3.00 — strong tool use, quality output
1670
+ // 1,646ms, IQ 47, $0.60/$3.00 — strong tool use, quality output
1669
1671
  fallback: [
1672
+ "google/gemini-3-flash-preview",
1673
+ // 1,398ms, IQ 46 — nearly same IQ, faster + cheaper
1670
1674
  "deepseek/deepseek-chat",
1671
- // 1,431ms, 41% retention
1675
+ // 1,431ms, IQ 32, 41% retention
1672
1676
  "google/gemini-2.5-flash",
1673
1677
  // 1,238ms, 60% retention
1674
1678
  "google/gemini-2.5-flash-lite",
@@ -1681,24 +1685,24 @@ var DEFAULT_ROUTING_CONFIG = {
1681
1685
  },
1682
1686
  COMPLEX: {
1683
1687
  primary: "google/gemini-3.1-pro",
1684
- // 1,609ms — fast flagship quality
1688
+ // 1,609ms, IQ 57 — fast flagship quality
1685
1689
  fallback: [
1686
- "google/gemini-2.5-flash",
1687
- // 1,238ms, cheap failsafe before expensive models
1688
- "google/gemini-2.5-flash-lite",
1689
- // 1,353ms, 1M context, ultra-cheap failsafe ($0.10/$0.40)
1690
1690
  "google/gemini-3-pro-preview",
1691
- // 1,352ms
1691
+ // 1,352ms, IQ 48 — quality-first fallback
1692
+ "google/gemini-3-flash-preview",
1693
+ // 1,398ms, IQ 46 — fast + smart
1694
+ "xai/grok-4-0709",
1695
+ // 1,348ms, IQ 41
1692
1696
  "google/gemini-2.5-pro",
1693
1697
  // 1,294ms
1694
- "xai/grok-4-0709",
1695
- // 1,348ms
1696
- "deepseek/deepseek-chat",
1697
- // 1,431ms
1698
1698
  "anthropic/claude-sonnet-4.6",
1699
- // 2,110ms — quality fallback
1699
+ // 2,110ms, IQ 52 — quality fallback
1700
+ "deepseek/deepseek-chat",
1701
+ // 1,431ms, IQ 32
1702
+ "google/gemini-2.5-flash",
1703
+ // 1,238ms, IQ 20 — cheap last resort
1700
1704
  "openai/gpt-5.4"
1701
- // 6,213ms — slowest but highest quality
1705
+ // 6,213ms, IQ 57 — slowest but highest quality
1702
1706
  ]
1703
1707
  },
1704
1708
  REASONING: {
@@ -5587,6 +5591,27 @@ function normalizeMessagesForThinking(messages) {
5587
5591
  });
5588
5592
  return hasChanges ? normalized : messages;
5589
5593
  }
5594
+ function debrandSystemMessages(messages, resolvedModel) {
5595
+ const PROFILE_NAMES = ["auto", "free", "eco", "premium"];
5596
+ const profilePattern = new RegExp(
5597
+ `\\bblockrun/(${PROFILE_NAMES.join("|")})\\b`,
5598
+ "gi"
5599
+ );
5600
+ const prefixPattern = /\bblockrun\/(?=[a-z])/gi;
5601
+ let hasChanges = false;
5602
+ const result = messages.map((msg) => {
5603
+ if (msg.role !== "system" || typeof msg.content !== "string") return msg;
5604
+ let content = msg.content;
5605
+ const afterProfiles = content.replace(profilePattern, resolvedModel);
5606
+ const afterPrefix = afterProfiles.replace(prefixPattern, "");
5607
+ if (afterPrefix !== content) {
5608
+ hasChanges = true;
5609
+ content = afterPrefix;
5610
+ }
5611
+ return content !== msg.content ? { ...msg, content } : msg;
5612
+ });
5613
+ return hasChanges ? result : messages;
5614
+ }
5590
5615
  function truncateMessages(messages) {
5591
5616
  if (!messages || messages.length <= MAX_MESSAGES) {
5592
5617
  return {
@@ -6351,6 +6376,12 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
6351
6376
  if (Array.isArray(parsed.messages)) {
6352
6377
  parsed.messages = normalizeMessageRoles(parsed.messages);
6353
6378
  }
6379
+ if (Array.isArray(parsed.messages)) {
6380
+ parsed.messages = debrandSystemMessages(
6381
+ parsed.messages,
6382
+ modelId
6383
+ );
6384
+ }
6354
6385
  if (Array.isArray(parsed.messages)) {
6355
6386
  const truncationResult = truncateMessages(parsed.messages);
6356
6387
  parsed.messages = truncationResult.messages;