@blockrun/clawrouter 0.12.49 → 0.12.50

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -2932,12 +2932,14 @@ var DEFAULT_ROUTING_CONFIG = {
2932
2932
  tiers: {
2933
2933
  SIMPLE: {
2934
2934
  primary: "google/gemini-2.5-flash",
2935
- // 1,238ms, 60% retention (best) — fast AND quality
2935
+ // 1,238ms, IQ 20, 60% retention (best) — fast AND quality
2936
2936
  fallback: [
2937
+ "google/gemini-3-flash-preview",
2938
+ // 1,398ms, IQ 46 — smarter fallback
2937
2939
  "deepseek/deepseek-chat",
2938
- // 1,431ms, 41% retention
2940
+ // 1,431ms, IQ 32, 41% retention
2939
2941
  "moonshot/kimi-k2.5",
2940
- // 1,646ms, strong quality
2942
+ // 1,646ms, IQ 47, strong quality
2941
2943
  "google/gemini-2.5-flash-lite",
2942
2944
  // 1,353ms, 1M context, ultra cheap ($0.10/$0.40)
2943
2945
  "xai/grok-4-fast-non-reasoning",
@@ -2948,10 +2950,12 @@ var DEFAULT_ROUTING_CONFIG = {
2948
2950
  },
2949
2951
  MEDIUM: {
2950
2952
  primary: "moonshot/kimi-k2.5",
2951
- // 1,646ms, $0.60/$3.00 — strong tool use, quality output
2953
+ // 1,646ms, IQ 47, $0.60/$3.00 — strong tool use, quality output
2952
2954
  fallback: [
2955
+ "google/gemini-3-flash-preview",
2956
+ // 1,398ms, IQ 46 — nearly same IQ, faster + cheaper
2953
2957
  "deepseek/deepseek-chat",
2954
- // 1,431ms, 41% retention
2958
+ // 1,431ms, IQ 32, 41% retention
2955
2959
  "google/gemini-2.5-flash",
2956
2960
  // 1,238ms, 60% retention
2957
2961
  "google/gemini-2.5-flash-lite",
@@ -2964,24 +2968,24 @@ var DEFAULT_ROUTING_CONFIG = {
2964
2968
  },
2965
2969
  COMPLEX: {
2966
2970
  primary: "google/gemini-3.1-pro",
2967
- // 1,609ms — fast flagship quality
2971
+ // 1,609ms, IQ 57 — fast flagship quality
2968
2972
  fallback: [
2969
- "google/gemini-2.5-flash",
2970
- // 1,238ms, cheap failsafe before expensive models
2971
- "google/gemini-2.5-flash-lite",
2972
- // 1,353ms, 1M context, ultra-cheap failsafe ($0.10/$0.40)
2973
2973
  "google/gemini-3-pro-preview",
2974
- // 1,352ms
2974
+ // 1,352ms, IQ 48 — quality-first fallback
2975
+ "google/gemini-3-flash-preview",
2976
+ // 1,398ms, IQ 46 — fast + smart
2977
+ "xai/grok-4-0709",
2978
+ // 1,348ms, IQ 41
2975
2979
  "google/gemini-2.5-pro",
2976
2980
  // 1,294ms
2977
- "xai/grok-4-0709",
2978
- // 1,348ms
2979
- "deepseek/deepseek-chat",
2980
- // 1,431ms
2981
2981
  "anthropic/claude-sonnet-4.6",
2982
- // 2,110ms — quality fallback
2982
+ // 2,110ms, IQ 52 — quality fallback
2983
+ "deepseek/deepseek-chat",
2984
+ // 1,431ms, IQ 32
2985
+ "google/gemini-2.5-flash",
2986
+ // 1,238ms, IQ 20 — cheap last resort
2983
2987
  "openai/gpt-5.4"
2984
- // 6,213ms — slowest but highest quality
2988
+ // 6,213ms, IQ 57 — slowest but highest quality
2985
2989
  ]
2986
2990
  },
2987
2991
  REASONING: {
@@ -5786,6 +5790,27 @@ function normalizeMessagesForThinking(messages) {
5786
5790
  });
5787
5791
  return hasChanges ? normalized : messages;
5788
5792
  }
5793
+ function debrandSystemMessages(messages, resolvedModel) {
5794
+ const PROFILE_NAMES = ["auto", "free", "eco", "premium"];
5795
+ const profilePattern = new RegExp(
5796
+ `\\bblockrun/(${PROFILE_NAMES.join("|")})\\b`,
5797
+ "gi"
5798
+ );
5799
+ const prefixPattern = /\bblockrun\/(?=[a-z])/gi;
5800
+ let hasChanges = false;
5801
+ const result = messages.map((msg) => {
5802
+ if (msg.role !== "system" || typeof msg.content !== "string") return msg;
5803
+ let content = msg.content;
5804
+ const afterProfiles = content.replace(profilePattern, resolvedModel);
5805
+ const afterPrefix = afterProfiles.replace(prefixPattern, "");
5806
+ if (afterPrefix !== content) {
5807
+ hasChanges = true;
5808
+ content = afterPrefix;
5809
+ }
5810
+ return content !== msg.content ? { ...msg, content } : msg;
5811
+ });
5812
+ return hasChanges ? result : messages;
5813
+ }
5789
5814
  function truncateMessages(messages) {
5790
5815
  if (!messages || messages.length <= MAX_MESSAGES) {
5791
5816
  return {
@@ -6550,6 +6575,12 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
6550
6575
  if (Array.isArray(parsed.messages)) {
6551
6576
  parsed.messages = normalizeMessageRoles(parsed.messages);
6552
6577
  }
6578
+ if (Array.isArray(parsed.messages)) {
6579
+ parsed.messages = debrandSystemMessages(
6580
+ parsed.messages,
6581
+ modelId
6582
+ );
6583
+ }
6553
6584
  if (Array.isArray(parsed.messages)) {
6554
6585
  const truncationResult = truncateMessages(parsed.messages);
6555
6586
  parsed.messages = truncationResult.messages;