@blockrun/clawrouter 0.8.20 → 0.8.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -203,6 +203,10 @@ type RoutingConfig = {
203
203
  tiers: Record<Tier, TierConfig>;
204
204
  /** Tier configs for agentic mode - models that excel at multi-step tasks */
205
205
  agenticTiers?: Record<Tier, TierConfig>;
206
+ /** Tier configs for eco profile - ultra cost-optimized (blockrun/eco) */
207
+ ecoTiers?: Record<Tier, TierConfig>;
208
+ /** Tier configs for premium profile - best quality (blockrun/premium) */
209
+ premiumTiers?: Record<Tier, TierConfig>;
206
210
  overrides: OverridesConfig;
207
211
  };
208
212
 
@@ -225,7 +229,7 @@ declare function getFallbackChain(tier: Tier, tierConfigs: Record<Tier, TierConf
225
229
  * Calculate cost for a specific model (used when fallback model is used).
226
230
  * Returns updated cost fields for RoutingDecision.
227
231
  */
228
- declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number): {
232
+ declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number, routingProfile?: "free" | "eco" | "auto" | "premium"): {
229
233
  costEstimate: number;
230
234
  baselineCost: number;
231
235
  savings: number;
@@ -264,6 +268,7 @@ declare const DEFAULT_ROUTING_CONFIG: RoutingConfig;
264
268
  type RouterOptions = {
265
269
  config: RoutingConfig;
266
270
  modelPricing: Map<string, ModelPricing>;
271
+ routingProfile?: "free" | "eco" | "auto" | "premium";
267
272
  };
268
273
  /**
269
274
  * Route a request to the cheapest capable model.
package/dist/index.js CHANGED
@@ -41,16 +41,40 @@ function resolveModelAlias(model) {
41
41
  return model;
42
42
  }
43
43
  var BLOCKRUN_MODELS = [
44
- // Smart routing meta-model — proxy replaces with actual model
44
+ // Smart routing meta-models — proxy replaces with actual model
45
45
  // NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
46
46
  {
47
47
  id: "auto",
48
- name: "BlockRun Smart Router",
48
+ name: "Auto (Smart Router - Balanced)",
49
49
  inputPrice: 0,
50
50
  outputPrice: 0,
51
51
  contextWindow: 105e4,
52
52
  maxOutput: 128e3
53
53
  },
54
+ {
55
+ id: "free",
56
+ name: "Free (NVIDIA GPT-OSS-120B only)",
57
+ inputPrice: 0,
58
+ outputPrice: 0,
59
+ contextWindow: 128e3,
60
+ maxOutput: 4096
61
+ },
62
+ {
63
+ id: "eco",
64
+ name: "Eco (Smart Router - Cost Optimized)",
65
+ inputPrice: 0,
66
+ outputPrice: 0,
67
+ contextWindow: 105e4,
68
+ maxOutput: 128e3
69
+ },
70
+ {
71
+ id: "premium",
72
+ name: "Premium (Smart Router - Best Quality)",
73
+ inputPrice: 0,
74
+ outputPrice: 0,
75
+ contextWindow: 2e6,
76
+ maxOutput: 2e5
77
+ },
54
78
  // OpenAI GPT-5 Family
55
79
  {
56
80
  id: "openai/gpt-5.2",
@@ -352,8 +376,8 @@ var BLOCKRUN_MODELS = [
352
376
  {
353
377
  id: "xai/grok-4-0709",
354
378
  name: "Grok 4 (0709)",
355
- inputPrice: 3,
356
- outputPrice: 15,
379
+ inputPrice: 0.2,
380
+ outputPrice: 1.5,
357
381
  contextWindow: 131072,
358
382
  maxOutput: 16384,
359
383
  reasoning: true
@@ -909,7 +933,7 @@ function calibrateConfidence(distance, steepness) {
909
933
  }
910
934
 
911
935
  // src/router/selector.ts
912
- function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens) {
936
+ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
913
937
  const tierConfig = tierConfigs[tier];
914
938
  const model = tierConfig.primary;
915
939
  const pricing = modelPricing.get(model);
@@ -918,13 +942,13 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
918
942
  const inputCost = estimatedInputTokens / 1e6 * inputPrice;
919
943
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
920
944
  const costEstimate = inputCost + outputCost;
921
- const opusPricing = modelPricing.get("anthropic/claude-opus-4");
945
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
922
946
  const opusInputPrice = opusPricing?.inputPrice ?? 0;
923
947
  const opusOutputPrice = opusPricing?.outputPrice ?? 0;
924
948
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
925
949
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
926
950
  const baselineCost = baselineInput + baselineOutput;
927
- const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
951
+ const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
928
952
  return {
929
953
  model,
930
954
  tier,
@@ -940,20 +964,20 @@ function getFallbackChain(tier, tierConfigs) {
940
964
  const config = tierConfigs[tier];
941
965
  return [config.primary, ...config.fallback];
942
966
  }
943
- function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
967
+ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
944
968
  const pricing = modelPricing.get(model);
945
969
  const inputPrice = pricing?.inputPrice ?? 0;
946
970
  const outputPrice = pricing?.outputPrice ?? 0;
947
971
  const inputCost = estimatedInputTokens / 1e6 * inputPrice;
948
972
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
949
973
  const costEstimate = inputCost + outputCost;
950
- const opusPricing = modelPricing.get("anthropic/claude-opus-4");
974
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
951
975
  const opusInputPrice = opusPricing?.inputPrice ?? 0;
952
976
  const opusOutputPrice = opusPricing?.outputPrice ?? 0;
953
977
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
954
978
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
955
979
  const baselineCost = baselineInput + baselineOutput;
956
- const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
980
+ const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
957
981
  return { costEstimate, baselineCost, savings };
958
982
  }
959
983
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
@@ -1582,15 +1606,17 @@ var DEFAULT_ROUTING_CONFIG = {
1582
1606
  // Tier boundaries on weighted score axis
1583
1607
  tierBoundaries: {
1584
1608
  simpleMedium: 0,
1585
- mediumComplex: 0.18,
1586
- complexReasoning: 0.4
1587
- // Raised from 0.25 - requires strong reasoning signals
1609
+ mediumComplex: 0.3,
1610
+ // Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
1611
+ complexReasoning: 0.5
1612
+ // Raised from 0.4 - reserve for true reasoning tasks
1588
1613
  },
1589
1614
  // Sigmoid steepness for confidence calibration
1590
1615
  confidenceSteepness: 12,
1591
1616
  // Below this confidence → ambiguous (null tier)
1592
1617
  confidenceThreshold: 0.7
1593
1618
  },
1619
+ // Auto (balanced) tier configs - current default smart routing
1594
1620
  tiers: {
1595
1621
  SIMPLE: {
1596
1622
  primary: "nvidia/kimi-k2.5",
@@ -1599,7 +1625,9 @@ var DEFAULT_ROUTING_CONFIG = {
1599
1625
  "google/gemini-2.5-flash",
1600
1626
  "nvidia/gpt-oss-120b",
1601
1627
  "nvidia/gpt-oss-20b",
1602
- "deepseek/deepseek-chat"
1628
+ "deepseek/deepseek-chat",
1629
+ "xai/grok-code-fast-1"
1630
+ // Added for better quality fallback
1603
1631
  ]
1604
1632
  },
1605
1633
  MEDIUM: {
@@ -1614,7 +1642,8 @@ var DEFAULT_ROUTING_CONFIG = {
1614
1642
  },
1615
1643
  COMPLEX: {
1616
1644
  primary: "google/gemini-2.5-pro",
1617
- fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "xai/grok-4-0709", "openai/gpt-4o"]
1645
+ fallback: ["xai/grok-4-0709", "openai/gpt-4o", "openai/gpt-5.2", "anthropic/claude-sonnet-4"]
1646
+ // Grok first for cost efficiency, Sonnet as last resort
1618
1647
  },
1619
1648
  REASONING: {
1620
1649
  primary: "xai/grok-4-1-fast-reasoning",
@@ -1628,6 +1657,52 @@ var DEFAULT_ROUTING_CONFIG = {
1628
1657
  ]
1629
1658
  }
1630
1659
  },
1660
+ // Eco tier configs - ultra cost-optimized (blockrun/eco)
1661
+ ecoTiers: {
1662
+ SIMPLE: {
1663
+ primary: "nvidia/kimi-k2.5",
1664
+ // $0.001/$0.001
1665
+ fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "nvidia/gpt-oss-20b"]
1666
+ },
1667
+ MEDIUM: {
1668
+ primary: "deepseek/deepseek-chat",
1669
+ // $0.14/$0.28
1670
+ fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
1671
+ },
1672
+ COMPLEX: {
1673
+ primary: "xai/grok-4-0709",
1674
+ // $0.20/$1.50
1675
+ fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
1676
+ },
1677
+ REASONING: {
1678
+ primary: "deepseek/deepseek-reasoner",
1679
+ // $0.55/$2.19
1680
+ fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
1681
+ }
1682
+ },
1683
+ // Premium tier configs - best quality (blockrun/premium)
1684
+ premiumTiers: {
1685
+ SIMPLE: {
1686
+ primary: "google/gemini-2.5-flash",
1687
+ // $0.075/$0.30
1688
+ fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
1689
+ },
1690
+ MEDIUM: {
1691
+ primary: "openai/gpt-4o",
1692
+ // $2.50/$10
1693
+ fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
1694
+ },
1695
+ COMPLEX: {
1696
+ primary: "anthropic/claude-opus-4.5",
1697
+ // $15/$75
1698
+ fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "google/gemini-2.5-pro"]
1699
+ },
1700
+ REASONING: {
1701
+ primary: "openai/o3",
1702
+ // $10/$40
1703
+ fallback: ["anthropic/claude-opus-4.5", "openai/o1", "google/gemini-2.5-pro"]
1704
+ }
1705
+ },
1631
1706
  // Agentic tier configs - models that excel at multi-step autonomous tasks
1632
1707
  agenticTiers: {
1633
1708
  SIMPLE: {
@@ -1669,21 +1744,34 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1669
1744
  const fullText = `${systemPrompt ?? ""} ${prompt}`;
1670
1745
  const estimatedTokens = Math.ceil(fullText.length / 4);
1671
1746
  const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
1672
- const agenticScore = ruleResult.agenticScore ?? 0;
1673
- const isAutoAgentic = agenticScore >= 0.69;
1674
- const isExplicitAgentic = config.overrides.agenticMode ?? false;
1675
- const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
1676
- const tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1747
+ const { routingProfile } = options;
1748
+ let tierConfigs;
1749
+ let profileSuffix = "";
1750
+ if (routingProfile === "eco" && config.ecoTiers) {
1751
+ tierConfigs = config.ecoTiers;
1752
+ profileSuffix = " | eco";
1753
+ } else if (routingProfile === "premium" && config.premiumTiers) {
1754
+ tierConfigs = config.premiumTiers;
1755
+ profileSuffix = " | premium";
1756
+ } else {
1757
+ const agenticScore = ruleResult.agenticScore ?? 0;
1758
+ const isAutoAgentic = agenticScore >= 0.5;
1759
+ const isExplicitAgentic = config.overrides.agenticMode ?? false;
1760
+ const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
1761
+ tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1762
+ profileSuffix = useAgenticTiers ? " | agentic" : "";
1763
+ }
1677
1764
  if (estimatedTokens > config.overrides.maxTokensForceComplex) {
1678
1765
  return selectModel(
1679
1766
  "COMPLEX",
1680
1767
  0.95,
1681
1768
  "rules",
1682
- `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${useAgenticTiers ? " | agentic" : ""}`,
1769
+ `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
1683
1770
  tierConfigs,
1684
1771
  modelPricing,
1685
1772
  estimatedTokens,
1686
- maxOutputTokens
1773
+ maxOutputTokens,
1774
+ routingProfile
1687
1775
  );
1688
1776
  }
1689
1777
  const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
@@ -1707,11 +1795,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1707
1795
  tier = minTier;
1708
1796
  }
1709
1797
  }
1710
- if (isAutoAgentic) {
1711
- reasoning += " | auto-agentic";
1712
- } else if (isExplicitAgentic) {
1713
- reasoning += " | agentic";
1714
- }
1798
+ reasoning += profileSuffix;
1715
1799
  return selectModel(
1716
1800
  tier,
1717
1801
  confidence,
@@ -1720,7 +1804,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1720
1804
  tierConfigs,
1721
1805
  modelPricing,
1722
1806
  estimatedTokens,
1723
- maxOutputTokens
1807
+ maxOutputTokens,
1808
+ routingProfile
1724
1809
  );
1725
1810
  }
1726
1811
 
@@ -1878,12 +1963,13 @@ async function getStats(days = 7) {
1878
1963
  function formatStatsAscii(stats) {
1879
1964
  const lines = [];
1880
1965
  lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
1881
- lines.push("\u2551 ClawRouter Usage Statistics \u2551");
1966
+ lines.push("\u2551 ClawRouter by BlockRun v0.8.20 \u2551");
1967
+ lines.push("\u2551 Usage Statistics \u2551");
1882
1968
  lines.push("\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563");
1883
1969
  lines.push(`\u2551 Period: ${stats.period.padEnd(49)}\u2551`);
1884
1970
  lines.push(`\u2551 Total Requests: ${stats.totalRequests.toString().padEnd(41)}\u2551`);
1885
1971
  lines.push(`\u2551 Total Cost: $${stats.totalCost.toFixed(4).padEnd(43)}\u2551`);
1886
- lines.push(`\u2551 Baseline Cost (Opus): $${stats.totalBaselineCost.toFixed(4).padEnd(33)}\u2551`);
1972
+ lines.push(`\u2551 Baseline Cost (Opus 4.5): $${stats.totalBaselineCost.toFixed(4).padEnd(30)}\u2551`);
1887
1973
  const savingsLine = `\u2551 \u{1F4B0} Total Saved: $${stats.totalSavings.toFixed(4)} (${stats.savingsPercentage.toFixed(1)}%)`;
1888
1974
  if (stats.entriesWithBaseline < stats.totalRequests && stats.entriesWithBaseline > 0) {
1889
1975
  lines.push(savingsLine.padEnd(61) + "\u2551");
@@ -2411,7 +2497,16 @@ async function checkForUpdates() {
2411
2497
  // src/proxy.ts
2412
2498
  var BLOCKRUN_API = "https://blockrun.ai/api";
2413
2499
  var AUTO_MODEL = "blockrun/auto";
2414
- var AUTO_MODEL_SHORT = "auto";
2500
+ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
2501
+ "blockrun/free",
2502
+ "free",
2503
+ "blockrun/eco",
2504
+ "eco",
2505
+ "blockrun/auto",
2506
+ "auto",
2507
+ "blockrun/premium",
2508
+ "premium"
2509
+ ]);
2415
2510
  var FREE_MODEL = "nvidia/gpt-oss-120b";
2416
2511
  var HEARTBEAT_INTERVAL_MS = 2e3;
2417
2512
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
@@ -3093,6 +3188,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3093
3188
  let isStreaming = false;
3094
3189
  let modelId = "";
3095
3190
  let maxTokens = 4096;
3191
+ let routingProfile = null;
3096
3192
  const isChatCompletion = req.url?.includes("/chat/completions");
3097
3193
  if (isChatCompletion && body.length > 0) {
3098
3194
  try {
@@ -3108,58 +3204,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3108
3204
  const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
3109
3205
  const resolvedModel = resolveModelAlias(normalizedModel);
3110
3206
  const wasAlias = resolvedModel !== normalizedModel;
3111
- const isAutoModel = normalizedModel === AUTO_MODEL.toLowerCase() || normalizedModel === AUTO_MODEL_SHORT.toLowerCase();
3207
+ const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
3208
+ if (isRoutingProfile) {
3209
+ const profileName = normalizedModel.replace("blockrun/", "");
3210
+ routingProfile = profileName;
3211
+ }
3112
3212
  console.log(
3113
- `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}, isAuto: ${isAutoModel}`
3213
+ `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
3114
3214
  );
3115
- if (wasAlias && !isAutoModel) {
3215
+ if (wasAlias && !isRoutingProfile) {
3116
3216
  parsed.model = resolvedModel;
3117
3217
  modelId = resolvedModel;
3118
3218
  bodyModified = true;
3119
3219
  }
3120
- if (isAutoModel) {
3121
- const sessionId = getSessionId(
3122
- req.headers
3123
- );
3124
- const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
3125
- if (existingSession) {
3126
- console.log(
3127
- `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
3128
- );
3129
- parsed.model = existingSession.model;
3130
- modelId = existingSession.model;
3220
+ if (isRoutingProfile) {
3221
+ if (routingProfile === "free") {
3222
+ const freeModel = "nvidia/gpt-oss-120b";
3223
+ console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
3224
+ parsed.model = freeModel;
3225
+ modelId = freeModel;
3131
3226
  bodyModified = true;
3132
- sessionStore.touchSession(sessionId);
3227
+ await logUsage({
3228
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3229
+ model: freeModel,
3230
+ tier: "SIMPLE",
3231
+ cost: 0,
3232
+ baselineCost: 0,
3233
+ savings: 1,
3234
+ // 100% savings
3235
+ latencyMs: 0
3236
+ });
3133
3237
  } else {
3134
- const messages = parsed.messages;
3135
- let lastUserMsg;
3136
- if (messages) {
3137
- for (let i = messages.length - 1; i >= 0; i--) {
3138
- if (messages[i].role === "user") {
3139
- lastUserMsg = messages[i];
3140
- break;
3141
- }
3142
- }
3143
- }
3144
- const systemMsg = messages?.find((m) => m.role === "system");
3145
- const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
3146
- const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
3147
- const tools = parsed.tools;
3148
- const hasTools = Array.isArray(tools) && tools.length > 0;
3149
- if (hasTools) {
3150
- console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
3151
- }
3152
- routingDecision = route(prompt, systemPrompt, maxTokens, routerOpts);
3153
- parsed.model = routingDecision.model;
3154
- modelId = routingDecision.model;
3155
- bodyModified = true;
3156
- if (sessionId) {
3157
- sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
3238
+ const sessionId = getSessionId(
3239
+ req.headers
3240
+ );
3241
+ const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
3242
+ if (existingSession) {
3158
3243
  console.log(
3159
- `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
3244
+ `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
3160
3245
  );
3246
+ parsed.model = existingSession.model;
3247
+ modelId = existingSession.model;
3248
+ bodyModified = true;
3249
+ sessionStore.touchSession(sessionId);
3250
+ } else {
3251
+ const messages = parsed.messages;
3252
+ let lastUserMsg;
3253
+ if (messages) {
3254
+ for (let i = messages.length - 1; i >= 0; i--) {
3255
+ if (messages[i].role === "user") {
3256
+ lastUserMsg = messages[i];
3257
+ break;
3258
+ }
3259
+ }
3260
+ }
3261
+ const systemMsg = messages?.find((m) => m.role === "system");
3262
+ const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
3263
+ const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
3264
+ const tools = parsed.tools;
3265
+ const hasTools = Array.isArray(tools) && tools.length > 0;
3266
+ if (hasTools) {
3267
+ console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
3268
+ }
3269
+ routingDecision = route(prompt, systemPrompt, maxTokens, {
3270
+ ...routerOpts,
3271
+ routingProfile: routingProfile ?? void 0
3272
+ });
3273
+ parsed.model = routingDecision.model;
3274
+ modelId = routingDecision.model;
3275
+ bodyModified = true;
3276
+ if (sessionId) {
3277
+ sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
3278
+ console.log(
3279
+ `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
3280
+ );
3281
+ }
3282
+ options.onRouted?.(routingDecision);
3161
3283
  }
3162
- options.onRouted?.(routingDecision);
3163
3284
  }
3164
3285
  }
3165
3286
  if (bodyModified) {
@@ -3343,7 +3464,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3343
3464
  actualModelUsed,
3344
3465
  routerOpts.modelPricing,
3345
3466
  estimatedInputTokens,
3346
- maxTokens
3467
+ maxTokens,
3468
+ routingProfile ?? void 0
3347
3469
  );
3348
3470
  routingDecision = {
3349
3471
  ...routingDecision,
@@ -3552,7 +3674,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3552
3674
  routingDecision.model,
3553
3675
  routerOpts.modelPricing,
3554
3676
  estimatedInputTokens,
3555
- maxTokens
3677
+ maxTokens,
3678
+ routingProfile ?? void 0
3556
3679
  );
3557
3680
  const costWithBuffer = accurateCosts.costEstimate * 1.2;
3558
3681
  const baselineWithBuffer = accurateCosts.baselineCost * 1.2;