@blockrun/clawrouter 0.8.20 → 0.8.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -203,6 +203,10 @@ type RoutingConfig = {
203
203
  tiers: Record<Tier, TierConfig>;
204
204
  /** Tier configs for agentic mode - models that excel at multi-step tasks */
205
205
  agenticTiers?: Record<Tier, TierConfig>;
206
+ /** Tier configs for eco profile - ultra cost-optimized (blockrun/eco) */
207
+ ecoTiers?: Record<Tier, TierConfig>;
208
+ /** Tier configs for premium profile - best quality (blockrun/premium) */
209
+ premiumTiers?: Record<Tier, TierConfig>;
206
210
  overrides: OverridesConfig;
207
211
  };
208
212
 
@@ -225,7 +229,7 @@ declare function getFallbackChain(tier: Tier, tierConfigs: Record<Tier, TierConf
225
229
  * Calculate cost for a specific model (used when fallback model is used).
226
230
  * Returns updated cost fields for RoutingDecision.
227
231
  */
228
- declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number): {
232
+ declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number, routingProfile?: "free" | "eco" | "auto" | "premium"): {
229
233
  costEstimate: number;
230
234
  baselineCost: number;
231
235
  savings: number;
@@ -264,6 +268,7 @@ declare const DEFAULT_ROUTING_CONFIG: RoutingConfig;
264
268
  type RouterOptions = {
265
269
  config: RoutingConfig;
266
270
  modelPricing: Map<string, ModelPricing>;
271
+ routingProfile?: "free" | "eco" | "auto" | "premium";
267
272
  };
268
273
  /**
269
274
  * Route a request to the cheapest capable model.
package/dist/index.js CHANGED
@@ -23,11 +23,12 @@ var MODEL_ALIASES = {
23
23
  grok: "xai/grok-3",
24
24
  "grok-fast": "xai/grok-4-fast-reasoning",
25
25
  "grok-code": "xai/grok-code-fast-1",
26
- // NVIDIA (free)
26
+ // NVIDIA
27
27
  nvidia: "nvidia/gpt-oss-120b",
28
28
  "gpt-120b": "nvidia/gpt-oss-120b",
29
- "gpt-20b": "nvidia/gpt-oss-20b",
30
- free: "nvidia/gpt-oss-120b"
29
+ "gpt-20b": "nvidia/gpt-oss-20b"
30
+ // Note: auto, free, eco, premium are virtual routing profiles registered in BLOCKRUN_MODELS
31
+ // They don't need aliases since they're already top-level model IDs
31
32
  };
32
33
  function resolveModelAlias(model) {
33
34
  const normalized = model.trim().toLowerCase();
@@ -41,16 +42,40 @@ function resolveModelAlias(model) {
41
42
  return model;
42
43
  }
43
44
  var BLOCKRUN_MODELS = [
44
- // Smart routing meta-model — proxy replaces with actual model
45
+ // Smart routing meta-models — proxy replaces with actual model
45
46
  // NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
46
47
  {
47
48
  id: "auto",
48
- name: "BlockRun Smart Router",
49
+ name: "Auto (Smart Router - Balanced)",
49
50
  inputPrice: 0,
50
51
  outputPrice: 0,
51
52
  contextWindow: 105e4,
52
53
  maxOutput: 128e3
53
54
  },
55
+ {
56
+ id: "free",
57
+ name: "Free (NVIDIA GPT-OSS-120B only)",
58
+ inputPrice: 0,
59
+ outputPrice: 0,
60
+ contextWindow: 128e3,
61
+ maxOutput: 4096
62
+ },
63
+ {
64
+ id: "eco",
65
+ name: "Eco (Smart Router - Cost Optimized)",
66
+ inputPrice: 0,
67
+ outputPrice: 0,
68
+ contextWindow: 105e4,
69
+ maxOutput: 128e3
70
+ },
71
+ {
72
+ id: "premium",
73
+ name: "Premium (Smart Router - Best Quality)",
74
+ inputPrice: 0,
75
+ outputPrice: 0,
76
+ contextWindow: 2e6,
77
+ maxOutput: 2e5
78
+ },
54
79
  // OpenAI GPT-5 Family
55
80
  {
56
81
  id: "openai/gpt-5.2",
@@ -352,8 +377,8 @@ var BLOCKRUN_MODELS = [
352
377
  {
353
378
  id: "xai/grok-4-0709",
354
379
  name: "Grok 4 (0709)",
355
- inputPrice: 3,
356
- outputPrice: 15,
380
+ inputPrice: 0.2,
381
+ outputPrice: 1.5,
357
382
  contextWindow: 131072,
358
383
  maxOutput: 16384,
359
384
  reasoning: true
@@ -909,7 +934,7 @@ function calibrateConfidence(distance, steepness) {
909
934
  }
910
935
 
911
936
  // src/router/selector.ts
912
- function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens) {
937
+ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
913
938
  const tierConfig = tierConfigs[tier];
914
939
  const model = tierConfig.primary;
915
940
  const pricing = modelPricing.get(model);
@@ -918,13 +943,13 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
918
943
  const inputCost = estimatedInputTokens / 1e6 * inputPrice;
919
944
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
920
945
  const costEstimate = inputCost + outputCost;
921
- const opusPricing = modelPricing.get("anthropic/claude-opus-4");
946
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
922
947
  const opusInputPrice = opusPricing?.inputPrice ?? 0;
923
948
  const opusOutputPrice = opusPricing?.outputPrice ?? 0;
924
949
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
925
950
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
926
951
  const baselineCost = baselineInput + baselineOutput;
927
- const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
952
+ const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
928
953
  return {
929
954
  model,
930
955
  tier,
@@ -940,20 +965,20 @@ function getFallbackChain(tier, tierConfigs) {
940
965
  const config = tierConfigs[tier];
941
966
  return [config.primary, ...config.fallback];
942
967
  }
943
- function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
968
+ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
944
969
  const pricing = modelPricing.get(model);
945
970
  const inputPrice = pricing?.inputPrice ?? 0;
946
971
  const outputPrice = pricing?.outputPrice ?? 0;
947
972
  const inputCost = estimatedInputTokens / 1e6 * inputPrice;
948
973
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
949
974
  const costEstimate = inputCost + outputCost;
950
- const opusPricing = modelPricing.get("anthropic/claude-opus-4");
975
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
951
976
  const opusInputPrice = opusPricing?.inputPrice ?? 0;
952
977
  const opusOutputPrice = opusPricing?.outputPrice ?? 0;
953
978
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
954
979
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
955
980
  const baselineCost = baselineInput + baselineOutput;
956
- const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
981
+ const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
957
982
  return { costEstimate, baselineCost, savings };
958
983
  }
959
984
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
@@ -1582,15 +1607,17 @@ var DEFAULT_ROUTING_CONFIG = {
1582
1607
  // Tier boundaries on weighted score axis
1583
1608
  tierBoundaries: {
1584
1609
  simpleMedium: 0,
1585
- mediumComplex: 0.18,
1586
- complexReasoning: 0.4
1587
- // Raised from 0.25 - requires strong reasoning signals
1610
+ mediumComplex: 0.3,
1611
+ // Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
1612
+ complexReasoning: 0.5
1613
+ // Raised from 0.4 - reserve for true reasoning tasks
1588
1614
  },
1589
1615
  // Sigmoid steepness for confidence calibration
1590
1616
  confidenceSteepness: 12,
1591
1617
  // Below this confidence → ambiguous (null tier)
1592
1618
  confidenceThreshold: 0.7
1593
1619
  },
1620
+ // Auto (balanced) tier configs - current default smart routing
1594
1621
  tiers: {
1595
1622
  SIMPLE: {
1596
1623
  primary: "nvidia/kimi-k2.5",
@@ -1599,7 +1626,9 @@ var DEFAULT_ROUTING_CONFIG = {
1599
1626
  "google/gemini-2.5-flash",
1600
1627
  "nvidia/gpt-oss-120b",
1601
1628
  "nvidia/gpt-oss-20b",
1602
- "deepseek/deepseek-chat"
1629
+ "deepseek/deepseek-chat",
1630
+ "xai/grok-code-fast-1"
1631
+ // Added for better quality fallback
1603
1632
  ]
1604
1633
  },
1605
1634
  MEDIUM: {
@@ -1614,7 +1643,8 @@ var DEFAULT_ROUTING_CONFIG = {
1614
1643
  },
1615
1644
  COMPLEX: {
1616
1645
  primary: "google/gemini-2.5-pro",
1617
- fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "xai/grok-4-0709", "openai/gpt-4o"]
1646
+ fallback: ["xai/grok-4-0709", "openai/gpt-4o", "openai/gpt-5.2", "anthropic/claude-sonnet-4"]
1647
+ // Grok first for cost efficiency, Sonnet as last resort
1618
1648
  },
1619
1649
  REASONING: {
1620
1650
  primary: "xai/grok-4-1-fast-reasoning",
@@ -1628,6 +1658,52 @@ var DEFAULT_ROUTING_CONFIG = {
1628
1658
  ]
1629
1659
  }
1630
1660
  },
1661
+ // Eco tier configs - ultra cost-optimized (blockrun/eco)
1662
+ ecoTiers: {
1663
+ SIMPLE: {
1664
+ primary: "nvidia/kimi-k2.5",
1665
+ // $0.001/$0.001
1666
+ fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "nvidia/gpt-oss-20b"]
1667
+ },
1668
+ MEDIUM: {
1669
+ primary: "deepseek/deepseek-chat",
1670
+ // $0.14/$0.28
1671
+ fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
1672
+ },
1673
+ COMPLEX: {
1674
+ primary: "xai/grok-4-0709",
1675
+ // $0.20/$1.50
1676
+ fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
1677
+ },
1678
+ REASONING: {
1679
+ primary: "deepseek/deepseek-reasoner",
1680
+ // $0.55/$2.19
1681
+ fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
1682
+ }
1683
+ },
1684
+ // Premium tier configs - best quality (blockrun/premium)
1685
+ premiumTiers: {
1686
+ SIMPLE: {
1687
+ primary: "google/gemini-2.5-flash",
1688
+ // $0.075/$0.30
1689
+ fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
1690
+ },
1691
+ MEDIUM: {
1692
+ primary: "openai/gpt-4o",
1693
+ // $2.50/$10
1694
+ fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
1695
+ },
1696
+ COMPLEX: {
1697
+ primary: "anthropic/claude-opus-4.5",
1698
+ // $15/$75
1699
+ fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "google/gemini-2.5-pro"]
1700
+ },
1701
+ REASONING: {
1702
+ primary: "openai/o3",
1703
+ // $10/$40
1704
+ fallback: ["anthropic/claude-opus-4.5", "openai/o1", "google/gemini-2.5-pro"]
1705
+ }
1706
+ },
1631
1707
  // Agentic tier configs - models that excel at multi-step autonomous tasks
1632
1708
  agenticTiers: {
1633
1709
  SIMPLE: {
@@ -1669,21 +1745,34 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1669
1745
  const fullText = `${systemPrompt ?? ""} ${prompt}`;
1670
1746
  const estimatedTokens = Math.ceil(fullText.length / 4);
1671
1747
  const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
1672
- const agenticScore = ruleResult.agenticScore ?? 0;
1673
- const isAutoAgentic = agenticScore >= 0.69;
1674
- const isExplicitAgentic = config.overrides.agenticMode ?? false;
1675
- const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
1676
- const tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1748
+ const { routingProfile } = options;
1749
+ let tierConfigs;
1750
+ let profileSuffix = "";
1751
+ if (routingProfile === "eco" && config.ecoTiers) {
1752
+ tierConfigs = config.ecoTiers;
1753
+ profileSuffix = " | eco";
1754
+ } else if (routingProfile === "premium" && config.premiumTiers) {
1755
+ tierConfigs = config.premiumTiers;
1756
+ profileSuffix = " | premium";
1757
+ } else {
1758
+ const agenticScore = ruleResult.agenticScore ?? 0;
1759
+ const isAutoAgentic = agenticScore >= 0.5;
1760
+ const isExplicitAgentic = config.overrides.agenticMode ?? false;
1761
+ const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
1762
+ tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1763
+ profileSuffix = useAgenticTiers ? " | agentic" : "";
1764
+ }
1677
1765
  if (estimatedTokens > config.overrides.maxTokensForceComplex) {
1678
1766
  return selectModel(
1679
1767
  "COMPLEX",
1680
1768
  0.95,
1681
1769
  "rules",
1682
- `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${useAgenticTiers ? " | agentic" : ""}`,
1770
+ `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
1683
1771
  tierConfigs,
1684
1772
  modelPricing,
1685
1773
  estimatedTokens,
1686
- maxOutputTokens
1774
+ maxOutputTokens,
1775
+ routingProfile
1687
1776
  );
1688
1777
  }
1689
1778
  const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
@@ -1707,11 +1796,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1707
1796
  tier = minTier;
1708
1797
  }
1709
1798
  }
1710
- if (isAutoAgentic) {
1711
- reasoning += " | auto-agentic";
1712
- } else if (isExplicitAgentic) {
1713
- reasoning += " | agentic";
1714
- }
1799
+ reasoning += profileSuffix;
1715
1800
  return selectModel(
1716
1801
  tier,
1717
1802
  confidence,
@@ -1720,7 +1805,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1720
1805
  tierConfigs,
1721
1806
  modelPricing,
1722
1807
  estimatedTokens,
1723
- maxOutputTokens
1808
+ maxOutputTokens,
1809
+ routingProfile
1724
1810
  );
1725
1811
  }
1726
1812
 
@@ -1878,12 +1964,13 @@ async function getStats(days = 7) {
1878
1964
  function formatStatsAscii(stats) {
1879
1965
  const lines = [];
1880
1966
  lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
1881
- lines.push("\u2551 ClawRouter Usage Statistics \u2551");
1967
+ lines.push("\u2551 ClawRouter by BlockRun v0.8.20 \u2551");
1968
+ lines.push("\u2551 Usage Statistics \u2551");
1882
1969
  lines.push("\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563");
1883
1970
  lines.push(`\u2551 Period: ${stats.period.padEnd(49)}\u2551`);
1884
1971
  lines.push(`\u2551 Total Requests: ${stats.totalRequests.toString().padEnd(41)}\u2551`);
1885
1972
  lines.push(`\u2551 Total Cost: $${stats.totalCost.toFixed(4).padEnd(43)}\u2551`);
1886
- lines.push(`\u2551 Baseline Cost (Opus): $${stats.totalBaselineCost.toFixed(4).padEnd(33)}\u2551`);
1973
+ lines.push(`\u2551 Baseline Cost (Opus 4.5): $${stats.totalBaselineCost.toFixed(4).padEnd(30)}\u2551`);
1887
1974
  const savingsLine = `\u2551 \u{1F4B0} Total Saved: $${stats.totalSavings.toFixed(4)} (${stats.savingsPercentage.toFixed(1)}%)`;
1888
1975
  if (stats.entriesWithBaseline < stats.totalRequests && stats.entriesWithBaseline > 0) {
1889
1976
  lines.push(savingsLine.padEnd(61) + "\u2551");
@@ -2411,7 +2498,16 @@ async function checkForUpdates() {
2411
2498
  // src/proxy.ts
2412
2499
  var BLOCKRUN_API = "https://blockrun.ai/api";
2413
2500
  var AUTO_MODEL = "blockrun/auto";
2414
- var AUTO_MODEL_SHORT = "auto";
2501
+ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
2502
+ "blockrun/free",
2503
+ "free",
2504
+ "blockrun/eco",
2505
+ "eco",
2506
+ "blockrun/auto",
2507
+ "auto",
2508
+ "blockrun/premium",
2509
+ "premium"
2510
+ ]);
2415
2511
  var FREE_MODEL = "nvidia/gpt-oss-120b";
2416
2512
  var HEARTBEAT_INTERVAL_MS = 2e3;
2417
2513
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
@@ -3093,6 +3189,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3093
3189
  let isStreaming = false;
3094
3190
  let modelId = "";
3095
3191
  let maxTokens = 4096;
3192
+ let routingProfile = null;
3096
3193
  const isChatCompletion = req.url?.includes("/chat/completions");
3097
3194
  if (isChatCompletion && body.length > 0) {
3098
3195
  try {
@@ -3108,58 +3205,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3108
3205
  const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
3109
3206
  const resolvedModel = resolveModelAlias(normalizedModel);
3110
3207
  const wasAlias = resolvedModel !== normalizedModel;
3111
- const isAutoModel = normalizedModel === AUTO_MODEL.toLowerCase() || normalizedModel === AUTO_MODEL_SHORT.toLowerCase();
3208
+ const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
3209
+ if (isRoutingProfile) {
3210
+ const profileName = normalizedModel.replace("blockrun/", "");
3211
+ routingProfile = profileName;
3212
+ }
3112
3213
  console.log(
3113
- `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}, isAuto: ${isAutoModel}`
3214
+ `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
3114
3215
  );
3115
- if (wasAlias && !isAutoModel) {
3216
+ if (wasAlias && !isRoutingProfile) {
3116
3217
  parsed.model = resolvedModel;
3117
3218
  modelId = resolvedModel;
3118
3219
  bodyModified = true;
3119
3220
  }
3120
- if (isAutoModel) {
3121
- const sessionId = getSessionId(
3122
- req.headers
3123
- );
3124
- const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
3125
- if (existingSession) {
3126
- console.log(
3127
- `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
3128
- );
3129
- parsed.model = existingSession.model;
3130
- modelId = existingSession.model;
3221
+ if (isRoutingProfile) {
3222
+ if (routingProfile === "free") {
3223
+ const freeModel = "nvidia/gpt-oss-120b";
3224
+ console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
3225
+ parsed.model = freeModel;
3226
+ modelId = freeModel;
3131
3227
  bodyModified = true;
3132
- sessionStore.touchSession(sessionId);
3228
+ await logUsage({
3229
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3230
+ model: freeModel,
3231
+ tier: "SIMPLE",
3232
+ cost: 0,
3233
+ baselineCost: 0,
3234
+ savings: 1,
3235
+ // 100% savings
3236
+ latencyMs: 0
3237
+ });
3133
3238
  } else {
3134
- const messages = parsed.messages;
3135
- let lastUserMsg;
3136
- if (messages) {
3137
- for (let i = messages.length - 1; i >= 0; i--) {
3138
- if (messages[i].role === "user") {
3139
- lastUserMsg = messages[i];
3140
- break;
3141
- }
3142
- }
3143
- }
3144
- const systemMsg = messages?.find((m) => m.role === "system");
3145
- const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
3146
- const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
3147
- const tools = parsed.tools;
3148
- const hasTools = Array.isArray(tools) && tools.length > 0;
3149
- if (hasTools) {
3150
- console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
3151
- }
3152
- routingDecision = route(prompt, systemPrompt, maxTokens, routerOpts);
3153
- parsed.model = routingDecision.model;
3154
- modelId = routingDecision.model;
3155
- bodyModified = true;
3156
- if (sessionId) {
3157
- sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
3239
+ const sessionId = getSessionId(
3240
+ req.headers
3241
+ );
3242
+ const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
3243
+ if (existingSession) {
3158
3244
  console.log(
3159
- `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
3245
+ `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
3160
3246
  );
3247
+ parsed.model = existingSession.model;
3248
+ modelId = existingSession.model;
3249
+ bodyModified = true;
3250
+ sessionStore.touchSession(sessionId);
3251
+ } else {
3252
+ const messages = parsed.messages;
3253
+ let lastUserMsg;
3254
+ if (messages) {
3255
+ for (let i = messages.length - 1; i >= 0; i--) {
3256
+ if (messages[i].role === "user") {
3257
+ lastUserMsg = messages[i];
3258
+ break;
3259
+ }
3260
+ }
3261
+ }
3262
+ const systemMsg = messages?.find((m) => m.role === "system");
3263
+ const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
3264
+ const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
3265
+ const tools = parsed.tools;
3266
+ const hasTools = Array.isArray(tools) && tools.length > 0;
3267
+ if (hasTools) {
3268
+ console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
3269
+ }
3270
+ routingDecision = route(prompt, systemPrompt, maxTokens, {
3271
+ ...routerOpts,
3272
+ routingProfile: routingProfile ?? void 0
3273
+ });
3274
+ parsed.model = routingDecision.model;
3275
+ modelId = routingDecision.model;
3276
+ bodyModified = true;
3277
+ if (sessionId) {
3278
+ sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
3279
+ console.log(
3280
+ `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
3281
+ );
3282
+ }
3283
+ options.onRouted?.(routingDecision);
3161
3284
  }
3162
- options.onRouted?.(routingDecision);
3163
3285
  }
3164
3286
  }
3165
3287
  if (bodyModified) {
@@ -3343,7 +3465,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3343
3465
  actualModelUsed,
3344
3466
  routerOpts.modelPricing,
3345
3467
  estimatedInputTokens,
3346
- maxTokens
3468
+ maxTokens,
3469
+ routingProfile ?? void 0
3347
3470
  );
3348
3471
  routingDecision = {
3349
3472
  ...routingDecision,
@@ -3552,7 +3675,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3552
3675
  routingDecision.model,
3553
3676
  routerOpts.modelPricing,
3554
3677
  estimatedInputTokens,
3555
- maxTokens
3678
+ maxTokens,
3679
+ routingProfile ?? void 0
3556
3680
  );
3557
3681
  const costWithBuffer = accurateCosts.costEstimate * 1.2;
3558
3682
  const baselineWithBuffer = accurateCosts.baselineCost * 1.2;