@blockrun/clawrouter 0.8.20 → 0.8.22

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -14,7 +14,7 @@ One wallet, 30+ models, zero API keys.
14
14
  [Docs](https://blockrun.ai/docs) · [Models](https://blockrun.ai/models) · [Configuration](docs/configuration.md) · [Features](docs/features.md) · [Windows](docs/windows-installation.md) · [Troubleshooting](docs/troubleshooting.md) · [Telegram](https://t.me/blockrunAI) · [X](https://x.com/BlockRunAI)
15
15
 
16
16
  **Winner — Agentic Commerce Track** at the [USDC AI Agent Hackathon](https://x.com/USDC/status/2021625822294216977)<br>
17
- *The world's first hackathon run entirely by AI agents, powered by USDC*
17
+ _The world's first hackathon run entirely by AI agents, powered by USDC_
18
18
 
19
19
  </div>
20
20
 
@@ -44,15 +44,15 @@ One wallet, 30+ models, zero API keys.
44
44
 
45
45
  ## Quick Start (2 mins)
46
46
 
47
+ **Inspired by Andreas** — we've updated our installation script:
48
+
47
49
  ```bash
48
50
  # 1. Install with smart routing enabled by default
49
- curl -fsSL https://raw.githubusercontent.com/BlockRunAI/ClawRouter/main/scripts/reinstall.sh | bash
51
+ curl -fsSL https://blockrun.ai/ClawRouter-update | bash
52
+ openclaw gateway restart
50
53
 
51
54
  # 2. Fund your wallet with USDC on Base (address printed on install)
52
55
  # $5 is enough for thousands of requests
53
-
54
- # 3. Restart OpenClaw gateway
55
- openclaw gateway restart
56
56
  ```
57
57
 
58
58
  Done! Smart routing (`blockrun/auto`) is now your default model.
@@ -118,12 +118,12 @@ No external classifier calls. Ambiguous queries default to the MEDIUM tier (Grok
118
118
 
119
119
  ### Tier → Model Mapping
120
120
 
121
- | Tier | Primary Model | Cost/M | Savings vs Opus |
122
- | --------- | --------------------- | ------- | --------------- |
123
- | SIMPLE | nvidia/kimi-k2.5 | $0.001 | **~100%** |
124
- | MEDIUM | grok-code-fast-1 | $1.50 | **94.0%** |
125
- | COMPLEX | gemini-2.5-pro | $10.00 | **60.0%** |
126
- | REASONING | grok-4-1-fast-reasoning | $0.50 | **98.0%** |
121
+ | Tier | Primary Model | Cost/M | Savings vs Opus |
122
+ | --------- | ----------------------- | ------ | --------------- |
123
+ | SIMPLE | nvidia/kimi-k2.5 | $0.001 | **~100%** |
124
+ | MEDIUM | grok-code-fast-1 | $1.50 | **94.0%** |
125
+ | COMPLEX | gemini-2.5-pro | $10.00 | **60.0%** |
126
+ | REASONING | grok-4-1-fast-reasoning | $0.50 | **98.0%** |
127
127
 
128
128
  Special rule: 2+ reasoning markers → REASONING at 0.97 confidence.
129
129
 
@@ -137,6 +137,7 @@ ClawRouter v0.5+ includes intelligent features that work automatically:
137
137
  - **Model aliases** — `/model free`, `/model sonnet`, `/model grok`
138
138
  - **Session persistence** — pins model for multi-turn conversations
139
139
  - **Free tier fallback** — keeps working when wallet is empty
140
+ - **Auto-update check** — notifies you when a new version is available
140
141
 
141
142
  **Full details:** [docs/features.md](docs/features.md)
142
143
 
@@ -334,13 +335,19 @@ Agents shouldn't need a human to paste API keys. They should generate a wallet,
334
335
  Quick checklist:
335
336
 
336
337
  ```bash
337
- # Check version (should be 0.5.7+)
338
+ # Check version (should be 0.8.21+)
338
339
  cat ~/.openclaw/extensions/clawrouter/package.json | grep version
339
340
 
340
341
  # Check proxy running
341
342
  curl http://localhost:8402/health
343
+
344
+ # Update to latest version
345
+ curl -fsSL https://blockrun.ai/ClawRouter-update | bash
346
+ openclaw gateway restart
342
347
  ```
343
348
 
349
+ ClawRouter automatically checks for updates on startup and shows a notification if a newer version is available.
350
+
344
351
  **Full guide:** [docs/troubleshooting.md](docs/troubleshooting.md)
345
352
 
346
353
  ---
@@ -374,6 +381,7 @@ BLOCKRUN_WALLET_KEY=0x... npx tsx test-e2e.ts
374
381
  - [x] Cost tracking — /stats command with savings dashboard
375
382
  - [x] Model aliases — `/model free`, `/model sonnet`, `/model grok`, etc.
376
383
  - [x] Free tier — gpt-oss-120b for $0 when wallet is empty
384
+ - [x] Auto-update — startup version check with one-command update
377
385
  - [ ] Cascade routing — try cheap model first, escalate on low quality
378
386
  - [ ] Spend controls — daily/monthly budgets
379
387
  - [ ] Remote analytics — cost tracking at blockrun.ai
package/dist/cli.js CHANGED
@@ -439,7 +439,7 @@ function calibrateConfidence(distance, steepness) {
439
439
  }
440
440
 
441
441
  // src/router/selector.ts
442
- function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens) {
442
+ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
443
443
  const tierConfig = tierConfigs[tier];
444
444
  const model = tierConfig.primary;
445
445
  const pricing = modelPricing.get(model);
@@ -448,13 +448,13 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
448
448
  const inputCost = estimatedInputTokens / 1e6 * inputPrice;
449
449
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
450
450
  const costEstimate = inputCost + outputCost;
451
- const opusPricing = modelPricing.get("anthropic/claude-opus-4");
451
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
452
452
  const opusInputPrice = opusPricing?.inputPrice ?? 0;
453
453
  const opusOutputPrice = opusPricing?.outputPrice ?? 0;
454
454
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
455
455
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
456
456
  const baselineCost = baselineInput + baselineOutput;
457
- const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
457
+ const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
458
458
  return {
459
459
  model,
460
460
  tier,
@@ -470,20 +470,20 @@ function getFallbackChain(tier, tierConfigs) {
470
470
  const config = tierConfigs[tier];
471
471
  return [config.primary, ...config.fallback];
472
472
  }
473
- function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
473
+ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
474
474
  const pricing = modelPricing.get(model);
475
475
  const inputPrice = pricing?.inputPrice ?? 0;
476
476
  const outputPrice = pricing?.outputPrice ?? 0;
477
477
  const inputCost = estimatedInputTokens / 1e6 * inputPrice;
478
478
  const outputCost = maxOutputTokens / 1e6 * outputPrice;
479
479
  const costEstimate = inputCost + outputCost;
480
- const opusPricing = modelPricing.get("anthropic/claude-opus-4");
480
+ const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
481
481
  const opusInputPrice = opusPricing?.inputPrice ?? 0;
482
482
  const opusOutputPrice = opusPricing?.outputPrice ?? 0;
483
483
  const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
484
484
  const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
485
485
  const baselineCost = baselineInput + baselineOutput;
486
- const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
486
+ const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
487
487
  return { costEstimate, baselineCost, savings };
488
488
  }
489
489
  function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
@@ -1112,15 +1112,17 @@ var DEFAULT_ROUTING_CONFIG = {
1112
1112
  // Tier boundaries on weighted score axis
1113
1113
  tierBoundaries: {
1114
1114
  simpleMedium: 0,
1115
- mediumComplex: 0.18,
1116
- complexReasoning: 0.4
1117
- // Raised from 0.25 - requires strong reasoning signals
1115
+ mediumComplex: 0.3,
1116
+ // Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
1117
+ complexReasoning: 0.5
1118
+ // Raised from 0.4 - reserve for true reasoning tasks
1118
1119
  },
1119
1120
  // Sigmoid steepness for confidence calibration
1120
1121
  confidenceSteepness: 12,
1121
1122
  // Below this confidence → ambiguous (null tier)
1122
1123
  confidenceThreshold: 0.7
1123
1124
  },
1125
+ // Auto (balanced) tier configs - current default smart routing
1124
1126
  tiers: {
1125
1127
  SIMPLE: {
1126
1128
  primary: "nvidia/kimi-k2.5",
@@ -1129,7 +1131,9 @@ var DEFAULT_ROUTING_CONFIG = {
1129
1131
  "google/gemini-2.5-flash",
1130
1132
  "nvidia/gpt-oss-120b",
1131
1133
  "nvidia/gpt-oss-20b",
1132
- "deepseek/deepseek-chat"
1134
+ "deepseek/deepseek-chat",
1135
+ "xai/grok-code-fast-1"
1136
+ // Added for better quality fallback
1133
1137
  ]
1134
1138
  },
1135
1139
  MEDIUM: {
@@ -1144,7 +1148,8 @@ var DEFAULT_ROUTING_CONFIG = {
1144
1148
  },
1145
1149
  COMPLEX: {
1146
1150
  primary: "google/gemini-2.5-pro",
1147
- fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "xai/grok-4-0709", "openai/gpt-4o"]
1151
+ fallback: ["xai/grok-4-0709", "openai/gpt-4o", "openai/gpt-5.2", "anthropic/claude-sonnet-4"]
1152
+ // Grok first for cost efficiency, Sonnet as last resort
1148
1153
  },
1149
1154
  REASONING: {
1150
1155
  primary: "xai/grok-4-1-fast-reasoning",
@@ -1158,6 +1163,52 @@ var DEFAULT_ROUTING_CONFIG = {
1158
1163
  ]
1159
1164
  }
1160
1165
  },
1166
+ // Eco tier configs - ultra cost-optimized (blockrun/eco)
1167
+ ecoTiers: {
1168
+ SIMPLE: {
1169
+ primary: "nvidia/kimi-k2.5",
1170
+ // $0.001/$0.001
1171
+ fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "nvidia/gpt-oss-20b"]
1172
+ },
1173
+ MEDIUM: {
1174
+ primary: "deepseek/deepseek-chat",
1175
+ // $0.14/$0.28
1176
+ fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
1177
+ },
1178
+ COMPLEX: {
1179
+ primary: "xai/grok-4-0709",
1180
+ // $0.20/$1.50
1181
+ fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
1182
+ },
1183
+ REASONING: {
1184
+ primary: "deepseek/deepseek-reasoner",
1185
+ // $0.55/$2.19
1186
+ fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
1187
+ }
1188
+ },
1189
+ // Premium tier configs - best quality (blockrun/premium)
1190
+ premiumTiers: {
1191
+ SIMPLE: {
1192
+ primary: "google/gemini-2.5-flash",
1193
+ // $0.075/$0.30
1194
+ fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
1195
+ },
1196
+ MEDIUM: {
1197
+ primary: "openai/gpt-4o",
1198
+ // $2.50/$10
1199
+ fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
1200
+ },
1201
+ COMPLEX: {
1202
+ primary: "anthropic/claude-opus-4.5",
1203
+ // $15/$75
1204
+ fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "google/gemini-2.5-pro"]
1205
+ },
1206
+ REASONING: {
1207
+ primary: "openai/o3",
1208
+ // $10/$40
1209
+ fallback: ["anthropic/claude-opus-4.5", "openai/o1", "google/gemini-2.5-pro"]
1210
+ }
1211
+ },
1161
1212
  // Agentic tier configs - models that excel at multi-step autonomous tasks
1162
1213
  agenticTiers: {
1163
1214
  SIMPLE: {
@@ -1199,21 +1250,34 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1199
1250
  const fullText = `${systemPrompt ?? ""} ${prompt}`;
1200
1251
  const estimatedTokens = Math.ceil(fullText.length / 4);
1201
1252
  const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
1202
- const agenticScore = ruleResult.agenticScore ?? 0;
1203
- const isAutoAgentic = agenticScore >= 0.69;
1204
- const isExplicitAgentic = config.overrides.agenticMode ?? false;
1205
- const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
1206
- const tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1253
+ const { routingProfile } = options;
1254
+ let tierConfigs;
1255
+ let profileSuffix = "";
1256
+ if (routingProfile === "eco" && config.ecoTiers) {
1257
+ tierConfigs = config.ecoTiers;
1258
+ profileSuffix = " | eco";
1259
+ } else if (routingProfile === "premium" && config.premiumTiers) {
1260
+ tierConfigs = config.premiumTiers;
1261
+ profileSuffix = " | premium";
1262
+ } else {
1263
+ const agenticScore = ruleResult.agenticScore ?? 0;
1264
+ const isAutoAgentic = agenticScore >= 0.5;
1265
+ const isExplicitAgentic = config.overrides.agenticMode ?? false;
1266
+ const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
1267
+ tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
1268
+ profileSuffix = useAgenticTiers ? " | agentic" : "";
1269
+ }
1207
1270
  if (estimatedTokens > config.overrides.maxTokensForceComplex) {
1208
1271
  return selectModel(
1209
1272
  "COMPLEX",
1210
1273
  0.95,
1211
1274
  "rules",
1212
- `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${useAgenticTiers ? " | agentic" : ""}`,
1275
+ `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
1213
1276
  tierConfigs,
1214
1277
  modelPricing,
1215
1278
  estimatedTokens,
1216
- maxOutputTokens
1279
+ maxOutputTokens,
1280
+ routingProfile
1217
1281
  );
1218
1282
  }
1219
1283
  const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
@@ -1237,11 +1301,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1237
1301
  tier = minTier;
1238
1302
  }
1239
1303
  }
1240
- if (isAutoAgentic) {
1241
- reasoning += " | auto-agentic";
1242
- } else if (isExplicitAgentic) {
1243
- reasoning += " | agentic";
1244
- }
1304
+ reasoning += profileSuffix;
1245
1305
  return selectModel(
1246
1306
  tier,
1247
1307
  confidence,
@@ -1250,7 +1310,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
1250
1310
  tierConfigs,
1251
1311
  modelPricing,
1252
1312
  estimatedTokens,
1253
- maxOutputTokens
1313
+ maxOutputTokens,
1314
+ routingProfile
1254
1315
  );
1255
1316
  }
1256
1317
 
@@ -1279,11 +1340,12 @@ var MODEL_ALIASES = {
1279
1340
  grok: "xai/grok-3",
1280
1341
  "grok-fast": "xai/grok-4-fast-reasoning",
1281
1342
  "grok-code": "xai/grok-code-fast-1",
1282
- // NVIDIA (free)
1343
+ // NVIDIA
1283
1344
  nvidia: "nvidia/gpt-oss-120b",
1284
1345
  "gpt-120b": "nvidia/gpt-oss-120b",
1285
- "gpt-20b": "nvidia/gpt-oss-20b",
1286
- free: "nvidia/gpt-oss-120b"
1346
+ "gpt-20b": "nvidia/gpt-oss-20b"
1347
+ // Note: auto, free, eco, premium are virtual routing profiles registered in BLOCKRUN_MODELS
1348
+ // They don't need aliases since they're already top-level model IDs
1287
1349
  };
1288
1350
  function resolveModelAlias(model) {
1289
1351
  const normalized = model.trim().toLowerCase();
@@ -1297,16 +1359,40 @@ function resolveModelAlias(model) {
1297
1359
  return model;
1298
1360
  }
1299
1361
  var BLOCKRUN_MODELS = [
1300
- // Smart routing meta-model — proxy replaces with actual model
1362
+ // Smart routing meta-models — proxy replaces with actual model
1301
1363
  // NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
1302
1364
  {
1303
1365
  id: "auto",
1304
- name: "BlockRun Smart Router",
1366
+ name: "Auto (Smart Router - Balanced)",
1367
+ inputPrice: 0,
1368
+ outputPrice: 0,
1369
+ contextWindow: 105e4,
1370
+ maxOutput: 128e3
1371
+ },
1372
+ {
1373
+ id: "free",
1374
+ name: "Free (NVIDIA GPT-OSS-120B only)",
1375
+ inputPrice: 0,
1376
+ outputPrice: 0,
1377
+ contextWindow: 128e3,
1378
+ maxOutput: 4096
1379
+ },
1380
+ {
1381
+ id: "eco",
1382
+ name: "Eco (Smart Router - Cost Optimized)",
1305
1383
  inputPrice: 0,
1306
1384
  outputPrice: 0,
1307
1385
  contextWindow: 105e4,
1308
1386
  maxOutput: 128e3
1309
1387
  },
1388
+ {
1389
+ id: "premium",
1390
+ name: "Premium (Smart Router - Best Quality)",
1391
+ inputPrice: 0,
1392
+ outputPrice: 0,
1393
+ contextWindow: 2e6,
1394
+ maxOutput: 2e5
1395
+ },
1310
1396
  // OpenAI GPT-5 Family
1311
1397
  {
1312
1398
  id: "openai/gpt-5.2",
@@ -1608,8 +1694,8 @@ var BLOCKRUN_MODELS = [
1608
1694
  {
1609
1695
  id: "xai/grok-4-0709",
1610
1696
  name: "Grok 4 (0709)",
1611
- inputPrice: 3,
1612
- outputPrice: 15,
1697
+ inputPrice: 0.2,
1698
+ outputPrice: 1.5,
1613
1699
  contextWindow: 131072,
1614
1700
  maxOutput: 16384,
1615
1701
  reasoning: true
@@ -2272,7 +2358,16 @@ async function checkForUpdates() {
2272
2358
  // src/proxy.ts
2273
2359
  var BLOCKRUN_API = "https://blockrun.ai/api";
2274
2360
  var AUTO_MODEL = "blockrun/auto";
2275
- var AUTO_MODEL_SHORT = "auto";
2361
+ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
2362
+ "blockrun/free",
2363
+ "free",
2364
+ "blockrun/eco",
2365
+ "eco",
2366
+ "blockrun/auto",
2367
+ "auto",
2368
+ "blockrun/premium",
2369
+ "premium"
2370
+ ]);
2276
2371
  var FREE_MODEL = "nvidia/gpt-oss-120b";
2277
2372
  var HEARTBEAT_INTERVAL_MS = 2e3;
2278
2373
  var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
@@ -2954,6 +3049,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
2954
3049
  let isStreaming = false;
2955
3050
  let modelId = "";
2956
3051
  let maxTokens = 4096;
3052
+ let routingProfile = null;
2957
3053
  const isChatCompletion = req.url?.includes("/chat/completions");
2958
3054
  if (isChatCompletion && body.length > 0) {
2959
3055
  try {
@@ -2969,58 +3065,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
2969
3065
  const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
2970
3066
  const resolvedModel = resolveModelAlias(normalizedModel);
2971
3067
  const wasAlias = resolvedModel !== normalizedModel;
2972
- const isAutoModel = normalizedModel === AUTO_MODEL.toLowerCase() || normalizedModel === AUTO_MODEL_SHORT.toLowerCase();
3068
+ const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
3069
+ if (isRoutingProfile) {
3070
+ const profileName = normalizedModel.replace("blockrun/", "");
3071
+ routingProfile = profileName;
3072
+ }
2973
3073
  console.log(
2974
- `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}, isAuto: ${isAutoModel}`
3074
+ `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
2975
3075
  );
2976
- if (wasAlias && !isAutoModel) {
3076
+ if (wasAlias && !isRoutingProfile) {
2977
3077
  parsed.model = resolvedModel;
2978
3078
  modelId = resolvedModel;
2979
3079
  bodyModified = true;
2980
3080
  }
2981
- if (isAutoModel) {
2982
- const sessionId = getSessionId(
2983
- req.headers
2984
- );
2985
- const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
2986
- if (existingSession) {
2987
- console.log(
2988
- `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
2989
- );
2990
- parsed.model = existingSession.model;
2991
- modelId = existingSession.model;
3081
+ if (isRoutingProfile) {
3082
+ if (routingProfile === "free") {
3083
+ const freeModel = "nvidia/gpt-oss-120b";
3084
+ console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
3085
+ parsed.model = freeModel;
3086
+ modelId = freeModel;
2992
3087
  bodyModified = true;
2993
- sessionStore.touchSession(sessionId);
3088
+ await logUsage({
3089
+ timestamp: (/* @__PURE__ */ new Date()).toISOString(),
3090
+ model: freeModel,
3091
+ tier: "SIMPLE",
3092
+ cost: 0,
3093
+ baselineCost: 0,
3094
+ savings: 1,
3095
+ // 100% savings
3096
+ latencyMs: 0
3097
+ });
2994
3098
  } else {
2995
- const messages = parsed.messages;
2996
- let lastUserMsg;
2997
- if (messages) {
2998
- for (let i = messages.length - 1; i >= 0; i--) {
2999
- if (messages[i].role === "user") {
3000
- lastUserMsg = messages[i];
3001
- break;
3002
- }
3003
- }
3004
- }
3005
- const systemMsg = messages?.find((m) => m.role === "system");
3006
- const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
3007
- const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
3008
- const tools = parsed.tools;
3009
- const hasTools = Array.isArray(tools) && tools.length > 0;
3010
- if (hasTools) {
3011
- console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
3012
- }
3013
- routingDecision = route(prompt, systemPrompt, maxTokens, routerOpts);
3014
- parsed.model = routingDecision.model;
3015
- modelId = routingDecision.model;
3016
- bodyModified = true;
3017
- if (sessionId) {
3018
- sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
3099
+ const sessionId = getSessionId(
3100
+ req.headers
3101
+ );
3102
+ const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
3103
+ if (existingSession) {
3019
3104
  console.log(
3020
- `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
3105
+ `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
3021
3106
  );
3107
+ parsed.model = existingSession.model;
3108
+ modelId = existingSession.model;
3109
+ bodyModified = true;
3110
+ sessionStore.touchSession(sessionId);
3111
+ } else {
3112
+ const messages = parsed.messages;
3113
+ let lastUserMsg;
3114
+ if (messages) {
3115
+ for (let i = messages.length - 1; i >= 0; i--) {
3116
+ if (messages[i].role === "user") {
3117
+ lastUserMsg = messages[i];
3118
+ break;
3119
+ }
3120
+ }
3121
+ }
3122
+ const systemMsg = messages?.find((m) => m.role === "system");
3123
+ const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
3124
+ const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
3125
+ const tools = parsed.tools;
3126
+ const hasTools = Array.isArray(tools) && tools.length > 0;
3127
+ if (hasTools) {
3128
+ console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
3129
+ }
3130
+ routingDecision = route(prompt, systemPrompt, maxTokens, {
3131
+ ...routerOpts,
3132
+ routingProfile: routingProfile ?? void 0
3133
+ });
3134
+ parsed.model = routingDecision.model;
3135
+ modelId = routingDecision.model;
3136
+ bodyModified = true;
3137
+ if (sessionId) {
3138
+ sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
3139
+ console.log(
3140
+ `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
3141
+ );
3142
+ }
3143
+ options.onRouted?.(routingDecision);
3022
3144
  }
3023
- options.onRouted?.(routingDecision);
3024
3145
  }
3025
3146
  }
3026
3147
  if (bodyModified) {
@@ -3204,7 +3325,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3204
3325
  actualModelUsed,
3205
3326
  routerOpts.modelPricing,
3206
3327
  estimatedInputTokens,
3207
- maxTokens
3328
+ maxTokens,
3329
+ routingProfile ?? void 0
3208
3330
  );
3209
3331
  routingDecision = {
3210
3332
  ...routingDecision,
@@ -3413,7 +3535,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3413
3535
  routingDecision.model,
3414
3536
  routerOpts.modelPricing,
3415
3537
  estimatedInputTokens,
3416
- maxTokens
3538
+ maxTokens,
3539
+ routingProfile ?? void 0
3417
3540
  );
3418
3541
  const costWithBuffer = accurateCosts.costEstimate * 1.2;
3419
3542
  const baselineWithBuffer = accurateCosts.baselineCost * 1.2;