@blockrun/clawrouter 0.8.20 → 0.8.22
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +20 -12
- package/dist/cli.js +198 -75
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +6 -1
- package/dist/index.js +201 -77
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
package/README.md
CHANGED
|
@@ -14,7 +14,7 @@ One wallet, 30+ models, zero API keys.
|
|
|
14
14
|
[Docs](https://blockrun.ai/docs) · [Models](https://blockrun.ai/models) · [Configuration](docs/configuration.md) · [Features](docs/features.md) · [Windows](docs/windows-installation.md) · [Troubleshooting](docs/troubleshooting.md) · [Telegram](https://t.me/blockrunAI) · [X](https://x.com/BlockRunAI)
|
|
15
15
|
|
|
16
16
|
**Winner — Agentic Commerce Track** at the [USDC AI Agent Hackathon](https://x.com/USDC/status/2021625822294216977)<br>
|
|
17
|
-
|
|
17
|
+
_The world's first hackathon run entirely by AI agents, powered by USDC_
|
|
18
18
|
|
|
19
19
|
</div>
|
|
20
20
|
|
|
@@ -44,15 +44,15 @@ One wallet, 30+ models, zero API keys.
|
|
|
44
44
|
|
|
45
45
|
## Quick Start (2 mins)
|
|
46
46
|
|
|
47
|
+
**Inspired by Andreas** — we've updated our installation script:
|
|
48
|
+
|
|
47
49
|
```bash
|
|
48
50
|
# 1. Install with smart routing enabled by default
|
|
49
|
-
curl -fsSL https://
|
|
51
|
+
curl -fsSL https://blockrun.ai/ClawRouter-update | bash
|
|
52
|
+
openclaw gateway restart
|
|
50
53
|
|
|
51
54
|
# 2. Fund your wallet with USDC on Base (address printed on install)
|
|
52
55
|
# $5 is enough for thousands of requests
|
|
53
|
-
|
|
54
|
-
# 3. Restart OpenClaw gateway
|
|
55
|
-
openclaw gateway restart
|
|
56
56
|
```
|
|
57
57
|
|
|
58
58
|
Done! Smart routing (`blockrun/auto`) is now your default model.
|
|
@@ -118,12 +118,12 @@ No external classifier calls. Ambiguous queries default to the MEDIUM tier (Grok
|
|
|
118
118
|
|
|
119
119
|
### Tier → Model Mapping
|
|
120
120
|
|
|
121
|
-
| Tier | Primary Model
|
|
122
|
-
| --------- |
|
|
123
|
-
| SIMPLE | nvidia/kimi-k2.5
|
|
124
|
-
| MEDIUM | grok-code-fast-1
|
|
125
|
-
| COMPLEX | gemini-2.5-pro
|
|
126
|
-
| REASONING | grok-4-1-fast-reasoning | $0.50
|
|
121
|
+
| Tier | Primary Model | Cost/M | Savings vs Opus |
|
|
122
|
+
| --------- | ----------------------- | ------ | --------------- |
|
|
123
|
+
| SIMPLE | nvidia/kimi-k2.5 | $0.001 | **~100%** |
|
|
124
|
+
| MEDIUM | grok-code-fast-1 | $1.50 | **94.0%** |
|
|
125
|
+
| COMPLEX | gemini-2.5-pro | $10.00 | **60.0%** |
|
|
126
|
+
| REASONING | grok-4-1-fast-reasoning | $0.50 | **98.0%** |
|
|
127
127
|
|
|
128
128
|
Special rule: 2+ reasoning markers → REASONING at 0.97 confidence.
|
|
129
129
|
|
|
@@ -137,6 +137,7 @@ ClawRouter v0.5+ includes intelligent features that work automatically:
|
|
|
137
137
|
- **Model aliases** — `/model free`, `/model sonnet`, `/model grok`
|
|
138
138
|
- **Session persistence** — pins model for multi-turn conversations
|
|
139
139
|
- **Free tier fallback** — keeps working when wallet is empty
|
|
140
|
+
- **Auto-update check** — notifies you when a new version is available
|
|
140
141
|
|
|
141
142
|
**Full details:** [docs/features.md](docs/features.md)
|
|
142
143
|
|
|
@@ -334,13 +335,19 @@ Agents shouldn't need a human to paste API keys. They should generate a wallet,
|
|
|
334
335
|
Quick checklist:
|
|
335
336
|
|
|
336
337
|
```bash
|
|
337
|
-
# Check version (should be 0.
|
|
338
|
+
# Check version (should be 0.8.21+)
|
|
338
339
|
cat ~/.openclaw/extensions/clawrouter/package.json | grep version
|
|
339
340
|
|
|
340
341
|
# Check proxy running
|
|
341
342
|
curl http://localhost:8402/health
|
|
343
|
+
|
|
344
|
+
# Update to latest version
|
|
345
|
+
curl -fsSL https://blockrun.ai/ClawRouter-update | bash
|
|
346
|
+
openclaw gateway restart
|
|
342
347
|
```
|
|
343
348
|
|
|
349
|
+
ClawRouter automatically checks for updates on startup and shows a notification if a newer version is available.
|
|
350
|
+
|
|
344
351
|
**Full guide:** [docs/troubleshooting.md](docs/troubleshooting.md)
|
|
345
352
|
|
|
346
353
|
---
|
|
@@ -374,6 +381,7 @@ BLOCKRUN_WALLET_KEY=0x... npx tsx test-e2e.ts
|
|
|
374
381
|
- [x] Cost tracking — /stats command with savings dashboard
|
|
375
382
|
- [x] Model aliases — `/model free`, `/model sonnet`, `/model grok`, etc.
|
|
376
383
|
- [x] Free tier — gpt-oss-120b for $0 when wallet is empty
|
|
384
|
+
- [x] Auto-update — startup version check with one-command update
|
|
377
385
|
- [ ] Cascade routing — try cheap model first, escalate on low quality
|
|
378
386
|
- [ ] Spend controls — daily/monthly budgets
|
|
379
387
|
- [ ] Remote analytics — cost tracking at blockrun.ai
|
package/dist/cli.js
CHANGED
|
@@ -439,7 +439,7 @@ function calibrateConfidence(distance, steepness) {
|
|
|
439
439
|
}
|
|
440
440
|
|
|
441
441
|
// src/router/selector.ts
|
|
442
|
-
function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
442
|
+
function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
|
|
443
443
|
const tierConfig = tierConfigs[tier];
|
|
444
444
|
const model = tierConfig.primary;
|
|
445
445
|
const pricing = modelPricing.get(model);
|
|
@@ -448,13 +448,13 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
|
|
|
448
448
|
const inputCost = estimatedInputTokens / 1e6 * inputPrice;
|
|
449
449
|
const outputCost = maxOutputTokens / 1e6 * outputPrice;
|
|
450
450
|
const costEstimate = inputCost + outputCost;
|
|
451
|
-
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
451
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
|
|
452
452
|
const opusInputPrice = opusPricing?.inputPrice ?? 0;
|
|
453
453
|
const opusOutputPrice = opusPricing?.outputPrice ?? 0;
|
|
454
454
|
const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
|
|
455
455
|
const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
|
|
456
456
|
const baselineCost = baselineInput + baselineOutput;
|
|
457
|
-
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
457
|
+
const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
458
458
|
return {
|
|
459
459
|
model,
|
|
460
460
|
tier,
|
|
@@ -470,20 +470,20 @@ function getFallbackChain(tier, tierConfigs) {
|
|
|
470
470
|
const config = tierConfigs[tier];
|
|
471
471
|
return [config.primary, ...config.fallback];
|
|
472
472
|
}
|
|
473
|
-
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
|
|
473
|
+
function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
|
|
474
474
|
const pricing = modelPricing.get(model);
|
|
475
475
|
const inputPrice = pricing?.inputPrice ?? 0;
|
|
476
476
|
const outputPrice = pricing?.outputPrice ?? 0;
|
|
477
477
|
const inputCost = estimatedInputTokens / 1e6 * inputPrice;
|
|
478
478
|
const outputCost = maxOutputTokens / 1e6 * outputPrice;
|
|
479
479
|
const costEstimate = inputCost + outputCost;
|
|
480
|
-
const opusPricing = modelPricing.get("anthropic/claude-opus-4");
|
|
480
|
+
const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
|
|
481
481
|
const opusInputPrice = opusPricing?.inputPrice ?? 0;
|
|
482
482
|
const opusOutputPrice = opusPricing?.outputPrice ?? 0;
|
|
483
483
|
const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
|
|
484
484
|
const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
|
|
485
485
|
const baselineCost = baselineInput + baselineOutput;
|
|
486
|
-
const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
486
|
+
const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
|
|
487
487
|
return { costEstimate, baselineCost, savings };
|
|
488
488
|
}
|
|
489
489
|
function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
|
|
@@ -1112,15 +1112,17 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1112
1112
|
// Tier boundaries on weighted score axis
|
|
1113
1113
|
tierBoundaries: {
|
|
1114
1114
|
simpleMedium: 0,
|
|
1115
|
-
mediumComplex: 0.
|
|
1116
|
-
|
|
1117
|
-
|
|
1115
|
+
mediumComplex: 0.3,
|
|
1116
|
+
// Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
|
|
1117
|
+
complexReasoning: 0.5
|
|
1118
|
+
// Raised from 0.4 - reserve for true reasoning tasks
|
|
1118
1119
|
},
|
|
1119
1120
|
// Sigmoid steepness for confidence calibration
|
|
1120
1121
|
confidenceSteepness: 12,
|
|
1121
1122
|
// Below this confidence → ambiguous (null tier)
|
|
1122
1123
|
confidenceThreshold: 0.7
|
|
1123
1124
|
},
|
|
1125
|
+
// Auto (balanced) tier configs - current default smart routing
|
|
1124
1126
|
tiers: {
|
|
1125
1127
|
SIMPLE: {
|
|
1126
1128
|
primary: "nvidia/kimi-k2.5",
|
|
@@ -1129,7 +1131,9 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1129
1131
|
"google/gemini-2.5-flash",
|
|
1130
1132
|
"nvidia/gpt-oss-120b",
|
|
1131
1133
|
"nvidia/gpt-oss-20b",
|
|
1132
|
-
"deepseek/deepseek-chat"
|
|
1134
|
+
"deepseek/deepseek-chat",
|
|
1135
|
+
"xai/grok-code-fast-1"
|
|
1136
|
+
// Added for better quality fallback
|
|
1133
1137
|
]
|
|
1134
1138
|
},
|
|
1135
1139
|
MEDIUM: {
|
|
@@ -1144,7 +1148,8 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1144
1148
|
},
|
|
1145
1149
|
COMPLEX: {
|
|
1146
1150
|
primary: "google/gemini-2.5-pro",
|
|
1147
|
-
fallback: ["
|
|
1151
|
+
fallback: ["xai/grok-4-0709", "openai/gpt-4o", "openai/gpt-5.2", "anthropic/claude-sonnet-4"]
|
|
1152
|
+
// Grok first for cost efficiency, Sonnet as last resort
|
|
1148
1153
|
},
|
|
1149
1154
|
REASONING: {
|
|
1150
1155
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
@@ -1158,6 +1163,52 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1158
1163
|
]
|
|
1159
1164
|
}
|
|
1160
1165
|
},
|
|
1166
|
+
// Eco tier configs - ultra cost-optimized (blockrun/eco)
|
|
1167
|
+
ecoTiers: {
|
|
1168
|
+
SIMPLE: {
|
|
1169
|
+
primary: "nvidia/kimi-k2.5",
|
|
1170
|
+
// $0.001/$0.001
|
|
1171
|
+
fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "nvidia/gpt-oss-20b"]
|
|
1172
|
+
},
|
|
1173
|
+
MEDIUM: {
|
|
1174
|
+
primary: "deepseek/deepseek-chat",
|
|
1175
|
+
// $0.14/$0.28
|
|
1176
|
+
fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
|
|
1177
|
+
},
|
|
1178
|
+
COMPLEX: {
|
|
1179
|
+
primary: "xai/grok-4-0709",
|
|
1180
|
+
// $0.20/$1.50
|
|
1181
|
+
fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
|
|
1182
|
+
},
|
|
1183
|
+
REASONING: {
|
|
1184
|
+
primary: "deepseek/deepseek-reasoner",
|
|
1185
|
+
// $0.55/$2.19
|
|
1186
|
+
fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
|
|
1187
|
+
}
|
|
1188
|
+
},
|
|
1189
|
+
// Premium tier configs - best quality (blockrun/premium)
|
|
1190
|
+
premiumTiers: {
|
|
1191
|
+
SIMPLE: {
|
|
1192
|
+
primary: "google/gemini-2.5-flash",
|
|
1193
|
+
// $0.075/$0.30
|
|
1194
|
+
fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
|
|
1195
|
+
},
|
|
1196
|
+
MEDIUM: {
|
|
1197
|
+
primary: "openai/gpt-4o",
|
|
1198
|
+
// $2.50/$10
|
|
1199
|
+
fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
|
|
1200
|
+
},
|
|
1201
|
+
COMPLEX: {
|
|
1202
|
+
primary: "anthropic/claude-opus-4.5",
|
|
1203
|
+
// $15/$75
|
|
1204
|
+
fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "google/gemini-2.5-pro"]
|
|
1205
|
+
},
|
|
1206
|
+
REASONING: {
|
|
1207
|
+
primary: "openai/o3",
|
|
1208
|
+
// $10/$40
|
|
1209
|
+
fallback: ["anthropic/claude-opus-4.5", "openai/o1", "google/gemini-2.5-pro"]
|
|
1210
|
+
}
|
|
1211
|
+
},
|
|
1161
1212
|
// Agentic tier configs - models that excel at multi-step autonomous tasks
|
|
1162
1213
|
agenticTiers: {
|
|
1163
1214
|
SIMPLE: {
|
|
@@ -1199,21 +1250,34 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1199
1250
|
const fullText = `${systemPrompt ?? ""} ${prompt}`;
|
|
1200
1251
|
const estimatedTokens = Math.ceil(fullText.length / 4);
|
|
1201
1252
|
const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
|
|
1202
|
-
const
|
|
1203
|
-
|
|
1204
|
-
|
|
1205
|
-
|
|
1206
|
-
|
|
1253
|
+
const { routingProfile } = options;
|
|
1254
|
+
let tierConfigs;
|
|
1255
|
+
let profileSuffix = "";
|
|
1256
|
+
if (routingProfile === "eco" && config.ecoTiers) {
|
|
1257
|
+
tierConfigs = config.ecoTiers;
|
|
1258
|
+
profileSuffix = " | eco";
|
|
1259
|
+
} else if (routingProfile === "premium" && config.premiumTiers) {
|
|
1260
|
+
tierConfigs = config.premiumTiers;
|
|
1261
|
+
profileSuffix = " | premium";
|
|
1262
|
+
} else {
|
|
1263
|
+
const agenticScore = ruleResult.agenticScore ?? 0;
|
|
1264
|
+
const isAutoAgentic = agenticScore >= 0.5;
|
|
1265
|
+
const isExplicitAgentic = config.overrides.agenticMode ?? false;
|
|
1266
|
+
const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
|
|
1267
|
+
tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
|
|
1268
|
+
profileSuffix = useAgenticTiers ? " | agentic" : "";
|
|
1269
|
+
}
|
|
1207
1270
|
if (estimatedTokens > config.overrides.maxTokensForceComplex) {
|
|
1208
1271
|
return selectModel(
|
|
1209
1272
|
"COMPLEX",
|
|
1210
1273
|
0.95,
|
|
1211
1274
|
"rules",
|
|
1212
|
-
`Input exceeds ${config.overrides.maxTokensForceComplex} tokens${
|
|
1275
|
+
`Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
|
|
1213
1276
|
tierConfigs,
|
|
1214
1277
|
modelPricing,
|
|
1215
1278
|
estimatedTokens,
|
|
1216
|
-
maxOutputTokens
|
|
1279
|
+
maxOutputTokens,
|
|
1280
|
+
routingProfile
|
|
1217
1281
|
);
|
|
1218
1282
|
}
|
|
1219
1283
|
const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
|
|
@@ -1237,11 +1301,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1237
1301
|
tier = minTier;
|
|
1238
1302
|
}
|
|
1239
1303
|
}
|
|
1240
|
-
|
|
1241
|
-
reasoning += " | auto-agentic";
|
|
1242
|
-
} else if (isExplicitAgentic) {
|
|
1243
|
-
reasoning += " | agentic";
|
|
1244
|
-
}
|
|
1304
|
+
reasoning += profileSuffix;
|
|
1245
1305
|
return selectModel(
|
|
1246
1306
|
tier,
|
|
1247
1307
|
confidence,
|
|
@@ -1250,7 +1310,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
|
|
|
1250
1310
|
tierConfigs,
|
|
1251
1311
|
modelPricing,
|
|
1252
1312
|
estimatedTokens,
|
|
1253
|
-
maxOutputTokens
|
|
1313
|
+
maxOutputTokens,
|
|
1314
|
+
routingProfile
|
|
1254
1315
|
);
|
|
1255
1316
|
}
|
|
1256
1317
|
|
|
@@ -1279,11 +1340,12 @@ var MODEL_ALIASES = {
|
|
|
1279
1340
|
grok: "xai/grok-3",
|
|
1280
1341
|
"grok-fast": "xai/grok-4-fast-reasoning",
|
|
1281
1342
|
"grok-code": "xai/grok-code-fast-1",
|
|
1282
|
-
// NVIDIA
|
|
1343
|
+
// NVIDIA
|
|
1283
1344
|
nvidia: "nvidia/gpt-oss-120b",
|
|
1284
1345
|
"gpt-120b": "nvidia/gpt-oss-120b",
|
|
1285
|
-
"gpt-20b": "nvidia/gpt-oss-20b"
|
|
1286
|
-
|
|
1346
|
+
"gpt-20b": "nvidia/gpt-oss-20b"
|
|
1347
|
+
// Note: auto, free, eco, premium are virtual routing profiles registered in BLOCKRUN_MODELS
|
|
1348
|
+
// They don't need aliases since they're already top-level model IDs
|
|
1287
1349
|
};
|
|
1288
1350
|
function resolveModelAlias(model) {
|
|
1289
1351
|
const normalized = model.trim().toLowerCase();
|
|
@@ -1297,16 +1359,40 @@ function resolveModelAlias(model) {
|
|
|
1297
1359
|
return model;
|
|
1298
1360
|
}
|
|
1299
1361
|
var BLOCKRUN_MODELS = [
|
|
1300
|
-
// Smart routing meta-
|
|
1362
|
+
// Smart routing meta-models — proxy replaces with actual model
|
|
1301
1363
|
// NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
|
|
1302
1364
|
{
|
|
1303
1365
|
id: "auto",
|
|
1304
|
-
name: "
|
|
1366
|
+
name: "Auto (Smart Router - Balanced)",
|
|
1367
|
+
inputPrice: 0,
|
|
1368
|
+
outputPrice: 0,
|
|
1369
|
+
contextWindow: 105e4,
|
|
1370
|
+
maxOutput: 128e3
|
|
1371
|
+
},
|
|
1372
|
+
{
|
|
1373
|
+
id: "free",
|
|
1374
|
+
name: "Free (NVIDIA GPT-OSS-120B only)",
|
|
1375
|
+
inputPrice: 0,
|
|
1376
|
+
outputPrice: 0,
|
|
1377
|
+
contextWindow: 128e3,
|
|
1378
|
+
maxOutput: 4096
|
|
1379
|
+
},
|
|
1380
|
+
{
|
|
1381
|
+
id: "eco",
|
|
1382
|
+
name: "Eco (Smart Router - Cost Optimized)",
|
|
1305
1383
|
inputPrice: 0,
|
|
1306
1384
|
outputPrice: 0,
|
|
1307
1385
|
contextWindow: 105e4,
|
|
1308
1386
|
maxOutput: 128e3
|
|
1309
1387
|
},
|
|
1388
|
+
{
|
|
1389
|
+
id: "premium",
|
|
1390
|
+
name: "Premium (Smart Router - Best Quality)",
|
|
1391
|
+
inputPrice: 0,
|
|
1392
|
+
outputPrice: 0,
|
|
1393
|
+
contextWindow: 2e6,
|
|
1394
|
+
maxOutput: 2e5
|
|
1395
|
+
},
|
|
1310
1396
|
// OpenAI GPT-5 Family
|
|
1311
1397
|
{
|
|
1312
1398
|
id: "openai/gpt-5.2",
|
|
@@ -1608,8 +1694,8 @@ var BLOCKRUN_MODELS = [
|
|
|
1608
1694
|
{
|
|
1609
1695
|
id: "xai/grok-4-0709",
|
|
1610
1696
|
name: "Grok 4 (0709)",
|
|
1611
|
-
inputPrice:
|
|
1612
|
-
outputPrice:
|
|
1697
|
+
inputPrice: 0.2,
|
|
1698
|
+
outputPrice: 1.5,
|
|
1613
1699
|
contextWindow: 131072,
|
|
1614
1700
|
maxOutput: 16384,
|
|
1615
1701
|
reasoning: true
|
|
@@ -2272,7 +2358,16 @@ async function checkForUpdates() {
|
|
|
2272
2358
|
// src/proxy.ts
|
|
2273
2359
|
var BLOCKRUN_API = "https://blockrun.ai/api";
|
|
2274
2360
|
var AUTO_MODEL = "blockrun/auto";
|
|
2275
|
-
var
|
|
2361
|
+
var ROUTING_PROFILES = /* @__PURE__ */ new Set([
|
|
2362
|
+
"blockrun/free",
|
|
2363
|
+
"free",
|
|
2364
|
+
"blockrun/eco",
|
|
2365
|
+
"eco",
|
|
2366
|
+
"blockrun/auto",
|
|
2367
|
+
"auto",
|
|
2368
|
+
"blockrun/premium",
|
|
2369
|
+
"premium"
|
|
2370
|
+
]);
|
|
2276
2371
|
var FREE_MODEL = "nvidia/gpt-oss-120b";
|
|
2277
2372
|
var HEARTBEAT_INTERVAL_MS = 2e3;
|
|
2278
2373
|
var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
|
|
@@ -2954,6 +3049,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
2954
3049
|
let isStreaming = false;
|
|
2955
3050
|
let modelId = "";
|
|
2956
3051
|
let maxTokens = 4096;
|
|
3052
|
+
let routingProfile = null;
|
|
2957
3053
|
const isChatCompletion = req.url?.includes("/chat/completions");
|
|
2958
3054
|
if (isChatCompletion && body.length > 0) {
|
|
2959
3055
|
try {
|
|
@@ -2969,58 +3065,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
2969
3065
|
const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
|
|
2970
3066
|
const resolvedModel = resolveModelAlias(normalizedModel);
|
|
2971
3067
|
const wasAlias = resolvedModel !== normalizedModel;
|
|
2972
|
-
const
|
|
3068
|
+
const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
|
|
3069
|
+
if (isRoutingProfile) {
|
|
3070
|
+
const profileName = normalizedModel.replace("blockrun/", "");
|
|
3071
|
+
routingProfile = profileName;
|
|
3072
|
+
}
|
|
2973
3073
|
console.log(
|
|
2974
|
-
`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}
|
|
3074
|
+
`[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
|
|
2975
3075
|
);
|
|
2976
|
-
if (wasAlias && !
|
|
3076
|
+
if (wasAlias && !isRoutingProfile) {
|
|
2977
3077
|
parsed.model = resolvedModel;
|
|
2978
3078
|
modelId = resolvedModel;
|
|
2979
3079
|
bodyModified = true;
|
|
2980
3080
|
}
|
|
2981
|
-
if (
|
|
2982
|
-
|
|
2983
|
-
|
|
2984
|
-
|
|
2985
|
-
|
|
2986
|
-
|
|
2987
|
-
console.log(
|
|
2988
|
-
`[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
|
|
2989
|
-
);
|
|
2990
|
-
parsed.model = existingSession.model;
|
|
2991
|
-
modelId = existingSession.model;
|
|
3081
|
+
if (isRoutingProfile) {
|
|
3082
|
+
if (routingProfile === "free") {
|
|
3083
|
+
const freeModel = "nvidia/gpt-oss-120b";
|
|
3084
|
+
console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
|
|
3085
|
+
parsed.model = freeModel;
|
|
3086
|
+
modelId = freeModel;
|
|
2992
3087
|
bodyModified = true;
|
|
2993
|
-
|
|
3088
|
+
await logUsage({
|
|
3089
|
+
timestamp: (/* @__PURE__ */ new Date()).toISOString(),
|
|
3090
|
+
model: freeModel,
|
|
3091
|
+
tier: "SIMPLE",
|
|
3092
|
+
cost: 0,
|
|
3093
|
+
baselineCost: 0,
|
|
3094
|
+
savings: 1,
|
|
3095
|
+
// 100% savings
|
|
3096
|
+
latencyMs: 0
|
|
3097
|
+
});
|
|
2994
3098
|
} else {
|
|
2995
|
-
const
|
|
2996
|
-
|
|
2997
|
-
|
|
2998
|
-
|
|
2999
|
-
|
|
3000
|
-
lastUserMsg = messages[i];
|
|
3001
|
-
break;
|
|
3002
|
-
}
|
|
3003
|
-
}
|
|
3004
|
-
}
|
|
3005
|
-
const systemMsg = messages?.find((m) => m.role === "system");
|
|
3006
|
-
const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
|
|
3007
|
-
const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
|
|
3008
|
-
const tools = parsed.tools;
|
|
3009
|
-
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
3010
|
-
if (hasTools) {
|
|
3011
|
-
console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
|
|
3012
|
-
}
|
|
3013
|
-
routingDecision = route(prompt, systemPrompt, maxTokens, routerOpts);
|
|
3014
|
-
parsed.model = routingDecision.model;
|
|
3015
|
-
modelId = routingDecision.model;
|
|
3016
|
-
bodyModified = true;
|
|
3017
|
-
if (sessionId) {
|
|
3018
|
-
sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
|
|
3099
|
+
const sessionId = getSessionId(
|
|
3100
|
+
req.headers
|
|
3101
|
+
);
|
|
3102
|
+
const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
|
|
3103
|
+
if (existingSession) {
|
|
3019
3104
|
console.log(
|
|
3020
|
-
`[ClawRouter] Session ${sessionId
|
|
3105
|
+
`[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
|
|
3021
3106
|
);
|
|
3107
|
+
parsed.model = existingSession.model;
|
|
3108
|
+
modelId = existingSession.model;
|
|
3109
|
+
bodyModified = true;
|
|
3110
|
+
sessionStore.touchSession(sessionId);
|
|
3111
|
+
} else {
|
|
3112
|
+
const messages = parsed.messages;
|
|
3113
|
+
let lastUserMsg;
|
|
3114
|
+
if (messages) {
|
|
3115
|
+
for (let i = messages.length - 1; i >= 0; i--) {
|
|
3116
|
+
if (messages[i].role === "user") {
|
|
3117
|
+
lastUserMsg = messages[i];
|
|
3118
|
+
break;
|
|
3119
|
+
}
|
|
3120
|
+
}
|
|
3121
|
+
}
|
|
3122
|
+
const systemMsg = messages?.find((m) => m.role === "system");
|
|
3123
|
+
const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
|
|
3124
|
+
const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
|
|
3125
|
+
const tools = parsed.tools;
|
|
3126
|
+
const hasTools = Array.isArray(tools) && tools.length > 0;
|
|
3127
|
+
if (hasTools) {
|
|
3128
|
+
console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
|
|
3129
|
+
}
|
|
3130
|
+
routingDecision = route(prompt, systemPrompt, maxTokens, {
|
|
3131
|
+
...routerOpts,
|
|
3132
|
+
routingProfile: routingProfile ?? void 0
|
|
3133
|
+
});
|
|
3134
|
+
parsed.model = routingDecision.model;
|
|
3135
|
+
modelId = routingDecision.model;
|
|
3136
|
+
bodyModified = true;
|
|
3137
|
+
if (sessionId) {
|
|
3138
|
+
sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
|
|
3139
|
+
console.log(
|
|
3140
|
+
`[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
|
|
3141
|
+
);
|
|
3142
|
+
}
|
|
3143
|
+
options.onRouted?.(routingDecision);
|
|
3022
3144
|
}
|
|
3023
|
-
options.onRouted?.(routingDecision);
|
|
3024
3145
|
}
|
|
3025
3146
|
}
|
|
3026
3147
|
if (bodyModified) {
|
|
@@ -3204,7 +3325,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3204
3325
|
actualModelUsed,
|
|
3205
3326
|
routerOpts.modelPricing,
|
|
3206
3327
|
estimatedInputTokens,
|
|
3207
|
-
maxTokens
|
|
3328
|
+
maxTokens,
|
|
3329
|
+
routingProfile ?? void 0
|
|
3208
3330
|
);
|
|
3209
3331
|
routingDecision = {
|
|
3210
3332
|
...routingDecision,
|
|
@@ -3413,7 +3535,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3413
3535
|
routingDecision.model,
|
|
3414
3536
|
routerOpts.modelPricing,
|
|
3415
3537
|
estimatedInputTokens,
|
|
3416
|
-
maxTokens
|
|
3538
|
+
maxTokens,
|
|
3539
|
+
routingProfile ?? void 0
|
|
3417
3540
|
);
|
|
3418
3541
|
const costWithBuffer = accurateCosts.costEstimate * 1.2;
|
|
3419
3542
|
const baselineWithBuffer = accurateCosts.baselineCost * 1.2;
|