@blockrun/clawrouter 0.12.44 → 0.12.46
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +1 -1
- package/dist/cli.js +86 -66
- package/dist/cli.js.map +1 -1
- package/dist/index.js +86 -66
- package/dist/index.js.map +1 -1
- package/package.json +1 -1
- package/scripts/benchmark.py +222 -0
package/dist/index.js
CHANGED
|
@@ -3157,63 +3157,74 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
3157
3157
|
confidenceThreshold: 0.7
|
|
3158
3158
|
},
|
|
3159
3159
|
// Auto (balanced) tier configs - current default smart routing
|
|
3160
|
+
// Benchmark-tuned 2026-03-16: latency-ranked via blockrun.ai x402 end-to-end
|
|
3160
3161
|
tiers: {
|
|
3161
3162
|
SIMPLE: {
|
|
3162
|
-
primary: "
|
|
3163
|
-
// $0.
|
|
3163
|
+
primary: "xai/grok-4-fast-non-reasoning",
|
|
3164
|
+
// 1,143ms, $0.20/$0.50 — fastest overall
|
|
3164
3165
|
fallback: [
|
|
3166
|
+
"xai/grok-3-mini",
|
|
3167
|
+
// 1,202ms, $0.30/$0.50
|
|
3165
3168
|
"google/gemini-2.5-flash",
|
|
3166
|
-
// 60% retention (best)
|
|
3169
|
+
// 1,238ms, 60% retention (best)
|
|
3167
3170
|
"google/gemini-2.5-flash-lite",
|
|
3168
|
-
// 1M context, ultra cheap ($0.10/$0.40)
|
|
3171
|
+
// 1,353ms, 1M context, ultra cheap ($0.10/$0.40)
|
|
3169
3172
|
"deepseek/deepseek-chat",
|
|
3170
|
-
// 41% retention
|
|
3173
|
+
// 1,431ms, 41% retention
|
|
3171
3174
|
"nvidia/gpt-oss-120b"
|
|
3172
|
-
// FREE fallback
|
|
3175
|
+
// 1,252ms, FREE fallback
|
|
3173
3176
|
]
|
|
3174
3177
|
},
|
|
3175
3178
|
MEDIUM: {
|
|
3176
|
-
primary: "
|
|
3177
|
-
// $0.
|
|
3179
|
+
primary: "xai/grok-4-1-fast-non-reasoning",
|
|
3180
|
+
// 1,244ms, $0.20/$0.50 — fast + tool calling
|
|
3178
3181
|
fallback: [
|
|
3179
3182
|
"deepseek/deepseek-chat",
|
|
3180
|
-
// 41% retention
|
|
3183
|
+
// 1,431ms, 41% retention
|
|
3184
|
+
"moonshot/kimi-k2.5",
|
|
3185
|
+
// 1,646ms, strong tool use quality
|
|
3181
3186
|
"google/gemini-2.5-flash",
|
|
3182
|
-
// 60% retention
|
|
3187
|
+
// 1,238ms, 60% retention
|
|
3183
3188
|
"google/gemini-2.5-flash-lite",
|
|
3184
|
-
// 1M context
|
|
3185
|
-
"xai/grok-
|
|
3186
|
-
//
|
|
3189
|
+
// 1,353ms, 1M context ($0.10/$0.40)
|
|
3190
|
+
"xai/grok-3-mini"
|
|
3191
|
+
// 1,202ms, $0.30/$0.50
|
|
3187
3192
|
]
|
|
3188
3193
|
},
|
|
3189
3194
|
COMPLEX: {
|
|
3190
3195
|
primary: "google/gemini-3.1-pro",
|
|
3191
|
-
//
|
|
3196
|
+
// 1,609ms — fast flagship quality
|
|
3192
3197
|
fallback: [
|
|
3193
3198
|
"google/gemini-2.5-flash",
|
|
3194
|
-
//
|
|
3199
|
+
// 1,238ms, cheap failsafe before expensive models
|
|
3195
3200
|
"google/gemini-2.5-flash-lite",
|
|
3196
|
-
//
|
|
3201
|
+
// 1,353ms, 1M context, ultra-cheap failsafe ($0.10/$0.40)
|
|
3197
3202
|
"google/gemini-3-pro-preview",
|
|
3198
|
-
//
|
|
3203
|
+
// 1,352ms
|
|
3199
3204
|
"google/gemini-2.5-pro",
|
|
3200
|
-
|
|
3205
|
+
// 1,294ms
|
|
3201
3206
|
"xai/grok-4-0709",
|
|
3202
|
-
|
|
3203
|
-
|
|
3204
|
-
|
|
3205
|
-
"anthropic/claude-sonnet-4.6"
|
|
3207
|
+
// 1,348ms
|
|
3208
|
+
"deepseek/deepseek-chat",
|
|
3209
|
+
// 1,431ms
|
|
3210
|
+
"anthropic/claude-sonnet-4.6",
|
|
3211
|
+
// 2,110ms — quality fallback
|
|
3212
|
+
"openai/gpt-5.4"
|
|
3213
|
+
// 6,213ms — slowest but highest quality
|
|
3206
3214
|
]
|
|
3207
3215
|
},
|
|
3208
3216
|
REASONING: {
|
|
3209
3217
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
3210
|
-
//
|
|
3218
|
+
// 1,454ms, $0.20/$0.50
|
|
3211
3219
|
fallback: [
|
|
3220
|
+
"xai/grok-4-fast-reasoning",
|
|
3221
|
+
// 1,298ms, $0.20/$0.50
|
|
3212
3222
|
"deepseek/deepseek-reasoner",
|
|
3213
|
-
//
|
|
3223
|
+
// 1,454ms, cheap reasoning
|
|
3214
3224
|
"openai/o4-mini",
|
|
3215
|
-
//
|
|
3225
|
+
// 2,328ms ($1.10/$4.40)
|
|
3216
3226
|
"openai/o3"
|
|
3227
|
+
// 2,862ms
|
|
3217
3228
|
]
|
|
3218
3229
|
}
|
|
3219
3230
|
},
|
|
@@ -3221,27 +3232,30 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
3221
3232
|
ecoTiers: {
|
|
3222
3233
|
SIMPLE: {
|
|
3223
3234
|
primary: "nvidia/gpt-oss-120b",
|
|
3224
|
-
// FREE! $0.00/$0.00
|
|
3235
|
+
// 1,252ms, FREE! $0.00/$0.00
|
|
3225
3236
|
fallback: [
|
|
3226
3237
|
"google/gemini-2.5-flash-lite",
|
|
3227
|
-
|
|
3228
|
-
"
|
|
3238
|
+
// 1,353ms, $0.10/$0.40
|
|
3239
|
+
"xai/grok-4-fast-non-reasoning",
|
|
3240
|
+
// 1,143ms, $0.20/$0.50
|
|
3241
|
+
"google/gemini-2.5-flash"
|
|
3242
|
+
// 1,238ms
|
|
3229
3243
|
]
|
|
3230
3244
|
},
|
|
3231
3245
|
MEDIUM: {
|
|
3232
3246
|
primary: "google/gemini-2.5-flash-lite",
|
|
3233
|
-
// $0.10/$0.40 - cheapest capable with 1M context
|
|
3234
|
-
fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
|
|
3247
|
+
// 1,353ms, $0.10/$0.40 - cheapest capable with 1M context
|
|
3248
|
+
fallback: ["xai/grok-4-fast-non-reasoning", "google/gemini-2.5-flash", "deepseek/deepseek-chat", "nvidia/gpt-oss-120b"]
|
|
3235
3249
|
},
|
|
3236
3250
|
COMPLEX: {
|
|
3237
3251
|
primary: "google/gemini-2.5-flash-lite",
|
|
3238
|
-
// $0.10/$0.40 - 1M context handles complexity
|
|
3239
|
-
fallback: ["google/gemini-2.5-flash", "deepseek/deepseek-chat"
|
|
3252
|
+
// 1,353ms, $0.10/$0.40 - 1M context handles complexity
|
|
3253
|
+
fallback: ["xai/grok-4-0709", "google/gemini-2.5-flash", "deepseek/deepseek-chat"]
|
|
3240
3254
|
},
|
|
3241
3255
|
REASONING: {
|
|
3242
3256
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
3243
|
-
// $0.20/$0.50
|
|
3244
|
-
fallback: ["deepseek/deepseek-reasoner"]
|
|
3257
|
+
// 1,454ms, $0.20/$0.50
|
|
3258
|
+
fallback: ["xai/grok-4-fast-reasoning", "deepseek/deepseek-reasoner"]
|
|
3245
3259
|
}
|
|
3246
3260
|
},
|
|
3247
3261
|
// Premium tier configs - best quality (blockrun/premium)
|
|
@@ -3287,57 +3301,71 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
3287
3301
|
},
|
|
3288
3302
|
REASONING: {
|
|
3289
3303
|
primary: "anthropic/claude-sonnet-4.6",
|
|
3290
|
-
// $3/$15 - best for reasoning/instructions
|
|
3304
|
+
// 2,110ms, $3/$15 - best for reasoning/instructions
|
|
3291
3305
|
fallback: [
|
|
3292
3306
|
"anthropic/claude-opus-4.6",
|
|
3293
|
-
|
|
3307
|
+
// 2,139ms
|
|
3308
|
+
"xai/grok-4-1-fast-reasoning",
|
|
3309
|
+
// 1,454ms, cheap fast reasoning
|
|
3294
3310
|
"openai/o4-mini",
|
|
3295
|
-
//
|
|
3296
|
-
"openai/o3"
|
|
3297
|
-
|
|
3311
|
+
// 2,328ms ($1.10/$4.40)
|
|
3312
|
+
"openai/o3"
|
|
3313
|
+
// 2,862ms
|
|
3298
3314
|
]
|
|
3299
3315
|
}
|
|
3300
3316
|
},
|
|
3301
3317
|
// Agentic tier configs - models that excel at multi-step autonomous tasks
|
|
3302
3318
|
agenticTiers: {
|
|
3303
3319
|
SIMPLE: {
|
|
3304
|
-
primary: "
|
|
3305
|
-
//
|
|
3320
|
+
primary: "xai/grok-4-1-fast-non-reasoning",
|
|
3321
|
+
// 1,244ms, $0.20/$0.50 — fast tool calling
|
|
3306
3322
|
fallback: [
|
|
3307
|
-
"
|
|
3308
|
-
|
|
3309
|
-
"
|
|
3323
|
+
"openai/gpt-4o-mini",
|
|
3324
|
+
// 2,764ms, $0.15/$0.60 - reliable tool compliance
|
|
3325
|
+
"moonshot/kimi-k2.5",
|
|
3326
|
+
// 1,646ms, strong tool use quality
|
|
3327
|
+
"anthropic/claude-haiku-4.5"
|
|
3328
|
+
// 2,305ms
|
|
3310
3329
|
]
|
|
3311
3330
|
},
|
|
3312
3331
|
MEDIUM: {
|
|
3313
3332
|
primary: "moonshot/kimi-k2.5",
|
|
3314
|
-
// $0.
|
|
3333
|
+
// 1,646ms, $0.60/$3.00 - strong tool use, proper function calls
|
|
3315
3334
|
fallback: [
|
|
3335
|
+
"xai/grok-4-1-fast-non-reasoning",
|
|
3336
|
+
// 1,244ms, fast fallback
|
|
3337
|
+
"openai/gpt-4o-mini",
|
|
3338
|
+
// 2,764ms, reliable tool calling
|
|
3316
3339
|
"anthropic/claude-haiku-4.5",
|
|
3317
|
-
|
|
3318
|
-
"
|
|
3340
|
+
// 2,305ms
|
|
3341
|
+
"deepseek/deepseek-chat"
|
|
3342
|
+
// 1,431ms
|
|
3319
3343
|
]
|
|
3320
3344
|
},
|
|
3321
3345
|
COMPLEX: {
|
|
3322
3346
|
primary: "anthropic/claude-sonnet-4.6",
|
|
3347
|
+
// 2,110ms — best agentic quality
|
|
3323
3348
|
fallback: [
|
|
3324
3349
|
"anthropic/claude-opus-4.6",
|
|
3325
|
-
//
|
|
3326
|
-
"openai/gpt-5.4",
|
|
3327
|
-
// Newest flagship
|
|
3350
|
+
// 2,139ms — top quality
|
|
3328
3351
|
"google/gemini-3.1-pro",
|
|
3329
|
-
//
|
|
3330
|
-
"
|
|
3331
|
-
|
|
3352
|
+
// 1,609ms
|
|
3353
|
+
"xai/grok-4-0709",
|
|
3354
|
+
// 1,348ms
|
|
3355
|
+
"openai/gpt-5.4"
|
|
3356
|
+
// 6,213ms — slow but highest quality fallback
|
|
3332
3357
|
]
|
|
3333
3358
|
},
|
|
3334
3359
|
REASONING: {
|
|
3335
3360
|
primary: "anthropic/claude-sonnet-4.6",
|
|
3336
|
-
//
|
|
3361
|
+
// 2,110ms — strong tool use + reasoning
|
|
3337
3362
|
fallback: [
|
|
3338
3363
|
"anthropic/claude-opus-4.6",
|
|
3364
|
+
// 2,139ms
|
|
3339
3365
|
"xai/grok-4-1-fast-reasoning",
|
|
3366
|
+
// 1,454ms
|
|
3340
3367
|
"deepseek/deepseek-reasoner"
|
|
3368
|
+
// 1,454ms
|
|
3341
3369
|
]
|
|
3342
3370
|
}
|
|
3343
3371
|
},
|
|
@@ -7415,17 +7443,9 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
7415
7443
|
hasTools
|
|
7416
7444
|
});
|
|
7417
7445
|
if (hasTools && routingDecision.tier === "SIMPLE") {
|
|
7418
|
-
|
|
7419
|
-
|
|
7420
|
-
|
|
7421
|
-
hasTools: false
|
|
7422
|
-
});
|
|
7423
|
-
if (simpleRoutingDecision.tier === "SIMPLE") {
|
|
7424
|
-
console.log(
|
|
7425
|
-
`[ClawRouter] SIMPLE+tools: using non-agentic model ${simpleRoutingDecision.model} (tools present but query is trivial)`
|
|
7426
|
-
);
|
|
7427
|
-
routingDecision = simpleRoutingDecision;
|
|
7428
|
-
}
|
|
7446
|
+
console.log(
|
|
7447
|
+
`[ClawRouter] SIMPLE+tools: keeping agentic model ${routingDecision.model} (tools need reliable function-call support)`
|
|
7448
|
+
);
|
|
7429
7449
|
}
|
|
7430
7450
|
if (existingSession) {
|
|
7431
7451
|
const tierRank = {
|
|
@@ -7742,7 +7762,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
7742
7762
|
} else {
|
|
7743
7763
|
modelsToTry = modelId ? [modelId] : [];
|
|
7744
7764
|
}
|
|
7745
|
-
if (!modelsToTry.includes(FREE_MODEL)) {
|
|
7765
|
+
if (!hasTools && !modelsToTry.includes(FREE_MODEL)) {
|
|
7746
7766
|
modelsToTry.push(FREE_MODEL);
|
|
7747
7767
|
}
|
|
7748
7768
|
let upstream;
|