@blockrun/clawrouter 0.12.73 → 0.12.75
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -4
- package/dist/cli.js +279 -63
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +3 -1
- package/dist/index.js +279 -63
- package/dist/index.js.map +1 -1
- package/docs/clawrouter-vs-openrouter-llm-routing-comparison.md +2 -2
- package/docs/smart-llm-router-14-dimension-classifier.md +4 -4
- package/package.json +2 -2
- package/skills/clawrouter/SKILL.md +3 -3
package/README.md
CHANGED
|
@@ -32,7 +32,7 @@ Agents can only sign transactions.<br><br>
|
|
|
32
32
|
|
|
33
33
|
</div>
|
|
34
34
|
|
|
35
|
-
> **ClawRouter** is an open-source smart LLM router that reduces AI API costs by up to 92%. It analyzes each request across 15 dimensions and routes to the cheapest capable model in under 1ms, entirely locally. ClawRouter is the only LLM router built for autonomous AI agents — it uses wallet signatures for authentication (no API keys) and USDC micropayments via the x402 protocol (no credit cards).
|
|
35
|
+
> **ClawRouter** is an open-source smart LLM router that reduces AI API costs by up to 92%. It analyzes each request across 15 dimensions and routes to the cheapest capable model in under 1ms, entirely locally. ClawRouter is the only LLM router built for autonomous AI agents — it uses wallet signatures for authentication (no API keys) and USDC micropayments via the x402 protocol (no credit cards). 55+ models from OpenAI, Anthropic, Google, xAI, DeepSeek, and more. MIT licensed.
|
|
36
36
|
|
|
37
37
|
---
|
|
38
38
|
|
|
@@ -58,7 +58,7 @@ This is the stack that lets agents operate autonomously: **x402 + USDC + local r
|
|
|
58
58
|
|
|
59
59
|
| | OpenRouter | LiteLLM | Martian | Portkey | **ClawRouter** |
|
|
60
60
|
| ---------------- | ----------------- | ---------------- | ----------------- | ----------------- | ----------------------- |
|
|
61
|
-
| **Models** | 200+ | 100+ | Smart routing | Gateway | **
|
|
61
|
+
| **Models** | 200+ | 100+ | Smart routing | Gateway | **55+** |
|
|
62
62
|
| **Routing** | Manual selection | Manual selection | Smart (closed) | Observability | **Smart (open source)** |
|
|
63
63
|
| **Auth** | Account + API key | Your API keys | Account + API key | Account + API key | **Wallet signature** |
|
|
64
64
|
| **Payment** | Credit card | BYO keys | Credit card | $49-499/mo | **USDC per-request** |
|
|
@@ -161,7 +161,7 @@ Edit existing images with `/img2img`:
|
|
|
161
161
|
|
|
162
162
|
## Models & Pricing
|
|
163
163
|
|
|
164
|
-
|
|
164
|
+
55+ models across 9 providers, one wallet. **Starting at $0.0002/request.**
|
|
165
165
|
|
|
166
166
|
> **💡 "Cost per request"** = estimated cost for a typical chat message (~500 input + 500 output tokens).
|
|
167
167
|
|
|
@@ -394,7 +394,7 @@ npm test
|
|
|
394
394
|
|
|
395
395
|
**The LLM router built for autonomous agents**
|
|
396
396
|
|
|
397
|
-
You're here.
|
|
397
|
+
You're here. 55+ models, local smart routing, x402 USDC payments — the only stack that lets agents operate independently.
|
|
398
398
|
|
|
399
399
|
`curl -fsSL https://blockrun.ai/ClawRouter-update | bash`
|
|
400
400
|
|
package/dist/cli.js
CHANGED
|
@@ -39086,7 +39086,11 @@ var RulesStrategy = class {
|
|
|
39086
39086
|
let tierConfigs;
|
|
39087
39087
|
let profileSuffix;
|
|
39088
39088
|
let profile;
|
|
39089
|
-
if (routingProfile === "
|
|
39089
|
+
if (routingProfile === "free" && config.freeTiers) {
|
|
39090
|
+
tierConfigs = config.freeTiers;
|
|
39091
|
+
profileSuffix = " | free";
|
|
39092
|
+
profile = "free";
|
|
39093
|
+
} else if (routingProfile === "eco" && config.ecoTiers) {
|
|
39090
39094
|
tierConfigs = config.ecoTiers;
|
|
39091
39095
|
profileSuffix = " | eco";
|
|
39092
39096
|
profile = "eco";
|
|
@@ -40275,39 +40279,50 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
40275
40279
|
]
|
|
40276
40280
|
}
|
|
40277
40281
|
},
|
|
40278
|
-
// Eco tier configs - absolute cheapest (blockrun/eco)
|
|
40282
|
+
// Eco tier configs - absolute cheapest, free-first (blockrun/eco)
|
|
40279
40283
|
ecoTiers: {
|
|
40280
40284
|
SIMPLE: {
|
|
40281
40285
|
primary: "nvidia/gpt-oss-120b",
|
|
40282
|
-
//
|
|
40286
|
+
// FREE! $0.00/$0.00
|
|
40283
40287
|
fallback: [
|
|
40288
|
+
"nvidia/gpt-oss-20b",
|
|
40289
|
+
// FREE — smaller, faster
|
|
40284
40290
|
"google/gemini-3.1-flash-lite",
|
|
40285
40291
|
// $0.25/$1.50 — newest flash-lite
|
|
40286
40292
|
"openai/gpt-5.4-nano",
|
|
40287
40293
|
// $0.20/$1.25 — fast nano
|
|
40288
40294
|
"google/gemini-2.5-flash-lite",
|
|
40289
|
-
//
|
|
40295
|
+
// $0.10/$0.40
|
|
40290
40296
|
"xai/grok-4-fast-non-reasoning"
|
|
40291
|
-
//
|
|
40297
|
+
// $0.20/$0.50
|
|
40292
40298
|
]
|
|
40293
40299
|
},
|
|
40294
40300
|
MEDIUM: {
|
|
40295
|
-
primary: "
|
|
40296
|
-
//
|
|
40301
|
+
primary: "nvidia/deepseek-v3.2",
|
|
40302
|
+
// FREE — DeepSeek V3.2 quality at zero cost
|
|
40297
40303
|
fallback: [
|
|
40304
|
+
"nvidia/gpt-oss-120b",
|
|
40305
|
+
// FREE fallback
|
|
40306
|
+
"google/gemini-3.1-flash-lite",
|
|
40307
|
+
// $0.25/$1.50
|
|
40298
40308
|
"openai/gpt-5.4-nano",
|
|
40299
|
-
// $0.20/$1.25
|
|
40309
|
+
// $0.20/$1.25
|
|
40300
40310
|
"google/gemini-2.5-flash-lite",
|
|
40301
|
-
//
|
|
40311
|
+
// $0.10/$0.40
|
|
40302
40312
|
"xai/grok-4-fast-non-reasoning",
|
|
40303
|
-
"google/gemini-2.5-flash"
|
|
40304
|
-
"nvidia/gpt-oss-120b"
|
|
40313
|
+
"google/gemini-2.5-flash"
|
|
40305
40314
|
]
|
|
40306
40315
|
},
|
|
40307
40316
|
COMPLEX: {
|
|
40308
|
-
primary: "
|
|
40309
|
-
//
|
|
40317
|
+
primary: "nvidia/nemotron-ultra-253b",
|
|
40318
|
+
// FREE — 253B reasoning model
|
|
40310
40319
|
fallback: [
|
|
40320
|
+
"nvidia/mistral-large-3-675b",
|
|
40321
|
+
// FREE — 675B brute-force
|
|
40322
|
+
"nvidia/deepseek-v3.2",
|
|
40323
|
+
// FREE
|
|
40324
|
+
"google/gemini-3.1-flash-lite",
|
|
40325
|
+
// $0.25/$1.50
|
|
40311
40326
|
"google/gemini-2.5-flash-lite",
|
|
40312
40327
|
"xai/grok-4-0709",
|
|
40313
40328
|
"google/gemini-2.5-flash",
|
|
@@ -40316,8 +40331,13 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
40316
40331
|
},
|
|
40317
40332
|
REASONING: {
|
|
40318
40333
|
primary: "xai/grok-4-1-fast-reasoning",
|
|
40319
|
-
//
|
|
40320
|
-
fallback: [
|
|
40334
|
+
// $0.20/$0.50
|
|
40335
|
+
fallback: [
|
|
40336
|
+
"xai/grok-4-fast-reasoning",
|
|
40337
|
+
"nvidia/nemotron-ultra-253b",
|
|
40338
|
+
// FREE reasoning fallback
|
|
40339
|
+
"deepseek/deepseek-reasoner"
|
|
40340
|
+
]
|
|
40321
40341
|
}
|
|
40322
40342
|
},
|
|
40323
40343
|
// Premium tier configs - best quality (blockrun/premium)
|
|
@@ -40431,6 +40451,73 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
40431
40451
|
]
|
|
40432
40452
|
}
|
|
40433
40453
|
},
|
|
40454
|
+
// Free tier configs - NVIDIA free models, smart-routed by task type (blockrun/free)
|
|
40455
|
+
freeTiers: {
|
|
40456
|
+
SIMPLE: {
|
|
40457
|
+
primary: "nvidia/gpt-oss-20b",
|
|
40458
|
+
// Fastest: small 20B for simple tasks
|
|
40459
|
+
fallback: [
|
|
40460
|
+
"nvidia/gpt-oss-120b",
|
|
40461
|
+
// Solid general-purpose
|
|
40462
|
+
"nvidia/nemotron-super-49b",
|
|
40463
|
+
// Thinking mode
|
|
40464
|
+
"nvidia/llama-4-maverick",
|
|
40465
|
+
// MoE broad coverage
|
|
40466
|
+
"nvidia/glm-4.7"
|
|
40467
|
+
// Thinking mode
|
|
40468
|
+
]
|
|
40469
|
+
},
|
|
40470
|
+
MEDIUM: {
|
|
40471
|
+
primary: "nvidia/deepseek-v3.2",
|
|
40472
|
+
// DeepSeek V3.2 quality, zero cost
|
|
40473
|
+
fallback: [
|
|
40474
|
+
"nvidia/gpt-oss-120b",
|
|
40475
|
+
// Strong 120B general-purpose
|
|
40476
|
+
"nvidia/nemotron-super-49b",
|
|
40477
|
+
// Thinking mode
|
|
40478
|
+
"nvidia/mistral-large-3-675b",
|
|
40479
|
+
// Largest Mistral
|
|
40480
|
+
"nvidia/llama-4-maverick",
|
|
40481
|
+
// MoE breadth
|
|
40482
|
+
"nvidia/glm-4.7"
|
|
40483
|
+
// Thinking mode
|
|
40484
|
+
]
|
|
40485
|
+
},
|
|
40486
|
+
COMPLEX: {
|
|
40487
|
+
primary: "nvidia/nemotron-ultra-253b",
|
|
40488
|
+
// Strongest free: 253B reasoning
|
|
40489
|
+
fallback: [
|
|
40490
|
+
"nvidia/mistral-large-3-675b",
|
|
40491
|
+
// 675B massive params
|
|
40492
|
+
"nvidia/deepseek-v3.2",
|
|
40493
|
+
// V3.2 quality
|
|
40494
|
+
"nvidia/nemotron-3-super-120b",
|
|
40495
|
+
// Thinking mode MoE
|
|
40496
|
+
"nvidia/qwen3-coder-480b",
|
|
40497
|
+
// 480B MoE for code-heavy tasks
|
|
40498
|
+
"nvidia/devstral-2-123b",
|
|
40499
|
+
// Coding-focused
|
|
40500
|
+
"nvidia/gpt-oss-120b"
|
|
40501
|
+
// Last resort
|
|
40502
|
+
]
|
|
40503
|
+
},
|
|
40504
|
+
REASONING: {
|
|
40505
|
+
primary: "nvidia/nemotron-ultra-253b",
|
|
40506
|
+
// Best free reasoning: 253B
|
|
40507
|
+
fallback: [
|
|
40508
|
+
"nvidia/nemotron-3-super-120b",
|
|
40509
|
+
// Thinking mode MoE
|
|
40510
|
+
"nvidia/nemotron-super-49b",
|
|
40511
|
+
// Thinking mode
|
|
40512
|
+
"nvidia/deepseek-v3.2",
|
|
40513
|
+
// DeepSeek reasoning
|
|
40514
|
+
"nvidia/mistral-large-3-675b",
|
|
40515
|
+
// Brute-force params
|
|
40516
|
+
"nvidia/glm-4.7"
|
|
40517
|
+
// GLM thinking mode
|
|
40518
|
+
]
|
|
40519
|
+
}
|
|
40520
|
+
},
|
|
40434
40521
|
overrides: {
|
|
40435
40522
|
maxTokensForceComplex: 1e5,
|
|
40436
40523
|
structuredOutputMinTier: "MEDIUM",
|
|
@@ -40510,9 +40597,27 @@ var MODEL_ALIASES = {
|
|
|
40510
40597
|
// delisted 2026-03-12
|
|
40511
40598
|
"xai/grok-3-fast": "xai/grok-4-fast-reasoning",
|
|
40512
40599
|
// delisted (too expensive)
|
|
40513
|
-
// NVIDIA
|
|
40600
|
+
// NVIDIA — existing alias kept for backward compat
|
|
40514
40601
|
nvidia: "nvidia/gpt-oss-120b",
|
|
40515
40602
|
"gpt-120b": "nvidia/gpt-oss-120b",
|
|
40603
|
+
"gpt-20b": "nvidia/gpt-oss-20b",
|
|
40604
|
+
// Free model aliases — "-free" suffix for models with paid twins
|
|
40605
|
+
"deepseek-free": "nvidia/deepseek-v3.2",
|
|
40606
|
+
"mistral-free": "nvidia/mistral-large-3-675b",
|
|
40607
|
+
"glm-free": "nvidia/glm-4.7",
|
|
40608
|
+
"llama-free": "nvidia/llama-4-maverick",
|
|
40609
|
+
// Bare-name aliases for unique free models
|
|
40610
|
+
nemotron: "nvidia/nemotron-ultra-253b",
|
|
40611
|
+
"nemotron-ultra": "nvidia/nemotron-ultra-253b",
|
|
40612
|
+
"nemotron-253b": "nvidia/nemotron-ultra-253b",
|
|
40613
|
+
"nemotron-super": "nvidia/nemotron-super-49b",
|
|
40614
|
+
"nemotron-49b": "nvidia/nemotron-super-49b",
|
|
40615
|
+
"nemotron-120b": "nvidia/nemotron-3-super-120b",
|
|
40616
|
+
devstral: "nvidia/devstral-2-123b",
|
|
40617
|
+
"devstral-2": "nvidia/devstral-2-123b",
|
|
40618
|
+
"qwen-coder": "nvidia/qwen3-coder-480b",
|
|
40619
|
+
"qwen-coder-free": "nvidia/qwen3-coder-480b",
|
|
40620
|
+
maverick: "nvidia/llama-4-maverick",
|
|
40516
40621
|
// MiniMax
|
|
40517
40622
|
minimax: "minimax/minimax-m2.7",
|
|
40518
40623
|
"minimax-m2.7": "minimax/minimax-m2.7",
|
|
@@ -40559,11 +40664,11 @@ var BLOCKRUN_MODELS = [
|
|
|
40559
40664
|
},
|
|
40560
40665
|
{
|
|
40561
40666
|
id: "free",
|
|
40562
|
-
name: "Free (
|
|
40667
|
+
name: "Free (Smart Router - 11 NVIDIA Models)",
|
|
40563
40668
|
inputPrice: 0,
|
|
40564
40669
|
outputPrice: 0,
|
|
40565
|
-
contextWindow:
|
|
40566
|
-
maxOutput:
|
|
40670
|
+
contextWindow: 131072,
|
|
40671
|
+
maxOutput: 16384
|
|
40567
40672
|
},
|
|
40568
40673
|
{
|
|
40569
40674
|
id: "eco",
|
|
@@ -41068,18 +41173,116 @@ var BLOCKRUN_MODELS = [
|
|
|
41068
41173
|
agentic: true,
|
|
41069
41174
|
toolCalling: true
|
|
41070
41175
|
},
|
|
41071
|
-
// NVIDIA - Free
|
|
41176
|
+
// NVIDIA - Free models (hosted by NVIDIA, billingMode: "free" on server)
|
|
41177
|
+
// toolCalling intentionally omitted on all free models: structured function
|
|
41178
|
+
// calling support unverified. Excluded from tool-heavy routing paths.
|
|
41072
41179
|
{
|
|
41073
41180
|
id: "nvidia/gpt-oss-120b",
|
|
41074
|
-
name: "
|
|
41181
|
+
name: "[Free] GPT-OSS 120B",
|
|
41075
41182
|
version: "120b",
|
|
41076
41183
|
inputPrice: 0,
|
|
41077
41184
|
outputPrice: 0,
|
|
41078
41185
|
contextWindow: 128e3,
|
|
41079
41186
|
maxOutput: 16384
|
|
41080
|
-
// toolCalling intentionally omitted: free model, structured function
|
|
41081
|
-
// calling support unverified. Excluded from tool-heavy routing paths.
|
|
41082
41187
|
},
|
|
41188
|
+
{
|
|
41189
|
+
id: "nvidia/gpt-oss-20b",
|
|
41190
|
+
name: "[Free] GPT-OSS 20B",
|
|
41191
|
+
version: "20b",
|
|
41192
|
+
inputPrice: 0,
|
|
41193
|
+
outputPrice: 0,
|
|
41194
|
+
contextWindow: 128e3,
|
|
41195
|
+
maxOutput: 16384
|
|
41196
|
+
},
|
|
41197
|
+
{
|
|
41198
|
+
id: "nvidia/nemotron-ultra-253b",
|
|
41199
|
+
name: "[Free] Nemotron Ultra 253B",
|
|
41200
|
+
version: "253b",
|
|
41201
|
+
inputPrice: 0,
|
|
41202
|
+
outputPrice: 0,
|
|
41203
|
+
contextWindow: 131072,
|
|
41204
|
+
maxOutput: 16384,
|
|
41205
|
+
reasoning: true
|
|
41206
|
+
},
|
|
41207
|
+
{
|
|
41208
|
+
id: "nvidia/nemotron-3-super-120b",
|
|
41209
|
+
name: "[Free] Nemotron 3 Super 120B",
|
|
41210
|
+
version: "3-super-120b",
|
|
41211
|
+
inputPrice: 0,
|
|
41212
|
+
outputPrice: 0,
|
|
41213
|
+
contextWindow: 131072,
|
|
41214
|
+
maxOutput: 16384,
|
|
41215
|
+
reasoning: true
|
|
41216
|
+
},
|
|
41217
|
+
{
|
|
41218
|
+
id: "nvidia/nemotron-super-49b",
|
|
41219
|
+
name: "[Free] Nemotron Super 49B",
|
|
41220
|
+
version: "super-49b",
|
|
41221
|
+
inputPrice: 0,
|
|
41222
|
+
outputPrice: 0,
|
|
41223
|
+
contextWindow: 131072,
|
|
41224
|
+
maxOutput: 16384,
|
|
41225
|
+
reasoning: true
|
|
41226
|
+
},
|
|
41227
|
+
{
|
|
41228
|
+
id: "nvidia/deepseek-v3.2",
|
|
41229
|
+
name: "[Free] DeepSeek V3.2",
|
|
41230
|
+
version: "v3.2",
|
|
41231
|
+
inputPrice: 0,
|
|
41232
|
+
outputPrice: 0,
|
|
41233
|
+
contextWindow: 131072,
|
|
41234
|
+
maxOutput: 16384,
|
|
41235
|
+
reasoning: true
|
|
41236
|
+
},
|
|
41237
|
+
{
|
|
41238
|
+
id: "nvidia/mistral-large-3-675b",
|
|
41239
|
+
name: "[Free] Mistral Large 675B",
|
|
41240
|
+
version: "3-675b",
|
|
41241
|
+
inputPrice: 0,
|
|
41242
|
+
outputPrice: 0,
|
|
41243
|
+
contextWindow: 131072,
|
|
41244
|
+
maxOutput: 16384,
|
|
41245
|
+
reasoning: true
|
|
41246
|
+
},
|
|
41247
|
+
{
|
|
41248
|
+
id: "nvidia/qwen3-coder-480b",
|
|
41249
|
+
name: "[Free] Qwen3 Coder 480B",
|
|
41250
|
+
version: "480b",
|
|
41251
|
+
inputPrice: 0,
|
|
41252
|
+
outputPrice: 0,
|
|
41253
|
+
contextWindow: 131072,
|
|
41254
|
+
maxOutput: 16384
|
|
41255
|
+
},
|
|
41256
|
+
{
|
|
41257
|
+
id: "nvidia/devstral-2-123b",
|
|
41258
|
+
name: "[Free] Devstral 2 123B",
|
|
41259
|
+
version: "2-123b",
|
|
41260
|
+
inputPrice: 0,
|
|
41261
|
+
outputPrice: 0,
|
|
41262
|
+
contextWindow: 131072,
|
|
41263
|
+
maxOutput: 16384
|
|
41264
|
+
},
|
|
41265
|
+
{
|
|
41266
|
+
id: "nvidia/glm-4.7",
|
|
41267
|
+
name: "[Free] GLM-4.7",
|
|
41268
|
+
version: "4.7",
|
|
41269
|
+
inputPrice: 0,
|
|
41270
|
+
outputPrice: 0,
|
|
41271
|
+
contextWindow: 131072,
|
|
41272
|
+
maxOutput: 16384,
|
|
41273
|
+
reasoning: true
|
|
41274
|
+
},
|
|
41275
|
+
{
|
|
41276
|
+
id: "nvidia/llama-4-maverick",
|
|
41277
|
+
name: "[Free] Llama 4 Maverick",
|
|
41278
|
+
version: "4-maverick",
|
|
41279
|
+
inputPrice: 0,
|
|
41280
|
+
outputPrice: 0,
|
|
41281
|
+
contextWindow: 131072,
|
|
41282
|
+
maxOutput: 16384,
|
|
41283
|
+
reasoning: true
|
|
41284
|
+
},
|
|
41285
|
+
// NVIDIA - Paid models
|
|
41083
41286
|
{
|
|
41084
41287
|
id: "nvidia/kimi-k2.5",
|
|
41085
41288
|
name: "NVIDIA Kimi K2.5",
|
|
@@ -46656,11 +46859,36 @@ var ROUTING_PROFILES = /* @__PURE__ */ new Set([
|
|
|
46656
46859
|
"premium"
|
|
46657
46860
|
]);
|
|
46658
46861
|
var FREE_MODEL = "nvidia/gpt-oss-120b";
|
|
46862
|
+
var FREE_MODELS = /* @__PURE__ */ new Set([
|
|
46863
|
+
"nvidia/gpt-oss-120b",
|
|
46864
|
+
"nvidia/gpt-oss-20b",
|
|
46865
|
+
"nvidia/nemotron-ultra-253b",
|
|
46866
|
+
"nvidia/nemotron-3-super-120b",
|
|
46867
|
+
"nvidia/nemotron-super-49b",
|
|
46868
|
+
"nvidia/deepseek-v3.2",
|
|
46869
|
+
"nvidia/mistral-large-3-675b",
|
|
46870
|
+
"nvidia/qwen3-coder-480b",
|
|
46871
|
+
"nvidia/devstral-2-123b",
|
|
46872
|
+
"nvidia/glm-4.7",
|
|
46873
|
+
"nvidia/llama-4-maverick"
|
|
46874
|
+
]);
|
|
46659
46875
|
var FREE_TIER_CONFIGS = {
|
|
46660
|
-
SIMPLE: {
|
|
46661
|
-
|
|
46662
|
-
|
|
46663
|
-
|
|
46876
|
+
SIMPLE: {
|
|
46877
|
+
primary: "nvidia/gpt-oss-20b",
|
|
46878
|
+
fallback: ["nvidia/gpt-oss-120b", "nvidia/nemotron-super-49b"]
|
|
46879
|
+
},
|
|
46880
|
+
MEDIUM: {
|
|
46881
|
+
primary: "nvidia/deepseek-v3.2",
|
|
46882
|
+
fallback: ["nvidia/gpt-oss-120b", "nvidia/nemotron-super-49b"]
|
|
46883
|
+
},
|
|
46884
|
+
COMPLEX: {
|
|
46885
|
+
primary: "nvidia/nemotron-ultra-253b",
|
|
46886
|
+
fallback: ["nvidia/mistral-large-3-675b", "nvidia/deepseek-v3.2", "nvidia/gpt-oss-120b"]
|
|
46887
|
+
},
|
|
46888
|
+
REASONING: {
|
|
46889
|
+
primary: "nvidia/nemotron-ultra-253b",
|
|
46890
|
+
fallback: ["nvidia/nemotron-3-super-120b", "nvidia/deepseek-v3.2"]
|
|
46891
|
+
}
|
|
46664
46892
|
};
|
|
46665
46893
|
var freeRequestCount = 0;
|
|
46666
46894
|
var MAX_MESSAGES = 200;
|
|
@@ -48672,30 +48900,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48672
48900
|
modelId = resolvedModel;
|
|
48673
48901
|
}
|
|
48674
48902
|
if (isRoutingProfile) {
|
|
48675
|
-
|
|
48676
|
-
const freeModel = "nvidia/gpt-oss-120b";
|
|
48677
|
-
console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
|
|
48678
|
-
parsed.model = freeModel;
|
|
48679
|
-
modelId = freeModel;
|
|
48680
|
-
bodyModified = true;
|
|
48681
|
-
freeRequestCount++;
|
|
48682
|
-
if (freeRequestCount % 5 === 0) {
|
|
48683
|
-
balanceFallbackNotice = `> **\u{1F4A1} Tip:** Not satisfied with free model quality? Fund your wallet to unlock deepseek-chat, gemini-flash, and 30+ premium models \u2014 starting at $0.001/request.
|
|
48684
|
-
|
|
48685
|
-
`;
|
|
48686
|
-
}
|
|
48687
|
-
routingDecision = {
|
|
48688
|
-
model: freeModel,
|
|
48689
|
-
tier: "SIMPLE",
|
|
48690
|
-
confidence: 1,
|
|
48691
|
-
method: "rules",
|
|
48692
|
-
reasoning: "free profile",
|
|
48693
|
-
costEstimate: 0,
|
|
48694
|
-
baselineCost: 0,
|
|
48695
|
-
savings: 1,
|
|
48696
|
-
tierConfigs: FREE_TIER_CONFIGS
|
|
48697
|
-
};
|
|
48698
|
-
} else {
|
|
48903
|
+
{
|
|
48699
48904
|
effectiveSessionId = getSessionId(req.headers) ?? deriveSessionId(parsedMessages);
|
|
48700
48905
|
const existingSession = effectiveSessionId ? sessionStore.getSession(effectiveSessionId) : void 0;
|
|
48701
48906
|
const rawPrompt = lastUserMsg?.content;
|
|
@@ -48811,6 +49016,14 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48811
49016
|
}
|
|
48812
49017
|
}
|
|
48813
49018
|
options.onRouted?.(routingDecision);
|
|
49019
|
+
if (routingProfile === "free") {
|
|
49020
|
+
freeRequestCount++;
|
|
49021
|
+
if (freeRequestCount % 5 === 0) {
|
|
49022
|
+
balanceFallbackNotice = `> **\u{1F4A1} Tip:** Free tier gives you 11 NVIDIA models. Want Claude, GPT-5, or Gemini? Fund your wallet \u2014 starting at $0.001/request.
|
|
49023
|
+
|
|
49024
|
+
`;
|
|
49025
|
+
}
|
|
49026
|
+
}
|
|
48814
49027
|
}
|
|
48815
49028
|
}
|
|
48816
49029
|
if (!effectiveSessionId && parsedMessages.length > 0) {
|
|
@@ -48906,7 +49119,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48906
49119
|
}
|
|
48907
49120
|
deduplicator.markInflight(dedupKey);
|
|
48908
49121
|
let estimatedCostMicros;
|
|
48909
|
-
let isFreeModel = modelId
|
|
49122
|
+
let isFreeModel = FREE_MODELS.has(modelId ?? "");
|
|
48910
49123
|
if (modelId && !options.skipBalanceCheck && !isFreeModel) {
|
|
48911
49124
|
const estimated = estimateAmount(modelId, body.length, maxTokens);
|
|
48912
49125
|
if (estimated) {
|
|
@@ -48915,13 +49128,16 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48915
49128
|
const sufficiency = await balanceMonitor.checkSufficient(bufferedCostMicros);
|
|
48916
49129
|
if (sufficiency.info.isEmpty || !sufficiency.sufficient) {
|
|
48917
49130
|
const originalModel = modelId;
|
|
49131
|
+
const fallbackTier = routingDecision?.tier ?? "SIMPLE";
|
|
49132
|
+
const freeTierConfig = FREE_TIER_CONFIGS[fallbackTier];
|
|
49133
|
+
const freeModel = freeTierConfig.primary;
|
|
48918
49134
|
console.log(
|
|
48919
|
-
`[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${
|
|
49135
|
+
`[ClawRouter] Wallet ${sufficiency.info.isEmpty ? "empty" : "insufficient"} (${sufficiency.info.balanceUSD}), falling back to free model: ${freeModel} (tier: ${fallbackTier}, requested: ${originalModel})`
|
|
48920
49136
|
);
|
|
48921
|
-
modelId =
|
|
49137
|
+
modelId = freeModel;
|
|
48922
49138
|
isFreeModel = true;
|
|
48923
49139
|
const parsed = JSON.parse(body.toString());
|
|
48924
|
-
parsed.model =
|
|
49140
|
+
parsed.model = freeModel;
|
|
48925
49141
|
body = Buffer.from(JSON.stringify(parsed));
|
|
48926
49142
|
balanceFallbackNotice = sufficiency.info.isEmpty ? `> **\u26A0\uFE0F Wallet empty** \u2014 using free model. Fund your wallet to use ${originalModel}.
|
|
48927
49143
|
|
|
@@ -48930,7 +49146,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48930
49146
|
`;
|
|
48931
49147
|
freeRequestCount++;
|
|
48932
49148
|
if (freeRequestCount % 5 === 0) {
|
|
48933
|
-
balanceFallbackNotice = `> **\u{1F4A1} Tip:**
|
|
49149
|
+
balanceFallbackNotice = `> **\u{1F4A1} Tip:** Free tier gives you 11 NVIDIA models. Want Claude, GPT-5, or Gemini? Fund your wallet \u2014 starting at $0.001/request.
|
|
48934
49150
|
|
|
48935
49151
|
`;
|
|
48936
49152
|
}
|
|
@@ -48978,7 +49194,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
48978
49194
|
const isComplexOrAgentic = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING";
|
|
48979
49195
|
if (isComplexOrAgentic) {
|
|
48980
49196
|
const canAffordAnyNonFreeModel = BLOCKRUN_MODELS.some((m) => {
|
|
48981
|
-
if (m.id
|
|
49197
|
+
if (FREE_MODELS.has(m.id)) return false;
|
|
48982
49198
|
const est = estimateAmount(m.id, body.length, maxTokens);
|
|
48983
49199
|
return est !== void 0 && Number(est) / 1e6 <= remainingUsd;
|
|
48984
49200
|
});
|
|
@@ -49003,7 +49219,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49003
49219
|
deduplicator.removeInflight(dedupKey);
|
|
49004
49220
|
return;
|
|
49005
49221
|
}
|
|
49006
|
-
} else if (!routingDecision && modelId && modelId
|
|
49222
|
+
} else if (!routingDecision && modelId && !FREE_MODELS.has(modelId)) {
|
|
49007
49223
|
const est = estimateAmount(modelId, body.length, maxTokens);
|
|
49008
49224
|
const canAfford = !est || Number(est) / 1e6 <= remainingUsd;
|
|
49009
49225
|
if (!canAfford) {
|
|
@@ -49144,14 +49360,14 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
49144
49360
|
const remainingUsd = options.maxCostPerRunUsd - runCostUsd;
|
|
49145
49361
|
const beforeFilter = [...modelsToTry];
|
|
49146
49362
|
modelsToTry = modelsToTry.filter((m) => {
|
|
49147
|
-
if (m
|
|
49363
|
+
if (FREE_MODELS.has(m)) return true;
|
|
49148
49364
|
const est = estimateAmount(m, body.length, maxTokens);
|
|
49149
49365
|
if (!est) return true;
|
|
49150
49366
|
return Number(est) / 1e6 <= remainingUsd;
|
|
49151
49367
|
});
|
|
49152
49368
|
const excluded = beforeFilter.filter((m) => !modelsToTry.includes(m));
|
|
49153
49369
|
const isComplexOrAgenticFilter = hasTools || routingDecision?.tier === "COMPLEX" || routingDecision?.tier === "REASONING" || routingDecision === void 0;
|
|
49154
|
-
const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => m
|
|
49370
|
+
const filteredToFreeOnly = modelsToTry.length > 0 && modelsToTry.every((m) => FREE_MODELS.has(m));
|
|
49155
49371
|
if (isComplexOrAgenticFilter && filteredToFreeOnly) {
|
|
49156
49372
|
const budgetSummary = `$${Math.max(0, remainingUsd).toFixed(4)} remaining (limit: $${options.maxCostPerRunUsd})`;
|
|
49157
49373
|
console.log(
|
|
@@ -49189,7 +49405,7 @@ data: [DONE]
|
|
|
49189
49405
|
`[ClawRouter] Budget downgrade (${budgetSummary}): excluded ${excluded.join(", ")}`
|
|
49190
49406
|
);
|
|
49191
49407
|
const fromModel = excluded[0];
|
|
49192
|
-
const usingFree = modelsToTry.length === 1 && modelsToTry[0]
|
|
49408
|
+
const usingFree = modelsToTry.length === 1 && FREE_MODELS.has(modelsToTry[0]);
|
|
49193
49409
|
if (usingFree) {
|
|
49194
49410
|
budgetDowngradeNotice = `> **\u26A0\uFE0F Budget cap reached** ($${runCostUsd.toFixed(4)}/$${options.maxCostPerRunUsd}) \u2014 downgraded to free model. Quality may be reduced. Increase \`maxCostPerRun\` to continue with ${fromModel}.
|
|
49195
49411
|
|
|
@@ -49243,7 +49459,7 @@ data: [DONE]
|
|
|
49243
49459
|
upstream = result.response;
|
|
49244
49460
|
actualModelUsed = tryModel;
|
|
49245
49461
|
console.log(`[ClawRouter] Success with model: ${tryModel}`);
|
|
49246
|
-
if (options.maxCostPerRunUsd && effectiveSessionId && tryModel
|
|
49462
|
+
if (options.maxCostPerRunUsd && effectiveSessionId && !FREE_MODELS.has(tryModel)) {
|
|
49247
49463
|
const costEst = estimateAmount(tryModel, body.length, maxTokens);
|
|
49248
49464
|
if (costEst) {
|
|
49249
49465
|
sessionStore.addSessionCost(effectiveSessionId, BigInt(costEst));
|
|
@@ -49263,7 +49479,7 @@ data: [DONE]
|
|
|
49263
49479
|
const isPaymentErr = /payment.*verification.*failed|payment.*settlement.*failed|insufficient.*funds|transaction_simulation_failed/i.test(
|
|
49264
49480
|
result.errorBody || ""
|
|
49265
49481
|
);
|
|
49266
|
-
if (isPaymentErr && tryModel
|
|
49482
|
+
if (isPaymentErr && !FREE_MODELS.has(tryModel) && !isLastAttempt) {
|
|
49267
49483
|
failedAttempts.push({
|
|
49268
49484
|
...failedAttempts[failedAttempts.length - 1],
|
|
49269
49485
|
reason: "payment_error"
|