@blockrun/clawrouter 0.12.63 → 0.12.65
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +55 -55
- package/dist/cli.js +50 -14
- package/dist/cli.js.map +1 -1
- package/dist/index.js +57 -16
- package/dist/index.js.map +1 -1
- package/docs/anthropic-cost-savings.md +90 -85
- package/docs/architecture.md +12 -12
- package/docs/{blog-openclaw-cost-overruns.md → clawrouter-cuts-llm-api-costs-500x.md} +27 -27
- package/docs/clawrouter-vs-openrouter-llm-routing-comparison.md +280 -0
- package/docs/configuration.md +2 -2
- package/docs/image-generation.md +39 -39
- package/docs/{blog-benchmark-2026-03.md → llm-router-benchmark-46-models-sub-1ms-routing.md} +61 -64
- package/docs/routing-profiles.md +6 -6
- package/docs/{technical-routing-2026-03.md → smart-llm-router-14-dimension-classifier.md} +29 -28
- package/docs/worker-network.md +438 -347
- package/package.json +3 -2
- package/scripts/reinstall.sh +31 -6
- package/scripts/update.sh +6 -1
- package/docs/assets/blockrun-248-day-cost-overrun-problem.png +0 -0
- package/docs/assets/blockrun-clawrouter-7-layer-token-compression-openclaw.png +0 -0
- package/docs/assets/blockrun-clawrouter-observation-compression-97-percent-token-savings.png +0 -0
- package/docs/assets/blockrun-clawrouter-openclaw-agentic-proxy-architecture.png +0 -0
- package/docs/assets/blockrun-clawrouter-openclaw-automatic-tier-routing-model-selection.png +0 -0
- package/docs/assets/blockrun-clawrouter-openclaw-error-classification-retry-storm-prevention.png +0 -0
- package/docs/assets/blockrun-clawrouter-openclaw-session-memory-journaling-vs-context-compounding.png +0 -0
- package/docs/assets/blockrun-clawrouter-vs-openclaw-standalone-comparison-production-safety.png +0 -0
- package/docs/assets/blockrun-clawrouter-x402-usdc-micropayment-wallet-budget-control.png +0 -0
- package/docs/assets/blockrun-openclaw-inference-layer-blind-spots.png +0 -0
- package/docs/plans/2026-02-03-smart-routing-design.md +0 -267
- package/docs/plans/2026-02-13-e2e-docker-deployment.md +0 -1260
- package/docs/plans/2026-02-28-worker-network.md +0 -947
- package/docs/plans/2026-03-18-error-classification.md +0 -574
- package/docs/plans/2026-03-19-exclude-models.md +0 -538
- package/docs/vs-openrouter.md +0 -157
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@blockrun/clawrouter",
|
|
3
|
-
"version": "0.12.
|
|
3
|
+
"version": "0.12.65",
|
|
4
4
|
"description": "Smart LLM router — save 85% on inference costs. 46+ models, one wallet, x402 micropayments.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"main": "dist/index.js",
|
|
@@ -21,7 +21,8 @@
|
|
|
21
21
|
},
|
|
22
22
|
"files": [
|
|
23
23
|
"dist",
|
|
24
|
-
"docs",
|
|
24
|
+
"docs/*.md",
|
|
25
|
+
"docs/*.png",
|
|
25
26
|
"scripts",
|
|
26
27
|
"skills",
|
|
27
28
|
"openclaw.plugin.json"
|
package/scripts/reinstall.sh
CHANGED
|
@@ -127,6 +127,11 @@ echo "→ Cleaning config entries..."
|
|
|
127
127
|
node -e "
|
|
128
128
|
const f = require('os').homedir() + '/.openclaw/openclaw.json';
|
|
129
129
|
const fs = require('fs');
|
|
130
|
+
function atomicWrite(filePath, data) {
|
|
131
|
+
const tmpPath = filePath + '.tmp.' + process.pid;
|
|
132
|
+
fs.writeFileSync(tmpPath, data);
|
|
133
|
+
fs.renameSync(tmpPath, filePath);
|
|
134
|
+
}
|
|
130
135
|
if (!fs.existsSync(f)) {
|
|
131
136
|
console.log(' No openclaw.json found, skipping');
|
|
132
137
|
process.exit(0);
|
|
@@ -167,7 +172,7 @@ if (c.agents?.defaults?.models) {
|
|
|
167
172
|
}
|
|
168
173
|
}
|
|
169
174
|
}
|
|
170
|
-
|
|
175
|
+
atomicWrite(f, JSON.stringify(c, null, 2));
|
|
171
176
|
console.log(' Config cleaned');
|
|
172
177
|
"
|
|
173
178
|
|
|
@@ -177,7 +182,7 @@ kill_port_processes 8402
|
|
|
177
182
|
|
|
178
183
|
# 3.1. Remove stale models.json so it gets regenerated with apiKey
|
|
179
184
|
echo "→ Cleaning models cache..."
|
|
180
|
-
rm -f ~/.openclaw/agents
|
|
185
|
+
rm -f ~/.openclaw/agents/*/agent/models.json 2>/dev/null || true
|
|
181
186
|
|
|
182
187
|
# 4. Inject auth profile (ensures blockrun provider is recognized)
|
|
183
188
|
echo "→ Injecting auth profile..."
|
|
@@ -187,6 +192,11 @@ const fs = require('fs');
|
|
|
187
192
|
const path = require('path');
|
|
188
193
|
const authDir = path.join(os.homedir(), '.openclaw', 'agents', 'main', 'agent');
|
|
189
194
|
const authPath = path.join(authDir, 'auth-profiles.json');
|
|
195
|
+
function atomicWrite(filePath, data) {
|
|
196
|
+
const tmpPath = filePath + '.tmp.' + process.pid;
|
|
197
|
+
fs.writeFileSync(tmpPath, data);
|
|
198
|
+
fs.renameSync(tmpPath, filePath);
|
|
199
|
+
}
|
|
190
200
|
|
|
191
201
|
// Create directory if needed
|
|
192
202
|
fs.mkdirSync(authDir, { recursive: true });
|
|
@@ -216,7 +226,7 @@ if (!store.profiles[profileKey]) {
|
|
|
216
226
|
provider: 'blockrun',
|
|
217
227
|
key: 'x402-proxy-handles-auth'
|
|
218
228
|
};
|
|
219
|
-
|
|
229
|
+
atomicWrite(authPath, JSON.stringify(store, null, 2));
|
|
220
230
|
console.log(' Auth profile created');
|
|
221
231
|
} else {
|
|
222
232
|
console.log(' Auth profile already exists');
|
|
@@ -230,6 +240,11 @@ const os = require('os');
|
|
|
230
240
|
const fs = require('fs');
|
|
231
241
|
const path = require('path');
|
|
232
242
|
const configPath = path.join(os.homedir(), '.openclaw', 'openclaw.json');
|
|
243
|
+
function atomicWrite(filePath, data) {
|
|
244
|
+
const tmpPath = filePath + '.tmp.' + process.pid;
|
|
245
|
+
fs.writeFileSync(tmpPath, data);
|
|
246
|
+
fs.renameSync(tmpPath, filePath);
|
|
247
|
+
}
|
|
233
248
|
|
|
234
249
|
if (fs.existsSync(configPath)) {
|
|
235
250
|
try {
|
|
@@ -244,7 +259,7 @@ if (fs.existsSync(configPath)) {
|
|
|
244
259
|
}
|
|
245
260
|
|
|
246
261
|
if (changed) {
|
|
247
|
-
|
|
262
|
+
atomicWrite(configPath, JSON.stringify(config, null, 2));
|
|
248
263
|
}
|
|
249
264
|
} catch (e) {
|
|
250
265
|
console.log(' Could not update config:', e.message);
|
|
@@ -288,6 +303,11 @@ node -e "
|
|
|
288
303
|
const os = require('os');
|
|
289
304
|
const fs = require('fs');
|
|
290
305
|
const path = require('path');
|
|
306
|
+
function atomicWrite(filePath, data) {
|
|
307
|
+
const tmpPath = filePath + '.tmp.' + process.pid;
|
|
308
|
+
fs.writeFileSync(tmpPath, data);
|
|
309
|
+
fs.renameSync(tmpPath, filePath);
|
|
310
|
+
}
|
|
291
311
|
|
|
292
312
|
const configPath = path.join(os.homedir(), '.openclaw', 'openclaw.json');
|
|
293
313
|
if (!fs.existsSync(configPath)) {
|
|
@@ -365,7 +385,7 @@ try {
|
|
|
365
385
|
console.log(' Allowlist already up to date');
|
|
366
386
|
}
|
|
367
387
|
if (changed) {
|
|
368
|
-
|
|
388
|
+
atomicWrite(configPath, JSON.stringify(config, null, 2));
|
|
369
389
|
}
|
|
370
390
|
} catch (err) {
|
|
371
391
|
console.log(' Could not update config:', err.message);
|
|
@@ -379,6 +399,11 @@ const os = require('os');
|
|
|
379
399
|
const fs = require('fs');
|
|
380
400
|
const path = require('path');
|
|
381
401
|
const configPath = path.join(os.homedir(), '.openclaw', 'openclaw.json');
|
|
402
|
+
function atomicWrite(filePath, data) {
|
|
403
|
+
const tmpPath = filePath + '.tmp.' + process.pid;
|
|
404
|
+
fs.writeFileSync(tmpPath, data);
|
|
405
|
+
fs.renameSync(tmpPath, filePath);
|
|
406
|
+
}
|
|
382
407
|
|
|
383
408
|
if (fs.existsSync(configPath)) {
|
|
384
409
|
try {
|
|
@@ -396,7 +421,7 @@ if (fs.existsSync(configPath)) {
|
|
|
396
421
|
console.log(' Plugin already in allow list');
|
|
397
422
|
}
|
|
398
423
|
|
|
399
|
-
|
|
424
|
+
atomicWrite(configPath, JSON.stringify(config, null, 2));
|
|
400
425
|
} catch (e) {
|
|
401
426
|
console.log(' Could not update config:', e.message);
|
|
402
427
|
}
|
package/scripts/update.sh
CHANGED
|
@@ -218,6 +218,11 @@ const fs = require('fs');
|
|
|
218
218
|
const path = require('path');
|
|
219
219
|
const authDir = path.join(os.homedir(), '.openclaw', 'agents', 'main', 'agent');
|
|
220
220
|
const authPath = path.join(authDir, 'auth-profiles.json');
|
|
221
|
+
function atomicWrite(filePath, data) {
|
|
222
|
+
const tmpPath = filePath + '.tmp.' + process.pid;
|
|
223
|
+
fs.writeFileSync(tmpPath, data);
|
|
224
|
+
fs.renameSync(tmpPath, filePath);
|
|
225
|
+
}
|
|
221
226
|
|
|
222
227
|
fs.mkdirSync(authDir, { recursive: true });
|
|
223
228
|
|
|
@@ -232,7 +237,7 @@ if (fs.existsSync(authPath)) {
|
|
|
232
237
|
const profileKey = 'blockrun:default';
|
|
233
238
|
if (!store.profiles[profileKey]) {
|
|
234
239
|
store.profiles[profileKey] = { type: 'api_key', provider: 'blockrun', key: 'x402-proxy-handles-auth' };
|
|
235
|
-
|
|
240
|
+
atomicWrite(authPath, JSON.stringify(store, null, 2));
|
|
236
241
|
console.log(' Auth profile created');
|
|
237
242
|
} else {
|
|
238
243
|
console.log(' Auth profile already exists');
|
|
Binary file
|
|
Binary file
|
package/docs/assets/blockrun-clawrouter-observation-compression-97-percent-token-savings.png
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|
package/docs/assets/blockrun-clawrouter-openclaw-error-classification-retry-storm-prevention.png
DELETED
|
Binary file
|
|
Binary file
|
package/docs/assets/blockrun-clawrouter-vs-openclaw-standalone-comparison-production-safety.png
DELETED
|
Binary file
|
|
Binary file
|
|
Binary file
|
|
@@ -1,267 +0,0 @@
|
|
|
1
|
-
# ClawRouter: Client-Side Smart Routing Design
|
|
2
|
-
|
|
3
|
-
> **Status: Implemented (v2)** — Weighted scoring engine shipped in [`src/router/`](../../src/router/). This document is the design record.
|
|
4
|
-
|
|
5
|
-
## Problem
|
|
6
|
-
|
|
7
|
-
Simple queries go to Claude Opus at $75/M output tokens when Gemini Flash could handle them at $0.60/M. No cost-aware model selection.
|
|
8
|
-
|
|
9
|
-
Phase 1 solved API key management (one wallet for 30+ models). Phase 2 solves cost optimization by routing queries to the cheapest capable model.
|
|
10
|
-
|
|
11
|
-
## Why Client-Side
|
|
12
|
-
|
|
13
|
-
Every existing smart router (OpenRouter, LiteLLM, etc.) runs server-side. The routing logic is proprietary — users can't see why a model was chosen or customize the rules.
|
|
14
|
-
|
|
15
|
-
BlockRun's structural advantage: **x402 per-model transparent pricing**. Each model has an independent price visible in the 402 response. This means the routing decision can live in the open-source plugin where it's inspectable, customizable, and auditable.
|
|
16
|
-
|
|
17
|
-
| | Server-side (OpenRouter) | Client-side (ClawRouter) |
|
|
18
|
-
| ------------- | ------------------------ | ------------------------------- |
|
|
19
|
-
| Routing logic | Proprietary black box | Open-source in plugin |
|
|
20
|
-
| Pricing | Bundled, opaque | Per-model, transparent via x402 |
|
|
21
|
-
| Customization | None | Operators edit config |
|
|
22
|
-
| Trust model | "Trust us" | "Read the code" |
|
|
23
|
-
|
|
24
|
-
## Research Summary
|
|
25
|
-
|
|
26
|
-
Analyzed 9 open-source smart routing implementations. Three classification approaches emerged:
|
|
27
|
-
|
|
28
|
-
1. **Pure heuristic** (keyword + length + regex) — Zero cost, < 1ms, but brittle
|
|
29
|
-
2. **Small LLM classifier** (DistilBERT, Granite 350M, 8B model) — Better accuracy, 20-500ms overhead
|
|
30
|
-
3. **Hybrid** (rules first, LLM only for ambiguous cases) — Best of both worlds
|
|
31
|
-
|
|
32
|
-
The hybrid approach (from octoroute, smart-router) handles 70-80% of requests via rules in < 1ms, and only sends ambiguous cases to a cheap LLM classifier. This is what we implemented.
|
|
33
|
-
|
|
34
|
-
## Architecture
|
|
35
|
-
|
|
36
|
-
```
|
|
37
|
-
OpenClaw Agent
|
|
38
|
-
|
|
|
39
|
-
v
|
|
40
|
-
┌─────────────────────────────────────────────────┐
|
|
41
|
-
│ ClawRouter (src/router/) │
|
|
42
|
-
│ │
|
|
43
|
-
│ ┌─────────────────────────────────────────────┐ │
|
|
44
|
-
│ │ Step 1: Weighted Scoring Engine (< 1ms) │ │
|
|
45
|
-
│ │ • 14 scoring dimensions, each [-1, 1] │ │
|
|
46
|
-
│ │ • Weighted sum → float score │ │
|
|
47
|
-
│ │ • Sigmoid confidence calibration │ │
|
|
48
|
-
│ │ • Returns: tier or null (ambiguous) │ │
|
|
49
|
-
│ └─────────────────────┬───────────────────────┘ │
|
|
50
|
-
│ | │
|
|
51
|
-
│ ┌─────────────┴──────────────┐ │
|
|
52
|
-
│ | | │
|
|
53
|
-
│ confident ambiguous │
|
|
54
|
-
│ (conf >= 0.70) (conf < 0.70) │
|
|
55
|
-
│ | | │
|
|
56
|
-
│ | ┌─────────────────────────┴────────┐ │
|
|
57
|
-
│ | │ Step 2: LLM Classifier (~200ms) │ │
|
|
58
|
-
│ | │ • Send to gemini-flash (cheapest)│ │
|
|
59
|
-
│ | │ • "Classify: SIMPLE/MEDIUM/..." │ │
|
|
60
|
-
│ | │ • Cache classification result │ │
|
|
61
|
-
│ | └─────────────────────────┬────────┘ │
|
|
62
|
-
│ | | │
|
|
63
|
-
│ └────────────┬───────────────┘ │
|
|
64
|
-
│ | │
|
|
65
|
-
│ ┌────────────────────┴────────────────────────┐ │
|
|
66
|
-
│ │ Step 3: Tier → Model Selection │ │
|
|
67
|
-
│ │ • Look up cheapest model for tier │ │
|
|
68
|
-
│ │ • Calculate cost estimate + savings │ │
|
|
69
|
-
│ └────────────────────┬────────────────────────┘ │
|
|
70
|
-
│ | │
|
|
71
|
-
│ ┌────────────────────┴────────────────────────┐ │
|
|
72
|
-
│ │ Step 4: RoutingDecision metadata │ │
|
|
73
|
-
│ │ { model, tier, confidence, reasoning } │ │
|
|
74
|
-
│ └────────────────────┬────────────────────────┘ │
|
|
75
|
-
│ | │
|
|
76
|
-
└───────────────────────┼─────────────────────────┘
|
|
77
|
-
|
|
|
78
|
-
v
|
|
79
|
-
BlockRun API (x402)
|
|
80
|
-
|
|
|
81
|
-
v
|
|
82
|
-
LLM Provider
|
|
83
|
-
```
|
|
84
|
-
|
|
85
|
-
## Classification Tiers
|
|
86
|
-
|
|
87
|
-
Four tiers. REASONING is distinct from COMPLEX because reasoning tasks need different models (o3, gemini-pro) than general complex tasks (claude-opus-4, gpt-4o).
|
|
88
|
-
|
|
89
|
-
| Tier | Description | Example Queries |
|
|
90
|
-
| ------------- | ------------------------------------------------ | ------------------------------------------------------------------ |
|
|
91
|
-
| **SIMPLE** | Short factual Q&A, translations, definitions | "What's the capital of France?", "Translate hello to Spanish" |
|
|
92
|
-
| **MEDIUM** | Summaries, explanations, moderate code | "Summarize this article", "Write a Python function to sort a list" |
|
|
93
|
-
| **COMPLEX** | Multi-step code, system design, creative writing | "Build a React component with tests", "Design a REST API" |
|
|
94
|
-
| **REASONING** | Proofs, multi-step logic, mathematical reasoning | "Prove this theorem", "Solve step by step", "Debug this algorithm" |
|
|
95
|
-
|
|
96
|
-
## Weighted Scoring Engine (v2)
|
|
97
|
-
|
|
98
|
-
Implemented in [`src/router/rules.ts`](../../src/router/rules.ts).
|
|
99
|
-
|
|
100
|
-
14 dimensions, each scored in [-1, 1] and multiplied by a learned weight:
|
|
101
|
-
|
|
102
|
-
| Dimension | Weight | Signal |
|
|
103
|
-
| -------------------- | ------ | ---------------------------------------- |
|
|
104
|
-
| Reasoning markers | 0.18 | "prove", "theorem", "step by step" |
|
|
105
|
-
| Code presence | 0.15 | "function", "async", "import", "```" |
|
|
106
|
-
| Simple indicators | 0.12 | "what is", "define", "translate" |
|
|
107
|
-
| Multi-step patterns | 0.12 | "first...then", "step 1", numbered lists |
|
|
108
|
-
| Technical terms | 0.10 | "algorithm", "kubernetes", "distributed" |
|
|
109
|
-
| Token count | 0.08 | short (<50) vs long (>500) |
|
|
110
|
-
| Creative markers | 0.05 | "story", "poem", "brainstorm" |
|
|
111
|
-
| Question complexity | 0.05 | 4+ question marks |
|
|
112
|
-
| Constraint count | 0.04 | "at most", "O(n)", "maximum" |
|
|
113
|
-
| Imperative verbs | 0.03 | "build", "create", "implement" |
|
|
114
|
-
| Output format | 0.03 | "json", "yaml", "schema" |
|
|
115
|
-
| Domain specificity | 0.02 | "quantum", "fpga", "genomics" |
|
|
116
|
-
| Reference complexity | 0.02 | "the docs", "the api", "above" |
|
|
117
|
-
| Negation complexity | 0.01 | "don't", "avoid", "without" |
|
|
118
|
-
|
|
119
|
-
Weighted score maps to a tier via configurable boundaries. Confidence is calibrated using a sigmoid function — distance from the nearest tier boundary determines how sure the classifier is.
|
|
120
|
-
|
|
121
|
-
### Tier Boundaries
|
|
122
|
-
|
|
123
|
-
```
|
|
124
|
-
Score < 0.00 → SIMPLE
|
|
125
|
-
Score 0.00-0.15 → MEDIUM
|
|
126
|
-
Score 0.15-0.25 → COMPLEX
|
|
127
|
-
Score > 0.25 → REASONING
|
|
128
|
-
```
|
|
129
|
-
|
|
130
|
-
### Sigmoid Confidence Calibration
|
|
131
|
-
|
|
132
|
-
```typescript
|
|
133
|
-
function calibrateConfidence(distance: number, steepness: number): number {
|
|
134
|
-
return 1 / (1 + Math.exp(-steepness * distance));
|
|
135
|
-
}
|
|
136
|
-
// steepness = 12 (tuned)
|
|
137
|
-
// distance = how far the score is from the nearest tier boundary
|
|
138
|
-
// Near boundary → confidence ~0.50 → triggers LLM fallback
|
|
139
|
-
// Far from boundary → confidence ~0.95+ → confident classification
|
|
140
|
-
```
|
|
141
|
-
|
|
142
|
-
### Special Case Overrides
|
|
143
|
-
|
|
144
|
-
| Condition | Override | Reason |
|
|
145
|
-
| --------------------------------------------- | ------------------------------------- | -------------------------------------- |
|
|
146
|
-
| 2+ reasoning markers | Force REASONING at >= 0.85 confidence | Reasoning markers are strong signals |
|
|
147
|
-
| Input > 100K tokens | Force COMPLEX tier | Large context = expensive regardless |
|
|
148
|
-
| System prompt contains "JSON" or "structured" | Minimum MEDIUM tier | Structured output needs capable models |
|
|
149
|
-
|
|
150
|
-
## LLM Classifier (Fallback)
|
|
151
|
-
|
|
152
|
-
Implemented in [`src/router/llm-classifier.ts`](../../src/router/llm-classifier.ts).
|
|
153
|
-
|
|
154
|
-
When weighted scoring confidence is below 0.70, sends a classification request to the cheapest available model.
|
|
155
|
-
|
|
156
|
-
### Implementation Details
|
|
157
|
-
|
|
158
|
-
- **Model**: `google/gemini-2.5-flash` ($0.15/$0.60 per M tokens)
|
|
159
|
-
- **Max tokens**: 10 (one word response)
|
|
160
|
-
- **Temperature**: 0 (deterministic)
|
|
161
|
-
- **Prompt truncation**: First 500 characters
|
|
162
|
-
- **Cost per classification**: ~$0.00003
|
|
163
|
-
- **Latency**: ~200-400ms
|
|
164
|
-
- **Parsing**: Word-boundary regex matching for SIMPLE/MEDIUM/COMPLEX/REASONING
|
|
165
|
-
- **Fallback on parse failure**: Default to MEDIUM
|
|
166
|
-
- **Cache**: In-memory Map, TTL 1 hour, prunes at 1000 entries
|
|
167
|
-
|
|
168
|
-
## Tier → Model Mapping
|
|
169
|
-
|
|
170
|
-
Implemented in [`src/router/selector.ts`](../../src/router/selector.ts) and [`src/router/config.ts`](../../src/router/config.ts).
|
|
171
|
-
|
|
172
|
-
| Tier | Primary Model | Cost (output per M) | Fallback Chain |
|
|
173
|
-
| ------------- | --------------------------- | ------------------- | ---------------------------------- |
|
|
174
|
-
| **SIMPLE** | `google/gemini-2.5-flash` | $0.60 | deepseek-chat → gpt-4o-mini |
|
|
175
|
-
| **MEDIUM** | `deepseek/deepseek-chat` | $0.42 | gemini-flash → gpt-4o-mini |
|
|
176
|
-
| **COMPLEX** | `anthropic/claude-opus-4.5` | $25.00 | gpt-4o → gemini-2.5-pro |
|
|
177
|
-
| **REASONING** | `openai/o3` | $8.00 | gemini-2.5-pro → claude-sonnet-4.6 |
|
|
178
|
-
|
|
179
|
-
### Cost Savings (vs Claude Opus at $75/M)
|
|
180
|
-
|
|
181
|
-
| Tier | % of Traffic | Output $/M | Savings |
|
|
182
|
-
| ---------------- | ------------ | ------------ | --------------- |
|
|
183
|
-
| SIMPLE | 40% | $0.60 | **99% cheaper** |
|
|
184
|
-
| MEDIUM | 30% | $0.42 | **99% cheaper** |
|
|
185
|
-
| COMPLEX | 20% | $25.00 | best quality |
|
|
186
|
-
| REASONING | 10% | $8.00 | **89% cheaper** |
|
|
187
|
-
| **Weighted avg** | | **$16.17/M** | **78% savings** |
|
|
188
|
-
|
|
189
|
-
## RoutingDecision Object
|
|
190
|
-
|
|
191
|
-
Defined in [`src/router/types.ts`](../../src/router/types.ts).
|
|
192
|
-
|
|
193
|
-
```typescript
|
|
194
|
-
type RoutingDecision = {
|
|
195
|
-
model: string; // "deepseek/deepseek-chat"
|
|
196
|
-
tier: Tier; // "MEDIUM"
|
|
197
|
-
confidence: number; // 0.82
|
|
198
|
-
method: "rules" | "llm"; // How the decision was made
|
|
199
|
-
reasoning: string; // "score=-0.200 | short (8 tokens), simple indicator (what is)"
|
|
200
|
-
costEstimate: number; // 0.0004
|
|
201
|
-
baselineCost: number; // 0.3073 (what Claude Opus would have cost)
|
|
202
|
-
savings: number; // 0.992 (0-1)
|
|
203
|
-
};
|
|
204
|
-
```
|
|
205
|
-
|
|
206
|
-
## E2E Test Results
|
|
207
|
-
|
|
208
|
-
19 tests, 0 failures. See [`test/e2e.ts`](../../test/e2e.ts).
|
|
209
|
-
|
|
210
|
-
```
|
|
211
|
-
═══ Rule-Based Classifier ═══
|
|
212
|
-
|
|
213
|
-
Simple queries:
|
|
214
|
-
✓ "What is the capital of France?" → SIMPLE (score=-0.200)
|
|
215
|
-
✓ "Hello" → SIMPLE (score=-0.200)
|
|
216
|
-
✓ "Define photosynthesis" → SIMPLE (score=-0.125)
|
|
217
|
-
✓ "Translate hello to Spanish" → SIMPLE (score=-0.200)
|
|
218
|
-
✓ "Yes or no: is the sky blue?" → SIMPLE (score=-0.200)
|
|
219
|
-
|
|
220
|
-
Complex queries (correctly deferred to classifier):
|
|
221
|
-
✓ Kanban board → AMBIGUOUS (score=0.090, conf=0.673)
|
|
222
|
-
✓ Distributed trading → AMBIGUOUS (score=0.127, conf=0.569)
|
|
223
|
-
|
|
224
|
-
Reasoning queries:
|
|
225
|
-
✓ "Prove sqrt(2) irrational" → REASONING (score=0.180, conf=0.973)
|
|
226
|
-
✓ "Derive time complexity" → REASONING (score=0.186, conf=0.973)
|
|
227
|
-
✓ "Chain of thought proof" → REASONING (score=0.180, conf=0.973)
|
|
228
|
-
|
|
229
|
-
═══ Full Router ═══
|
|
230
|
-
|
|
231
|
-
✓ Simple factual → google/gemini-2.5-flash (SIMPLE, rules) saved=99.2%
|
|
232
|
-
✓ Greeting → google/gemini-2.5-flash (SIMPLE, rules) saved=99.2%
|
|
233
|
-
✓ Math proof → openai/o3 (REASONING, rules) saved=89.3%
|
|
234
|
-
|
|
235
|
-
═══════════════════════════════════
|
|
236
|
-
19 passed, 0 failed
|
|
237
|
-
═══════════════════════════════════
|
|
238
|
-
```
|
|
239
|
-
|
|
240
|
-
## File Structure
|
|
241
|
-
|
|
242
|
-
```
|
|
243
|
-
src/
|
|
244
|
-
├── index.ts # Plugin entry — register() + activate()
|
|
245
|
-
├── provider.ts # Registers "blockrun" provider in OpenClaw
|
|
246
|
-
├── proxy.ts # Local HTTP proxy — routing + x402 payment
|
|
247
|
-
├── models.ts # 30+ model definitions with pricing
|
|
248
|
-
├── auth.ts # Wallet key resolution (env, config, prompt)
|
|
249
|
-
├── logger.ts # JSON lines usage logger
|
|
250
|
-
├── types.ts # OpenClaw plugin type definitions
|
|
251
|
-
└── router/
|
|
252
|
-
├── index.ts # route() entry point
|
|
253
|
-
├── rules.ts # Weighted classifier (14 dimensions, sigmoid confidence)
|
|
254
|
-
├── llm-classifier.ts # LLM fallback (gemini-flash, cached)
|
|
255
|
-
├── selector.ts # Tier → model selection + cost calculation
|
|
256
|
-
├── config.ts # Default routing configuration
|
|
257
|
-
└── types.ts # RoutingDecision, Tier, ScoringResult
|
|
258
|
-
```
|
|
259
|
-
|
|
260
|
-
## Not Implemented (Future)
|
|
261
|
-
|
|
262
|
-
- **KNN fallback** — Embedding-based classifier to replace LLM fallback (<5ms vs ~200ms)
|
|
263
|
-
- **Cascade routing** — Try cheaper model first, escalate on low quality (AutoMix-inspired)
|
|
264
|
-
- **Graceful fallback** — Auto-switch on rate limit or provider error using per-tier fallback chains
|
|
265
|
-
- **Spend controls** — Daily/monthly budgets, server-side enforcement
|
|
266
|
-
- **Quality feedback loop** — Learning from past routing decisions to improve accuracy
|
|
267
|
-
- **Conversation context** — Current design is per-message. Future: track conversation complexity over time
|