@blockrun/clawrouter 0.12.63 → 0.12.65

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (34) hide show
  1. package/README.md +55 -55
  2. package/dist/cli.js +50 -14
  3. package/dist/cli.js.map +1 -1
  4. package/dist/index.js +57 -16
  5. package/dist/index.js.map +1 -1
  6. package/docs/anthropic-cost-savings.md +90 -85
  7. package/docs/architecture.md +12 -12
  8. package/docs/{blog-openclaw-cost-overruns.md → clawrouter-cuts-llm-api-costs-500x.md} +27 -27
  9. package/docs/clawrouter-vs-openrouter-llm-routing-comparison.md +280 -0
  10. package/docs/configuration.md +2 -2
  11. package/docs/image-generation.md +39 -39
  12. package/docs/{blog-benchmark-2026-03.md → llm-router-benchmark-46-models-sub-1ms-routing.md} +61 -64
  13. package/docs/routing-profiles.md +6 -6
  14. package/docs/{technical-routing-2026-03.md → smart-llm-router-14-dimension-classifier.md} +29 -28
  15. package/docs/worker-network.md +438 -347
  16. package/package.json +3 -2
  17. package/scripts/reinstall.sh +31 -6
  18. package/scripts/update.sh +6 -1
  19. package/docs/assets/blockrun-248-day-cost-overrun-problem.png +0 -0
  20. package/docs/assets/blockrun-clawrouter-7-layer-token-compression-openclaw.png +0 -0
  21. package/docs/assets/blockrun-clawrouter-observation-compression-97-percent-token-savings.png +0 -0
  22. package/docs/assets/blockrun-clawrouter-openclaw-agentic-proxy-architecture.png +0 -0
  23. package/docs/assets/blockrun-clawrouter-openclaw-automatic-tier-routing-model-selection.png +0 -0
  24. package/docs/assets/blockrun-clawrouter-openclaw-error-classification-retry-storm-prevention.png +0 -0
  25. package/docs/assets/blockrun-clawrouter-openclaw-session-memory-journaling-vs-context-compounding.png +0 -0
  26. package/docs/assets/blockrun-clawrouter-vs-openclaw-standalone-comparison-production-safety.png +0 -0
  27. package/docs/assets/blockrun-clawrouter-x402-usdc-micropayment-wallet-budget-control.png +0 -0
  28. package/docs/assets/blockrun-openclaw-inference-layer-blind-spots.png +0 -0
  29. package/docs/plans/2026-02-03-smart-routing-design.md +0 -267
  30. package/docs/plans/2026-02-13-e2e-docker-deployment.md +0 -1260
  31. package/docs/plans/2026-02-28-worker-network.md +0 -947
  32. package/docs/plans/2026-03-18-error-classification.md +0 -574
  33. package/docs/plans/2026-03-19-exclude-models.md +0 -538
  34. package/docs/vs-openrouter.md +0 -157
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@blockrun/clawrouter",
3
- "version": "0.12.63",
3
+ "version": "0.12.65",
4
4
  "description": "Smart LLM router — save 85% on inference costs. 46+ models, one wallet, x402 micropayments.",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -21,7 +21,8 @@
21
21
  },
22
22
  "files": [
23
23
  "dist",
24
- "docs",
24
+ "docs/*.md",
25
+ "docs/*.png",
25
26
  "scripts",
26
27
  "skills",
27
28
  "openclaw.plugin.json"
@@ -127,6 +127,11 @@ echo "→ Cleaning config entries..."
127
127
  node -e "
128
128
  const f = require('os').homedir() + '/.openclaw/openclaw.json';
129
129
  const fs = require('fs');
130
+ function atomicWrite(filePath, data) {
131
+ const tmpPath = filePath + '.tmp.' + process.pid;
132
+ fs.writeFileSync(tmpPath, data);
133
+ fs.renameSync(tmpPath, filePath);
134
+ }
130
135
  if (!fs.existsSync(f)) {
131
136
  console.log(' No openclaw.json found, skipping');
132
137
  process.exit(0);
@@ -167,7 +172,7 @@ if (c.agents?.defaults?.models) {
167
172
  }
168
173
  }
169
174
  }
170
- fs.writeFileSync(f, JSON.stringify(c, null, 2));
175
+ atomicWrite(f, JSON.stringify(c, null, 2));
171
176
  console.log(' Config cleaned');
172
177
  "
173
178
 
@@ -177,7 +182,7 @@ kill_port_processes 8402
177
182
 
178
183
  # 3.1. Remove stale models.json so it gets regenerated with apiKey
179
184
  echo "→ Cleaning models cache..."
180
- rm -f ~/.openclaw/agents/main/agent/models.json 2>/dev/null || true
185
+ rm -f ~/.openclaw/agents/*/agent/models.json 2>/dev/null || true
181
186
 
182
187
  # 4. Inject auth profile (ensures blockrun provider is recognized)
183
188
  echo "→ Injecting auth profile..."
@@ -187,6 +192,11 @@ const fs = require('fs');
187
192
  const path = require('path');
188
193
  const authDir = path.join(os.homedir(), '.openclaw', 'agents', 'main', 'agent');
189
194
  const authPath = path.join(authDir, 'auth-profiles.json');
195
+ function atomicWrite(filePath, data) {
196
+ const tmpPath = filePath + '.tmp.' + process.pid;
197
+ fs.writeFileSync(tmpPath, data);
198
+ fs.renameSync(tmpPath, filePath);
199
+ }
190
200
 
191
201
  // Create directory if needed
192
202
  fs.mkdirSync(authDir, { recursive: true });
@@ -216,7 +226,7 @@ if (!store.profiles[profileKey]) {
216
226
  provider: 'blockrun',
217
227
  key: 'x402-proxy-handles-auth'
218
228
  };
219
- fs.writeFileSync(authPath, JSON.stringify(store, null, 2));
229
+ atomicWrite(authPath, JSON.stringify(store, null, 2));
220
230
  console.log(' Auth profile created');
221
231
  } else {
222
232
  console.log(' Auth profile already exists');
@@ -230,6 +240,11 @@ const os = require('os');
230
240
  const fs = require('fs');
231
241
  const path = require('path');
232
242
  const configPath = path.join(os.homedir(), '.openclaw', 'openclaw.json');
243
+ function atomicWrite(filePath, data) {
244
+ const tmpPath = filePath + '.tmp.' + process.pid;
245
+ fs.writeFileSync(tmpPath, data);
246
+ fs.renameSync(tmpPath, filePath);
247
+ }
233
248
 
234
249
  if (fs.existsSync(configPath)) {
235
250
  try {
@@ -244,7 +259,7 @@ if (fs.existsSync(configPath)) {
244
259
  }
245
260
 
246
261
  if (changed) {
247
- fs.writeFileSync(configPath, JSON.stringify(config, null, 2));
262
+ atomicWrite(configPath, JSON.stringify(config, null, 2));
248
263
  }
249
264
  } catch (e) {
250
265
  console.log(' Could not update config:', e.message);
@@ -288,6 +303,11 @@ node -e "
288
303
  const os = require('os');
289
304
  const fs = require('fs');
290
305
  const path = require('path');
306
+ function atomicWrite(filePath, data) {
307
+ const tmpPath = filePath + '.tmp.' + process.pid;
308
+ fs.writeFileSync(tmpPath, data);
309
+ fs.renameSync(tmpPath, filePath);
310
+ }
291
311
 
292
312
  const configPath = path.join(os.homedir(), '.openclaw', 'openclaw.json');
293
313
  if (!fs.existsSync(configPath)) {
@@ -365,7 +385,7 @@ try {
365
385
  console.log(' Allowlist already up to date');
366
386
  }
367
387
  if (changed) {
368
- fs.writeFileSync(configPath, JSON.stringify(config, null, 2));
388
+ atomicWrite(configPath, JSON.stringify(config, null, 2));
369
389
  }
370
390
  } catch (err) {
371
391
  console.log(' Could not update config:', err.message);
@@ -379,6 +399,11 @@ const os = require('os');
379
399
  const fs = require('fs');
380
400
  const path = require('path');
381
401
  const configPath = path.join(os.homedir(), '.openclaw', 'openclaw.json');
402
+ function atomicWrite(filePath, data) {
403
+ const tmpPath = filePath + '.tmp.' + process.pid;
404
+ fs.writeFileSync(tmpPath, data);
405
+ fs.renameSync(tmpPath, filePath);
406
+ }
382
407
 
383
408
  if (fs.existsSync(configPath)) {
384
409
  try {
@@ -396,7 +421,7 @@ if (fs.existsSync(configPath)) {
396
421
  console.log(' Plugin already in allow list');
397
422
  }
398
423
 
399
- fs.writeFileSync(configPath, JSON.stringify(config, null, 2));
424
+ atomicWrite(configPath, JSON.stringify(config, null, 2));
400
425
  } catch (e) {
401
426
  console.log(' Could not update config:', e.message);
402
427
  }
package/scripts/update.sh CHANGED
@@ -218,6 +218,11 @@ const fs = require('fs');
218
218
  const path = require('path');
219
219
  const authDir = path.join(os.homedir(), '.openclaw', 'agents', 'main', 'agent');
220
220
  const authPath = path.join(authDir, 'auth-profiles.json');
221
+ function atomicWrite(filePath, data) {
222
+ const tmpPath = filePath + '.tmp.' + process.pid;
223
+ fs.writeFileSync(tmpPath, data);
224
+ fs.renameSync(tmpPath, filePath);
225
+ }
221
226
 
222
227
  fs.mkdirSync(authDir, { recursive: true });
223
228
 
@@ -232,7 +237,7 @@ if (fs.existsSync(authPath)) {
232
237
  const profileKey = 'blockrun:default';
233
238
  if (!store.profiles[profileKey]) {
234
239
  store.profiles[profileKey] = { type: 'api_key', provider: 'blockrun', key: 'x402-proxy-handles-auth' };
235
- fs.writeFileSync(authPath, JSON.stringify(store, null, 2));
240
+ atomicWrite(authPath, JSON.stringify(store, null, 2));
236
241
  console.log(' Auth profile created');
237
242
  } else {
238
243
  console.log(' Auth profile already exists');
@@ -1,267 +0,0 @@
1
- # ClawRouter: Client-Side Smart Routing Design
2
-
3
- > **Status: Implemented (v2)** — Weighted scoring engine shipped in [`src/router/`](../../src/router/). This document is the design record.
4
-
5
- ## Problem
6
-
7
- Simple queries go to Claude Opus at $75/M output tokens when Gemini Flash could handle them at $0.60/M. No cost-aware model selection.
8
-
9
- Phase 1 solved API key management (one wallet for 30+ models). Phase 2 solves cost optimization by routing queries to the cheapest capable model.
10
-
11
- ## Why Client-Side
12
-
13
- Every existing smart router (OpenRouter, LiteLLM, etc.) runs server-side. The routing logic is proprietary — users can't see why a model was chosen or customize the rules.
14
-
15
- BlockRun's structural advantage: **x402 per-model transparent pricing**. Each model has an independent price visible in the 402 response. This means the routing decision can live in the open-source plugin where it's inspectable, customizable, and auditable.
16
-
17
- | | Server-side (OpenRouter) | Client-side (ClawRouter) |
18
- | ------------- | ------------------------ | ------------------------------- |
19
- | Routing logic | Proprietary black box | Open-source in plugin |
20
- | Pricing | Bundled, opaque | Per-model, transparent via x402 |
21
- | Customization | None | Operators edit config |
22
- | Trust model | "Trust us" | "Read the code" |
23
-
24
- ## Research Summary
25
-
26
- Analyzed 9 open-source smart routing implementations. Three classification approaches emerged:
27
-
28
- 1. **Pure heuristic** (keyword + length + regex) — Zero cost, < 1ms, but brittle
29
- 2. **Small LLM classifier** (DistilBERT, Granite 350M, 8B model) — Better accuracy, 20-500ms overhead
30
- 3. **Hybrid** (rules first, LLM only for ambiguous cases) — Best of both worlds
31
-
32
- The hybrid approach (from octoroute, smart-router) handles 70-80% of requests via rules in < 1ms, and only sends ambiguous cases to a cheap LLM classifier. This is what we implemented.
33
-
34
- ## Architecture
35
-
36
- ```
37
- OpenClaw Agent
38
- |
39
- v
40
- ┌─────────────────────────────────────────────────┐
41
- │ ClawRouter (src/router/) │
42
- │ │
43
- │ ┌─────────────────────────────────────────────┐ │
44
- │ │ Step 1: Weighted Scoring Engine (< 1ms) │ │
45
- │ │ • 14 scoring dimensions, each [-1, 1] │ │
46
- │ │ • Weighted sum → float score │ │
47
- │ │ • Sigmoid confidence calibration │ │
48
- │ │ • Returns: tier or null (ambiguous) │ │
49
- │ └─────────────────────┬───────────────────────┘ │
50
- │ | │
51
- │ ┌─────────────┴──────────────┐ │
52
- │ | | │
53
- │ confident ambiguous │
54
- │ (conf >= 0.70) (conf < 0.70) │
55
- │ | | │
56
- │ | ┌─────────────────────────┴────────┐ │
57
- │ | │ Step 2: LLM Classifier (~200ms) │ │
58
- │ | │ • Send to gemini-flash (cheapest)│ │
59
- │ | │ • "Classify: SIMPLE/MEDIUM/..." │ │
60
- │ | │ • Cache classification result │ │
61
- │ | └─────────────────────────┬────────┘ │
62
- │ | | │
63
- │ └────────────┬───────────────┘ │
64
- │ | │
65
- │ ┌────────────────────┴────────────────────────┐ │
66
- │ │ Step 3: Tier → Model Selection │ │
67
- │ │ • Look up cheapest model for tier │ │
68
- │ │ • Calculate cost estimate + savings │ │
69
- │ └────────────────────┬────────────────────────┘ │
70
- │ | │
71
- │ ┌────────────────────┴────────────────────────┐ │
72
- │ │ Step 4: RoutingDecision metadata │ │
73
- │ │ { model, tier, confidence, reasoning } │ │
74
- │ └────────────────────┬────────────────────────┘ │
75
- │ | │
76
- └───────────────────────┼─────────────────────────┘
77
- |
78
- v
79
- BlockRun API (x402)
80
- |
81
- v
82
- LLM Provider
83
- ```
84
-
85
- ## Classification Tiers
86
-
87
- Four tiers. REASONING is distinct from COMPLEX because reasoning tasks need different models (o3, gemini-pro) than general complex tasks (claude-opus-4, gpt-4o).
88
-
89
- | Tier | Description | Example Queries |
90
- | ------------- | ------------------------------------------------ | ------------------------------------------------------------------ |
91
- | **SIMPLE** | Short factual Q&A, translations, definitions | "What's the capital of France?", "Translate hello to Spanish" |
92
- | **MEDIUM** | Summaries, explanations, moderate code | "Summarize this article", "Write a Python function to sort a list" |
93
- | **COMPLEX** | Multi-step code, system design, creative writing | "Build a React component with tests", "Design a REST API" |
94
- | **REASONING** | Proofs, multi-step logic, mathematical reasoning | "Prove this theorem", "Solve step by step", "Debug this algorithm" |
95
-
96
- ## Weighted Scoring Engine (v2)
97
-
98
- Implemented in [`src/router/rules.ts`](../../src/router/rules.ts).
99
-
100
- 14 dimensions, each scored in [-1, 1] and multiplied by a learned weight:
101
-
102
- | Dimension | Weight | Signal |
103
- | -------------------- | ------ | ---------------------------------------- |
104
- | Reasoning markers | 0.18 | "prove", "theorem", "step by step" |
105
- | Code presence | 0.15 | "function", "async", "import", "```" |
106
- | Simple indicators | 0.12 | "what is", "define", "translate" |
107
- | Multi-step patterns | 0.12 | "first...then", "step 1", numbered lists |
108
- | Technical terms | 0.10 | "algorithm", "kubernetes", "distributed" |
109
- | Token count | 0.08 | short (<50) vs long (>500) |
110
- | Creative markers | 0.05 | "story", "poem", "brainstorm" |
111
- | Question complexity | 0.05 | 4+ question marks |
112
- | Constraint count | 0.04 | "at most", "O(n)", "maximum" |
113
- | Imperative verbs | 0.03 | "build", "create", "implement" |
114
- | Output format | 0.03 | "json", "yaml", "schema" |
115
- | Domain specificity | 0.02 | "quantum", "fpga", "genomics" |
116
- | Reference complexity | 0.02 | "the docs", "the api", "above" |
117
- | Negation complexity | 0.01 | "don't", "avoid", "without" |
118
-
119
- Weighted score maps to a tier via configurable boundaries. Confidence is calibrated using a sigmoid function — distance from the nearest tier boundary determines how sure the classifier is.
120
-
121
- ### Tier Boundaries
122
-
123
- ```
124
- Score < 0.00 → SIMPLE
125
- Score 0.00-0.15 → MEDIUM
126
- Score 0.15-0.25 → COMPLEX
127
- Score > 0.25 → REASONING
128
- ```
129
-
130
- ### Sigmoid Confidence Calibration
131
-
132
- ```typescript
133
- function calibrateConfidence(distance: number, steepness: number): number {
134
- return 1 / (1 + Math.exp(-steepness * distance));
135
- }
136
- // steepness = 12 (tuned)
137
- // distance = how far the score is from the nearest tier boundary
138
- // Near boundary → confidence ~0.50 → triggers LLM fallback
139
- // Far from boundary → confidence ~0.95+ → confident classification
140
- ```
141
-
142
- ### Special Case Overrides
143
-
144
- | Condition | Override | Reason |
145
- | --------------------------------------------- | ------------------------------------- | -------------------------------------- |
146
- | 2+ reasoning markers | Force REASONING at >= 0.85 confidence | Reasoning markers are strong signals |
147
- | Input > 100K tokens | Force COMPLEX tier | Large context = expensive regardless |
148
- | System prompt contains "JSON" or "structured" | Minimum MEDIUM tier | Structured output needs capable models |
149
-
150
- ## LLM Classifier (Fallback)
151
-
152
- Implemented in [`src/router/llm-classifier.ts`](../../src/router/llm-classifier.ts).
153
-
154
- When weighted scoring confidence is below 0.70, sends a classification request to the cheapest available model.
155
-
156
- ### Implementation Details
157
-
158
- - **Model**: `google/gemini-2.5-flash` ($0.15/$0.60 per M tokens)
159
- - **Max tokens**: 10 (one word response)
160
- - **Temperature**: 0 (deterministic)
161
- - **Prompt truncation**: First 500 characters
162
- - **Cost per classification**: ~$0.00003
163
- - **Latency**: ~200-400ms
164
- - **Parsing**: Word-boundary regex matching for SIMPLE/MEDIUM/COMPLEX/REASONING
165
- - **Fallback on parse failure**: Default to MEDIUM
166
- - **Cache**: In-memory Map, TTL 1 hour, prunes at 1000 entries
167
-
168
- ## Tier → Model Mapping
169
-
170
- Implemented in [`src/router/selector.ts`](../../src/router/selector.ts) and [`src/router/config.ts`](../../src/router/config.ts).
171
-
172
- | Tier | Primary Model | Cost (output per M) | Fallback Chain |
173
- | ------------- | --------------------------- | ------------------- | ---------------------------------- |
174
- | **SIMPLE** | `google/gemini-2.5-flash` | $0.60 | deepseek-chat → gpt-4o-mini |
175
- | **MEDIUM** | `deepseek/deepseek-chat` | $0.42 | gemini-flash → gpt-4o-mini |
176
- | **COMPLEX** | `anthropic/claude-opus-4.5` | $25.00 | gpt-4o → gemini-2.5-pro |
177
- | **REASONING** | `openai/o3` | $8.00 | gemini-2.5-pro → claude-sonnet-4.6 |
178
-
179
- ### Cost Savings (vs Claude Opus at $75/M)
180
-
181
- | Tier | % of Traffic | Output $/M | Savings |
182
- | ---------------- | ------------ | ------------ | --------------- |
183
- | SIMPLE | 40% | $0.60 | **99% cheaper** |
184
- | MEDIUM | 30% | $0.42 | **99% cheaper** |
185
- | COMPLEX | 20% | $25.00 | best quality |
186
- | REASONING | 10% | $8.00 | **89% cheaper** |
187
- | **Weighted avg** | | **$16.17/M** | **78% savings** |
188
-
189
- ## RoutingDecision Object
190
-
191
- Defined in [`src/router/types.ts`](../../src/router/types.ts).
192
-
193
- ```typescript
194
- type RoutingDecision = {
195
- model: string; // "deepseek/deepseek-chat"
196
- tier: Tier; // "MEDIUM"
197
- confidence: number; // 0.82
198
- method: "rules" | "llm"; // How the decision was made
199
- reasoning: string; // "score=-0.200 | short (8 tokens), simple indicator (what is)"
200
- costEstimate: number; // 0.0004
201
- baselineCost: number; // 0.3073 (what Claude Opus would have cost)
202
- savings: number; // 0.992 (0-1)
203
- };
204
- ```
205
-
206
- ## E2E Test Results
207
-
208
- 19 tests, 0 failures. See [`test/e2e.ts`](../../test/e2e.ts).
209
-
210
- ```
211
- ═══ Rule-Based Classifier ═══
212
-
213
- Simple queries:
214
- ✓ "What is the capital of France?" → SIMPLE (score=-0.200)
215
- ✓ "Hello" → SIMPLE (score=-0.200)
216
- ✓ "Define photosynthesis" → SIMPLE (score=-0.125)
217
- ✓ "Translate hello to Spanish" → SIMPLE (score=-0.200)
218
- ✓ "Yes or no: is the sky blue?" → SIMPLE (score=-0.200)
219
-
220
- Complex queries (correctly deferred to classifier):
221
- ✓ Kanban board → AMBIGUOUS (score=0.090, conf=0.673)
222
- ✓ Distributed trading → AMBIGUOUS (score=0.127, conf=0.569)
223
-
224
- Reasoning queries:
225
- ✓ "Prove sqrt(2) irrational" → REASONING (score=0.180, conf=0.973)
226
- ✓ "Derive time complexity" → REASONING (score=0.186, conf=0.973)
227
- ✓ "Chain of thought proof" → REASONING (score=0.180, conf=0.973)
228
-
229
- ═══ Full Router ═══
230
-
231
- ✓ Simple factual → google/gemini-2.5-flash (SIMPLE, rules) saved=99.2%
232
- ✓ Greeting → google/gemini-2.5-flash (SIMPLE, rules) saved=99.2%
233
- ✓ Math proof → openai/o3 (REASONING, rules) saved=89.3%
234
-
235
- ═══════════════════════════════════
236
- 19 passed, 0 failed
237
- ═══════════════════════════════════
238
- ```
239
-
240
- ## File Structure
241
-
242
- ```
243
- src/
244
- ├── index.ts # Plugin entry — register() + activate()
245
- ├── provider.ts # Registers "blockrun" provider in OpenClaw
246
- ├── proxy.ts # Local HTTP proxy — routing + x402 payment
247
- ├── models.ts # 30+ model definitions with pricing
248
- ├── auth.ts # Wallet key resolution (env, config, prompt)
249
- ├── logger.ts # JSON lines usage logger
250
- ├── types.ts # OpenClaw plugin type definitions
251
- └── router/
252
- ├── index.ts # route() entry point
253
- ├── rules.ts # Weighted classifier (14 dimensions, sigmoid confidence)
254
- ├── llm-classifier.ts # LLM fallback (gemini-flash, cached)
255
- ├── selector.ts # Tier → model selection + cost calculation
256
- ├── config.ts # Default routing configuration
257
- └── types.ts # RoutingDecision, Tier, ScoringResult
258
- ```
259
-
260
- ## Not Implemented (Future)
261
-
262
- - **KNN fallback** — Embedding-based classifier to replace LLM fallback (<5ms vs ~200ms)
263
- - **Cascade routing** — Try cheaper model first, escalate on low quality (AutoMix-inspired)
264
- - **Graceful fallback** — Auto-switch on rate limit or provider error using per-tier fallback chains
265
- - **Spend controls** — Daily/monthly budgets, server-side enforcement
266
- - **Quality feedback loop** — Learning from past routing decisions to improve accuracy
267
- - **Conversation context** — Current design is per-message. Future: track conversation complexity over time