@blockrun/clawrouter 0.9.8 → 0.9.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -71,7 +71,7 @@ Choose your routing strategy with `/model <profile>`:
71
71
 
72
72
  **Other shortcuts:**
73
73
 
74
- - **Model aliases:** `/model sonnet`, `/model grok`, `/model gpt5`, `/model o3`
74
+ - **Model aliases:** `/model br-sonnet`, `/model grok`, `/model gpt5`, `/model o3`
75
75
  - **Specific models:** `blockrun/openai/gpt-4o` or `blockrun/anthropic/claude-sonnet-4`
76
76
  - **Bring your wallet:** `export BLOCKRUN_WALLET_KEY=0x...`
77
77
 
@@ -150,7 +150,7 @@ ClawRouter v0.5+ includes intelligent features that work automatically:
150
150
  - **Agentic auto-detect** — routes multi-step tasks to Kimi K2.5
151
151
  - **Tool detection** — auto-switches when `tools` array present
152
152
  - **Context-aware** — filters models that can't handle your context size
153
- - **Model aliases** — `/model free`, `/model sonnet`, `/model grok`
153
+ - **Model aliases** — `/model free`, `/model br-sonnet`, `/model grok`
154
154
  - **Session persistence** — pins model for multi-turn conversations
155
155
  - **Free tier fallback** — keeps working when wallet is empty
156
156
  - **Auto-update check** — notifies you when a new version is available
@@ -315,11 +315,12 @@ const decision = route("Prove sqrt(2) is irrational", ...);
315
315
 
316
316
  ---
317
317
 
318
- ## Performance Optimizations (v0.3)
318
+ ## Performance Optimizations (v0.3+)
319
319
 
320
320
  - **SSE heartbeat**: Sends headers + heartbeat immediately, preventing upstream timeouts
321
321
  - **Response dedup**: SHA-256 hash → 30s cache, prevents double-charge on retries
322
322
  - **Payment pre-auth**: Caches 402 params, pre-signs USDC, skips 402 round trip (~200ms saved)
323
+ - **Response cache**: LLM response caching with 10-minute TTL, saves cost on repeated queries
323
324
 
324
325
  ---
325
326
 
@@ -362,6 +363,20 @@ Based on [50+ OpenClaw issues](https://github.com/openclaw/openclaw/issues?q=ope
362
363
 
363
364
  ---
364
365
 
366
+ ## Why did we build this
367
+
368
+ - **Agents need to pay and get paid — without humans in the loop.** Today's AI infra requires accounts, API keys, manual billing. But an agent spawning 50 sub-agents shouldn't need a human to provision 50 keys. An agent completing a bounty shouldn't wait for someone to invoice and collect.
369
+
370
+ - **Payment IS authentication.** A wallet signature proves you can pay — no shared secrets that leak into prompts, no accounts to create, no keys to rotate.
371
+
372
+ - **Agents should control their own money.** Non-custodial means the agent holds the keys. No platform can freeze funds or change terms overnight.
373
+
374
+ - **Cost optimization should be automatic.** Agents shouldn't overpay $25/M for "what is 2+2". Smart routing to the cheapest capable model saves 92% on typical workloads.
375
+
376
+ The result: an agent can generate a wallet, receive funds, call any model, pay per-request, and earn money — all programmatically. **This is agentic commerce.**
377
+
378
+ ---
379
+
365
380
  ## Troubleshooting
366
381
 
367
382
  Quick checklist:
@@ -422,15 +437,26 @@ Your wallet key remains at `~/.openclaw/blockrun/wallet.key` — back it up befo
422
437
  - [x] Context-aware routing — filter out models that can't handle context size
423
438
  - [x] Session persistence — pin model for multi-turn conversations
424
439
  - [x] Cost tracking — /stats command with savings dashboard
425
- - [x] Model aliases — `/model free`, `/model sonnet`, `/model grok`, etc.
440
+ - [x] Model aliases — `/model free`, `/model br-sonnet`, `/model grok`, etc.
426
441
  - [x] Free tier — gpt-oss-120b for $0 when wallet is empty
427
442
  - [x] Auto-update — startup version check with one-command update
443
+ - [x] Response cache — LiteLLM-inspired caching for repeated requests
428
444
  - [ ] Cascade routing — try cheap model first, escalate on low quality
429
445
  - [ ] Spend controls — daily/monthly budgets
430
446
  - [ ] Remote analytics — cost tracking at blockrun.ai
431
447
 
432
448
  ---
433
449
 
450
+ ## Support / talk with founders
451
+
452
+ - [Schedule Demo 👋](https://calendly.com/vickyfu9/30min)
453
+ - [Community Telegram 💭](https://t.me/blockrunAI)
454
+ - [X / Twitter 🐦](https://x.com/BlockRunAI)
455
+ - Telegram 📱 [@bc1max](https://t.me/bc1max)
456
+ - Our email ✉️ vicky@blockrun.ai
457
+
458
+ ---
459
+
434
460
  ## License
435
461
 
436
462
  MIT
package/dist/cli.js CHANGED
@@ -1199,27 +1199,43 @@ var DEFAULT_ROUTING_CONFIG = {
1199
1199
  }
1200
1200
  },
1201
1201
  // Premium tier configs - best quality (blockrun/premium)
1202
- // kimi=coding, sonnet=reasoning/instructions, opus=heavy lifting/architecture/audits
1202
+ // codex=complex coding, kimi=simple coding, sonnet=reasoning/instructions, opus=architecture/PM/audits
1203
1203
  premiumTiers: {
1204
1204
  SIMPLE: {
1205
1205
  primary: "moonshot/kimi-k2.5",
1206
- // $0.50/$2.40 - good for coding
1206
+ // $0.50/$2.40 - good for simple coding
1207
1207
  fallback: ["anthropic/claude-haiku-4.5", "google/gemini-2.5-flash", "xai/grok-code-fast-1"]
1208
1208
  },
1209
1209
  MEDIUM: {
1210
1210
  primary: "anthropic/claude-sonnet-4",
1211
1211
  // $3/$15 - reasoning/instructions
1212
- fallback: ["moonshot/kimi-k2.5", "google/gemini-2.5-pro", "xai/grok-4-0709"]
1212
+ fallback: [
1213
+ "openai/gpt-5.2-codex",
1214
+ "moonshot/kimi-k2.5",
1215
+ "google/gemini-2.5-pro",
1216
+ "xai/grok-4-0709"
1217
+ ]
1213
1218
  },
1214
1219
  COMPLEX: {
1215
- primary: "anthropic/claude-opus-4.5",
1216
- // $5/$25 - architecture, audits, heavy lifting
1217
- fallback: ["anthropic/claude-sonnet-4", "google/gemini-3-pro-preview", "moonshot/kimi-k2.5"]
1220
+ primary: "openai/gpt-5.2-codex",
1221
+ // $2.50/$10 - complex coding (78% cost savings vs Opus)
1222
+ fallback: [
1223
+ "anthropic/claude-opus-4.6",
1224
+ "anthropic/claude-opus-4.5",
1225
+ "anthropic/claude-sonnet-4",
1226
+ "google/gemini-3-pro-preview",
1227
+ "moonshot/kimi-k2.5"
1228
+ ]
1218
1229
  },
1219
1230
  REASONING: {
1220
1231
  primary: "anthropic/claude-sonnet-4",
1221
1232
  // $3/$15 - best for reasoning/instructions
1222
- fallback: ["anthropic/claude-opus-4.5", "openai/o3", "xai/grok-4-1-fast-reasoning"]
1233
+ fallback: [
1234
+ "anthropic/claude-opus-4.6",
1235
+ "anthropic/claude-opus-4.5",
1236
+ "openai/o3",
1237
+ "xai/grok-4-1-fast-reasoning"
1238
+ ]
1223
1239
  }
1224
1240
  },
1225
1241
  // Agentic tier configs - models that excel at multi-step autonomous tasks
@@ -1241,7 +1257,7 @@ var DEFAULT_ROUTING_CONFIG = {
1241
1257
  COMPLEX: {
1242
1258
  primary: "anthropic/claude-sonnet-4",
1243
1259
  fallback: [
1244
- "anthropic/claude-opus-4.5",
1260
+ "anthropic/claude-opus-4.6",
1245
1261
  // Latest Opus - best agentic
1246
1262
  "openai/gpt-5.2",
1247
1263
  "google/gemini-3-pro-preview",
@@ -1252,7 +1268,7 @@ var DEFAULT_ROUTING_CONFIG = {
1252
1268
  primary: "anthropic/claude-sonnet-4",
1253
1269
  // Strong tool use + reasoning for agentic tasks
1254
1270
  fallback: [
1255
- "anthropic/claude-opus-4.5",
1271
+ "anthropic/claude-opus-4.6",
1256
1272
  "xai/grok-4-fast-reasoning",
1257
1273
  "moonshot/kimi-k2.5",
1258
1274
  "deepseek/deepseek-reasoner"
@@ -1343,12 +1359,16 @@ var MODEL_ALIASES = {
1343
1359
  // Claude
1344
1360
  claude: "anthropic/claude-sonnet-4",
1345
1361
  sonnet: "anthropic/claude-sonnet-4",
1346
- opus: "anthropic/claude-opus-4",
1362
+ opus: "anthropic/claude-opus-4.6",
1363
+ // Updated to latest Opus 4.6
1364
+ "opus-46": "anthropic/claude-opus-4.6",
1365
+ "opus-45": "anthropic/claude-opus-4.5",
1347
1366
  haiku: "anthropic/claude-haiku-4.5",
1348
1367
  // OpenAI
1349
1368
  gpt: "openai/gpt-4o",
1350
1369
  gpt4: "openai/gpt-4o",
1351
1370
  gpt5: "openai/gpt-5.2",
1371
+ codex: "openai/gpt-5.2-codex",
1352
1372
  mini: "openai/gpt-4o-mini",
1353
1373
  o3: "openai/o3",
1354
1374
  // DeepSeek
@@ -1453,6 +1473,16 @@ var BLOCKRUN_MODELS = [
1453
1473
  maxOutput: 128e3,
1454
1474
  reasoning: true
1455
1475
  },
1476
+ // OpenAI Codex Family
1477
+ {
1478
+ id: "openai/gpt-5.2-codex",
1479
+ name: "GPT-5.2 Codex",
1480
+ inputPrice: 2.5,
1481
+ outputPrice: 12,
1482
+ contextWindow: 128e3,
1483
+ maxOutput: 32e3,
1484
+ agentic: true
1485
+ },
1456
1486
  // OpenAI GPT-4 Family
1457
1487
  {
1458
1488
  id: "openai/gpt-4.1",
@@ -1558,6 +1588,17 @@ var BLOCKRUN_MODELS = [
1558
1588
  reasoning: true,
1559
1589
  agentic: true
1560
1590
  },
1591
+ {
1592
+ id: "anthropic/claude-opus-4.6",
1593
+ name: "Claude Opus 4.6",
1594
+ inputPrice: 5,
1595
+ outputPrice: 25,
1596
+ contextWindow: 2e5,
1597
+ maxOutput: 64e3,
1598
+ reasoning: true,
1599
+ vision: true,
1600
+ agentic: true
1601
+ },
1561
1602
  // Google
1562
1603
  {
1563
1604
  id: "google/gemini-3-pro-preview",
@@ -2035,6 +2076,203 @@ var RequestDeduplicator = class {
2035
2076
  }
2036
2077
  };
2037
2078
 
2079
+ // src/response-cache.ts
2080
+ import { createHash as createHash2 } from "crypto";
2081
+ var DEFAULT_CONFIG = {
2082
+ maxSize: 200,
2083
+ defaultTTL: 600,
2084
+ maxItemSize: 1048576,
2085
+ // 1MB
2086
+ enabled: true
2087
+ };
2088
+ function canonicalize2(obj) {
2089
+ if (obj === null || typeof obj !== "object") {
2090
+ return obj;
2091
+ }
2092
+ if (Array.isArray(obj)) {
2093
+ return obj.map(canonicalize2);
2094
+ }
2095
+ const sorted = {};
2096
+ for (const key of Object.keys(obj).sort()) {
2097
+ sorted[key] = canonicalize2(obj[key]);
2098
+ }
2099
+ return sorted;
2100
+ }
2101
+ var TIMESTAMP_PATTERN2 = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
2102
+ function normalizeForCache(obj) {
2103
+ const result = {};
2104
+ for (const [key, value] of Object.entries(obj)) {
2105
+ if (["stream", "user", "request_id", "x-request-id"].includes(key)) {
2106
+ continue;
2107
+ }
2108
+ if (key === "messages" && Array.isArray(value)) {
2109
+ result[key] = value.map((msg) => {
2110
+ if (typeof msg === "object" && msg !== null) {
2111
+ const m = msg;
2112
+ if (typeof m.content === "string") {
2113
+ return { ...m, content: m.content.replace(TIMESTAMP_PATTERN2, "") };
2114
+ }
2115
+ }
2116
+ return msg;
2117
+ });
2118
+ } else {
2119
+ result[key] = value;
2120
+ }
2121
+ }
2122
+ return result;
2123
+ }
2124
+ var ResponseCache = class {
2125
+ cache = /* @__PURE__ */ new Map();
2126
+ expirationHeap = [];
2127
+ config;
2128
+ // Stats for monitoring
2129
+ stats = {
2130
+ hits: 0,
2131
+ misses: 0,
2132
+ evictions: 0
2133
+ };
2134
+ constructor(config = {}) {
2135
+ const filtered = Object.fromEntries(
2136
+ Object.entries(config).filter(([, v]) => v !== void 0)
2137
+ );
2138
+ this.config = { ...DEFAULT_CONFIG, ...filtered };
2139
+ }
2140
+ /**
2141
+ * Generate cache key from request body.
2142
+ * Hashes: model + messages + temperature + max_tokens + other params
2143
+ */
2144
+ static generateKey(body) {
2145
+ try {
2146
+ const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
2147
+ const normalized = normalizeForCache(parsed);
2148
+ const canonical = canonicalize2(normalized);
2149
+ const keyContent = JSON.stringify(canonical);
2150
+ return createHash2("sha256").update(keyContent).digest("hex").slice(0, 32);
2151
+ } catch {
2152
+ const content = typeof body === "string" ? body : body.toString();
2153
+ return createHash2("sha256").update(content).digest("hex").slice(0, 32);
2154
+ }
2155
+ }
2156
+ /**
2157
+ * Check if caching is enabled for this request.
2158
+ * Respects cache control headers and request params.
2159
+ */
2160
+ shouldCache(body, headers) {
2161
+ if (!this.config.enabled) return false;
2162
+ if (headers?.["cache-control"]?.includes("no-cache")) {
2163
+ return false;
2164
+ }
2165
+ try {
2166
+ const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
2167
+ if (parsed.cache === false || parsed.no_cache === true) {
2168
+ return false;
2169
+ }
2170
+ } catch {
2171
+ }
2172
+ return true;
2173
+ }
2174
+ /**
2175
+ * Get cached response if available and not expired.
2176
+ */
2177
+ get(key) {
2178
+ const entry = this.cache.get(key);
2179
+ if (!entry) {
2180
+ this.stats.misses++;
2181
+ return void 0;
2182
+ }
2183
+ if (Date.now() > entry.expiresAt) {
2184
+ this.cache.delete(key);
2185
+ this.stats.misses++;
2186
+ return void 0;
2187
+ }
2188
+ this.stats.hits++;
2189
+ return entry;
2190
+ }
2191
+ /**
2192
+ * Cache a response with optional custom TTL.
2193
+ */
2194
+ set(key, response, ttlSeconds) {
2195
+ if (!this.config.enabled || this.config.maxSize <= 0) return;
2196
+ if (response.body.length > this.config.maxItemSize) {
2197
+ console.log(`[ResponseCache] Skipping cache - item too large: ${response.body.length} bytes`);
2198
+ return;
2199
+ }
2200
+ if (response.status >= 400) {
2201
+ return;
2202
+ }
2203
+ if (this.cache.size >= this.config.maxSize) {
2204
+ this.evict();
2205
+ }
2206
+ const now = Date.now();
2207
+ const ttl = ttlSeconds ?? this.config.defaultTTL;
2208
+ const expiresAt = now + ttl * 1e3;
2209
+ const entry = {
2210
+ ...response,
2211
+ cachedAt: now,
2212
+ expiresAt
2213
+ };
2214
+ this.cache.set(key, entry);
2215
+ this.expirationHeap.push({ expiresAt, key });
2216
+ }
2217
+ /**
2218
+ * Evict expired and oldest entries to make room.
2219
+ */
2220
+ evict() {
2221
+ const now = Date.now();
2222
+ this.expirationHeap.sort((a, b) => a.expiresAt - b.expiresAt);
2223
+ while (this.expirationHeap.length > 0) {
2224
+ const oldest = this.expirationHeap[0];
2225
+ const entry = this.cache.get(oldest.key);
2226
+ if (!entry || entry.expiresAt !== oldest.expiresAt) {
2227
+ this.expirationHeap.shift();
2228
+ continue;
2229
+ }
2230
+ if (oldest.expiresAt <= now) {
2231
+ this.cache.delete(oldest.key);
2232
+ this.expirationHeap.shift();
2233
+ this.stats.evictions++;
2234
+ } else {
2235
+ break;
2236
+ }
2237
+ }
2238
+ while (this.cache.size >= this.config.maxSize && this.expirationHeap.length > 0) {
2239
+ const oldest = this.expirationHeap.shift();
2240
+ if (this.cache.has(oldest.key)) {
2241
+ this.cache.delete(oldest.key);
2242
+ this.stats.evictions++;
2243
+ }
2244
+ }
2245
+ }
2246
+ /**
2247
+ * Get cache statistics.
2248
+ */
2249
+ getStats() {
2250
+ const total = this.stats.hits + this.stats.misses;
2251
+ const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) + "%" : "0%";
2252
+ return {
2253
+ size: this.cache.size,
2254
+ maxSize: this.config.maxSize,
2255
+ hits: this.stats.hits,
2256
+ misses: this.stats.misses,
2257
+ evictions: this.stats.evictions,
2258
+ hitRate
2259
+ };
2260
+ }
2261
+ /**
2262
+ * Clear all cached entries.
2263
+ */
2264
+ clear() {
2265
+ this.cache.clear();
2266
+ this.expirationHeap = [];
2267
+ }
2268
+ /**
2269
+ * Check if cache is enabled.
2270
+ */
2271
+ isEnabled() {
2272
+ return this.config.enabled;
2273
+ }
2274
+ };
2275
+
2038
2276
  // src/balance.ts
2039
2277
  import { createPublicClient, http, erc20Abi } from "viem";
2040
2278
  import { base } from "viem/chains";
@@ -3491,6 +3729,7 @@ async function startProxy(options) {
3491
3729
  modelPricing
3492
3730
  };
3493
3731
  const deduplicator = new RequestDeduplicator();
3732
+ const responseCache = new ResponseCache(options.cacheConfig);
3494
3733
  const sessionStore = new SessionStore(options.sessionConfig);
3495
3734
  const connections = /* @__PURE__ */ new Set();
3496
3735
  const server = createServer(async (req, res) => {
@@ -3531,6 +3770,15 @@ async function startProxy(options) {
3531
3770
  res.end(JSON.stringify(response));
3532
3771
  return;
3533
3772
  }
3773
+ if (req.url === "/cache" || req.url?.startsWith("/cache?")) {
3774
+ const stats = responseCache.getStats();
3775
+ res.writeHead(200, {
3776
+ "Content-Type": "application/json",
3777
+ "Cache-Control": "no-cache"
3778
+ });
3779
+ res.end(JSON.stringify(stats, null, 2));
3780
+ return;
3781
+ }
3534
3782
  if (req.url === "/stats" || req.url?.startsWith("/stats?")) {
3535
3783
  try {
3536
3784
  const url = new URL(req.url, "http://localhost");
@@ -3577,7 +3825,8 @@ async function startProxy(options) {
3577
3825
  routerOpts,
3578
3826
  deduplicator,
3579
3827
  balanceMonitor,
3580
- sessionStore
3828
+ sessionStore,
3829
+ responseCache
3581
3830
  );
3582
3831
  } catch (err) {
3583
3832
  const error = err instanceof Error ? err : new Error(String(err));
@@ -3778,7 +4027,7 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
3778
4027
  };
3779
4028
  }
3780
4029
  }
3781
- async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore) {
4030
+ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore, responseCache) {
3782
4031
  const startTime = Date.now();
3783
4032
  const upstreamUrl = `${apiBase}${req.url}`;
3784
4033
  const bodyChunks = [];
@@ -3946,6 +4195,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3946
4195
  );
3947
4196
  }
3948
4197
  }
4198
+ const cacheKey = ResponseCache.generateKey(body);
4199
+ const reqHeaders = {};
4200
+ for (const [key, value] of Object.entries(req.headers)) {
4201
+ if (typeof value === "string") reqHeaders[key] = value;
4202
+ }
4203
+ if (responseCache.shouldCache(body, reqHeaders)) {
4204
+ const cachedResponse = responseCache.get(cacheKey);
4205
+ if (cachedResponse) {
4206
+ console.log(`[ClawRouter] Cache HIT for ${cachedResponse.model} (saved API call)`);
4207
+ res.writeHead(cachedResponse.status, cachedResponse.headers);
4208
+ res.end(cachedResponse.body);
4209
+ return;
4210
+ }
4211
+ }
3949
4212
  const dedupKey = RequestDeduplicator.hash(body);
3950
4213
  const cached = deduplicator.getCached(dedupKey);
3951
4214
  if (cached) {
@@ -4298,12 +4561,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4298
4561
  }
4299
4562
  }
4300
4563
  res.end();
4564
+ const responseBody = Buffer.concat(responseChunks);
4301
4565
  deduplicator.complete(dedupKey, {
4302
4566
  status: upstream.status,
4303
4567
  headers: responseHeaders,
4304
- body: Buffer.concat(responseChunks),
4568
+ body: responseBody,
4305
4569
  completedAt: Date.now()
4306
4570
  });
4571
+ if (upstream.status === 200 && responseCache.shouldCache(body)) {
4572
+ responseCache.set(cacheKey, {
4573
+ body: responseBody,
4574
+ status: upstream.status,
4575
+ headers: responseHeaders,
4576
+ model: modelId
4577
+ });
4578
+ console.log(`[ClawRouter] Cached response for ${modelId} (${responseBody.length} bytes)`);
4579
+ }
4307
4580
  }
4308
4581
  if (estimatedCostMicros !== void 0) {
4309
4582
  balanceMonitor.deductEstimated(estimatedCostMicros);