@blockrun/clawrouter 0.9.7 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -71,7 +71,7 @@ Choose your routing strategy with `/model <profile>`:
71
71
 
72
72
  **Other shortcuts:**
73
73
 
74
- - **Model aliases:** `/model sonnet`, `/model grok`, `/model gpt5`, `/model o3`
74
+ - **Model aliases:** `/model br-sonnet`, `/model grok`, `/model gpt5`, `/model o3`
75
75
  - **Specific models:** `blockrun/openai/gpt-4o` or `blockrun/anthropic/claude-sonnet-4`
76
76
  - **Bring your wallet:** `export BLOCKRUN_WALLET_KEY=0x...`
77
77
 
@@ -150,7 +150,7 @@ ClawRouter v0.5+ includes intelligent features that work automatically:
150
150
  - **Agentic auto-detect** — routes multi-step tasks to Kimi K2.5
151
151
  - **Tool detection** — auto-switches when `tools` array present
152
152
  - **Context-aware** — filters models that can't handle your context size
153
- - **Model aliases** — `/model free`, `/model sonnet`, `/model grok`
153
+ - **Model aliases** — `/model free`, `/model br-sonnet`, `/model grok`
154
154
  - **Session persistence** — pins model for multi-turn conversations
155
155
  - **Free tier fallback** — keeps working when wallet is empty
156
156
  - **Auto-update check** — notifies you when a new version is available
@@ -315,11 +315,12 @@ const decision = route("Prove sqrt(2) is irrational", ...);
315
315
 
316
316
  ---
317
317
 
318
- ## Performance Optimizations (v0.3)
318
+ ## Performance Optimizations (v0.3+)
319
319
 
320
320
  - **SSE heartbeat**: Sends headers + heartbeat immediately, preventing upstream timeouts
321
321
  - **Response dedup**: SHA-256 hash → 30s cache, prevents double-charge on retries
322
322
  - **Payment pre-auth**: Caches 402 params, pre-signs USDC, skips 402 round trip (~200ms saved)
323
+ - **Response cache**: LLM response caching with 10-minute TTL, saves cost on repeated queries
323
324
 
324
325
  ---
325
326
 
@@ -422,9 +423,10 @@ Your wallet key remains at `~/.openclaw/blockrun/wallet.key` — back it up befo
422
423
  - [x] Context-aware routing — filter out models that can't handle context size
423
424
  - [x] Session persistence — pin model for multi-turn conversations
424
425
  - [x] Cost tracking — /stats command with savings dashboard
425
- - [x] Model aliases — `/model free`, `/model sonnet`, `/model grok`, etc.
426
+ - [x] Model aliases — `/model free`, `/model br-sonnet`, `/model grok`, etc.
426
427
  - [x] Free tier — gpt-oss-120b for $0 when wallet is empty
427
428
  - [x] Auto-update — startup version check with one-command update
429
+ - [x] Response cache — LiteLLM-inspired caching for repeated requests
428
430
  - [ ] Cascade routing — try cheap model first, escalate on low quality
429
431
  - [ ] Spend controls — daily/monthly budgets
430
432
  - [ ] Remote analytics — cost tracking at blockrun.ai
package/dist/cli.js CHANGED
@@ -1199,37 +1199,42 @@ var DEFAULT_ROUTING_CONFIG = {
1199
1199
  }
1200
1200
  },
1201
1201
  // Premium tier configs - best quality (blockrun/premium)
1202
+ // codex=complex coding, kimi=simple coding, sonnet=reasoning/instructions, opus=architecture/PM/audits
1202
1203
  premiumTiers: {
1203
1204
  SIMPLE: {
1204
- primary: "google/gemini-2.5-flash",
1205
- // $0.075/$0.30
1206
- fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
1205
+ primary: "moonshot/kimi-k2.5",
1206
+ // $0.50/$2.40 - good for simple coding
1207
+ fallback: ["anthropic/claude-haiku-4.5", "google/gemini-2.5-flash", "xai/grok-code-fast-1"]
1207
1208
  },
1208
1209
  MEDIUM: {
1209
- primary: "openai/gpt-4o",
1210
- // $2.50/$10
1211
- fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
1210
+ primary: "anthropic/claude-sonnet-4",
1211
+ // $3/$15 - reasoning/instructions
1212
+ fallback: [
1213
+ "openai/gpt-5.2-codex",
1214
+ "moonshot/kimi-k2.5",
1215
+ "google/gemini-2.5-pro",
1216
+ "xai/grok-4-0709"
1217
+ ]
1212
1218
  },
1213
1219
  COMPLEX: {
1214
- primary: "anthropic/claude-opus-4.5",
1215
- // $5/$25 - Latest Opus
1220
+ primary: "openai/gpt-5.2-codex",
1221
+ // $2.50/$10 - complex coding (78% cost savings vs Opus)
1216
1222
  fallback: [
1217
- "openai/gpt-5.2-pro",
1218
- // $21/$168 - Latest GPT pro
1223
+ "anthropic/claude-opus-4.6",
1224
+ "anthropic/claude-opus-4.5",
1225
+ "anthropic/claude-sonnet-4",
1219
1226
  "google/gemini-3-pro-preview",
1220
- // Latest Gemini
1221
- "openai/gpt-5.2",
1222
- "anthropic/claude-sonnet-4"
1227
+ "moonshot/kimi-k2.5"
1223
1228
  ]
1224
1229
  },
1225
1230
  REASONING: {
1226
- primary: "openai/o3",
1227
- // $2/$8 - Best value reasoning
1231
+ primary: "anthropic/claude-sonnet-4",
1232
+ // $3/$15 - best for reasoning/instructions
1228
1233
  fallback: [
1229
- "openai/o4-mini",
1230
- // Latest o-series
1234
+ "anthropic/claude-opus-4.6",
1231
1235
  "anthropic/claude-opus-4.5",
1232
- "google/gemini-3-pro-preview"
1236
+ "openai/o3",
1237
+ "xai/grok-4-1-fast-reasoning"
1233
1238
  ]
1234
1239
  }
1235
1240
  },
@@ -1252,7 +1257,7 @@ var DEFAULT_ROUTING_CONFIG = {
1252
1257
  COMPLEX: {
1253
1258
  primary: "anthropic/claude-sonnet-4",
1254
1259
  fallback: [
1255
- "anthropic/claude-opus-4.5",
1260
+ "anthropic/claude-opus-4.6",
1256
1261
  // Latest Opus - best agentic
1257
1262
  "openai/gpt-5.2",
1258
1263
  "google/gemini-3-pro-preview",
@@ -1263,7 +1268,7 @@ var DEFAULT_ROUTING_CONFIG = {
1263
1268
  primary: "anthropic/claude-sonnet-4",
1264
1269
  // Strong tool use + reasoning for agentic tasks
1265
1270
  fallback: [
1266
- "anthropic/claude-opus-4.5",
1271
+ "anthropic/claude-opus-4.6",
1267
1272
  "xai/grok-4-fast-reasoning",
1268
1273
  "moonshot/kimi-k2.5",
1269
1274
  "deepseek/deepseek-reasoner"
@@ -1354,12 +1359,16 @@ var MODEL_ALIASES = {
1354
1359
  // Claude
1355
1360
  claude: "anthropic/claude-sonnet-4",
1356
1361
  sonnet: "anthropic/claude-sonnet-4",
1357
- opus: "anthropic/claude-opus-4",
1362
+ opus: "anthropic/claude-opus-4.6",
1363
+ // Updated to latest Opus 4.6
1364
+ "opus-46": "anthropic/claude-opus-4.6",
1365
+ "opus-45": "anthropic/claude-opus-4.5",
1358
1366
  haiku: "anthropic/claude-haiku-4.5",
1359
1367
  // OpenAI
1360
1368
  gpt: "openai/gpt-4o",
1361
1369
  gpt4: "openai/gpt-4o",
1362
1370
  gpt5: "openai/gpt-5.2",
1371
+ codex: "openai/gpt-5.2-codex",
1363
1372
  mini: "openai/gpt-4o-mini",
1364
1373
  o3: "openai/o3",
1365
1374
  // DeepSeek
@@ -1464,6 +1473,16 @@ var BLOCKRUN_MODELS = [
1464
1473
  maxOutput: 128e3,
1465
1474
  reasoning: true
1466
1475
  },
1476
+ // OpenAI Codex Family
1477
+ {
1478
+ id: "openai/gpt-5.2-codex",
1479
+ name: "GPT-5.2 Codex",
1480
+ inputPrice: 2.5,
1481
+ outputPrice: 12,
1482
+ contextWindow: 128e3,
1483
+ maxOutput: 32e3,
1484
+ agentic: true
1485
+ },
1467
1486
  // OpenAI GPT-4 Family
1468
1487
  {
1469
1488
  id: "openai/gpt-4.1",
@@ -1569,6 +1588,17 @@ var BLOCKRUN_MODELS = [
1569
1588
  reasoning: true,
1570
1589
  agentic: true
1571
1590
  },
1591
+ {
1592
+ id: "anthropic/claude-opus-4.6",
1593
+ name: "Claude Opus 4.6",
1594
+ inputPrice: 5,
1595
+ outputPrice: 25,
1596
+ contextWindow: 2e5,
1597
+ maxOutput: 64e3,
1598
+ reasoning: true,
1599
+ vision: true,
1600
+ agentic: true
1601
+ },
1572
1602
  // Google
1573
1603
  {
1574
1604
  id: "google/gemini-3-pro-preview",
@@ -2046,6 +2076,203 @@ var RequestDeduplicator = class {
2046
2076
  }
2047
2077
  };
2048
2078
 
2079
+ // src/response-cache.ts
2080
+ import { createHash as createHash2 } from "crypto";
2081
+ var DEFAULT_CONFIG = {
2082
+ maxSize: 200,
2083
+ defaultTTL: 600,
2084
+ maxItemSize: 1048576,
2085
+ // 1MB
2086
+ enabled: true
2087
+ };
2088
+ function canonicalize2(obj) {
2089
+ if (obj === null || typeof obj !== "object") {
2090
+ return obj;
2091
+ }
2092
+ if (Array.isArray(obj)) {
2093
+ return obj.map(canonicalize2);
2094
+ }
2095
+ const sorted = {};
2096
+ for (const key of Object.keys(obj).sort()) {
2097
+ sorted[key] = canonicalize2(obj[key]);
2098
+ }
2099
+ return sorted;
2100
+ }
2101
+ var TIMESTAMP_PATTERN2 = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
2102
+ function normalizeForCache(obj) {
2103
+ const result = {};
2104
+ for (const [key, value] of Object.entries(obj)) {
2105
+ if (["stream", "user", "request_id", "x-request-id"].includes(key)) {
2106
+ continue;
2107
+ }
2108
+ if (key === "messages" && Array.isArray(value)) {
2109
+ result[key] = value.map((msg) => {
2110
+ if (typeof msg === "object" && msg !== null) {
2111
+ const m = msg;
2112
+ if (typeof m.content === "string") {
2113
+ return { ...m, content: m.content.replace(TIMESTAMP_PATTERN2, "") };
2114
+ }
2115
+ }
2116
+ return msg;
2117
+ });
2118
+ } else {
2119
+ result[key] = value;
2120
+ }
2121
+ }
2122
+ return result;
2123
+ }
2124
+ var ResponseCache = class {
2125
+ cache = /* @__PURE__ */ new Map();
2126
+ expirationHeap = [];
2127
+ config;
2128
+ // Stats for monitoring
2129
+ stats = {
2130
+ hits: 0,
2131
+ misses: 0,
2132
+ evictions: 0
2133
+ };
2134
+ constructor(config = {}) {
2135
+ const filtered = Object.fromEntries(
2136
+ Object.entries(config).filter(([, v]) => v !== void 0)
2137
+ );
2138
+ this.config = { ...DEFAULT_CONFIG, ...filtered };
2139
+ }
2140
+ /**
2141
+ * Generate cache key from request body.
2142
+ * Hashes: model + messages + temperature + max_tokens + other params
2143
+ */
2144
+ static generateKey(body) {
2145
+ try {
2146
+ const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
2147
+ const normalized = normalizeForCache(parsed);
2148
+ const canonical = canonicalize2(normalized);
2149
+ const keyContent = JSON.stringify(canonical);
2150
+ return createHash2("sha256").update(keyContent).digest("hex").slice(0, 32);
2151
+ } catch {
2152
+ const content = typeof body === "string" ? body : body.toString();
2153
+ return createHash2("sha256").update(content).digest("hex").slice(0, 32);
2154
+ }
2155
+ }
2156
+ /**
2157
+ * Check if caching is enabled for this request.
2158
+ * Respects cache control headers and request params.
2159
+ */
2160
+ shouldCache(body, headers) {
2161
+ if (!this.config.enabled) return false;
2162
+ if (headers?.["cache-control"]?.includes("no-cache")) {
2163
+ return false;
2164
+ }
2165
+ try {
2166
+ const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
2167
+ if (parsed.cache === false || parsed.no_cache === true) {
2168
+ return false;
2169
+ }
2170
+ } catch {
2171
+ }
2172
+ return true;
2173
+ }
2174
+ /**
2175
+ * Get cached response if available and not expired.
2176
+ */
2177
+ get(key) {
2178
+ const entry = this.cache.get(key);
2179
+ if (!entry) {
2180
+ this.stats.misses++;
2181
+ return void 0;
2182
+ }
2183
+ if (Date.now() > entry.expiresAt) {
2184
+ this.cache.delete(key);
2185
+ this.stats.misses++;
2186
+ return void 0;
2187
+ }
2188
+ this.stats.hits++;
2189
+ return entry;
2190
+ }
2191
+ /**
2192
+ * Cache a response with optional custom TTL.
2193
+ */
2194
+ set(key, response, ttlSeconds) {
2195
+ if (!this.config.enabled || this.config.maxSize <= 0) return;
2196
+ if (response.body.length > this.config.maxItemSize) {
2197
+ console.log(`[ResponseCache] Skipping cache - item too large: ${response.body.length} bytes`);
2198
+ return;
2199
+ }
2200
+ if (response.status >= 400) {
2201
+ return;
2202
+ }
2203
+ if (this.cache.size >= this.config.maxSize) {
2204
+ this.evict();
2205
+ }
2206
+ const now = Date.now();
2207
+ const ttl = ttlSeconds ?? this.config.defaultTTL;
2208
+ const expiresAt = now + ttl * 1e3;
2209
+ const entry = {
2210
+ ...response,
2211
+ cachedAt: now,
2212
+ expiresAt
2213
+ };
2214
+ this.cache.set(key, entry);
2215
+ this.expirationHeap.push({ expiresAt, key });
2216
+ }
2217
+ /**
2218
+ * Evict expired and oldest entries to make room.
2219
+ */
2220
+ evict() {
2221
+ const now = Date.now();
2222
+ this.expirationHeap.sort((a, b) => a.expiresAt - b.expiresAt);
2223
+ while (this.expirationHeap.length > 0) {
2224
+ const oldest = this.expirationHeap[0];
2225
+ const entry = this.cache.get(oldest.key);
2226
+ if (!entry || entry.expiresAt !== oldest.expiresAt) {
2227
+ this.expirationHeap.shift();
2228
+ continue;
2229
+ }
2230
+ if (oldest.expiresAt <= now) {
2231
+ this.cache.delete(oldest.key);
2232
+ this.expirationHeap.shift();
2233
+ this.stats.evictions++;
2234
+ } else {
2235
+ break;
2236
+ }
2237
+ }
2238
+ while (this.cache.size >= this.config.maxSize && this.expirationHeap.length > 0) {
2239
+ const oldest = this.expirationHeap.shift();
2240
+ if (this.cache.has(oldest.key)) {
2241
+ this.cache.delete(oldest.key);
2242
+ this.stats.evictions++;
2243
+ }
2244
+ }
2245
+ }
2246
+ /**
2247
+ * Get cache statistics.
2248
+ */
2249
+ getStats() {
2250
+ const total = this.stats.hits + this.stats.misses;
2251
+ const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) + "%" : "0%";
2252
+ return {
2253
+ size: this.cache.size,
2254
+ maxSize: this.config.maxSize,
2255
+ hits: this.stats.hits,
2256
+ misses: this.stats.misses,
2257
+ evictions: this.stats.evictions,
2258
+ hitRate
2259
+ };
2260
+ }
2261
+ /**
2262
+ * Clear all cached entries.
2263
+ */
2264
+ clear() {
2265
+ this.cache.clear();
2266
+ this.expirationHeap = [];
2267
+ }
2268
+ /**
2269
+ * Check if cache is enabled.
2270
+ */
2271
+ isEnabled() {
2272
+ return this.config.enabled;
2273
+ }
2274
+ };
2275
+
2049
2276
  // src/balance.ts
2050
2277
  import { createPublicClient, http, erc20Abi } from "viem";
2051
2278
  import { base } from "viem/chains";
@@ -3502,6 +3729,7 @@ async function startProxy(options) {
3502
3729
  modelPricing
3503
3730
  };
3504
3731
  const deduplicator = new RequestDeduplicator();
3732
+ const responseCache = new ResponseCache(options.cacheConfig);
3505
3733
  const sessionStore = new SessionStore(options.sessionConfig);
3506
3734
  const connections = /* @__PURE__ */ new Set();
3507
3735
  const server = createServer(async (req, res) => {
@@ -3542,6 +3770,15 @@ async function startProxy(options) {
3542
3770
  res.end(JSON.stringify(response));
3543
3771
  return;
3544
3772
  }
3773
+ if (req.url === "/cache" || req.url?.startsWith("/cache?")) {
3774
+ const stats = responseCache.getStats();
3775
+ res.writeHead(200, {
3776
+ "Content-Type": "application/json",
3777
+ "Cache-Control": "no-cache"
3778
+ });
3779
+ res.end(JSON.stringify(stats, null, 2));
3780
+ return;
3781
+ }
3545
3782
  if (req.url === "/stats" || req.url?.startsWith("/stats?")) {
3546
3783
  try {
3547
3784
  const url = new URL(req.url, "http://localhost");
@@ -3588,7 +3825,8 @@ async function startProxy(options) {
3588
3825
  routerOpts,
3589
3826
  deduplicator,
3590
3827
  balanceMonitor,
3591
- sessionStore
3828
+ sessionStore,
3829
+ responseCache
3592
3830
  );
3593
3831
  } catch (err) {
3594
3832
  const error = err instanceof Error ? err : new Error(String(err));
@@ -3789,7 +4027,7 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
3789
4027
  };
3790
4028
  }
3791
4029
  }
3792
- async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore) {
4030
+ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore, responseCache) {
3793
4031
  const startTime = Date.now();
3794
4032
  const upstreamUrl = `${apiBase}${req.url}`;
3795
4033
  const bodyChunks = [];
@@ -3957,6 +4195,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
3957
4195
  );
3958
4196
  }
3959
4197
  }
4198
+ const cacheKey = ResponseCache.generateKey(body);
4199
+ const reqHeaders = {};
4200
+ for (const [key, value] of Object.entries(req.headers)) {
4201
+ if (typeof value === "string") reqHeaders[key] = value;
4202
+ }
4203
+ if (responseCache.shouldCache(body, reqHeaders)) {
4204
+ const cachedResponse = responseCache.get(cacheKey);
4205
+ if (cachedResponse) {
4206
+ console.log(`[ClawRouter] Cache HIT for ${cachedResponse.model} (saved API call)`);
4207
+ res.writeHead(cachedResponse.status, cachedResponse.headers);
4208
+ res.end(cachedResponse.body);
4209
+ return;
4210
+ }
4211
+ }
3960
4212
  const dedupKey = RequestDeduplicator.hash(body);
3961
4213
  const cached = deduplicator.getCached(dedupKey);
3962
4214
  if (cached) {
@@ -4309,12 +4561,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
4309
4561
  }
4310
4562
  }
4311
4563
  res.end();
4564
+ const responseBody = Buffer.concat(responseChunks);
4312
4565
  deduplicator.complete(dedupKey, {
4313
4566
  status: upstream.status,
4314
4567
  headers: responseHeaders,
4315
- body: Buffer.concat(responseChunks),
4568
+ body: responseBody,
4316
4569
  completedAt: Date.now()
4317
4570
  });
4571
+ if (upstream.status === 200 && responseCache.shouldCache(body)) {
4572
+ responseCache.set(cacheKey, {
4573
+ body: responseBody,
4574
+ status: upstream.status,
4575
+ headers: responseHeaders,
4576
+ model: modelId
4577
+ });
4578
+ console.log(`[ClawRouter] Cached response for ${modelId} (${responseBody.length} bytes)`);
4579
+ }
4318
4580
  }
4319
4581
  if (estimatedCostMicros !== void 0) {
4320
4582
  balanceMonitor.deductEstimated(estimatedCostMicros);