@blockrun/clawrouter 0.9.8 → 0.9.10
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +30 -4
- package/dist/cli.js +286 -13
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +92 -1
- package/dist/index.js +295 -17
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -71,7 +71,7 @@ Choose your routing strategy with `/model <profile>`:
|
|
|
71
71
|
|
|
72
72
|
**Other shortcuts:**
|
|
73
73
|
|
|
74
|
-
- **Model aliases:** `/model sonnet`, `/model grok`, `/model gpt5`, `/model o3`
|
|
74
|
+
- **Model aliases:** `/model br-sonnet`, `/model grok`, `/model gpt5`, `/model o3`
|
|
75
75
|
- **Specific models:** `blockrun/openai/gpt-4o` or `blockrun/anthropic/claude-sonnet-4`
|
|
76
76
|
- **Bring your wallet:** `export BLOCKRUN_WALLET_KEY=0x...`
|
|
77
77
|
|
|
@@ -150,7 +150,7 @@ ClawRouter v0.5+ includes intelligent features that work automatically:
|
|
|
150
150
|
- **Agentic auto-detect** — routes multi-step tasks to Kimi K2.5
|
|
151
151
|
- **Tool detection** — auto-switches when `tools` array present
|
|
152
152
|
- **Context-aware** — filters models that can't handle your context size
|
|
153
|
-
- **Model aliases** — `/model free`, `/model sonnet`, `/model grok`
|
|
153
|
+
- **Model aliases** — `/model free`, `/model br-sonnet`, `/model grok`
|
|
154
154
|
- **Session persistence** — pins model for multi-turn conversations
|
|
155
155
|
- **Free tier fallback** — keeps working when wallet is empty
|
|
156
156
|
- **Auto-update check** — notifies you when a new version is available
|
|
@@ -315,11 +315,12 @@ const decision = route("Prove sqrt(2) is irrational", ...);
|
|
|
315
315
|
|
|
316
316
|
---
|
|
317
317
|
|
|
318
|
-
## Performance Optimizations (v0.3)
|
|
318
|
+
## Performance Optimizations (v0.3+)
|
|
319
319
|
|
|
320
320
|
- **SSE heartbeat**: Sends headers + heartbeat immediately, preventing upstream timeouts
|
|
321
321
|
- **Response dedup**: SHA-256 hash → 30s cache, prevents double-charge on retries
|
|
322
322
|
- **Payment pre-auth**: Caches 402 params, pre-signs USDC, skips 402 round trip (~200ms saved)
|
|
323
|
+
- **Response cache**: LLM response caching with 10-minute TTL, saves cost on repeated queries
|
|
323
324
|
|
|
324
325
|
---
|
|
325
326
|
|
|
@@ -362,6 +363,20 @@ Based on [50+ OpenClaw issues](https://github.com/openclaw/openclaw/issues?q=ope
|
|
|
362
363
|
|
|
363
364
|
---
|
|
364
365
|
|
|
366
|
+
## Why did we build this
|
|
367
|
+
|
|
368
|
+
- **Agents need to pay and get paid — without humans in the loop.** Today's AI infra requires accounts, API keys, manual billing. But an agent spawning 50 sub-agents shouldn't need a human to provision 50 keys. An agent completing a bounty shouldn't wait for someone to invoice and collect.
|
|
369
|
+
|
|
370
|
+
- **Payment IS authentication.** A wallet signature proves you can pay — no shared secrets that leak into prompts, no accounts to create, no keys to rotate.
|
|
371
|
+
|
|
372
|
+
- **Agents should control their own money.** Non-custodial means the agent holds the keys. No platform can freeze funds or change terms overnight.
|
|
373
|
+
|
|
374
|
+
- **Cost optimization should be automatic.** Agents shouldn't overpay $25/M for "what is 2+2". Smart routing to the cheapest capable model saves 92% on typical workloads.
|
|
375
|
+
|
|
376
|
+
The result: an agent can generate a wallet, receive funds, call any model, pay per-request, and earn money — all programmatically. **This is agentic commerce.**
|
|
377
|
+
|
|
378
|
+
---
|
|
379
|
+
|
|
365
380
|
## Troubleshooting
|
|
366
381
|
|
|
367
382
|
Quick checklist:
|
|
@@ -422,15 +437,26 @@ Your wallet key remains at `~/.openclaw/blockrun/wallet.key` — back it up befo
|
|
|
422
437
|
- [x] Context-aware routing — filter out models that can't handle context size
|
|
423
438
|
- [x] Session persistence — pin model for multi-turn conversations
|
|
424
439
|
- [x] Cost tracking — /stats command with savings dashboard
|
|
425
|
-
- [x] Model aliases — `/model free`, `/model sonnet`, `/model grok`, etc.
|
|
440
|
+
- [x] Model aliases — `/model free`, `/model br-sonnet`, `/model grok`, etc.
|
|
426
441
|
- [x] Free tier — gpt-oss-120b for $0 when wallet is empty
|
|
427
442
|
- [x] Auto-update — startup version check with one-command update
|
|
443
|
+
- [x] Response cache — LiteLLM-inspired caching for repeated requests
|
|
428
444
|
- [ ] Cascade routing — try cheap model first, escalate on low quality
|
|
429
445
|
- [ ] Spend controls — daily/monthly budgets
|
|
430
446
|
- [ ] Remote analytics — cost tracking at blockrun.ai
|
|
431
447
|
|
|
432
448
|
---
|
|
433
449
|
|
|
450
|
+
## Support / talk with founders
|
|
451
|
+
|
|
452
|
+
- [Schedule Demo 👋](https://calendly.com/vickyfu9/30min)
|
|
453
|
+
- [Community Telegram 💭](https://t.me/blockrunAI)
|
|
454
|
+
- [X / Twitter 🐦](https://x.com/BlockRunAI)
|
|
455
|
+
- Telegram 📱 [@bc1max](https://t.me/bc1max)
|
|
456
|
+
- Our email ✉️ vicky@blockrun.ai
|
|
457
|
+
|
|
458
|
+
---
|
|
459
|
+
|
|
434
460
|
## License
|
|
435
461
|
|
|
436
462
|
MIT
|
package/dist/cli.js
CHANGED
|
@@ -1199,27 +1199,43 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1199
1199
|
}
|
|
1200
1200
|
},
|
|
1201
1201
|
// Premium tier configs - best quality (blockrun/premium)
|
|
1202
|
-
// kimi=coding, sonnet=reasoning/instructions, opus=
|
|
1202
|
+
// codex=complex coding, kimi=simple coding, sonnet=reasoning/instructions, opus=architecture/PM/audits
|
|
1203
1203
|
premiumTiers: {
|
|
1204
1204
|
SIMPLE: {
|
|
1205
1205
|
primary: "moonshot/kimi-k2.5",
|
|
1206
|
-
// $0.50/$2.40 - good for coding
|
|
1206
|
+
// $0.50/$2.40 - good for simple coding
|
|
1207
1207
|
fallback: ["anthropic/claude-haiku-4.5", "google/gemini-2.5-flash", "xai/grok-code-fast-1"]
|
|
1208
1208
|
},
|
|
1209
1209
|
MEDIUM: {
|
|
1210
1210
|
primary: "anthropic/claude-sonnet-4",
|
|
1211
1211
|
// $3/$15 - reasoning/instructions
|
|
1212
|
-
fallback: [
|
|
1212
|
+
fallback: [
|
|
1213
|
+
"openai/gpt-5.2-codex",
|
|
1214
|
+
"moonshot/kimi-k2.5",
|
|
1215
|
+
"google/gemini-2.5-pro",
|
|
1216
|
+
"xai/grok-4-0709"
|
|
1217
|
+
]
|
|
1213
1218
|
},
|
|
1214
1219
|
COMPLEX: {
|
|
1215
|
-
primary: "
|
|
1216
|
-
// $
|
|
1217
|
-
fallback: [
|
|
1220
|
+
primary: "openai/gpt-5.2-codex",
|
|
1221
|
+
// $2.50/$10 - complex coding (78% cost savings vs Opus)
|
|
1222
|
+
fallback: [
|
|
1223
|
+
"anthropic/claude-opus-4.6",
|
|
1224
|
+
"anthropic/claude-opus-4.5",
|
|
1225
|
+
"anthropic/claude-sonnet-4",
|
|
1226
|
+
"google/gemini-3-pro-preview",
|
|
1227
|
+
"moonshot/kimi-k2.5"
|
|
1228
|
+
]
|
|
1218
1229
|
},
|
|
1219
1230
|
REASONING: {
|
|
1220
1231
|
primary: "anthropic/claude-sonnet-4",
|
|
1221
1232
|
// $3/$15 - best for reasoning/instructions
|
|
1222
|
-
fallback: [
|
|
1233
|
+
fallback: [
|
|
1234
|
+
"anthropic/claude-opus-4.6",
|
|
1235
|
+
"anthropic/claude-opus-4.5",
|
|
1236
|
+
"openai/o3",
|
|
1237
|
+
"xai/grok-4-1-fast-reasoning"
|
|
1238
|
+
]
|
|
1223
1239
|
}
|
|
1224
1240
|
},
|
|
1225
1241
|
// Agentic tier configs - models that excel at multi-step autonomous tasks
|
|
@@ -1241,7 +1257,7 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1241
1257
|
COMPLEX: {
|
|
1242
1258
|
primary: "anthropic/claude-sonnet-4",
|
|
1243
1259
|
fallback: [
|
|
1244
|
-
"anthropic/claude-opus-4.
|
|
1260
|
+
"anthropic/claude-opus-4.6",
|
|
1245
1261
|
// Latest Opus - best agentic
|
|
1246
1262
|
"openai/gpt-5.2",
|
|
1247
1263
|
"google/gemini-3-pro-preview",
|
|
@@ -1252,7 +1268,7 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1252
1268
|
primary: "anthropic/claude-sonnet-4",
|
|
1253
1269
|
// Strong tool use + reasoning for agentic tasks
|
|
1254
1270
|
fallback: [
|
|
1255
|
-
"anthropic/claude-opus-4.
|
|
1271
|
+
"anthropic/claude-opus-4.6",
|
|
1256
1272
|
"xai/grok-4-fast-reasoning",
|
|
1257
1273
|
"moonshot/kimi-k2.5",
|
|
1258
1274
|
"deepseek/deepseek-reasoner"
|
|
@@ -1343,12 +1359,16 @@ var MODEL_ALIASES = {
|
|
|
1343
1359
|
// Claude
|
|
1344
1360
|
claude: "anthropic/claude-sonnet-4",
|
|
1345
1361
|
sonnet: "anthropic/claude-sonnet-4",
|
|
1346
|
-
opus: "anthropic/claude-opus-4",
|
|
1362
|
+
opus: "anthropic/claude-opus-4.6",
|
|
1363
|
+
// Updated to latest Opus 4.6
|
|
1364
|
+
"opus-46": "anthropic/claude-opus-4.6",
|
|
1365
|
+
"opus-45": "anthropic/claude-opus-4.5",
|
|
1347
1366
|
haiku: "anthropic/claude-haiku-4.5",
|
|
1348
1367
|
// OpenAI
|
|
1349
1368
|
gpt: "openai/gpt-4o",
|
|
1350
1369
|
gpt4: "openai/gpt-4o",
|
|
1351
1370
|
gpt5: "openai/gpt-5.2",
|
|
1371
|
+
codex: "openai/gpt-5.2-codex",
|
|
1352
1372
|
mini: "openai/gpt-4o-mini",
|
|
1353
1373
|
o3: "openai/o3",
|
|
1354
1374
|
// DeepSeek
|
|
@@ -1453,6 +1473,16 @@ var BLOCKRUN_MODELS = [
|
|
|
1453
1473
|
maxOutput: 128e3,
|
|
1454
1474
|
reasoning: true
|
|
1455
1475
|
},
|
|
1476
|
+
// OpenAI Codex Family
|
|
1477
|
+
{
|
|
1478
|
+
id: "openai/gpt-5.2-codex",
|
|
1479
|
+
name: "GPT-5.2 Codex",
|
|
1480
|
+
inputPrice: 2.5,
|
|
1481
|
+
outputPrice: 12,
|
|
1482
|
+
contextWindow: 128e3,
|
|
1483
|
+
maxOutput: 32e3,
|
|
1484
|
+
agentic: true
|
|
1485
|
+
},
|
|
1456
1486
|
// OpenAI GPT-4 Family
|
|
1457
1487
|
{
|
|
1458
1488
|
id: "openai/gpt-4.1",
|
|
@@ -1558,6 +1588,17 @@ var BLOCKRUN_MODELS = [
|
|
|
1558
1588
|
reasoning: true,
|
|
1559
1589
|
agentic: true
|
|
1560
1590
|
},
|
|
1591
|
+
{
|
|
1592
|
+
id: "anthropic/claude-opus-4.6",
|
|
1593
|
+
name: "Claude Opus 4.6",
|
|
1594
|
+
inputPrice: 5,
|
|
1595
|
+
outputPrice: 25,
|
|
1596
|
+
contextWindow: 2e5,
|
|
1597
|
+
maxOutput: 64e3,
|
|
1598
|
+
reasoning: true,
|
|
1599
|
+
vision: true,
|
|
1600
|
+
agentic: true
|
|
1601
|
+
},
|
|
1561
1602
|
// Google
|
|
1562
1603
|
{
|
|
1563
1604
|
id: "google/gemini-3-pro-preview",
|
|
@@ -2035,6 +2076,203 @@ var RequestDeduplicator = class {
|
|
|
2035
2076
|
}
|
|
2036
2077
|
};
|
|
2037
2078
|
|
|
2079
|
+
// src/response-cache.ts
|
|
2080
|
+
import { createHash as createHash2 } from "crypto";
|
|
2081
|
+
var DEFAULT_CONFIG = {
|
|
2082
|
+
maxSize: 200,
|
|
2083
|
+
defaultTTL: 600,
|
|
2084
|
+
maxItemSize: 1048576,
|
|
2085
|
+
// 1MB
|
|
2086
|
+
enabled: true
|
|
2087
|
+
};
|
|
2088
|
+
function canonicalize2(obj) {
|
|
2089
|
+
if (obj === null || typeof obj !== "object") {
|
|
2090
|
+
return obj;
|
|
2091
|
+
}
|
|
2092
|
+
if (Array.isArray(obj)) {
|
|
2093
|
+
return obj.map(canonicalize2);
|
|
2094
|
+
}
|
|
2095
|
+
const sorted = {};
|
|
2096
|
+
for (const key of Object.keys(obj).sort()) {
|
|
2097
|
+
sorted[key] = canonicalize2(obj[key]);
|
|
2098
|
+
}
|
|
2099
|
+
return sorted;
|
|
2100
|
+
}
|
|
2101
|
+
var TIMESTAMP_PATTERN2 = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
|
|
2102
|
+
function normalizeForCache(obj) {
|
|
2103
|
+
const result = {};
|
|
2104
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
2105
|
+
if (["stream", "user", "request_id", "x-request-id"].includes(key)) {
|
|
2106
|
+
continue;
|
|
2107
|
+
}
|
|
2108
|
+
if (key === "messages" && Array.isArray(value)) {
|
|
2109
|
+
result[key] = value.map((msg) => {
|
|
2110
|
+
if (typeof msg === "object" && msg !== null) {
|
|
2111
|
+
const m = msg;
|
|
2112
|
+
if (typeof m.content === "string") {
|
|
2113
|
+
return { ...m, content: m.content.replace(TIMESTAMP_PATTERN2, "") };
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
return msg;
|
|
2117
|
+
});
|
|
2118
|
+
} else {
|
|
2119
|
+
result[key] = value;
|
|
2120
|
+
}
|
|
2121
|
+
}
|
|
2122
|
+
return result;
|
|
2123
|
+
}
|
|
2124
|
+
var ResponseCache = class {
|
|
2125
|
+
cache = /* @__PURE__ */ new Map();
|
|
2126
|
+
expirationHeap = [];
|
|
2127
|
+
config;
|
|
2128
|
+
// Stats for monitoring
|
|
2129
|
+
stats = {
|
|
2130
|
+
hits: 0,
|
|
2131
|
+
misses: 0,
|
|
2132
|
+
evictions: 0
|
|
2133
|
+
};
|
|
2134
|
+
constructor(config = {}) {
|
|
2135
|
+
const filtered = Object.fromEntries(
|
|
2136
|
+
Object.entries(config).filter(([, v]) => v !== void 0)
|
|
2137
|
+
);
|
|
2138
|
+
this.config = { ...DEFAULT_CONFIG, ...filtered };
|
|
2139
|
+
}
|
|
2140
|
+
/**
|
|
2141
|
+
* Generate cache key from request body.
|
|
2142
|
+
* Hashes: model + messages + temperature + max_tokens + other params
|
|
2143
|
+
*/
|
|
2144
|
+
static generateKey(body) {
|
|
2145
|
+
try {
|
|
2146
|
+
const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
|
|
2147
|
+
const normalized = normalizeForCache(parsed);
|
|
2148
|
+
const canonical = canonicalize2(normalized);
|
|
2149
|
+
const keyContent = JSON.stringify(canonical);
|
|
2150
|
+
return createHash2("sha256").update(keyContent).digest("hex").slice(0, 32);
|
|
2151
|
+
} catch {
|
|
2152
|
+
const content = typeof body === "string" ? body : body.toString();
|
|
2153
|
+
return createHash2("sha256").update(content).digest("hex").slice(0, 32);
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
/**
|
|
2157
|
+
* Check if caching is enabled for this request.
|
|
2158
|
+
* Respects cache control headers and request params.
|
|
2159
|
+
*/
|
|
2160
|
+
shouldCache(body, headers) {
|
|
2161
|
+
if (!this.config.enabled) return false;
|
|
2162
|
+
if (headers?.["cache-control"]?.includes("no-cache")) {
|
|
2163
|
+
return false;
|
|
2164
|
+
}
|
|
2165
|
+
try {
|
|
2166
|
+
const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
|
|
2167
|
+
if (parsed.cache === false || parsed.no_cache === true) {
|
|
2168
|
+
return false;
|
|
2169
|
+
}
|
|
2170
|
+
} catch {
|
|
2171
|
+
}
|
|
2172
|
+
return true;
|
|
2173
|
+
}
|
|
2174
|
+
/**
|
|
2175
|
+
* Get cached response if available and not expired.
|
|
2176
|
+
*/
|
|
2177
|
+
get(key) {
|
|
2178
|
+
const entry = this.cache.get(key);
|
|
2179
|
+
if (!entry) {
|
|
2180
|
+
this.stats.misses++;
|
|
2181
|
+
return void 0;
|
|
2182
|
+
}
|
|
2183
|
+
if (Date.now() > entry.expiresAt) {
|
|
2184
|
+
this.cache.delete(key);
|
|
2185
|
+
this.stats.misses++;
|
|
2186
|
+
return void 0;
|
|
2187
|
+
}
|
|
2188
|
+
this.stats.hits++;
|
|
2189
|
+
return entry;
|
|
2190
|
+
}
|
|
2191
|
+
/**
|
|
2192
|
+
* Cache a response with optional custom TTL.
|
|
2193
|
+
*/
|
|
2194
|
+
set(key, response, ttlSeconds) {
|
|
2195
|
+
if (!this.config.enabled || this.config.maxSize <= 0) return;
|
|
2196
|
+
if (response.body.length > this.config.maxItemSize) {
|
|
2197
|
+
console.log(`[ResponseCache] Skipping cache - item too large: ${response.body.length} bytes`);
|
|
2198
|
+
return;
|
|
2199
|
+
}
|
|
2200
|
+
if (response.status >= 400) {
|
|
2201
|
+
return;
|
|
2202
|
+
}
|
|
2203
|
+
if (this.cache.size >= this.config.maxSize) {
|
|
2204
|
+
this.evict();
|
|
2205
|
+
}
|
|
2206
|
+
const now = Date.now();
|
|
2207
|
+
const ttl = ttlSeconds ?? this.config.defaultTTL;
|
|
2208
|
+
const expiresAt = now + ttl * 1e3;
|
|
2209
|
+
const entry = {
|
|
2210
|
+
...response,
|
|
2211
|
+
cachedAt: now,
|
|
2212
|
+
expiresAt
|
|
2213
|
+
};
|
|
2214
|
+
this.cache.set(key, entry);
|
|
2215
|
+
this.expirationHeap.push({ expiresAt, key });
|
|
2216
|
+
}
|
|
2217
|
+
/**
|
|
2218
|
+
* Evict expired and oldest entries to make room.
|
|
2219
|
+
*/
|
|
2220
|
+
evict() {
|
|
2221
|
+
const now = Date.now();
|
|
2222
|
+
this.expirationHeap.sort((a, b) => a.expiresAt - b.expiresAt);
|
|
2223
|
+
while (this.expirationHeap.length > 0) {
|
|
2224
|
+
const oldest = this.expirationHeap[0];
|
|
2225
|
+
const entry = this.cache.get(oldest.key);
|
|
2226
|
+
if (!entry || entry.expiresAt !== oldest.expiresAt) {
|
|
2227
|
+
this.expirationHeap.shift();
|
|
2228
|
+
continue;
|
|
2229
|
+
}
|
|
2230
|
+
if (oldest.expiresAt <= now) {
|
|
2231
|
+
this.cache.delete(oldest.key);
|
|
2232
|
+
this.expirationHeap.shift();
|
|
2233
|
+
this.stats.evictions++;
|
|
2234
|
+
} else {
|
|
2235
|
+
break;
|
|
2236
|
+
}
|
|
2237
|
+
}
|
|
2238
|
+
while (this.cache.size >= this.config.maxSize && this.expirationHeap.length > 0) {
|
|
2239
|
+
const oldest = this.expirationHeap.shift();
|
|
2240
|
+
if (this.cache.has(oldest.key)) {
|
|
2241
|
+
this.cache.delete(oldest.key);
|
|
2242
|
+
this.stats.evictions++;
|
|
2243
|
+
}
|
|
2244
|
+
}
|
|
2245
|
+
}
|
|
2246
|
+
/**
|
|
2247
|
+
* Get cache statistics.
|
|
2248
|
+
*/
|
|
2249
|
+
getStats() {
|
|
2250
|
+
const total = this.stats.hits + this.stats.misses;
|
|
2251
|
+
const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) + "%" : "0%";
|
|
2252
|
+
return {
|
|
2253
|
+
size: this.cache.size,
|
|
2254
|
+
maxSize: this.config.maxSize,
|
|
2255
|
+
hits: this.stats.hits,
|
|
2256
|
+
misses: this.stats.misses,
|
|
2257
|
+
evictions: this.stats.evictions,
|
|
2258
|
+
hitRate
|
|
2259
|
+
};
|
|
2260
|
+
}
|
|
2261
|
+
/**
|
|
2262
|
+
* Clear all cached entries.
|
|
2263
|
+
*/
|
|
2264
|
+
clear() {
|
|
2265
|
+
this.cache.clear();
|
|
2266
|
+
this.expirationHeap = [];
|
|
2267
|
+
}
|
|
2268
|
+
/**
|
|
2269
|
+
* Check if cache is enabled.
|
|
2270
|
+
*/
|
|
2271
|
+
isEnabled() {
|
|
2272
|
+
return this.config.enabled;
|
|
2273
|
+
}
|
|
2274
|
+
};
|
|
2275
|
+
|
|
2038
2276
|
// src/balance.ts
|
|
2039
2277
|
import { createPublicClient, http, erc20Abi } from "viem";
|
|
2040
2278
|
import { base } from "viem/chains";
|
|
@@ -3491,6 +3729,7 @@ async function startProxy(options) {
|
|
|
3491
3729
|
modelPricing
|
|
3492
3730
|
};
|
|
3493
3731
|
const deduplicator = new RequestDeduplicator();
|
|
3732
|
+
const responseCache = new ResponseCache(options.cacheConfig);
|
|
3494
3733
|
const sessionStore = new SessionStore(options.sessionConfig);
|
|
3495
3734
|
const connections = /* @__PURE__ */ new Set();
|
|
3496
3735
|
const server = createServer(async (req, res) => {
|
|
@@ -3531,6 +3770,15 @@ async function startProxy(options) {
|
|
|
3531
3770
|
res.end(JSON.stringify(response));
|
|
3532
3771
|
return;
|
|
3533
3772
|
}
|
|
3773
|
+
if (req.url === "/cache" || req.url?.startsWith("/cache?")) {
|
|
3774
|
+
const stats = responseCache.getStats();
|
|
3775
|
+
res.writeHead(200, {
|
|
3776
|
+
"Content-Type": "application/json",
|
|
3777
|
+
"Cache-Control": "no-cache"
|
|
3778
|
+
});
|
|
3779
|
+
res.end(JSON.stringify(stats, null, 2));
|
|
3780
|
+
return;
|
|
3781
|
+
}
|
|
3534
3782
|
if (req.url === "/stats" || req.url?.startsWith("/stats?")) {
|
|
3535
3783
|
try {
|
|
3536
3784
|
const url = new URL(req.url, "http://localhost");
|
|
@@ -3577,7 +3825,8 @@ async function startProxy(options) {
|
|
|
3577
3825
|
routerOpts,
|
|
3578
3826
|
deduplicator,
|
|
3579
3827
|
balanceMonitor,
|
|
3580
|
-
sessionStore
|
|
3828
|
+
sessionStore,
|
|
3829
|
+
responseCache
|
|
3581
3830
|
);
|
|
3582
3831
|
} catch (err) {
|
|
3583
3832
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
@@ -3778,7 +4027,7 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
|
|
|
3778
4027
|
};
|
|
3779
4028
|
}
|
|
3780
4029
|
}
|
|
3781
|
-
async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore) {
|
|
4030
|
+
async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore, responseCache) {
|
|
3782
4031
|
const startTime = Date.now();
|
|
3783
4032
|
const upstreamUrl = `${apiBase}${req.url}`;
|
|
3784
4033
|
const bodyChunks = [];
|
|
@@ -3946,6 +4195,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3946
4195
|
);
|
|
3947
4196
|
}
|
|
3948
4197
|
}
|
|
4198
|
+
const cacheKey = ResponseCache.generateKey(body);
|
|
4199
|
+
const reqHeaders = {};
|
|
4200
|
+
for (const [key, value] of Object.entries(req.headers)) {
|
|
4201
|
+
if (typeof value === "string") reqHeaders[key] = value;
|
|
4202
|
+
}
|
|
4203
|
+
if (responseCache.shouldCache(body, reqHeaders)) {
|
|
4204
|
+
const cachedResponse = responseCache.get(cacheKey);
|
|
4205
|
+
if (cachedResponse) {
|
|
4206
|
+
console.log(`[ClawRouter] Cache HIT for ${cachedResponse.model} (saved API call)`);
|
|
4207
|
+
res.writeHead(cachedResponse.status, cachedResponse.headers);
|
|
4208
|
+
res.end(cachedResponse.body);
|
|
4209
|
+
return;
|
|
4210
|
+
}
|
|
4211
|
+
}
|
|
3949
4212
|
const dedupKey = RequestDeduplicator.hash(body);
|
|
3950
4213
|
const cached = deduplicator.getCached(dedupKey);
|
|
3951
4214
|
if (cached) {
|
|
@@ -4298,12 +4561,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
4298
4561
|
}
|
|
4299
4562
|
}
|
|
4300
4563
|
res.end();
|
|
4564
|
+
const responseBody = Buffer.concat(responseChunks);
|
|
4301
4565
|
deduplicator.complete(dedupKey, {
|
|
4302
4566
|
status: upstream.status,
|
|
4303
4567
|
headers: responseHeaders,
|
|
4304
|
-
body:
|
|
4568
|
+
body: responseBody,
|
|
4305
4569
|
completedAt: Date.now()
|
|
4306
4570
|
});
|
|
4571
|
+
if (upstream.status === 200 && responseCache.shouldCache(body)) {
|
|
4572
|
+
responseCache.set(cacheKey, {
|
|
4573
|
+
body: responseBody,
|
|
4574
|
+
status: upstream.status,
|
|
4575
|
+
headers: responseHeaders,
|
|
4576
|
+
model: modelId
|
|
4577
|
+
});
|
|
4578
|
+
console.log(`[ClawRouter] Cached response for ${modelId} (${responseBody.length} bytes)`);
|
|
4579
|
+
}
|
|
4307
4580
|
}
|
|
4308
4581
|
if (estimatedCostMicros !== void 0) {
|
|
4309
4582
|
balanceMonitor.deductEstimated(estimatedCostMicros);
|