@blockrun/clawrouter 0.9.7 → 0.9.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +6 -4
- package/dist/cli.js +286 -24
- package/dist/cli.js.map +1 -1
- package/dist/index.d.ts +92 -1
- package/dist/index.js +295 -28
- package/dist/index.js.map +1 -1
- package/package.json +5 -2
package/README.md
CHANGED
|
@@ -71,7 +71,7 @@ Choose your routing strategy with `/model <profile>`:
|
|
|
71
71
|
|
|
72
72
|
**Other shortcuts:**
|
|
73
73
|
|
|
74
|
-
- **Model aliases:** `/model sonnet`, `/model grok`, `/model gpt5`, `/model o3`
|
|
74
|
+
- **Model aliases:** `/model br-sonnet`, `/model grok`, `/model gpt5`, `/model o3`
|
|
75
75
|
- **Specific models:** `blockrun/openai/gpt-4o` or `blockrun/anthropic/claude-sonnet-4`
|
|
76
76
|
- **Bring your wallet:** `export BLOCKRUN_WALLET_KEY=0x...`
|
|
77
77
|
|
|
@@ -150,7 +150,7 @@ ClawRouter v0.5+ includes intelligent features that work automatically:
|
|
|
150
150
|
- **Agentic auto-detect** — routes multi-step tasks to Kimi K2.5
|
|
151
151
|
- **Tool detection** — auto-switches when `tools` array present
|
|
152
152
|
- **Context-aware** — filters models that can't handle your context size
|
|
153
|
-
- **Model aliases** — `/model free`, `/model sonnet`, `/model grok`
|
|
153
|
+
- **Model aliases** — `/model free`, `/model br-sonnet`, `/model grok`
|
|
154
154
|
- **Session persistence** — pins model for multi-turn conversations
|
|
155
155
|
- **Free tier fallback** — keeps working when wallet is empty
|
|
156
156
|
- **Auto-update check** — notifies you when a new version is available
|
|
@@ -315,11 +315,12 @@ const decision = route("Prove sqrt(2) is irrational", ...);
|
|
|
315
315
|
|
|
316
316
|
---
|
|
317
317
|
|
|
318
|
-
## Performance Optimizations (v0.3)
|
|
318
|
+
## Performance Optimizations (v0.3+)
|
|
319
319
|
|
|
320
320
|
- **SSE heartbeat**: Sends headers + heartbeat immediately, preventing upstream timeouts
|
|
321
321
|
- **Response dedup**: SHA-256 hash → 30s cache, prevents double-charge on retries
|
|
322
322
|
- **Payment pre-auth**: Caches 402 params, pre-signs USDC, skips 402 round trip (~200ms saved)
|
|
323
|
+
- **Response cache**: LLM response caching with 10-minute TTL, saves cost on repeated queries
|
|
323
324
|
|
|
324
325
|
---
|
|
325
326
|
|
|
@@ -422,9 +423,10 @@ Your wallet key remains at `~/.openclaw/blockrun/wallet.key` — back it up befo
|
|
|
422
423
|
- [x] Context-aware routing — filter out models that can't handle context size
|
|
423
424
|
- [x] Session persistence — pin model for multi-turn conversations
|
|
424
425
|
- [x] Cost tracking — /stats command with savings dashboard
|
|
425
|
-
- [x] Model aliases — `/model free`, `/model sonnet`, `/model grok`, etc.
|
|
426
|
+
- [x] Model aliases — `/model free`, `/model br-sonnet`, `/model grok`, etc.
|
|
426
427
|
- [x] Free tier — gpt-oss-120b for $0 when wallet is empty
|
|
427
428
|
- [x] Auto-update — startup version check with one-command update
|
|
429
|
+
- [x] Response cache — LiteLLM-inspired caching for repeated requests
|
|
428
430
|
- [ ] Cascade routing — try cheap model first, escalate on low quality
|
|
429
431
|
- [ ] Spend controls — daily/monthly budgets
|
|
430
432
|
- [ ] Remote analytics — cost tracking at blockrun.ai
|
package/dist/cli.js
CHANGED
|
@@ -1199,37 +1199,42 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1199
1199
|
}
|
|
1200
1200
|
},
|
|
1201
1201
|
// Premium tier configs - best quality (blockrun/premium)
|
|
1202
|
+
// codex=complex coding, kimi=simple coding, sonnet=reasoning/instructions, opus=architecture/PM/audits
|
|
1202
1203
|
premiumTiers: {
|
|
1203
1204
|
SIMPLE: {
|
|
1204
|
-
primary: "
|
|
1205
|
-
// $0.
|
|
1206
|
-
fallback: ["
|
|
1205
|
+
primary: "moonshot/kimi-k2.5",
|
|
1206
|
+
// $0.50/$2.40 - good for simple coding
|
|
1207
|
+
fallback: ["anthropic/claude-haiku-4.5", "google/gemini-2.5-flash", "xai/grok-code-fast-1"]
|
|
1207
1208
|
},
|
|
1208
1209
|
MEDIUM: {
|
|
1209
|
-
primary: "
|
|
1210
|
-
// $
|
|
1211
|
-
fallback: [
|
|
1210
|
+
primary: "anthropic/claude-sonnet-4",
|
|
1211
|
+
// $3/$15 - reasoning/instructions
|
|
1212
|
+
fallback: [
|
|
1213
|
+
"openai/gpt-5.2-codex",
|
|
1214
|
+
"moonshot/kimi-k2.5",
|
|
1215
|
+
"google/gemini-2.5-pro",
|
|
1216
|
+
"xai/grok-4-0709"
|
|
1217
|
+
]
|
|
1212
1218
|
},
|
|
1213
1219
|
COMPLEX: {
|
|
1214
|
-
primary: "
|
|
1215
|
-
// $
|
|
1220
|
+
primary: "openai/gpt-5.2-codex",
|
|
1221
|
+
// $2.50/$10 - complex coding (78% cost savings vs Opus)
|
|
1216
1222
|
fallback: [
|
|
1217
|
-
"
|
|
1218
|
-
|
|
1223
|
+
"anthropic/claude-opus-4.6",
|
|
1224
|
+
"anthropic/claude-opus-4.5",
|
|
1225
|
+
"anthropic/claude-sonnet-4",
|
|
1219
1226
|
"google/gemini-3-pro-preview",
|
|
1220
|
-
|
|
1221
|
-
"openai/gpt-5.2",
|
|
1222
|
-
"anthropic/claude-sonnet-4"
|
|
1227
|
+
"moonshot/kimi-k2.5"
|
|
1223
1228
|
]
|
|
1224
1229
|
},
|
|
1225
1230
|
REASONING: {
|
|
1226
|
-
primary: "
|
|
1227
|
-
// $
|
|
1231
|
+
primary: "anthropic/claude-sonnet-4",
|
|
1232
|
+
// $3/$15 - best for reasoning/instructions
|
|
1228
1233
|
fallback: [
|
|
1229
|
-
"
|
|
1230
|
-
// Latest o-series
|
|
1234
|
+
"anthropic/claude-opus-4.6",
|
|
1231
1235
|
"anthropic/claude-opus-4.5",
|
|
1232
|
-
"
|
|
1236
|
+
"openai/o3",
|
|
1237
|
+
"xai/grok-4-1-fast-reasoning"
|
|
1233
1238
|
]
|
|
1234
1239
|
}
|
|
1235
1240
|
},
|
|
@@ -1252,7 +1257,7 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1252
1257
|
COMPLEX: {
|
|
1253
1258
|
primary: "anthropic/claude-sonnet-4",
|
|
1254
1259
|
fallback: [
|
|
1255
|
-
"anthropic/claude-opus-4.
|
|
1260
|
+
"anthropic/claude-opus-4.6",
|
|
1256
1261
|
// Latest Opus - best agentic
|
|
1257
1262
|
"openai/gpt-5.2",
|
|
1258
1263
|
"google/gemini-3-pro-preview",
|
|
@@ -1263,7 +1268,7 @@ var DEFAULT_ROUTING_CONFIG = {
|
|
|
1263
1268
|
primary: "anthropic/claude-sonnet-4",
|
|
1264
1269
|
// Strong tool use + reasoning for agentic tasks
|
|
1265
1270
|
fallback: [
|
|
1266
|
-
"anthropic/claude-opus-4.
|
|
1271
|
+
"anthropic/claude-opus-4.6",
|
|
1267
1272
|
"xai/grok-4-fast-reasoning",
|
|
1268
1273
|
"moonshot/kimi-k2.5",
|
|
1269
1274
|
"deepseek/deepseek-reasoner"
|
|
@@ -1354,12 +1359,16 @@ var MODEL_ALIASES = {
|
|
|
1354
1359
|
// Claude
|
|
1355
1360
|
claude: "anthropic/claude-sonnet-4",
|
|
1356
1361
|
sonnet: "anthropic/claude-sonnet-4",
|
|
1357
|
-
opus: "anthropic/claude-opus-4",
|
|
1362
|
+
opus: "anthropic/claude-opus-4.6",
|
|
1363
|
+
// Updated to latest Opus 4.6
|
|
1364
|
+
"opus-46": "anthropic/claude-opus-4.6",
|
|
1365
|
+
"opus-45": "anthropic/claude-opus-4.5",
|
|
1358
1366
|
haiku: "anthropic/claude-haiku-4.5",
|
|
1359
1367
|
// OpenAI
|
|
1360
1368
|
gpt: "openai/gpt-4o",
|
|
1361
1369
|
gpt4: "openai/gpt-4o",
|
|
1362
1370
|
gpt5: "openai/gpt-5.2",
|
|
1371
|
+
codex: "openai/gpt-5.2-codex",
|
|
1363
1372
|
mini: "openai/gpt-4o-mini",
|
|
1364
1373
|
o3: "openai/o3",
|
|
1365
1374
|
// DeepSeek
|
|
@@ -1464,6 +1473,16 @@ var BLOCKRUN_MODELS = [
|
|
|
1464
1473
|
maxOutput: 128e3,
|
|
1465
1474
|
reasoning: true
|
|
1466
1475
|
},
|
|
1476
|
+
// OpenAI Codex Family
|
|
1477
|
+
{
|
|
1478
|
+
id: "openai/gpt-5.2-codex",
|
|
1479
|
+
name: "GPT-5.2 Codex",
|
|
1480
|
+
inputPrice: 2.5,
|
|
1481
|
+
outputPrice: 12,
|
|
1482
|
+
contextWindow: 128e3,
|
|
1483
|
+
maxOutput: 32e3,
|
|
1484
|
+
agentic: true
|
|
1485
|
+
},
|
|
1467
1486
|
// OpenAI GPT-4 Family
|
|
1468
1487
|
{
|
|
1469
1488
|
id: "openai/gpt-4.1",
|
|
@@ -1569,6 +1588,17 @@ var BLOCKRUN_MODELS = [
|
|
|
1569
1588
|
reasoning: true,
|
|
1570
1589
|
agentic: true
|
|
1571
1590
|
},
|
|
1591
|
+
{
|
|
1592
|
+
id: "anthropic/claude-opus-4.6",
|
|
1593
|
+
name: "Claude Opus 4.6",
|
|
1594
|
+
inputPrice: 5,
|
|
1595
|
+
outputPrice: 25,
|
|
1596
|
+
contextWindow: 2e5,
|
|
1597
|
+
maxOutput: 64e3,
|
|
1598
|
+
reasoning: true,
|
|
1599
|
+
vision: true,
|
|
1600
|
+
agentic: true
|
|
1601
|
+
},
|
|
1572
1602
|
// Google
|
|
1573
1603
|
{
|
|
1574
1604
|
id: "google/gemini-3-pro-preview",
|
|
@@ -2046,6 +2076,203 @@ var RequestDeduplicator = class {
|
|
|
2046
2076
|
}
|
|
2047
2077
|
};
|
|
2048
2078
|
|
|
2079
|
+
// src/response-cache.ts
|
|
2080
|
+
import { createHash as createHash2 } from "crypto";
|
|
2081
|
+
var DEFAULT_CONFIG = {
|
|
2082
|
+
maxSize: 200,
|
|
2083
|
+
defaultTTL: 600,
|
|
2084
|
+
maxItemSize: 1048576,
|
|
2085
|
+
// 1MB
|
|
2086
|
+
enabled: true
|
|
2087
|
+
};
|
|
2088
|
+
function canonicalize2(obj) {
|
|
2089
|
+
if (obj === null || typeof obj !== "object") {
|
|
2090
|
+
return obj;
|
|
2091
|
+
}
|
|
2092
|
+
if (Array.isArray(obj)) {
|
|
2093
|
+
return obj.map(canonicalize2);
|
|
2094
|
+
}
|
|
2095
|
+
const sorted = {};
|
|
2096
|
+
for (const key of Object.keys(obj).sort()) {
|
|
2097
|
+
sorted[key] = canonicalize2(obj[key]);
|
|
2098
|
+
}
|
|
2099
|
+
return sorted;
|
|
2100
|
+
}
|
|
2101
|
+
var TIMESTAMP_PATTERN2 = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
|
|
2102
|
+
function normalizeForCache(obj) {
|
|
2103
|
+
const result = {};
|
|
2104
|
+
for (const [key, value] of Object.entries(obj)) {
|
|
2105
|
+
if (["stream", "user", "request_id", "x-request-id"].includes(key)) {
|
|
2106
|
+
continue;
|
|
2107
|
+
}
|
|
2108
|
+
if (key === "messages" && Array.isArray(value)) {
|
|
2109
|
+
result[key] = value.map((msg) => {
|
|
2110
|
+
if (typeof msg === "object" && msg !== null) {
|
|
2111
|
+
const m = msg;
|
|
2112
|
+
if (typeof m.content === "string") {
|
|
2113
|
+
return { ...m, content: m.content.replace(TIMESTAMP_PATTERN2, "") };
|
|
2114
|
+
}
|
|
2115
|
+
}
|
|
2116
|
+
return msg;
|
|
2117
|
+
});
|
|
2118
|
+
} else {
|
|
2119
|
+
result[key] = value;
|
|
2120
|
+
}
|
|
2121
|
+
}
|
|
2122
|
+
return result;
|
|
2123
|
+
}
|
|
2124
|
+
var ResponseCache = class {
|
|
2125
|
+
cache = /* @__PURE__ */ new Map();
|
|
2126
|
+
expirationHeap = [];
|
|
2127
|
+
config;
|
|
2128
|
+
// Stats for monitoring
|
|
2129
|
+
stats = {
|
|
2130
|
+
hits: 0,
|
|
2131
|
+
misses: 0,
|
|
2132
|
+
evictions: 0
|
|
2133
|
+
};
|
|
2134
|
+
constructor(config = {}) {
|
|
2135
|
+
const filtered = Object.fromEntries(
|
|
2136
|
+
Object.entries(config).filter(([, v]) => v !== void 0)
|
|
2137
|
+
);
|
|
2138
|
+
this.config = { ...DEFAULT_CONFIG, ...filtered };
|
|
2139
|
+
}
|
|
2140
|
+
/**
|
|
2141
|
+
* Generate cache key from request body.
|
|
2142
|
+
* Hashes: model + messages + temperature + max_tokens + other params
|
|
2143
|
+
*/
|
|
2144
|
+
static generateKey(body) {
|
|
2145
|
+
try {
|
|
2146
|
+
const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
|
|
2147
|
+
const normalized = normalizeForCache(parsed);
|
|
2148
|
+
const canonical = canonicalize2(normalized);
|
|
2149
|
+
const keyContent = JSON.stringify(canonical);
|
|
2150
|
+
return createHash2("sha256").update(keyContent).digest("hex").slice(0, 32);
|
|
2151
|
+
} catch {
|
|
2152
|
+
const content = typeof body === "string" ? body : body.toString();
|
|
2153
|
+
return createHash2("sha256").update(content).digest("hex").slice(0, 32);
|
|
2154
|
+
}
|
|
2155
|
+
}
|
|
2156
|
+
/**
|
|
2157
|
+
* Check if caching is enabled for this request.
|
|
2158
|
+
* Respects cache control headers and request params.
|
|
2159
|
+
*/
|
|
2160
|
+
shouldCache(body, headers) {
|
|
2161
|
+
if (!this.config.enabled) return false;
|
|
2162
|
+
if (headers?.["cache-control"]?.includes("no-cache")) {
|
|
2163
|
+
return false;
|
|
2164
|
+
}
|
|
2165
|
+
try {
|
|
2166
|
+
const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
|
|
2167
|
+
if (parsed.cache === false || parsed.no_cache === true) {
|
|
2168
|
+
return false;
|
|
2169
|
+
}
|
|
2170
|
+
} catch {
|
|
2171
|
+
}
|
|
2172
|
+
return true;
|
|
2173
|
+
}
|
|
2174
|
+
/**
|
|
2175
|
+
* Get cached response if available and not expired.
|
|
2176
|
+
*/
|
|
2177
|
+
get(key) {
|
|
2178
|
+
const entry = this.cache.get(key);
|
|
2179
|
+
if (!entry) {
|
|
2180
|
+
this.stats.misses++;
|
|
2181
|
+
return void 0;
|
|
2182
|
+
}
|
|
2183
|
+
if (Date.now() > entry.expiresAt) {
|
|
2184
|
+
this.cache.delete(key);
|
|
2185
|
+
this.stats.misses++;
|
|
2186
|
+
return void 0;
|
|
2187
|
+
}
|
|
2188
|
+
this.stats.hits++;
|
|
2189
|
+
return entry;
|
|
2190
|
+
}
|
|
2191
|
+
/**
|
|
2192
|
+
* Cache a response with optional custom TTL.
|
|
2193
|
+
*/
|
|
2194
|
+
set(key, response, ttlSeconds) {
|
|
2195
|
+
if (!this.config.enabled || this.config.maxSize <= 0) return;
|
|
2196
|
+
if (response.body.length > this.config.maxItemSize) {
|
|
2197
|
+
console.log(`[ResponseCache] Skipping cache - item too large: ${response.body.length} bytes`);
|
|
2198
|
+
return;
|
|
2199
|
+
}
|
|
2200
|
+
if (response.status >= 400) {
|
|
2201
|
+
return;
|
|
2202
|
+
}
|
|
2203
|
+
if (this.cache.size >= this.config.maxSize) {
|
|
2204
|
+
this.evict();
|
|
2205
|
+
}
|
|
2206
|
+
const now = Date.now();
|
|
2207
|
+
const ttl = ttlSeconds ?? this.config.defaultTTL;
|
|
2208
|
+
const expiresAt = now + ttl * 1e3;
|
|
2209
|
+
const entry = {
|
|
2210
|
+
...response,
|
|
2211
|
+
cachedAt: now,
|
|
2212
|
+
expiresAt
|
|
2213
|
+
};
|
|
2214
|
+
this.cache.set(key, entry);
|
|
2215
|
+
this.expirationHeap.push({ expiresAt, key });
|
|
2216
|
+
}
|
|
2217
|
+
/**
|
|
2218
|
+
* Evict expired and oldest entries to make room.
|
|
2219
|
+
*/
|
|
2220
|
+
evict() {
|
|
2221
|
+
const now = Date.now();
|
|
2222
|
+
this.expirationHeap.sort((a, b) => a.expiresAt - b.expiresAt);
|
|
2223
|
+
while (this.expirationHeap.length > 0) {
|
|
2224
|
+
const oldest = this.expirationHeap[0];
|
|
2225
|
+
const entry = this.cache.get(oldest.key);
|
|
2226
|
+
if (!entry || entry.expiresAt !== oldest.expiresAt) {
|
|
2227
|
+
this.expirationHeap.shift();
|
|
2228
|
+
continue;
|
|
2229
|
+
}
|
|
2230
|
+
if (oldest.expiresAt <= now) {
|
|
2231
|
+
this.cache.delete(oldest.key);
|
|
2232
|
+
this.expirationHeap.shift();
|
|
2233
|
+
this.stats.evictions++;
|
|
2234
|
+
} else {
|
|
2235
|
+
break;
|
|
2236
|
+
}
|
|
2237
|
+
}
|
|
2238
|
+
while (this.cache.size >= this.config.maxSize && this.expirationHeap.length > 0) {
|
|
2239
|
+
const oldest = this.expirationHeap.shift();
|
|
2240
|
+
if (this.cache.has(oldest.key)) {
|
|
2241
|
+
this.cache.delete(oldest.key);
|
|
2242
|
+
this.stats.evictions++;
|
|
2243
|
+
}
|
|
2244
|
+
}
|
|
2245
|
+
}
|
|
2246
|
+
/**
|
|
2247
|
+
* Get cache statistics.
|
|
2248
|
+
*/
|
|
2249
|
+
getStats() {
|
|
2250
|
+
const total = this.stats.hits + this.stats.misses;
|
|
2251
|
+
const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) + "%" : "0%";
|
|
2252
|
+
return {
|
|
2253
|
+
size: this.cache.size,
|
|
2254
|
+
maxSize: this.config.maxSize,
|
|
2255
|
+
hits: this.stats.hits,
|
|
2256
|
+
misses: this.stats.misses,
|
|
2257
|
+
evictions: this.stats.evictions,
|
|
2258
|
+
hitRate
|
|
2259
|
+
};
|
|
2260
|
+
}
|
|
2261
|
+
/**
|
|
2262
|
+
* Clear all cached entries.
|
|
2263
|
+
*/
|
|
2264
|
+
clear() {
|
|
2265
|
+
this.cache.clear();
|
|
2266
|
+
this.expirationHeap = [];
|
|
2267
|
+
}
|
|
2268
|
+
/**
|
|
2269
|
+
* Check if cache is enabled.
|
|
2270
|
+
*/
|
|
2271
|
+
isEnabled() {
|
|
2272
|
+
return this.config.enabled;
|
|
2273
|
+
}
|
|
2274
|
+
};
|
|
2275
|
+
|
|
2049
2276
|
// src/balance.ts
|
|
2050
2277
|
import { createPublicClient, http, erc20Abi } from "viem";
|
|
2051
2278
|
import { base } from "viem/chains";
|
|
@@ -3502,6 +3729,7 @@ async function startProxy(options) {
|
|
|
3502
3729
|
modelPricing
|
|
3503
3730
|
};
|
|
3504
3731
|
const deduplicator = new RequestDeduplicator();
|
|
3732
|
+
const responseCache = new ResponseCache(options.cacheConfig);
|
|
3505
3733
|
const sessionStore = new SessionStore(options.sessionConfig);
|
|
3506
3734
|
const connections = /* @__PURE__ */ new Set();
|
|
3507
3735
|
const server = createServer(async (req, res) => {
|
|
@@ -3542,6 +3770,15 @@ async function startProxy(options) {
|
|
|
3542
3770
|
res.end(JSON.stringify(response));
|
|
3543
3771
|
return;
|
|
3544
3772
|
}
|
|
3773
|
+
if (req.url === "/cache" || req.url?.startsWith("/cache?")) {
|
|
3774
|
+
const stats = responseCache.getStats();
|
|
3775
|
+
res.writeHead(200, {
|
|
3776
|
+
"Content-Type": "application/json",
|
|
3777
|
+
"Cache-Control": "no-cache"
|
|
3778
|
+
});
|
|
3779
|
+
res.end(JSON.stringify(stats, null, 2));
|
|
3780
|
+
return;
|
|
3781
|
+
}
|
|
3545
3782
|
if (req.url === "/stats" || req.url?.startsWith("/stats?")) {
|
|
3546
3783
|
try {
|
|
3547
3784
|
const url = new URL(req.url, "http://localhost");
|
|
@@ -3588,7 +3825,8 @@ async function startProxy(options) {
|
|
|
3588
3825
|
routerOpts,
|
|
3589
3826
|
deduplicator,
|
|
3590
3827
|
balanceMonitor,
|
|
3591
|
-
sessionStore
|
|
3828
|
+
sessionStore,
|
|
3829
|
+
responseCache
|
|
3592
3830
|
);
|
|
3593
3831
|
} catch (err) {
|
|
3594
3832
|
const error = err instanceof Error ? err : new Error(String(err));
|
|
@@ -3789,7 +4027,7 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
|
|
|
3789
4027
|
};
|
|
3790
4028
|
}
|
|
3791
4029
|
}
|
|
3792
|
-
async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore) {
|
|
4030
|
+
async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore, responseCache) {
|
|
3793
4031
|
const startTime = Date.now();
|
|
3794
4032
|
const upstreamUrl = `${apiBase}${req.url}`;
|
|
3795
4033
|
const bodyChunks = [];
|
|
@@ -3957,6 +4195,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
3957
4195
|
);
|
|
3958
4196
|
}
|
|
3959
4197
|
}
|
|
4198
|
+
const cacheKey = ResponseCache.generateKey(body);
|
|
4199
|
+
const reqHeaders = {};
|
|
4200
|
+
for (const [key, value] of Object.entries(req.headers)) {
|
|
4201
|
+
if (typeof value === "string") reqHeaders[key] = value;
|
|
4202
|
+
}
|
|
4203
|
+
if (responseCache.shouldCache(body, reqHeaders)) {
|
|
4204
|
+
const cachedResponse = responseCache.get(cacheKey);
|
|
4205
|
+
if (cachedResponse) {
|
|
4206
|
+
console.log(`[ClawRouter] Cache HIT for ${cachedResponse.model} (saved API call)`);
|
|
4207
|
+
res.writeHead(cachedResponse.status, cachedResponse.headers);
|
|
4208
|
+
res.end(cachedResponse.body);
|
|
4209
|
+
return;
|
|
4210
|
+
}
|
|
4211
|
+
}
|
|
3960
4212
|
const dedupKey = RequestDeduplicator.hash(body);
|
|
3961
4213
|
const cached = deduplicator.getCached(dedupKey);
|
|
3962
4214
|
if (cached) {
|
|
@@ -4309,12 +4561,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
|
|
|
4309
4561
|
}
|
|
4310
4562
|
}
|
|
4311
4563
|
res.end();
|
|
4564
|
+
const responseBody = Buffer.concat(responseChunks);
|
|
4312
4565
|
deduplicator.complete(dedupKey, {
|
|
4313
4566
|
status: upstream.status,
|
|
4314
4567
|
headers: responseHeaders,
|
|
4315
|
-
body:
|
|
4568
|
+
body: responseBody,
|
|
4316
4569
|
completedAt: Date.now()
|
|
4317
4570
|
});
|
|
4571
|
+
if (upstream.status === 200 && responseCache.shouldCache(body)) {
|
|
4572
|
+
responseCache.set(cacheKey, {
|
|
4573
|
+
body: responseBody,
|
|
4574
|
+
status: upstream.status,
|
|
4575
|
+
headers: responseHeaders,
|
|
4576
|
+
model: modelId
|
|
4577
|
+
});
|
|
4578
|
+
console.log(`[ClawRouter] Cached response for ${modelId} (${responseBody.length} bytes)`);
|
|
4579
|
+
}
|
|
4318
4580
|
}
|
|
4319
4581
|
if (estimatedCostMicros !== void 0) {
|
|
4320
4582
|
balanceMonitor.deductEstimated(estimatedCostMicros);
|