npm - @blockrun/clawrouter - Versions diffs - 0.9.7 → 0.9.9 - Mend

@blockrun/clawrouter 0.9.7 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -71,7 +71,7 @@ Choose your routing strategy with `/model <profile>`:
 **Other shortcuts:**
-- **Model aliases:** `/model sonnet`, `/model grok`, `/model gpt5`, `/model o3`
+- **Model aliases:** `/model br-sonnet`, `/model grok`, `/model gpt5`, `/model o3`
 - **Specific models:** `blockrun/openai/gpt-4o` or `blockrun/anthropic/claude-sonnet-4`
 - **Bring your wallet:** `export BLOCKRUN_WALLET_KEY=0x...`
@@ -150,7 +150,7 @@ ClawRouter v0.5+ includes intelligent features that work automatically:
 - **Agentic auto-detect** — routes multi-step tasks to Kimi K2.5
 - **Tool detection** — auto-switches when `tools` array present
 - **Context-aware** — filters models that can't handle your context size
-- **Model aliases** — `/model free`, `/model sonnet`, `/model grok`
+- **Model aliases** — `/model free`, `/model br-sonnet`, `/model grok`
 - **Session persistence** — pins model for multi-turn conversations
 - **Free tier fallback** — keeps working when wallet is empty
 - **Auto-update check** — notifies you when a new version is available
@@ -315,11 +315,12 @@ const decision = route("Prove sqrt(2) is irrational", ...);
 ---
-## Performance Optimizations (v0.3)
+## Performance Optimizations (v0.3+)
 - **SSE heartbeat**: Sends headers + heartbeat immediately, preventing upstream timeouts
 - **Response dedup**: SHA-256 hash → 30s cache, prevents double-charge on retries
 - **Payment pre-auth**: Caches 402 params, pre-signs USDC, skips 402 round trip (~200ms saved)
+- **Response cache**: LLM response caching with 10-minute TTL, saves cost on repeated queries
 ---
@@ -422,9 +423,10 @@ Your wallet key remains at `~/.openclaw/blockrun/wallet.key` — back it up befo
 - [x] Context-aware routing — filter out models that can't handle context size
 - [x] Session persistence — pin model for multi-turn conversations
 - [x] Cost tracking — /stats command with savings dashboard
-- [x] Model aliases — `/model free`, `/model sonnet`, `/model grok`, etc.
+- [x] Model aliases — `/model free`, `/model br-sonnet`, `/model grok`, etc.
 - [x] Free tier — gpt-oss-120b for $0 when wallet is empty
 - [x] Auto-update — startup version check with one-command update
+- [x] Response cache — LiteLLM-inspired caching for repeated requests
 - [ ] Cascade routing — try cheap model first, escalate on low quality
 - [ ] Spend controls — daily/monthly budgets
 - [ ] Remote analytics — cost tracking at blockrun.ai

package/dist/cli.js CHANGED Viewed

@@ -1199,37 +1199,42 @@ var DEFAULT_ROUTING_CONFIG = {
     }
   },
   // Premium tier configs - best quality (blockrun/premium)
+  // codex=complex coding, kimi=simple coding, sonnet=reasoning/instructions, opus=architecture/PM/audits
   premiumTiers: {
     SIMPLE: {
-      primary: "google/gemini-2.5-flash",
-      // $0.075/$0.30
-      fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
+      primary: "moonshot/kimi-k2.5",
+      // $0.50/$2.40 - good for simple coding
+      fallback: ["anthropic/claude-haiku-4.5", "google/gemini-2.5-flash", "xai/grok-code-fast-1"]
     },
     MEDIUM: {
-      primary: "openai/gpt-4o",
-      // $2.50/$10
-      fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
+      primary: "anthropic/claude-sonnet-4",
+      // $3/$15 - reasoning/instructions
+      fallback: [
+        "openai/gpt-5.2-codex",
+        "moonshot/kimi-k2.5",
+        "google/gemini-2.5-pro",
+        "xai/grok-4-0709"
+      ]
     },
     COMPLEX: {
-      primary: "anthropic/claude-opus-4.5",
-      // $5/$25 - Latest Opus
+      primary: "openai/gpt-5.2-codex",
+      // $2.50/$10 - complex coding (78% cost savings vs Opus)
       fallback: [
-        "openai/gpt-5.2-pro",
-        // $21/$168 - Latest GPT pro
+        "anthropic/claude-opus-4.6",
+        "anthropic/claude-opus-4.5",
+        "anthropic/claude-sonnet-4",
         "google/gemini-3-pro-preview",
-        // Latest Gemini
-        "openai/gpt-5.2",
-        "anthropic/claude-sonnet-4"
+        "moonshot/kimi-k2.5"
       ]
     },
     REASONING: {
-      primary: "openai/o3",
-      // $2/$8 - Best value reasoning
+      primary: "anthropic/claude-sonnet-4",
+      // $3/$15 - best for reasoning/instructions
       fallback: [
-        "openai/o4-mini",
-        // Latest o-series
+        "anthropic/claude-opus-4.6",
         "anthropic/claude-opus-4.5",
-        "google/gemini-3-pro-preview"
+        "openai/o3",
+        "xai/grok-4-1-fast-reasoning"
       ]
     }
   },
@@ -1252,7 +1257,7 @@ var DEFAULT_ROUTING_CONFIG = {
     COMPLEX: {
       primary: "anthropic/claude-sonnet-4",
       fallback: [
-        "anthropic/claude-opus-4.5",
+        "anthropic/claude-opus-4.6",
         // Latest Opus - best agentic
         "openai/gpt-5.2",
         "google/gemini-3-pro-preview",
@@ -1263,7 +1268,7 @@ var DEFAULT_ROUTING_CONFIG = {
       primary: "anthropic/claude-sonnet-4",
       // Strong tool use + reasoning for agentic tasks
       fallback: [
-        "anthropic/claude-opus-4.5",
+        "anthropic/claude-opus-4.6",
         "xai/grok-4-fast-reasoning",
         "moonshot/kimi-k2.5",
         "deepseek/deepseek-reasoner"
@@ -1354,12 +1359,16 @@ var MODEL_ALIASES = {
   // Claude
   claude: "anthropic/claude-sonnet-4",
   sonnet: "anthropic/claude-sonnet-4",
-  opus: "anthropic/claude-opus-4",
+  opus: "anthropic/claude-opus-4.6",
+  // Updated to latest Opus 4.6
+  "opus-46": "anthropic/claude-opus-4.6",
+  "opus-45": "anthropic/claude-opus-4.5",
   haiku: "anthropic/claude-haiku-4.5",
   // OpenAI
   gpt: "openai/gpt-4o",
   gpt4: "openai/gpt-4o",
   gpt5: "openai/gpt-5.2",
+  codex: "openai/gpt-5.2-codex",
   mini: "openai/gpt-4o-mini",
   o3: "openai/o3",
   // DeepSeek
@@ -1464,6 +1473,16 @@ var BLOCKRUN_MODELS = [
     maxOutput: 128e3,
     reasoning: true
   },
+  // OpenAI Codex Family
+  {
+    id: "openai/gpt-5.2-codex",
+    name: "GPT-5.2 Codex",
+    inputPrice: 2.5,
+    outputPrice: 12,
+    contextWindow: 128e3,
+    maxOutput: 32e3,
+    agentic: true
+  },
   // OpenAI GPT-4 Family
   {
     id: "openai/gpt-4.1",
@@ -1569,6 +1588,17 @@ var BLOCKRUN_MODELS = [
     reasoning: true,
     agentic: true
   },
+  {
+    id: "anthropic/claude-opus-4.6",
+    name: "Claude Opus 4.6",
+    inputPrice: 5,
+    outputPrice: 25,
+    contextWindow: 2e5,
+    maxOutput: 64e3,
+    reasoning: true,
+    vision: true,
+    agentic: true
+  },
   // Google
   {
     id: "google/gemini-3-pro-preview",
@@ -2046,6 +2076,203 @@ var RequestDeduplicator = class {
   }
 };
+// src/response-cache.ts
+import { createHash as createHash2 } from "crypto";
+var DEFAULT_CONFIG = {
+  maxSize: 200,
+  defaultTTL: 600,
+  maxItemSize: 1048576,
+  // 1MB
+  enabled: true
+};
+function canonicalize2(obj) {
+  if (obj === null || typeof obj !== "object") {
+    return obj;
+  }
+  if (Array.isArray(obj)) {
+    return obj.map(canonicalize2);
+  }
+  const sorted = {};
+  for (const key of Object.keys(obj).sort()) {
+    sorted[key] = canonicalize2(obj[key]);
+  }
+  return sorted;
+}
+var TIMESTAMP_PATTERN2 = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
+function normalizeForCache(obj) {
+  const result = {};
+  for (const [key, value] of Object.entries(obj)) {
+    if (["stream", "user", "request_id", "x-request-id"].includes(key)) {
+      continue;
+    }
+    if (key === "messages" && Array.isArray(value)) {
+      result[key] = value.map((msg) => {
+        if (typeof msg === "object" && msg !== null) {
+          const m = msg;
+          if (typeof m.content === "string") {
+            return { ...m, content: m.content.replace(TIMESTAMP_PATTERN2, "") };
+          }
+        }
+        return msg;
+      });
+    } else {
+      result[key] = value;
+    }
+  }
+  return result;
+}
+var ResponseCache = class {
+  cache = /* @__PURE__ */ new Map();
+  expirationHeap = [];
+  config;
+  // Stats for monitoring
+  stats = {
+    hits: 0,
+    misses: 0,
+    evictions: 0
+  };
+  constructor(config = {}) {
+    const filtered = Object.fromEntries(
+      Object.entries(config).filter(([, v]) => v !== void 0)
+    );
+    this.config = { ...DEFAULT_CONFIG, ...filtered };
+  }
+  /**
+   * Generate cache key from request body.
+   * Hashes: model + messages + temperature + max_tokens + other params
+   */
+  static generateKey(body) {
+    try {
+      const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
+      const normalized = normalizeForCache(parsed);
+      const canonical = canonicalize2(normalized);
+      const keyContent = JSON.stringify(canonical);
+      return createHash2("sha256").update(keyContent).digest("hex").slice(0, 32);
+    } catch {
+      const content = typeof body === "string" ? body : body.toString();
+      return createHash2("sha256").update(content).digest("hex").slice(0, 32);
+    }
+  }
+  /**
+   * Check if caching is enabled for this request.
+   * Respects cache control headers and request params.
+   */
+  shouldCache(body, headers) {
+    if (!this.config.enabled) return false;
+    if (headers?.["cache-control"]?.includes("no-cache")) {
+      return false;
+    }
+    try {
+      const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
+      if (parsed.cache === false || parsed.no_cache === true) {
+        return false;
+      }
+    } catch {
+    }
+    return true;
+  }
+  /**
+   * Get cached response if available and not expired.
+   */
+  get(key) {
+    const entry = this.cache.get(key);
+    if (!entry) {
+      this.stats.misses++;
+      return void 0;
+    }
+    if (Date.now() > entry.expiresAt) {
+      this.cache.delete(key);
+      this.stats.misses++;
+      return void 0;
+    }
+    this.stats.hits++;
+    return entry;
+  }
+  /**
+   * Cache a response with optional custom TTL.
+   */
+  set(key, response, ttlSeconds) {
+    if (!this.config.enabled || this.config.maxSize <= 0) return;
+    if (response.body.length > this.config.maxItemSize) {
+      console.log(`[ResponseCache] Skipping cache - item too large: ${response.body.length} bytes`);
+      return;
+    }
+    if (response.status >= 400) {
+      return;
+    }
+    if (this.cache.size >= this.config.maxSize) {
+      this.evict();
+    }
+    const now = Date.now();
+    const ttl = ttlSeconds ?? this.config.defaultTTL;
+    const expiresAt = now + ttl * 1e3;
+    const entry = {
+      ...response,
+      cachedAt: now,
+      expiresAt
+    };
+    this.cache.set(key, entry);
+    this.expirationHeap.push({ expiresAt, key });
+  }
+  /**
+   * Evict expired and oldest entries to make room.
+   */
+  evict() {
+    const now = Date.now();
+    this.expirationHeap.sort((a, b) => a.expiresAt - b.expiresAt);
+    while (this.expirationHeap.length > 0) {
+      const oldest = this.expirationHeap[0];
+      const entry = this.cache.get(oldest.key);
+      if (!entry || entry.expiresAt !== oldest.expiresAt) {
+        this.expirationHeap.shift();
+        continue;
+      }
+      if (oldest.expiresAt <= now) {
+        this.cache.delete(oldest.key);
+        this.expirationHeap.shift();
+        this.stats.evictions++;
+      } else {
+        break;
+      }
+    }
+    while (this.cache.size >= this.config.maxSize && this.expirationHeap.length > 0) {
+      const oldest = this.expirationHeap.shift();
+      if (this.cache.has(oldest.key)) {
+        this.cache.delete(oldest.key);
+        this.stats.evictions++;
+      }
+    }
+  }
+  /**
+   * Get cache statistics.
+   */
+  getStats() {
+    const total = this.stats.hits + this.stats.misses;
+    const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) + "%" : "0%";
+    return {
+      size: this.cache.size,
+      maxSize: this.config.maxSize,
+      hits: this.stats.hits,
+      misses: this.stats.misses,
+      evictions: this.stats.evictions,
+      hitRate
+    };
+  }
+  /**
+   * Clear all cached entries.
+   */
+  clear() {
+    this.cache.clear();
+    this.expirationHeap = [];
+  }
+  /**
+   * Check if cache is enabled.
+   */
+  isEnabled() {
+    return this.config.enabled;
+  }
+};
 // src/balance.ts
 import { createPublicClient, http, erc20Abi } from "viem";
 import { base } from "viem/chains";
@@ -3502,6 +3729,7 @@ async function startProxy(options) {
     modelPricing
   };
   const deduplicator = new RequestDeduplicator();
+  const responseCache = new ResponseCache(options.cacheConfig);
   const sessionStore = new SessionStore(options.sessionConfig);
   const connections = /* @__PURE__ */ new Set();
   const server = createServer(async (req, res) => {
@@ -3542,6 +3770,15 @@ async function startProxy(options) {
       res.end(JSON.stringify(response));
       return;
     }
+    if (req.url === "/cache" || req.url?.startsWith("/cache?")) {
+      const stats = responseCache.getStats();
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "Cache-Control": "no-cache"
+      });
+      res.end(JSON.stringify(stats, null, 2));
+      return;
+    }
     if (req.url === "/stats" || req.url?.startsWith("/stats?")) {
       try {
         const url = new URL(req.url, "http://localhost");
@@ -3588,7 +3825,8 @@ async function startProxy(options) {
         routerOpts,
         deduplicator,
         balanceMonitor,
-        sessionStore
+        sessionStore,
+        responseCache
       );
     } catch (err) {
       const error = err instanceof Error ? err : new Error(String(err));
@@ -3789,7 +4027,7 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
     };
   }
 }
-async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore) {
+async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore, responseCache) {
   const startTime = Date.now();
   const upstreamUrl = `${apiBase}${req.url}`;
   const bodyChunks = [];
@@ -3957,6 +4195,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
       );
     }
   }
+  const cacheKey = ResponseCache.generateKey(body);
+  const reqHeaders = {};
+  for (const [key, value] of Object.entries(req.headers)) {
+    if (typeof value === "string") reqHeaders[key] = value;
+  }
+  if (responseCache.shouldCache(body, reqHeaders)) {
+    const cachedResponse = responseCache.get(cacheKey);
+    if (cachedResponse) {
+      console.log(`[ClawRouter] Cache HIT for ${cachedResponse.model} (saved API call)`);
+      res.writeHead(cachedResponse.status, cachedResponse.headers);
+      res.end(cachedResponse.body);
+      return;
+    }
+  }
   const dedupKey = RequestDeduplicator.hash(body);
   const cached = deduplicator.getCached(dedupKey);
   if (cached) {
@@ -4309,12 +4561,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
         }
       }
       res.end();
+      const responseBody = Buffer.concat(responseChunks);
       deduplicator.complete(dedupKey, {
         status: upstream.status,
         headers: responseHeaders,
-        body: Buffer.concat(responseChunks),
+        body: responseBody,
         completedAt: Date.now()
       });
+      if (upstream.status === 200 && responseCache.shouldCache(body)) {
+        responseCache.set(cacheKey, {
+          body: responseBody,
+          status: upstream.status,
+          headers: responseHeaders,
+          model: modelId
+        });
+        console.log(`[ClawRouter] Cached response for ${modelId} (${responseBody.length} bytes)`);
+      }
     }
     if (estimatedCostMicros !== void 0) {
       balanceMonitor.deductEstimated(estimatedCostMicros);