npm - @blockrun/clawrouter - Versions diffs - 0.9.7 → 0.9.9 - Mend

@blockrun/clawrouter 0.9.7 → 0.9.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -281,6 +281,91 @@ type RouterOptions = {
  */
 declare function route(prompt: string, systemPrompt: string | undefined, maxOutputTokens: number, options: RouterOptions): RoutingDecision;
+/**
+ * Response Cache for LLM Completions
+ *
+ * Caches LLM responses by request hash (model + messages + params).
+ * Inspired by LiteLLM's caching system. Returns cached responses for
+ * identical requests, saving both cost and latency.
+ *
+ * Features:
+ * - TTL-based expiration (default 10 minutes)
+ * - LRU eviction when cache is full
+ * - Size limits per item (1MB max)
+ * - Heap-based expiration tracking for efficient pruning
+ */
+type CachedLLMResponse = {
+    body: Buffer;
+    status: number;
+    headers: Record<string, string>;
+    model: string;
+    cachedAt: number;
+    expiresAt: number;
+};
+type ResponseCacheConfig = {
+    /** Maximum number of cached responses. Default: 200 */
+    maxSize?: number;
+    /** Default TTL in seconds. Default: 600 (10 minutes) */
+    defaultTTL?: number;
+    /** Maximum size per cached item in bytes. Default: 1MB */
+    maxItemSize?: number;
+    /** Enable/disable cache. Default: true */
+    enabled?: boolean;
+};
+declare class ResponseCache {
+    private cache;
+    private expirationHeap;
+    private config;
+    private stats;
+    constructor(config?: ResponseCacheConfig);
+    /**
+     * Generate cache key from request body.
+     * Hashes: model + messages + temperature + max_tokens + other params
+     */
+    static generateKey(body: Buffer | string): string;
+    /**
+     * Check if caching is enabled for this request.
+     * Respects cache control headers and request params.
+     */
+    shouldCache(body: Buffer | string, headers?: Record<string, string>): boolean;
+    /**
+     * Get cached response if available and not expired.
+     */
+    get(key: string): CachedLLMResponse | undefined;
+    /**
+     * Cache a response with optional custom TTL.
+     */
+    set(key: string, response: {
+        body: Buffer;
+        status: number;
+        headers: Record<string, string>;
+        model: string;
+    }, ttlSeconds?: number): void;
+    /**
+     * Evict expired and oldest entries to make room.
+     */
+    private evict;
+    /**
+     * Get cache statistics.
+     */
+    getStats(): {
+        size: number;
+        maxSize: number;
+        hits: number;
+        misses: number;
+        evictions: number;
+        hitRate: string;
+    };
+    /**
+     * Clear all cached entries.
+     */
+    clear(): void;
+    /**
+     * Check if cache is enabled.
+     */
+    isEnabled(): boolean;
+}
 /**
  * Balance Monitor for ClawRouter
  *
@@ -521,6 +606,12 @@ type ProxyOptions = {
      * Set to 0 to compress all requests.
      */
     compressionThresholdKB?: number;
+    /**
+     * Response caching config. When enabled, identical requests return
+     * cached responses instead of making new API calls.
+     * Default: enabled with 10 minute TTL, 200 max entries.
+     */
+    cacheConfig?: ResponseCacheConfig;
     onReady?: (port: number) => void;
     onError?: (error: Error) => void;
     onPayment?: (info: {
@@ -917,4 +1008,4 @@ declare function formatStatsAscii(stats: AggregatedStats): string;
 declare const plugin: OpenClawPluginDefinition;
-export { type AggregatedStats, BALANCE_THRESHOLDS, BLOCKRUN_MODELS, type BalanceInfo, BalanceMonitor, type CachedPaymentParams, type CachedResponse, DEFAULT_RETRY_CONFIG, DEFAULT_ROUTING_CONFIG, DEFAULT_SESSION_CONFIG, type DailyStats, EmptyWalletError, InsufficientFundsError, type InsufficientFundsInfo, type LowBalanceInfo, MODEL_ALIASES, OPENCLAW_MODELS, PaymentCache, type PaymentFetchResult, type PreAuthParams, type ProxyHandle, type ProxyOptions, RequestDeduplicator, type RetryConfig, type RoutingConfig, type RoutingDecision, RpcError, type SessionConfig, type SessionEntry, SessionStore, type SufficiencyResult, type Tier, type UsageEntry, blockrunProvider, buildProviderModels, calculateModelCost, createPaymentFetch, plugin as default, fetchWithRetry, formatStatsAscii, getAgenticModels, getFallbackChain, getFallbackChainFiltered, getModelContextWindow, getProxyPort, getSessionId, getStats, isAgenticModel, isBalanceError, isEmptyWalletError, isInsufficientFundsError, isRetryable, isRpcError, logUsage, resolveModelAlias, route, startProxy };
+export { type AggregatedStats, BALANCE_THRESHOLDS, BLOCKRUN_MODELS, type BalanceInfo, BalanceMonitor, type CachedLLMResponse, type CachedPaymentParams, type CachedResponse, DEFAULT_RETRY_CONFIG, DEFAULT_ROUTING_CONFIG, DEFAULT_SESSION_CONFIG, type DailyStats, EmptyWalletError, InsufficientFundsError, type InsufficientFundsInfo, type LowBalanceInfo, MODEL_ALIASES, OPENCLAW_MODELS, PaymentCache, type PaymentFetchResult, type PreAuthParams, type ProxyHandle, type ProxyOptions, RequestDeduplicator, ResponseCache, type ResponseCacheConfig, type RetryConfig, type RoutingConfig, type RoutingDecision, RpcError, type SessionConfig, type SessionEntry, SessionStore, type SufficiencyResult, type Tier, type UsageEntry, blockrunProvider, buildProviderModels, calculateModelCost, createPaymentFetch, plugin as default, fetchWithRetry, formatStatsAscii, getAgenticModels, getFallbackChain, getFallbackChainFiltered, getModelContextWindow, getProxyPort, getSessionId, getStats, isAgenticModel, isBalanceError, isEmptyWalletError, isInsufficientFundsError, isRetryable, isRpcError, logUsage, resolveModelAlias, route, startProxy };

package/dist/index.js CHANGED Viewed

@@ -3,12 +3,16 @@ var MODEL_ALIASES = {
   // Claude
   claude: "anthropic/claude-sonnet-4",
   sonnet: "anthropic/claude-sonnet-4",
-  opus: "anthropic/claude-opus-4",
+  opus: "anthropic/claude-opus-4.6",
+  // Updated to latest Opus 4.6
+  "opus-46": "anthropic/claude-opus-4.6",
+  "opus-45": "anthropic/claude-opus-4.5",
   haiku: "anthropic/claude-haiku-4.5",
   // OpenAI
   gpt: "openai/gpt-4o",
   gpt4: "openai/gpt-4o",
   gpt5: "openai/gpt-5.2",
+  codex: "openai/gpt-5.2-codex",
   mini: "openai/gpt-4o-mini",
   o3: "openai/o3",
   // DeepSeek
@@ -113,6 +117,16 @@ var BLOCKRUN_MODELS = [
     maxOutput: 128e3,
     reasoning: true
   },
+  // OpenAI Codex Family
+  {
+    id: "openai/gpt-5.2-codex",
+    name: "GPT-5.2 Codex",
+    inputPrice: 2.5,
+    outputPrice: 12,
+    contextWindow: 128e3,
+    maxOutput: 32e3,
+    agentic: true
+  },
   // OpenAI GPT-4 Family
   {
     id: "openai/gpt-4.1",
@@ -218,6 +232,17 @@ var BLOCKRUN_MODELS = [
     reasoning: true,
     agentic: true
   },
+  {
+    id: "anthropic/claude-opus-4.6",
+    name: "Claude Opus 4.6",
+    inputPrice: 5,
+    outputPrice: 25,
+    contextWindow: 2e5,
+    maxOutput: 64e3,
+    reasoning: true,
+    vision: true,
+    agentic: true
+  },
   // Google
   {
     id: "google/gemini-3-pro-preview",
@@ -1645,37 +1670,42 @@ var DEFAULT_ROUTING_CONFIG = {
     }
   },
   // Premium tier configs - best quality (blockrun/premium)
+  // codex=complex coding, kimi=simple coding, sonnet=reasoning/instructions, opus=architecture/PM/audits
   premiumTiers: {
     SIMPLE: {
-      primary: "google/gemini-2.5-flash",
-      // $0.075/$0.30
-      fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
+      primary: "moonshot/kimi-k2.5",
+      // $0.50/$2.40 - good for simple coding
+      fallback: ["anthropic/claude-haiku-4.5", "google/gemini-2.5-flash", "xai/grok-code-fast-1"]
     },
     MEDIUM: {
-      primary: "openai/gpt-4o",
-      // $2.50/$10
-      fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
+      primary: "anthropic/claude-sonnet-4",
+      // $3/$15 - reasoning/instructions
+      fallback: [
+        "openai/gpt-5.2-codex",
+        "moonshot/kimi-k2.5",
+        "google/gemini-2.5-pro",
+        "xai/grok-4-0709"
+      ]
     },
     COMPLEX: {
-      primary: "anthropic/claude-opus-4.5",
-      // $5/$25 - Latest Opus
+      primary: "openai/gpt-5.2-codex",
+      // $2.50/$10 - complex coding (78% cost savings vs Opus)
       fallback: [
-        "openai/gpt-5.2-pro",
-        // $21/$168 - Latest GPT pro
+        "anthropic/claude-opus-4.6",
+        "anthropic/claude-opus-4.5",
+        "anthropic/claude-sonnet-4",
         "google/gemini-3-pro-preview",
-        // Latest Gemini
-        "openai/gpt-5.2",
-        "anthropic/claude-sonnet-4"
+        "moonshot/kimi-k2.5"
       ]
     },
     REASONING: {
-      primary: "openai/o3",
-      // $2/$8 - Best value reasoning
+      primary: "anthropic/claude-sonnet-4",
+      // $3/$15 - best for reasoning/instructions
       fallback: [
-        "openai/o4-mini",
-        // Latest o-series
+        "anthropic/claude-opus-4.6",
         "anthropic/claude-opus-4.5",
-        "google/gemini-3-pro-preview"
+        "openai/o3",
+        "xai/grok-4-1-fast-reasoning"
       ]
     }
   },
@@ -1698,7 +1728,7 @@ var DEFAULT_ROUTING_CONFIG = {
     COMPLEX: {
       primary: "anthropic/claude-sonnet-4",
       fallback: [
-        "anthropic/claude-opus-4.5",
+        "anthropic/claude-opus-4.6",
         // Latest Opus - best agentic
         "openai/gpt-5.2",
         "google/gemini-3-pro-preview",
@@ -1709,7 +1739,7 @@ var DEFAULT_ROUTING_CONFIG = {
       primary: "anthropic/claude-sonnet-4",
       // Strong tool use + reasoning for agentic tasks
       fallback: [
-        "anthropic/claude-opus-4.5",
+        "anthropic/claude-opus-4.6",
         "xai/grok-4-fast-reasoning",
         "moonshot/kimi-k2.5",
         "deepseek/deepseek-reasoner"
@@ -2139,6 +2169,203 @@ var RequestDeduplicator = class {
   }
 };
+// src/response-cache.ts
+import { createHash as createHash2 } from "crypto";
+var DEFAULT_CONFIG = {
+  maxSize: 200,
+  defaultTTL: 600,
+  maxItemSize: 1048576,
+  // 1MB
+  enabled: true
+};
+function canonicalize2(obj) {
+  if (obj === null || typeof obj !== "object") {
+    return obj;
+  }
+  if (Array.isArray(obj)) {
+    return obj.map(canonicalize2);
+  }
+  const sorted = {};
+  for (const key of Object.keys(obj).sort()) {
+    sorted[key] = canonicalize2(obj[key]);
+  }
+  return sorted;
+}
+var TIMESTAMP_PATTERN2 = /^\[\w{3}\s+\d{4}-\d{2}-\d{2}\s+\d{2}:\d{2}\s+\w+\]\s*/;
+function normalizeForCache(obj) {
+  const result = {};
+  for (const [key, value] of Object.entries(obj)) {
+    if (["stream", "user", "request_id", "x-request-id"].includes(key)) {
+      continue;
+    }
+    if (key === "messages" && Array.isArray(value)) {
+      result[key] = value.map((msg) => {
+        if (typeof msg === "object" && msg !== null) {
+          const m = msg;
+          if (typeof m.content === "string") {
+            return { ...m, content: m.content.replace(TIMESTAMP_PATTERN2, "") };
+          }
+        }
+        return msg;
+      });
+    } else {
+      result[key] = value;
+    }
+  }
+  return result;
+}
+var ResponseCache = class {
+  cache = /* @__PURE__ */ new Map();
+  expirationHeap = [];
+  config;
+  // Stats for monitoring
+  stats = {
+    hits: 0,
+    misses: 0,
+    evictions: 0
+  };
+  constructor(config = {}) {
+    const filtered = Object.fromEntries(
+      Object.entries(config).filter(([, v]) => v !== void 0)
+    );
+    this.config = { ...DEFAULT_CONFIG, ...filtered };
+  }
+  /**
+   * Generate cache key from request body.
+   * Hashes: model + messages + temperature + max_tokens + other params
+   */
+  static generateKey(body) {
+    try {
+      const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
+      const normalized = normalizeForCache(parsed);
+      const canonical = canonicalize2(normalized);
+      const keyContent = JSON.stringify(canonical);
+      return createHash2("sha256").update(keyContent).digest("hex").slice(0, 32);
+    } catch {
+      const content = typeof body === "string" ? body : body.toString();
+      return createHash2("sha256").update(content).digest("hex").slice(0, 32);
+    }
+  }
+  /**
+   * Check if caching is enabled for this request.
+   * Respects cache control headers and request params.
+   */
+  shouldCache(body, headers) {
+    if (!this.config.enabled) return false;
+    if (headers?.["cache-control"]?.includes("no-cache")) {
+      return false;
+    }
+    try {
+      const parsed = JSON.parse(typeof body === "string" ? body : body.toString());
+      if (parsed.cache === false || parsed.no_cache === true) {
+        return false;
+      }
+    } catch {
+    }
+    return true;
+  }
+  /**
+   * Get cached response if available and not expired.
+   */
+  get(key) {
+    const entry = this.cache.get(key);
+    if (!entry) {
+      this.stats.misses++;
+      return void 0;
+    }
+    if (Date.now() > entry.expiresAt) {
+      this.cache.delete(key);
+      this.stats.misses++;
+      return void 0;
+    }
+    this.stats.hits++;
+    return entry;
+  }
+  /**
+   * Cache a response with optional custom TTL.
+   */
+  set(key, response, ttlSeconds) {
+    if (!this.config.enabled || this.config.maxSize <= 0) return;
+    if (response.body.length > this.config.maxItemSize) {
+      console.log(`[ResponseCache] Skipping cache - item too large: ${response.body.length} bytes`);
+      return;
+    }
+    if (response.status >= 400) {
+      return;
+    }
+    if (this.cache.size >= this.config.maxSize) {
+      this.evict();
+    }
+    const now = Date.now();
+    const ttl = ttlSeconds ?? this.config.defaultTTL;
+    const expiresAt = now + ttl * 1e3;
+    const entry = {
+      ...response,
+      cachedAt: now,
+      expiresAt
+    };
+    this.cache.set(key, entry);
+    this.expirationHeap.push({ expiresAt, key });
+  }
+  /**
+   * Evict expired and oldest entries to make room.
+   */
+  evict() {
+    const now = Date.now();
+    this.expirationHeap.sort((a, b) => a.expiresAt - b.expiresAt);
+    while (this.expirationHeap.length > 0) {
+      const oldest = this.expirationHeap[0];
+      const entry = this.cache.get(oldest.key);
+      if (!entry || entry.expiresAt !== oldest.expiresAt) {
+        this.expirationHeap.shift();
+        continue;
+      }
+      if (oldest.expiresAt <= now) {
+        this.cache.delete(oldest.key);
+        this.expirationHeap.shift();
+        this.stats.evictions++;
+      } else {
+        break;
+      }
+    }
+    while (this.cache.size >= this.config.maxSize && this.expirationHeap.length > 0) {
+      const oldest = this.expirationHeap.shift();
+      if (this.cache.has(oldest.key)) {
+        this.cache.delete(oldest.key);
+        this.stats.evictions++;
+      }
+    }
+  }
+  /**
+   * Get cache statistics.
+   */
+  getStats() {
+    const total = this.stats.hits + this.stats.misses;
+    const hitRate = total > 0 ? (this.stats.hits / total * 100).toFixed(1) + "%" : "0%";
+    return {
+      size: this.cache.size,
+      maxSize: this.config.maxSize,
+      hits: this.stats.hits,
+      misses: this.stats.misses,
+      evictions: this.stats.evictions,
+      hitRate
+    };
+  }
+  /**
+   * Clear all cached entries.
+   */
+  clear() {
+    this.cache.clear();
+    this.expirationHeap = [];
+  }
+  /**
+   * Check if cache is enabled.
+   */
+  isEnabled() {
+    return this.config.enabled;
+  }
+};
 // src/balance.ts
 import { createPublicClient, http, erc20Abi } from "viem";
 import { base } from "viem/chains";
@@ -3642,6 +3869,7 @@ async function startProxy(options) {
     modelPricing
   };
   const deduplicator = new RequestDeduplicator();
+  const responseCache = new ResponseCache(options.cacheConfig);
   const sessionStore = new SessionStore(options.sessionConfig);
   const connections = /* @__PURE__ */ new Set();
   const server = createServer(async (req, res) => {
@@ -3682,6 +3910,15 @@ async function startProxy(options) {
       res.end(JSON.stringify(response));
       return;
     }
+    if (req.url === "/cache" || req.url?.startsWith("/cache?")) {
+      const stats = responseCache.getStats();
+      res.writeHead(200, {
+        "Content-Type": "application/json",
+        "Cache-Control": "no-cache"
+      });
+      res.end(JSON.stringify(stats, null, 2));
+      return;
+    }
     if (req.url === "/stats" || req.url?.startsWith("/stats?")) {
       try {
         const url = new URL(req.url, "http://localhost");
@@ -3728,7 +3965,8 @@ async function startProxy(options) {
         routerOpts,
         deduplicator,
         balanceMonitor,
-        sessionStore
+        sessionStore,
+        responseCache
       );
     } catch (err) {
       const error = err instanceof Error ? err : new Error(String(err));
@@ -3929,7 +4167,7 @@ async function tryModelRequest(upstreamUrl, method, headers, body, modelId, maxT
     };
   }
 }
-async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore) {
+async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, deduplicator, balanceMonitor, sessionStore, responseCache) {
   const startTime = Date.now();
   const upstreamUrl = `${apiBase}${req.url}`;
   const bodyChunks = [];
@@ -4097,6 +4335,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
       );
     }
   }
+  const cacheKey = ResponseCache.generateKey(body);
+  const reqHeaders = {};
+  for (const [key, value] of Object.entries(req.headers)) {
+    if (typeof value === "string") reqHeaders[key] = value;
+  }
+  if (responseCache.shouldCache(body, reqHeaders)) {
+    const cachedResponse = responseCache.get(cacheKey);
+    if (cachedResponse) {
+      console.log(`[ClawRouter] Cache HIT for ${cachedResponse.model} (saved API call)`);
+      res.writeHead(cachedResponse.status, cachedResponse.headers);
+      res.end(cachedResponse.body);
+      return;
+    }
+  }
   const dedupKey = RequestDeduplicator.hash(body);
   const cached = deduplicator.getCached(dedupKey);
   if (cached) {
@@ -4449,12 +4701,22 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
         }
       }
       res.end();
+      const responseBody = Buffer.concat(responseChunks);
       deduplicator.complete(dedupKey, {
         status: upstream.status,
         headers: responseHeaders,
-        body: Buffer.concat(responseChunks),
+        body: responseBody,
         completedAt: Date.now()
       });
+      if (upstream.status === 200 && responseCache.shouldCache(body)) {
+        responseCache.set(cacheKey, {
+          body: responseBody,
+          status: upstream.status,
+          headers: responseHeaders,
+          model: modelId
+        });
+        console.log(`[ClawRouter] Cached response for ${modelId} (${responseBody.length} bytes)`);
+      }
     }
     if (estimatedCostMicros !== void 0) {
       balanceMonitor.deductEstimated(estimatedCostMicros);
@@ -4761,9 +5023,9 @@ function injectModelsConfig(logger) {
     { id: "eco", alias: "eco" },
     { id: "premium", alias: "premium" },
     { id: "free", alias: "free" },
-    { id: "sonnet", alias: "sonnet" },
-    { id: "opus", alias: "opus" },
-    { id: "haiku", alias: "haiku" },
+    { id: "sonnet", alias: "br-sonnet" },
+    { id: "opus", alias: "br-opus" },
+    { id: "haiku", alias: "br-haiku" },
     { id: "gpt5", alias: "gpt5" },
     { id: "mini", alias: "mini" },
     { id: "grok-fast", alias: "grok-fast" },
@@ -4789,9 +5051,13 @@ function injectModelsConfig(logger) {
   }
   for (const m of KEY_MODEL_ALIASES) {
     const fullId = `blockrun/${m.id}`;
-    if (!allowlist[fullId]) {
+    const existing = allowlist[fullId];
+    if (!existing) {
       allowlist[fullId] = { alias: m.alias };
       needsWrite = true;
+    } else if (existing.alias !== m.alias) {
+      existing.alias = m.alias;
+      needsWrite = true;
     }
   }
   if (needsWrite) {
@@ -5125,6 +5391,7 @@ export {
   OPENCLAW_MODELS,
   PaymentCache,
   RequestDeduplicator,
+  ResponseCache,
   RpcError,
   SessionStore,
   blockrunProvider,