npm - @blockrun/clawrouter - Versions diffs - 0.8.20 → 0.8.21 - Mend

@blockrun/clawrouter 0.8.20 → 0.8.21

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/dist/index.d.ts CHANGED Viewed

@@ -203,6 +203,10 @@ type RoutingConfig = {
     tiers: Record<Tier, TierConfig>;
     /** Tier configs for agentic mode - models that excel at multi-step tasks */
     agenticTiers?: Record<Tier, TierConfig>;
+    /** Tier configs for eco profile - ultra cost-optimized (blockrun/eco) */
+    ecoTiers?: Record<Tier, TierConfig>;
+    /** Tier configs for premium profile - best quality (blockrun/premium) */
+    premiumTiers?: Record<Tier, TierConfig>;
     overrides: OverridesConfig;
 };
@@ -225,7 +229,7 @@ declare function getFallbackChain(tier: Tier, tierConfigs: Record<Tier, TierConf
  * Calculate cost for a specific model (used when fallback model is used).
  * Returns updated cost fields for RoutingDecision.
  */
-declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number): {
+declare function calculateModelCost(model: string, modelPricing: Map<string, ModelPricing>, estimatedInputTokens: number, maxOutputTokens: number, routingProfile?: "free" | "eco" | "auto" | "premium"): {
     costEstimate: number;
     baselineCost: number;
     savings: number;
@@ -264,6 +268,7 @@ declare const DEFAULT_ROUTING_CONFIG: RoutingConfig;
 type RouterOptions = {
     config: RoutingConfig;
     modelPricing: Map<string, ModelPricing>;
+    routingProfile?: "free" | "eco" | "auto" | "premium";
 };
 /**
  * Route a request to the cheapest capable model.

package/dist/index.js CHANGED Viewed

@@ -41,16 +41,40 @@ function resolveModelAlias(model) {
   return model;
 }
 var BLOCKRUN_MODELS = [
-  // Smart routing meta-model — proxy replaces with actual model
+  // Smart routing meta-models — proxy replaces with actual model
   // NOTE: Model IDs are WITHOUT provider prefix (OpenClaw adds "blockrun/" automatically)
   {
     id: "auto",
-    name: "BlockRun Smart Router",
+    name: "Auto (Smart Router - Balanced)",
     inputPrice: 0,
     outputPrice: 0,
     contextWindow: 105e4,
     maxOutput: 128e3
   },
+  {
+    id: "free",
+    name: "Free (NVIDIA GPT-OSS-120B only)",
+    inputPrice: 0,
+    outputPrice: 0,
+    contextWindow: 128e3,
+    maxOutput: 4096
+  },
+  {
+    id: "eco",
+    name: "Eco (Smart Router - Cost Optimized)",
+    inputPrice: 0,
+    outputPrice: 0,
+    contextWindow: 105e4,
+    maxOutput: 128e3
+  },
+  {
+    id: "premium",
+    name: "Premium (Smart Router - Best Quality)",
+    inputPrice: 0,
+    outputPrice: 0,
+    contextWindow: 2e6,
+    maxOutput: 2e5
+  },
   // OpenAI GPT-5 Family
   {
     id: "openai/gpt-5.2",
@@ -352,8 +376,8 @@ var BLOCKRUN_MODELS = [
   {
     id: "xai/grok-4-0709",
     name: "Grok 4 (0709)",
-    inputPrice: 3,
-    outputPrice: 15,
+    inputPrice: 0.2,
+    outputPrice: 1.5,
     contextWindow: 131072,
     maxOutput: 16384,
     reasoning: true
@@ -909,7 +933,7 @@ function calibrateConfidence(distance, steepness) {
 }
 // src/router/selector.ts
-function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens) {
+function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
   const tierConfig = tierConfigs[tier];
   const model = tierConfig.primary;
   const pricing = modelPricing.get(model);
@@ -918,13 +942,13 @@ function selectModel(tier, confidence, method, reasoning, tierConfigs, modelPric
   const inputCost = estimatedInputTokens / 1e6 * inputPrice;
   const outputCost = maxOutputTokens / 1e6 * outputPrice;
   const costEstimate = inputCost + outputCost;
-  const opusPricing = modelPricing.get("anthropic/claude-opus-4");
+  const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
   const opusInputPrice = opusPricing?.inputPrice ?? 0;
   const opusOutputPrice = opusPricing?.outputPrice ?? 0;
   const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
   const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
   const baselineCost = baselineInput + baselineOutput;
-  const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
+  const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
   return {
     model,
     tier,
@@ -940,20 +964,20 @@ function getFallbackChain(tier, tierConfigs) {
   const config = tierConfigs[tier];
   return [config.primary, ...config.fallback];
 }
-function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens) {
+function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutputTokens, routingProfile) {
   const pricing = modelPricing.get(model);
   const inputPrice = pricing?.inputPrice ?? 0;
   const outputPrice = pricing?.outputPrice ?? 0;
   const inputCost = estimatedInputTokens / 1e6 * inputPrice;
   const outputCost = maxOutputTokens / 1e6 * outputPrice;
   const costEstimate = inputCost + outputCost;
-  const opusPricing = modelPricing.get("anthropic/claude-opus-4");
+  const opusPricing = modelPricing.get("anthropic/claude-opus-4.5");
   const opusInputPrice = opusPricing?.inputPrice ?? 0;
   const opusOutputPrice = opusPricing?.outputPrice ?? 0;
   const baselineInput = estimatedInputTokens / 1e6 * opusInputPrice;
   const baselineOutput = maxOutputTokens / 1e6 * opusOutputPrice;
   const baselineCost = baselineInput + baselineOutput;
-  const savings = baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
+  const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
   return { costEstimate, baselineCost, savings };
 }
 function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
@@ -1582,15 +1606,17 @@ var DEFAULT_ROUTING_CONFIG = {
     // Tier boundaries on weighted score axis
     tierBoundaries: {
       simpleMedium: 0,
-      mediumComplex: 0.18,
-      complexReasoning: 0.4
-      // Raised from 0.25 - requires strong reasoning signals
+      mediumComplex: 0.3,
+      // Raised from 0.18 - prevent simple tasks from reaching expensive COMPLEX tier
+      complexReasoning: 0.5
+      // Raised from 0.4 - reserve for true reasoning tasks
     },
     // Sigmoid steepness for confidence calibration
     confidenceSteepness: 12,
     // Below this confidence → ambiguous (null tier)
     confidenceThreshold: 0.7
   },
+  // Auto (balanced) tier configs - current default smart routing
   tiers: {
     SIMPLE: {
       primary: "nvidia/kimi-k2.5",
@@ -1599,7 +1625,9 @@ var DEFAULT_ROUTING_CONFIG = {
         "google/gemini-2.5-flash",
         "nvidia/gpt-oss-120b",
         "nvidia/gpt-oss-20b",
-        "deepseek/deepseek-chat"
+        "deepseek/deepseek-chat",
+        "xai/grok-code-fast-1"
+        // Added for better quality fallback
       ]
     },
     MEDIUM: {
@@ -1614,7 +1642,8 @@ var DEFAULT_ROUTING_CONFIG = {
     },
     COMPLEX: {
       primary: "google/gemini-2.5-pro",
-      fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "xai/grok-4-0709", "openai/gpt-4o"]
+      fallback: ["xai/grok-4-0709", "openai/gpt-4o", "openai/gpt-5.2", "anthropic/claude-sonnet-4"]
+      // Grok first for cost efficiency, Sonnet as last resort
     },
     REASONING: {
       primary: "xai/grok-4-1-fast-reasoning",
@@ -1628,6 +1657,52 @@ var DEFAULT_ROUTING_CONFIG = {
       ]
     }
   },
+  // Eco tier configs - ultra cost-optimized (blockrun/eco)
+  ecoTiers: {
+    SIMPLE: {
+      primary: "nvidia/kimi-k2.5",
+      // $0.001/$0.001
+      fallback: ["deepseek/deepseek-chat", "nvidia/gpt-oss-120b", "nvidia/gpt-oss-20b"]
+    },
+    MEDIUM: {
+      primary: "deepseek/deepseek-chat",
+      // $0.14/$0.28
+      fallback: ["xai/grok-code-fast-1", "google/gemini-2.5-flash", "nvidia/kimi-k2.5"]
+    },
+    COMPLEX: {
+      primary: "xai/grok-4-0709",
+      // $0.20/$1.50
+      fallback: ["deepseek/deepseek-chat", "google/gemini-2.5-flash", "openai/gpt-4o-mini"]
+    },
+    REASONING: {
+      primary: "deepseek/deepseek-reasoner",
+      // $0.55/$2.19
+      fallback: ["xai/grok-4-fast-reasoning", "moonshot/kimi-k2.5"]
+    }
+  },
+  // Premium tier configs - best quality (blockrun/premium)
+  premiumTiers: {
+    SIMPLE: {
+      primary: "google/gemini-2.5-flash",
+      // $0.075/$0.30
+      fallback: ["openai/gpt-4o-mini", "anthropic/claude-haiku-4.5", "moonshot/kimi-k2.5"]
+    },
+    MEDIUM: {
+      primary: "openai/gpt-4o",
+      // $2.50/$10
+      fallback: ["google/gemini-2.5-pro", "anthropic/claude-sonnet-4", "xai/grok-4-0709"]
+    },
+    COMPLEX: {
+      primary: "anthropic/claude-opus-4.5",
+      // $15/$75
+      fallback: ["openai/gpt-5.2", "anthropic/claude-sonnet-4", "google/gemini-2.5-pro"]
+    },
+    REASONING: {
+      primary: "openai/o3",
+      // $10/$40
+      fallback: ["anthropic/claude-opus-4.5", "openai/o1", "google/gemini-2.5-pro"]
+    }
+  },
   // Agentic tier configs - models that excel at multi-step autonomous tasks
   agenticTiers: {
     SIMPLE: {
@@ -1669,21 +1744,34 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
   const fullText = `${systemPrompt ?? ""} ${prompt}`;
   const estimatedTokens = Math.ceil(fullText.length / 4);
   const ruleResult = classifyByRules(prompt, systemPrompt, estimatedTokens, config.scoring);
-  const agenticScore = ruleResult.agenticScore ?? 0;
-  const isAutoAgentic = agenticScore >= 0.69;
-  const isExplicitAgentic = config.overrides.agenticMode ?? false;
-  const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
-  const tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
+  const { routingProfile } = options;
+  let tierConfigs;
+  let profileSuffix = "";
+  if (routingProfile === "eco" && config.ecoTiers) {
+    tierConfigs = config.ecoTiers;
+    profileSuffix = " | eco";
+  } else if (routingProfile === "premium" && config.premiumTiers) {
+    tierConfigs = config.premiumTiers;
+    profileSuffix = " | premium";
+  } else {
+    const agenticScore = ruleResult.agenticScore ?? 0;
+    const isAutoAgentic = agenticScore >= 0.5;
+    const isExplicitAgentic = config.overrides.agenticMode ?? false;
+    const useAgenticTiers = (isAutoAgentic || isExplicitAgentic) && config.agenticTiers != null;
+    tierConfigs = useAgenticTiers ? config.agenticTiers : config.tiers;
+    profileSuffix = useAgenticTiers ? " | agentic" : "";
+  }
   if (estimatedTokens > config.overrides.maxTokensForceComplex) {
     return selectModel(
       "COMPLEX",
       0.95,
       "rules",
-      `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${useAgenticTiers ? " | agentic" : ""}`,
+      `Input exceeds ${config.overrides.maxTokensForceComplex} tokens${profileSuffix}`,
       tierConfigs,
       modelPricing,
       estimatedTokens,
-      maxOutputTokens
+      maxOutputTokens,
+      routingProfile
     );
   }
   const hasStructuredOutput = systemPrompt ? /json|structured|schema/i.test(systemPrompt) : false;
@@ -1707,11 +1795,7 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
       tier = minTier;
     }
   }
-  if (isAutoAgentic) {
-    reasoning += " | auto-agentic";
-  } else if (isExplicitAgentic) {
-    reasoning += " | agentic";
-  }
+  reasoning += profileSuffix;
   return selectModel(
     tier,
     confidence,
@@ -1720,7 +1804,8 @@ function route(prompt, systemPrompt, maxOutputTokens, options) {
     tierConfigs,
     modelPricing,
     estimatedTokens,
-    maxOutputTokens
+    maxOutputTokens,
+    routingProfile
   );
 }
@@ -1878,12 +1963,13 @@ async function getStats(days = 7) {
 function formatStatsAscii(stats) {
   const lines = [];
   lines.push("\u2554\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2557");
-  lines.push("\u2551              ClawRouter Usage Statistics                   \u2551");
+  lines.push("\u2551          ClawRouter by BlockRun v0.8.20                    \u2551");
+  lines.push("\u2551                Usage Statistics                            \u2551");
   lines.push("\u2560\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2550\u2563");
   lines.push(`\u2551  Period: ${stats.period.padEnd(49)}\u2551`);
   lines.push(`\u2551  Total Requests: ${stats.totalRequests.toString().padEnd(41)}\u2551`);
   lines.push(`\u2551  Total Cost: $${stats.totalCost.toFixed(4).padEnd(43)}\u2551`);
-  lines.push(`\u2551  Baseline Cost (Opus): $${stats.totalBaselineCost.toFixed(4).padEnd(33)}\u2551`);
+  lines.push(`\u2551  Baseline Cost (Opus 4.5): $${stats.totalBaselineCost.toFixed(4).padEnd(30)}\u2551`);
   const savingsLine = `\u2551  \u{1F4B0} Total Saved: $${stats.totalSavings.toFixed(4)} (${stats.savingsPercentage.toFixed(1)}%)`;
   if (stats.entriesWithBaseline < stats.totalRequests && stats.entriesWithBaseline > 0) {
     lines.push(savingsLine.padEnd(61) + "\u2551");
@@ -2411,7 +2497,16 @@ async function checkForUpdates() {
 // src/proxy.ts
 var BLOCKRUN_API = "https://blockrun.ai/api";
 var AUTO_MODEL = "blockrun/auto";
-var AUTO_MODEL_SHORT = "auto";
+var ROUTING_PROFILES = /* @__PURE__ */ new Set([
+  "blockrun/free",
+  "free",
+  "blockrun/eco",
+  "eco",
+  "blockrun/auto",
+  "auto",
+  "blockrun/premium",
+  "premium"
+]);
 var FREE_MODEL = "nvidia/gpt-oss-120b";
 var HEARTBEAT_INTERVAL_MS = 2e3;
 var DEFAULT_REQUEST_TIMEOUT_MS = 18e4;
@@ -3093,6 +3188,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
   let isStreaming = false;
   let modelId = "";
   let maxTokens = 4096;
+  let routingProfile = null;
   const isChatCompletion = req.url?.includes("/chat/completions");
   if (isChatCompletion && body.length > 0) {
     try {
@@ -3108,58 +3204,83 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
       const normalizedModel = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
       const resolvedModel = resolveModelAlias(normalizedModel);
       const wasAlias = resolvedModel !== normalizedModel;
-      const isAutoModel = normalizedModel === AUTO_MODEL.toLowerCase() || normalizedModel === AUTO_MODEL_SHORT.toLowerCase();
+      const isRoutingProfile = ROUTING_PROFILES.has(normalizedModel);
+      if (isRoutingProfile) {
+        const profileName = normalizedModel.replace("blockrun/", "");
+        routingProfile = profileName;
+      }
       console.log(
-        `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}, isAuto: ${isAutoModel}`
+        `[ClawRouter] Received model: "${parsed.model}" -> normalized: "${normalizedModel}"${wasAlias ? ` -> alias: "${resolvedModel}"` : ""}${routingProfile ? `, profile: ${routingProfile}` : ""}`
       );
-      if (wasAlias && !isAutoModel) {
+      if (wasAlias && !isRoutingProfile) {
         parsed.model = resolvedModel;
         modelId = resolvedModel;
         bodyModified = true;
       }
-      if (isAutoModel) {
-        const sessionId = getSessionId(
-          req.headers
-        );
-        const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
-        if (existingSession) {
-          console.log(
-            `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
-          );
-          parsed.model = existingSession.model;
-          modelId = existingSession.model;
+      if (isRoutingProfile) {
+        if (routingProfile === "free") {
+          const freeModel = "nvidia/gpt-oss-120b";
+          console.log(`[ClawRouter] Free profile - using ${freeModel} directly`);
+          parsed.model = freeModel;
+          modelId = freeModel;
           bodyModified = true;
-          sessionStore.touchSession(sessionId);
+          await logUsage({
+            timestamp: (/* @__PURE__ */ new Date()).toISOString(),
+            model: freeModel,
+            tier: "SIMPLE",
+            cost: 0,
+            baselineCost: 0,
+            savings: 1,
+            // 100% savings
+            latencyMs: 0
+          });
         } else {
-          const messages = parsed.messages;
-          let lastUserMsg;
-          if (messages) {
-            for (let i = messages.length - 1; i >= 0; i--) {
-              if (messages[i].role === "user") {
-                lastUserMsg = messages[i];
-                break;
-              }
-            }
-          }
-          const systemMsg = messages?.find((m) => m.role === "system");
-          const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
-          const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
-          const tools = parsed.tools;
-          const hasTools = Array.isArray(tools) && tools.length > 0;
-          if (hasTools) {
-            console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
-          }
-          routingDecision = route(prompt, systemPrompt, maxTokens, routerOpts);
-          parsed.model = routingDecision.model;
-          modelId = routingDecision.model;
-          bodyModified = true;
-          if (sessionId) {
-            sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
+          const sessionId = getSessionId(
+            req.headers
+          );
+          const existingSession = sessionId ? sessionStore.getSession(sessionId) : void 0;
+          if (existingSession) {
             console.log(
-              `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
+              `[ClawRouter] Session ${sessionId?.slice(0, 8)}... using pinned model: ${existingSession.model}`
             );
+            parsed.model = existingSession.model;
+            modelId = existingSession.model;
+            bodyModified = true;
+            sessionStore.touchSession(sessionId);
+          } else {
+            const messages = parsed.messages;
+            let lastUserMsg;
+            if (messages) {
+              for (let i = messages.length - 1; i >= 0; i--) {
+                if (messages[i].role === "user") {
+                  lastUserMsg = messages[i];
+                  break;
+                }
+              }
+            }
+            const systemMsg = messages?.find((m) => m.role === "system");
+            const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
+            const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
+            const tools = parsed.tools;
+            const hasTools = Array.isArray(tools) && tools.length > 0;
+            if (hasTools) {
+              console.log(`[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`);
+            }
+            routingDecision = route(prompt, systemPrompt, maxTokens, {
+              ...routerOpts,
+              routingProfile: routingProfile ?? void 0
+            });
+            parsed.model = routingDecision.model;
+            modelId = routingDecision.model;
+            bodyModified = true;
+            if (sessionId) {
+              sessionStore.setSession(sessionId, routingDecision.model, routingDecision.tier);
+              console.log(
+                `[ClawRouter] Session ${sessionId.slice(0, 8)}... pinned to model: ${routingDecision.model}`
+              );
+            }
+            options.onRouted?.(routingDecision);
           }
-          options.onRouted?.(routingDecision);
         }
       }
       if (bodyModified) {
@@ -3343,7 +3464,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
         actualModelUsed,
         routerOpts.modelPricing,
         estimatedInputTokens,
-        maxTokens
+        maxTokens,
+        routingProfile ?? void 0
       );
       routingDecision = {
         ...routingDecision,
@@ -3552,7 +3674,8 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
       routingDecision.model,
       routerOpts.modelPricing,
       estimatedInputTokens,
-      maxTokens
+      maxTokens,
+      routingProfile ?? void 0
     );
     const costWithBuffer = accurateCosts.costEstimate * 1.2;
     const baselineWithBuffer = accurateCosts.baselineCost * 1.2;