npm - @blockrun/clawrouter - Versions diffs - 0.10.13 → 0.10.15 - Mend

@blockrun/clawrouter 0.10.13 → 0.10.15

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

package/dist/cli.js CHANGED Viewed

@@ -476,7 +476,8 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
       tier: "REASONING",
       confidence: Math.max(confidence2, 0.85),
       signals,
-      agenticScore
+      agenticScore,
+      dimensions
     };
   }
   const { simpleMedium, mediumComplex, complexReasoning } = config.tierBoundaries;
@@ -500,9 +501,9 @@ function classifyByRules(prompt, systemPrompt, estimatedTokens, config) {
   }
   const confidence = calibrateConfidence(distanceFromBoundary, config.confidenceSteepness);
   if (confidence < config.confidenceThreshold) {
-    return { score: weightedScore, tier: null, confidence, signals, agenticScore };
+    return { score: weightedScore, tier: null, confidence, signals, agenticScore, dimensions };
   }
-  return { score: weightedScore, tier, confidence, signals, agenticScore };
+  return { score: weightedScore, tier, confidence, signals, agenticScore, dimensions };
 }
 function calibrateConfidence(distance, steepness) {
   return 1 / (1 + Math.exp(-steepness * distance));
@@ -558,6 +559,11 @@ function calculateModelCost(model, modelPricing, estimatedInputTokens, maxOutput
   const savings = routingProfile === "premium" ? 0 : baselineCost > 0 ? Math.max(0, (baselineCost - costEstimate) / baselineCost) : 0;
   return { costEstimate, baselineCost, savings };
 }
+function filterByToolCalling(models, hasTools, supportsToolCalling2) {
+  if (!hasTools) return models;
+  const filtered = models.filter(supportsToolCalling2);
+  return filtered.length > 0 ? filtered : models;
+}
 function getFallbackChainFiltered(tier, tierConfigs, estimatedTotalTokens, getContextWindow) {
   const fullChain = getFallbackChain(tier, tierConfigs);
   const filtered = fullChain.filter((modelId) => {
@@ -1613,12 +1619,12 @@ var DEFAULT_ROUTING_CONFIG = {
       ]
     },
     MEDIUM: {
-      primary: "xai/grok-code-fast-1",
-      // Code specialist, $0.20/$1.50
+      primary: "moonshot/kimi-k2.5",
+      // $0.50/$2.40 - strong tool use, proper function call format
       fallback: [
+        "deepseek/deepseek-chat",
         "google/gemini-2.5-flash-lite",
         // 1M context, ultra cheap ($0.10/$0.40)
-        "deepseek/deepseek-chat",
         "xai/grok-4-1-fast-non-reasoning"
         // Upgraded Grok 4.1
       ]
@@ -1684,7 +1690,7 @@ var DEFAULT_ROUTING_CONFIG = {
       fallback: [
         "anthropic/claude-haiku-4.5",
         "google/gemini-2.5-flash-lite",
-        "xai/grok-code-fast-1"
+        "deepseek/deepseek-chat"
       ]
     },
     MEDIUM: {
@@ -1735,9 +1741,9 @@ var DEFAULT_ROUTING_CONFIG = {
       ]
     },
     MEDIUM: {
-      primary: "xai/grok-code-fast-1",
-      // Code specialist for agentic coding
-      fallback: ["moonshot/kimi-k2.5", "anthropic/claude-haiku-4.5", "claude-sonnet-4"]
+      primary: "moonshot/kimi-k2.5",
+      // $0.50/$2.40 - strong tool use, handles function calls correctly
+      fallback: ["anthropic/claude-haiku-4.5", "deepseek/deepseek-chat", "xai/grok-4-1-fast-non-reasoning"]
     },
     COMPLEX: {
       primary: "anthropic/claude-sonnet-4.6",
@@ -1960,7 +1966,8 @@ var BLOCKRUN_MODELS = [
     maxOutput: 128e3,
     reasoning: true,
     vision: true,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   {
     id: "openai/gpt-5-mini",
@@ -1969,7 +1976,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.25,
     outputPrice: 2,
     contextWindow: 2e5,
-    maxOutput: 65536
+    maxOutput: 65536,
+    toolCalling: true
   },
   {
     id: "openai/gpt-5-nano",
@@ -1978,7 +1986,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.05,
     outputPrice: 0.4,
     contextWindow: 128e3,
-    maxOutput: 32768
+    maxOutput: 32768,
+    toolCalling: true
   },
   {
     id: "openai/gpt-5.2-pro",
@@ -1988,7 +1997,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 168,
     contextWindow: 4e5,
     maxOutput: 128e3,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   // OpenAI Codex Family
   {
@@ -1999,7 +2009,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 14,
     contextWindow: 128e3,
     maxOutput: 32e3,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   // OpenAI GPT-4 Family
   {
@@ -2010,7 +2021,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 8,
     contextWindow: 128e3,
     maxOutput: 16384,
-    vision: true
+    vision: true,
+    toolCalling: true
   },
   {
     id: "openai/gpt-4.1-mini",
@@ -2019,7 +2031,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.4,
     outputPrice: 1.6,
     contextWindow: 128e3,
-    maxOutput: 16384
+    maxOutput: 16384,
+    toolCalling: true
   },
   {
     id: "openai/gpt-4.1-nano",
@@ -2028,7 +2041,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.1,
     outputPrice: 0.4,
     contextWindow: 128e3,
-    maxOutput: 16384
+    maxOutput: 16384,
+    toolCalling: true
   },
   {
     id: "openai/gpt-4o",
@@ -2039,7 +2053,8 @@ var BLOCKRUN_MODELS = [
     contextWindow: 128e3,
     maxOutput: 16384,
     vision: true,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   {
     id: "openai/gpt-4o-mini",
@@ -2048,7 +2063,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.15,
     outputPrice: 0.6,
     contextWindow: 128e3,
-    maxOutput: 16384
+    maxOutput: 16384,
+    toolCalling: true
   },
   // OpenAI O-series (Reasoning)
   {
@@ -2059,7 +2075,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 60,
     contextWindow: 2e5,
     maxOutput: 1e5,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   {
     id: "openai/o1-mini",
@@ -2069,7 +2086,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 4.4,
     contextWindow: 128e3,
     maxOutput: 65536,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   {
     id: "openai/o3",
@@ -2079,7 +2097,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 8,
     contextWindow: 2e5,
     maxOutput: 1e5,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   {
     id: "openai/o3-mini",
@@ -2089,7 +2108,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 4.4,
     contextWindow: 128e3,
     maxOutput: 65536,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   {
     id: "openai/o4-mini",
@@ -2099,7 +2119,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 4.4,
     contextWindow: 128e3,
     maxOutput: 65536,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   // Anthropic - all Claude models excel at agentic workflows
   // Use newest versions (4.6) with full provider prefix
@@ -2111,7 +2132,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 5,
     contextWindow: 2e5,
     maxOutput: 8192,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   {
     id: "anthropic/claude-sonnet-4.6",
@@ -2122,7 +2144,8 @@ var BLOCKRUN_MODELS = [
     contextWindow: 2e5,
     maxOutput: 64e3,
     reasoning: true,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   {
     id: "anthropic/claude-opus-4.6",
@@ -2133,7 +2156,8 @@ var BLOCKRUN_MODELS = [
     contextWindow: 2e5,
     maxOutput: 32e3,
     reasoning: true,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   // Google
   {
@@ -2145,7 +2169,8 @@ var BLOCKRUN_MODELS = [
     contextWindow: 105e4,
     maxOutput: 65536,
     reasoning: true,
-    vision: true
+    vision: true,
+    toolCalling: true
   },
   {
     id: "google/gemini-3-pro-preview",
@@ -2156,7 +2181,8 @@ var BLOCKRUN_MODELS = [
     contextWindow: 105e4,
     maxOutput: 65536,
     reasoning: true,
-    vision: true
+    vision: true,
+    toolCalling: true
   },
   {
     id: "google/gemini-3-flash-preview",
@@ -2166,7 +2192,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 3,
     contextWindow: 1e6,
     maxOutput: 65536,
-    vision: true
+    vision: true,
+    toolCalling: true
   },
   {
     id: "google/gemini-2.5-pro",
@@ -2177,7 +2204,8 @@ var BLOCKRUN_MODELS = [
     contextWindow: 105e4,
     maxOutput: 65536,
     reasoning: true,
-    vision: true
+    vision: true,
+    toolCalling: true
   },
   {
     id: "google/gemini-2.5-flash",
@@ -2186,7 +2214,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.3,
     outputPrice: 2.5,
     contextWindow: 1e6,
-    maxOutput: 65536
+    maxOutput: 65536,
+    toolCalling: true
   },
   {
     id: "google/gemini-2.5-flash-lite",
@@ -2195,7 +2224,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.1,
     outputPrice: 0.4,
     contextWindow: 1e6,
-    maxOutput: 65536
+    maxOutput: 65536,
+    toolCalling: true
   },
   // DeepSeek
   {
@@ -2205,7 +2235,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.28,
     outputPrice: 0.42,
     contextWindow: 128e3,
-    maxOutput: 8192
+    maxOutput: 8192,
+    toolCalling: true
   },
   {
     id: "deepseek/deepseek-reasoner",
@@ -2215,7 +2246,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 0.42,
     contextWindow: 128e3,
     maxOutput: 8192,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   // Moonshot / Kimi - optimized for agentic workflows
   {
@@ -2228,7 +2260,8 @@ var BLOCKRUN_MODELS = [
     maxOutput: 8192,
     reasoning: true,
     vision: true,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   // xAI / Grok
   {
@@ -2239,7 +2272,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 15,
     contextWindow: 131072,
     maxOutput: 16384,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   // grok-3-fast removed - too expensive ($5/$25), use grok-4-fast instead
   {
@@ -2249,7 +2283,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.3,
     outputPrice: 0.5,
     contextWindow: 131072,
-    maxOutput: 16384
+    maxOutput: 16384,
+    toolCalling: true
   },
   // xAI Grok 4 Family - Ultra-cheap fast models
   {
@@ -2260,7 +2295,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 0.5,
     contextWindow: 131072,
     maxOutput: 16384,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   {
     id: "xai/grok-4-fast-non-reasoning",
@@ -2269,7 +2305,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.2,
     outputPrice: 0.5,
     contextWindow: 131072,
-    maxOutput: 16384
+    maxOutput: 16384,
+    toolCalling: true
   },
   {
     id: "xai/grok-4-1-fast-reasoning",
@@ -2279,7 +2316,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 0.5,
     contextWindow: 131072,
     maxOutput: 16384,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   {
     id: "xai/grok-4-1-fast-non-reasoning",
@@ -2288,7 +2326,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.2,
     outputPrice: 0.5,
     contextWindow: 131072,
-    maxOutput: 16384
+    maxOutput: 16384,
+    toolCalling: true
   },
   {
     id: "xai/grok-code-fast-1",
@@ -2297,9 +2336,10 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.2,
     outputPrice: 1.5,
     contextWindow: 131072,
-    maxOutput: 16384,
-    agentic: true
-    // Good for coding tasks
+    maxOutput: 16384
+    // toolCalling intentionally omitted: outputs tool calls as plain text JSON,
+    // not OpenAI-compatible structured function calls. Will be skipped when
+    // request has tools to prevent the "talking to itself" bug.
   },
   {
     id: "xai/grok-4-0709",
@@ -2309,7 +2349,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 1.5,
     contextWindow: 131072,
     maxOutput: 16384,
-    reasoning: true
+    reasoning: true,
+    toolCalling: true
   },
   {
     id: "xai/grok-2-vision",
@@ -2319,7 +2360,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 10,
     contextWindow: 131072,
     maxOutput: 16384,
-    vision: true
+    vision: true,
+    toolCalling: true
   },
   // MiniMax
   {
@@ -2331,7 +2373,8 @@ var BLOCKRUN_MODELS = [
     contextWindow: 204800,
     maxOutput: 16384,
     reasoning: true,
-    agentic: true
+    agentic: true,
+    toolCalling: true
   },
   // NVIDIA - Free/cheap models
   {
@@ -2342,6 +2385,8 @@ var BLOCKRUN_MODELS = [
     outputPrice: 0,
     contextWindow: 128e3,
     maxOutput: 16384
+    // toolCalling intentionally omitted: free model, structured function
+    // calling support unverified. Excluded from tool-heavy routing paths.
   },
   {
     id: "nvidia/kimi-k2.5",
@@ -2350,7 +2395,8 @@ var BLOCKRUN_MODELS = [
     inputPrice: 0.55,
     outputPrice: 2.5,
     contextWindow: 262144,
-    maxOutput: 16384
+    maxOutput: 16384,
+    toolCalling: true
   }
 ];
 function toOpenClawModel(m) {
@@ -2379,6 +2425,11 @@ var OPENCLAW_MODELS = [
   ...BLOCKRUN_MODELS.map(toOpenClawModel),
   ...ALIAS_MODELS
 ];
+function supportsToolCalling(modelId) {
+  const normalized = modelId.replace("blockrun/", "");
+  const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
+  return model?.toolCalling ?? false;
+}
 function getModelContextWindow(modelId) {
   const normalized = modelId.replace("blockrun/", "");
   const model = BLOCKRUN_MODELS.find((m) => m.id === normalized);
@@ -5032,6 +5083,7 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
   const originalContextSizeKB = Math.ceil(body.length / 1024);
   const debugMode = req.headers["x-clawrouter-debug"] !== "false";
   let routingDecision;
+  let hasTools = false;
   let isStreaming = false;
   let modelId = "";
   let maxTokens = 4096;
@@ -5046,10 +5098,11 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
       modelId = parsed.model || "";
       maxTokens = parsed.max_tokens || 4096;
       let bodyModified = false;
-      if (sessionId && Array.isArray(parsed.messages)) {
-        const messages = parsed.messages;
-        const lastUserMsg = [...messages].reverse().find((m) => m.role === "user");
-        const lastContent = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
+      const parsedMessages = Array.isArray(parsed.messages) ? parsed.messages : [];
+      const lastUserMsg = [...parsedMessages].reverse().find((m) => m.role === "user");
+      const lastContent = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
+      if (sessionId && parsedMessages.length > 0) {
+        const messages = parsedMessages;
         if (sessionJournal.needsContext(lastContent)) {
           const journalText = sessionJournal.format(sessionId);
           if (journalText) {
@@ -5070,6 +5123,106 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
           }
         }
       }
+      if (lastContent.startsWith("/debug")) {
+        const debugPrompt = lastContent.slice("/debug".length).trim() || "hello";
+        const messages = parsed.messages;
+        const systemMsg = messages?.find((m) => m.role === "system");
+        const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
+        const fullText = `${systemPrompt ?? ""} ${debugPrompt}`;
+        const estimatedTokens = Math.ceil(fullText.length / 4);
+        const normalizedModel2 = typeof parsed.model === "string" ? parsed.model.trim().toLowerCase() : "";
+        const profileName = normalizedModel2.replace("blockrun/", "");
+        const debugProfile = ["free", "eco", "auto", "premium"].includes(profileName) ? profileName : "auto";
+        const scoring = classifyByRules(
+          debugPrompt,
+          systemPrompt,
+          estimatedTokens,
+          DEFAULT_ROUTING_CONFIG.scoring
+        );
+        const debugRouting = route(debugPrompt, systemPrompt, maxTokens, {
+          ...routerOpts,
+          routingProfile: debugProfile
+        });
+        const dimLines = (scoring.dimensions ?? []).map((d) => {
+          const nameStr = (d.name + ":").padEnd(24);
+          const scoreStr = d.score.toFixed(2).padStart(6);
+          const sigStr = d.signal ? `  [${d.signal}]` : "";
+          return `  ${nameStr}${scoreStr}${sigStr}`;
+        }).join("\n");
+        const sess = sessionId ? sessionStore.getSession(sessionId) : void 0;
+        const sessLine = sess ? `Session: ${sessionId.slice(0, 8)}... \u2192 pinned: ${sess.model} (${sess.requestCount} requests)` : sessionId ? `Session: ${sessionId.slice(0, 8)}... \u2192 no pinned model` : "Session: none";
+        const { simpleMedium, mediumComplex, complexReasoning } = DEFAULT_ROUTING_CONFIG.scoring.tierBoundaries;
+        const debugText = [
+          "ClawRouter Debug",
+          "",
+          `Profile: ${debugProfile} | Tier: ${debugRouting.tier} | Model: ${debugRouting.model}`,
+          `Confidence: ${debugRouting.confidence.toFixed(2)} | Cost: $${debugRouting.costEstimate.toFixed(4)} | Savings: ${(debugRouting.savings * 100).toFixed(0)}%`,
+          `Reasoning: ${debugRouting.reasoning}`,
+          "",
+          `Scoring (weighted: ${scoring.score.toFixed(3)})`,
+          dimLines,
+          "",
+          `Tier Boundaries: SIMPLE <${simpleMedium.toFixed(2)} | MEDIUM <${mediumComplex.toFixed(2)} | COMPLEX <${complexReasoning.toFixed(2)} | REASONING >=${complexReasoning.toFixed(2)}`,
+          "",
+          sessLine
+        ].join("\n");
+        const completionId = `chatcmpl-debug-${Date.now()}`;
+        const timestamp = Math.floor(Date.now() / 1e3);
+        const syntheticResponse = {
+          id: completionId,
+          object: "chat.completion",
+          created: timestamp,
+          model: "clawrouter/debug",
+          choices: [
+            {
+              index: 0,
+              message: { role: "assistant", content: debugText },
+              finish_reason: "stop"
+            }
+          ],
+          usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 }
+        };
+        if (isStreaming) {
+          res.writeHead(200, {
+            "Content-Type": "text/event-stream",
+            "Cache-Control": "no-cache",
+            Connection: "keep-alive"
+          });
+          const sseChunk = {
+            id: completionId,
+            object: "chat.completion.chunk",
+            created: timestamp,
+            model: "clawrouter/debug",
+            choices: [
+              {
+                index: 0,
+                delta: { role: "assistant", content: debugText },
+                finish_reason: null
+              }
+            ]
+          };
+          const sseDone = {
+            id: completionId,
+            object: "chat.completion.chunk",
+            created: timestamp,
+            model: "clawrouter/debug",
+            choices: [{ index: 0, delta: {}, finish_reason: "stop" }]
+          };
+          res.write(`data: ${JSON.stringify(sseChunk)}
+`);
+          res.write(`data: ${JSON.stringify(sseDone)}
+`);
+          res.write("data: [DONE]\n\n");
+          res.end();
+        } else {
+          res.writeHead(200, { "Content-Type": "application/json" });
+          res.end(JSON.stringify(syntheticResponse));
+        }
+        console.log(`[ClawRouter] /debug command \u2192 ${debugRouting.tier} | ${debugRouting.model}`);
+        return;
+      }
       if (parsed.stream === true) {
         parsed.stream = false;
         bodyModified = true;
@@ -5124,20 +5277,20 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
             sessionStore.touchSession(sessionId2);
           } else {
             const messages = parsed.messages;
-            let lastUserMsg;
+            let lastUserMsg2;
             if (messages) {
               for (let i = messages.length - 1; i >= 0; i--) {
                 if (messages[i].role === "user") {
-                  lastUserMsg = messages[i];
+                  lastUserMsg2 = messages[i];
                   break;
                 }
               }
             }
             const systemMsg = messages?.find((m) => m.role === "system");
-            const prompt = typeof lastUserMsg?.content === "string" ? lastUserMsg.content : "";
+            const prompt = typeof lastUserMsg2?.content === "string" ? lastUserMsg2.content : "";
             const systemPrompt = typeof systemMsg?.content === "string" ? systemMsg.content : void 0;
             const tools = parsed.tools;
-            const hasTools = Array.isArray(tools) && tools.length > 0;
+            hasTools = Array.isArray(tools) && tools.length > 0;
             if (hasTools && tools) {
               console.log(
                 `[ClawRouter] Tools detected (${tools.length}), agentic mode via keywords`
@@ -5354,7 +5507,14 @@ async function proxyRequest(req, res, apiBase, payFetch, options, routerOpts, de
           `[ClawRouter] Context filter (~${estimatedTotalTokens} tokens): excluded ${contextExcluded.join(", ")}`
         );
       }
-      modelsToTry = contextFiltered.slice(0, MAX_FALLBACK_ATTEMPTS);
+      const toolFiltered = filterByToolCalling(contextFiltered, hasTools, supportsToolCalling);
+      const toolExcluded = contextFiltered.filter((m) => !toolFiltered.includes(m));
+      if (toolExcluded.length > 0) {
+        console.log(
+          `[ClawRouter] Tool-calling filter: excluded ${toolExcluded.join(", ")} (no structured function call support)`
+        );
+      }
+      modelsToTry = toolFiltered.slice(0, MAX_FALLBACK_ATTEMPTS);
       modelsToTry = prioritizeNonRateLimited(modelsToTry);
     } else {
       if (modelId && modelId !== FREE_MODEL) {