npm - @elvatis_com/openclaw-cli-bridge-elvatis - Versions diffs - 2.10.0 → 2.10.1 - Mend

@elvatis_com/openclaw-cli-bridge-elvatis 2.10.0 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
-**Current version:** `2.10.0`
+**Current version:** `2.10.1`
 ---
@@ -406,6 +406,11 @@ npm run ci          # lint + typecheck + test
 ## Changelog
+### v2.10.1
+- **feat:** smart tool-routing — tool-heavy requests (>8 tools) auto-route to Haiku instead of Sonnet. Haiku handles tool calls in ~11s vs Sonnet's 80-120s (with intermittent hangs). Sonnet is preserved for reasoning/text responses.
+- **fix:** reduce stale-output timeout 120s→60s — faster fallback when Sonnet goes silent
+- **feat:** per-model spawn logging with prompt size for debugging
 ### v2.10.0
 - **fix:** cap effective timeout at 580s (under gateway's 600s `idleTimeoutSeconds`) so bridge fallback fires BEFORE gateway kills the request — eliminates the race condition where both compete to handle the timeout
 - **fix:** reduce Sonnet base timeout 420s→300s, Opus 420s→360s — ensures fallback triggers faster for stuck CLI sessions

package/SKILL.md CHANGED Viewed

@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
 See `README.md` for full configuration reference and architecture diagram.
-**Version:** 2.10.0
+**Version:** 2.10.1

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "openclaw-cli-bridge-elvatis",
   "slug": "openclaw-cli-bridge-elvatis",
   "name": "OpenClaw CLI Bridge",
-  "version": "2.10.0",
+  "version": "2.10.1",
   "license": "MIT",
   "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
   "providers": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
-  "version": "2.10.0",
+  "version": "2.10.1",
   "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
   "type": "module",
   "openclaw": {

package/src/cli-runner.ts CHANGED Viewed

@@ -566,6 +566,7 @@ export async function runClaude(
     : prompt;
   const cwd = workdir ?? homedir();
+  debugLog("CLAUDE", `spawn ${model}`, { promptLen: effectivePrompt.length, promptKB: Math.round(effectivePrompt.length / 1024), cwd, timeoutMs: Math.round(timeoutMs / 1000) });
   const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
   // On 401: attempt one token refresh + retry before giving up.

package/src/config.ts CHANGED Viewed

@@ -57,7 +57,7 @@ export const TIMEOUT_GRACE_MS = 5_000;
  * assume it's stuck and SIGTERM early. 0 = disabled.
  * Prevents waiting the full timeout when Claude CLI hangs silently.
  */
-export const STALE_OUTPUT_TIMEOUT_MS = 120_000; // 2 min of silence → kill
+export const STALE_OUTPUT_TIMEOUT_MS = 60_000; // 1 min of silence → kill (Sonnet goes silent when rate-limited)
 /** Max messages to include in the prompt sent to CLI subprocesses. */
 export const MAX_MESSAGES = 20;
@@ -71,6 +71,13 @@ export const MAX_MESSAGES_HEAVY_TOOLS = 12;
 /** Tool count threshold that triggers reduced message limit. */
 export const TOOL_HEAVY_THRESHOLD = 10;
+/**
+ * Tool count threshold that triggers smart routing to a faster model.
+ * When Sonnet receives a request with this many tools, route to Haiku instead.
+ * Haiku handles tool calls in ~11s vs Sonnet's 80-120s (and Sonnet hangs intermittently).
+ */
+export const TOOL_ROUTING_THRESHOLD = 8;
 /** Max characters per message content before truncation. */
 export const MAX_MSG_CHARS = 4_000;

package/src/proxy-server.ts CHANGED Viewed

@@ -32,6 +32,7 @@ import {
   BITNET_MAX_MESSAGES,
   BITNET_SYSTEM_PROMPT,
   DEFAULT_MODEL_TIMEOUTS,
+  TOOL_ROUTING_THRESHOLD,
 } from "./config.js";
 import { debugLog, DEBUG_LOG_PATH } from "./debug-log.js";
@@ -790,6 +791,18 @@ async function handleRequest(
     // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
     let result: CliToolResult;
     let usedModel = model;
+    // ── Smart tool routing: heavy tool requests → Haiku for speed ──────────
+    // Sonnet hangs intermittently on large tool prompts (20KB+, 21 tools).
+    // Haiku handles tool calls in ~11s vs Sonnet's 80-120s (when it works).
+    // Route tool-heavy requests directly to Haiku, keep Sonnet for reasoning.
+    if (hasTools && tools!.length > TOOL_ROUTING_THRESHOLD && model === "cli-claude/claude-sonnet-4-6") {
+      const toolModel = "cli-claude/claude-haiku-4-5";
+      opts.log(`[cli-bridge] tool-routing: ${model} → ${toolModel} (${tools!.length} tools)`);
+      debugLog("TOOL-ROUTE", `${model} → ${toolModel}`, { tools: tools!.length, threshold: TOOL_ROUTING_THRESHOLD });
+      usedModel = toolModel;
+    }
     const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
     // ── Provider session: ensure a persistent session for this model ────────
@@ -843,12 +856,12 @@ async function handleRequest(
     const cliStart = Date.now();
     try {
-      result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
+      result = await routeToCliRunner(usedModel, cleanMessages, effectiveTimeout, routeOpts);
       const latencyMs = Date.now() - cliStart;
       const estCompletionTokens = estimateTokens(result.content ?? "");
-      metrics.recordRequest(model, latencyMs, true, estPromptTokens, estCompletionTokens, promptPreview);
+      metrics.recordRequest(usedModel, latencyMs, true, estPromptTokens, estCompletionTokens, promptPreview);
       providerSessions.recordRun(session.id, false);
-      debugLog("OK", `${model} completed in ${(latencyMs / 1000).toFixed(1)}s`, { toolCalls: result.tool_calls?.length ?? 0, contentLen: result.content?.length ?? 0 });
+      debugLog("OK", `${usedModel} completed in ${(latencyMs / 1000).toFixed(1)}s`, { toolCalls: result.tool_calls?.length ?? 0, contentLen: result.content?.length ?? 0 });
     } catch (err) {
       const primaryDuration = Date.now() - cliStart;
       const msg = (err as Error).message;