@elvatis_com/openclaw-cli-bridge-elvatis 2.10.0 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
4
4
 
5
- **Current version:** `2.10.0`
5
+ **Current version:** `2.10.1`
6
6
 
7
7
  ---
8
8
 
@@ -406,6 +406,11 @@ npm run ci # lint + typecheck + test
406
406
 
407
407
  ## Changelog
408
408
 
409
+ ### v2.10.1
410
+ - **feat:** smart tool-routing — tool-heavy requests (>8 tools) auto-route to Haiku instead of Sonnet. Haiku handles tool calls in ~11s vs Sonnet's 80-120s (with intermittent hangs). Sonnet is preserved for reasoning/text responses.
411
+ - **fix:** reduce stale-output timeout 120s→60s — faster fallback when Sonnet goes silent
412
+ - **feat:** per-model spawn logging with prompt size for debugging
413
+
409
414
  ### v2.10.0
410
415
  - **fix:** cap effective timeout at 580s (under gateway's 600s `idleTimeoutSeconds`) so bridge fallback fires BEFORE gateway kills the request — eliminates the race condition where both compete to handle the timeout
411
416
  - **fix:** reduce Sonnet base timeout 420s→300s, Opus 420s→360s — ensures fallback triggers faster for stuck CLI sessions
package/SKILL.md CHANGED
@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
68
68
 
69
69
  See `README.md` for full configuration reference and architecture diagram.
70
70
 
71
- **Version:** 2.10.0
71
+ **Version:** 2.10.1
@@ -2,7 +2,7 @@
2
2
  "id": "openclaw-cli-bridge-elvatis",
3
3
  "slug": "openclaw-cli-bridge-elvatis",
4
4
  "name": "OpenClaw CLI Bridge",
5
- "version": "2.10.0",
5
+ "version": "2.10.1",
6
6
  "license": "MIT",
7
7
  "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
8
8
  "providers": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
3
- "version": "2.10.0",
3
+ "version": "2.10.1",
4
4
  "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
5
5
  "type": "module",
6
6
  "openclaw": {
package/src/cli-runner.ts CHANGED
@@ -566,6 +566,7 @@ export async function runClaude(
566
566
  : prompt;
567
567
 
568
568
  const cwd = workdir ?? homedir();
569
+ debugLog("CLAUDE", `spawn ${model}`, { promptLen: effectivePrompt.length, promptKB: Math.round(effectivePrompt.length / 1024), cwd, timeoutMs: Math.round(timeoutMs / 1000) });
569
570
  const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
570
571
 
571
572
  // On 401: attempt one token refresh + retry before giving up.
package/src/config.ts CHANGED
@@ -57,7 +57,7 @@ export const TIMEOUT_GRACE_MS = 5_000;
57
57
  * assume it's stuck and SIGTERM early. 0 = disabled.
58
58
  * Prevents waiting the full timeout when Claude CLI hangs silently.
59
59
  */
60
- export const STALE_OUTPUT_TIMEOUT_MS = 120_000; // 2 min of silence → kill
60
+ export const STALE_OUTPUT_TIMEOUT_MS = 60_000; // 1 min of silence → kill (Sonnet goes silent when rate-limited)
61
61
 
62
62
  /** Max messages to include in the prompt sent to CLI subprocesses. */
63
63
  export const MAX_MESSAGES = 20;
@@ -71,6 +71,13 @@ export const MAX_MESSAGES_HEAVY_TOOLS = 12;
71
71
  /** Tool count threshold that triggers reduced message limit. */
72
72
  export const TOOL_HEAVY_THRESHOLD = 10;
73
73
 
74
+ /**
75
+ * Tool count threshold that triggers smart routing to a faster model.
76
+ * When Sonnet receives a request with this many tools, route to Haiku instead.
77
+ * Haiku handles tool calls in ~11s vs Sonnet's 80-120s (and Sonnet hangs intermittently).
78
+ */
79
+ export const TOOL_ROUTING_THRESHOLD = 8;
80
+
74
81
  /** Max characters per message content before truncation. */
75
82
  export const MAX_MSG_CHARS = 4_000;
76
83
 
@@ -32,6 +32,7 @@ import {
32
32
  BITNET_MAX_MESSAGES,
33
33
  BITNET_SYSTEM_PROMPT,
34
34
  DEFAULT_MODEL_TIMEOUTS,
35
+ TOOL_ROUTING_THRESHOLD,
35
36
  } from "./config.js";
36
37
  import { debugLog, DEBUG_LOG_PATH } from "./debug-log.js";
37
38
 
@@ -790,6 +791,18 @@ async function handleRequest(
790
791
  // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
791
792
  let result: CliToolResult;
792
793
  let usedModel = model;
794
+
795
+ // ── Smart tool routing: heavy tool requests → Haiku for speed ──────────
796
+ // Sonnet hangs intermittently on large tool prompts (20KB+, 21 tools).
797
+ // Haiku handles tool calls in ~11s vs Sonnet's 80-120s (when it works).
798
+ // Route tool-heavy requests directly to Haiku, keep Sonnet for reasoning.
799
+ if (hasTools && tools!.length > TOOL_ROUTING_THRESHOLD && model === "cli-claude/claude-sonnet-4-6") {
800
+ const toolModel = "cli-claude/claude-haiku-4-5";
801
+ opts.log(`[cli-bridge] tool-routing: ${model} → ${toolModel} (${tools!.length} tools)`);
802
+ debugLog("TOOL-ROUTE", `${model} → ${toolModel}`, { tools: tools!.length, threshold: TOOL_ROUTING_THRESHOLD });
803
+ usedModel = toolModel;
804
+ }
805
+
793
806
  const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
794
807
 
795
808
  // ── Provider session: ensure a persistent session for this model ────────
@@ -843,12 +856,12 @@ async function handleRequest(
843
856
 
844
857
  const cliStart = Date.now();
845
858
  try {
846
- result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
859
+ result = await routeToCliRunner(usedModel, cleanMessages, effectiveTimeout, routeOpts);
847
860
  const latencyMs = Date.now() - cliStart;
848
861
  const estCompletionTokens = estimateTokens(result.content ?? "");
849
- metrics.recordRequest(model, latencyMs, true, estPromptTokens, estCompletionTokens, promptPreview);
862
+ metrics.recordRequest(usedModel, latencyMs, true, estPromptTokens, estCompletionTokens, promptPreview);
850
863
  providerSessions.recordRun(session.id, false);
851
- debugLog("OK", `${model} completed in ${(latencyMs / 1000).toFixed(1)}s`, { toolCalls: result.tool_calls?.length ?? 0, contentLen: result.content?.length ?? 0 });
864
+ debugLog("OK", `${usedModel} completed in ${(latencyMs / 1000).toFixed(1)}s`, { toolCalls: result.tool_calls?.length ?? 0, contentLen: result.content?.length ?? 0 });
852
865
  } catch (err) {
853
866
  const primaryDuration = Date.now() - cliStart;
854
867
  const msg = (err as Error).message;