npm - @elvatis_com/openclaw-cli-bridge-elvatis - Versions diffs - 2.9.0 → 2.10.1 - Mend

@elvatis_com/openclaw-cli-bridge-elvatis 2.9.0 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/README.md +18 -1
package/SKILL.md +1 -1
package/openclaw.plugin.json +3 -3
package/package.json +1 -1
package/src/cli-runner.ts +44 -10
package/src/config.ts +32 -4
package/src/debug-log.ts +55 -0
package/src/proxy-server.ts +44 -2
package/src/tool-protocol.ts +94 -11
package/test/config.test.ts +3 -3
package/test/session-manager.test.ts +3 -1

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
-**Current version:** `2.9.0`
+**Current version:** `2.10.1`
 ---
@@ -406,6 +406,23 @@ npm run ci          # lint + typecheck + test
 ## Changelog
+### v2.10.1
+- **feat:** smart tool-routing — tool-heavy requests (>8 tools) auto-route to Haiku instead of Sonnet. Haiku handles tool calls in ~11s vs Sonnet's 80-120s (with intermittent hangs). Sonnet is preserved for reasoning/text responses.
+- **fix:** reduce stale-output timeout 120s→60s — faster fallback when Sonnet goes silent
+- **feat:** per-model spawn logging with prompt size for debugging
+### v2.10.0
+- **fix:** cap effective timeout at 580s (under gateway's 600s `idleTimeoutSeconds`) so bridge fallback fires BEFORE gateway kills the request — eliminates the race condition where both compete to handle the timeout
+- **fix:** reduce Sonnet base timeout 420s→300s, Opus 420s→360s — ensures fallback triggers faster for stuck CLI sessions
+- **feat:** compact tool schema mode — when >8 tools, compress definitions to name+params only, cutting prompt size ~60%
+- **feat:** stale-output detection — if CLI produces no stdout for 120s, SIGTERM early instead of waiting full timeout
+- **feat:** adaptive message limits — reduce history from 20→12 messages when >10 tools to keep prompts smaller
+- **feat:** file-based debug log at `~/.openclaw/cli-bridge/debug.log` — `tail -f` for real-time request lifecycle visibility
+- **feat:** SSE progress comments every 30s so the webchat connection stays informed during long CLI runs
+- **feat:** SSE fallback notification — visible comment when a model times out and the bridge retries with fallback
+- **fix:** rescue tool_calls embedded inside content strings — handles models that wrap `{"tool_calls":[...]}` inside a `{"content":"..."}` wrapper
+- **fix:** parse robustness — debug logging on all parse paths to diagnose raw-JSON-instead-of-tool-calls issues
 ### v2.9.0
 - **feat:** enhanced `/status` dashboard with 5 new panels:
   - **Active Requests**: live in-flight requests with model, elapsed time, message/tool count, prompt preview

package/SKILL.md CHANGED Viewed

@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
 See `README.md` for full configuration reference and architecture diagram.
-**Version:** 2.9.0
+**Version:** 2.10.1

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "openclaw-cli-bridge-elvatis",
   "slug": "openclaw-cli-bridge-elvatis",
   "name": "OpenClaw CLI Bridge",
-  "version": "2.9.0",
+  "version": "2.10.1",
   "license": "MIT",
   "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
   "providers": [
@@ -43,8 +43,8 @@
           "type": "number"
         },
         "default": {
-          "cli-claude/claude-opus-4-6": 420000,
-          "cli-claude/claude-sonnet-4-6": 420000,
+          "cli-claude/claude-opus-4-6": 360000,
+          "cli-claude/claude-sonnet-4-6": 300000,
           "cli-claude/claude-haiku-4-5": 120000,
           "cli-gemini/gemini-2.5-pro": 300000,
           "cli-gemini/gemini-2.5-flash": 180000,

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
-  "version": "2.9.0",
+  "version": "2.10.1",
   "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
   "type": "module",
   "openclaw": {

package/src/cli-runner.ts CHANGED Viewed

@@ -30,11 +30,15 @@ import {
 } from "./tool-protocol.js";
 import {
   MAX_MESSAGES,
+  MAX_MESSAGES_HEAVY_TOOLS,
+  TOOL_HEAVY_THRESHOLD,
   MAX_MSG_CHARS,
   DEFAULT_CLI_TIMEOUT_MS,
   TIMEOUT_GRACE_MS,
   MEDIA_TMP_DIR,
+  STALE_OUTPUT_TIMEOUT_MS,
 } from "./config.js";
+import { debugLog } from "./debug-log.js";
 // ──────────────────────────────────────────────────────────────────────────────
 // Message formatting
@@ -69,13 +73,16 @@ export type { ToolDefinition, CliToolResult } from "./tool-protocol.js";
  *   - role "tool": formatted as [Tool Result: name]
  *   - role "assistant" with tool_calls: formatted as [Assistant Tool Call: name(args)]
  */
-export function formatPrompt(messages: ChatMessage[]): string {
+export function formatPrompt(messages: ChatMessage[], toolCount = 0): string {
   if (messages.length === 0) return "";
+  // Reduce history when tool schemas dominate the prompt
+  const maxMsgs = toolCount > TOOL_HEAVY_THRESHOLD ? MAX_MESSAGES_HEAVY_TOOLS : MAX_MESSAGES;
   // Keep system message (if any) + last N non-system messages
   const system = messages.find((m) => m.role === "system");
   const nonSystem = messages.filter((m) => m.role !== "system");
-  const recent = nonSystem.slice(-MAX_MESSAGES);
+  const recent = nonSystem.slice(-maxMsgs);
   const truncated = system ? [system, ...recent] : recent;
   // Single short user message — send bare (no wrapping needed)
@@ -331,17 +338,20 @@ export function runCli(
     let timedOut = false;
     let killTimer: ReturnType<typeof setTimeout> | null = null;
     let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
+    let staleTimer: ReturnType<typeof setInterval> | null = null;
+    let lastOutputAt = Date.now();
     const clearTimers = () => {
       if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
       if (killTimer) { clearTimeout(killTimer); killTimer = null; }
+      if (staleTimer) { clearInterval(staleTimer); staleTimer = null; }
     };
-    // ── Timeout sequence: SIGTERM → grace → SIGKILL ──────────────────────
-    timeoutTimer = setTimeout(() => {
+    const doKill = (reason: string) => {
+      if (timedOut) return; // already killing
       timedOut = true;
-      const elapsed = Math.round(timeoutMs / 1000);
-      log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
+      log(`[cli-bridge] ${reason} for ${cmd}, sending SIGTERM`);
+      debugLog("KILL", `${cmd} ${reason}`, { stdoutLen: stdout.length, stderrLen: stderr.length });
       proc.kill("SIGTERM");
       killTimer = setTimeout(() => {
@@ -350,14 +360,36 @@ export function runCli(
           proc.kill("SIGKILL");
         }
       }, TIMEOUT_GRACE_MS);
+    };
+    // ── Hard timeout: SIGTERM → grace → SIGKILL ──────────────────────────
+    timeoutTimer = setTimeout(() => {
+      doKill(`timeout after ${Math.round(timeoutMs / 1000)}s`);
     }, timeoutMs);
+    // ── Stale-output detection: kill if no stdout for STALE_OUTPUT_TIMEOUT_MS
+    if (STALE_OUTPUT_TIMEOUT_MS > 0) {
+      const checkInterval = 15_000; // check every 15s
+      staleTimer = setInterval(() => {
+        const silent = Date.now() - lastOutputAt;
+        if (silent >= STALE_OUTPUT_TIMEOUT_MS) {
+          doKill(`stale output — no stdout for ${Math.round(silent / 1000)}s`);
+        }
+      }, checkInterval);
+    }
     proc.stdin.write(prompt, "utf8", () => {
       proc.stdin.end();
     });
-    proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
-    proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
+    proc.stdout.on("data", (d: Buffer) => {
+      stdout += d.toString();
+      lastOutputAt = Date.now();
+    });
+    proc.stderr.on("data", (d: Buffer) => {
+      stderr += d.toString();
+      lastOutputAt = Date.now(); // stderr also counts as activity
+    });
     proc.on("close", (code) => {
       clearTimers();
@@ -534,6 +566,7 @@ export async function runClaude(
     : prompt;
   const cwd = workdir ?? homedir();
+  debugLog("CLAUDE", `spawn ${model}`, { promptLen: effectivePrompt.length, promptKB: Math.round(effectivePrompt.length / 1024), cwd, timeoutMs: Math.round(timeoutMs / 1000) });
   const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
   // On 401: attempt one token refresh + retry before giving up.
@@ -770,8 +803,9 @@ export async function routeToCliRunner(
   timeoutMs: number,
   opts: RouteOptions = {}
 ): Promise<CliToolResult> {
-  const prompt = formatPrompt(messages);
-  const hasTools = !!(opts.tools?.length);
+  const toolCount = opts.tools?.length ?? 0;
+  const prompt = formatPrompt(messages, toolCount);
+  const hasTools = toolCount > 0;
   // Strip "vllm/" prefix if present — OpenClaw sends the full provider path
   // (e.g. "vllm/cli-claude/claude-sonnet-4-6") but the router only needs the

package/src/config.ts CHANGED Viewed

@@ -25,8 +25,13 @@ export const DEFAULT_PROXY_API_KEY = "cli-bridge";
 /** Default base timeout for CLI subprocess responses (ms). Scales dynamically. */
 export const DEFAULT_PROXY_TIMEOUT_MS = 300_000; // 5 min
-/** Maximum effective timeout after dynamic scaling (ms). */
-export const MAX_EFFECTIVE_TIMEOUT_MS = 900_000; // 15 min
+/**
+ * Maximum effective timeout after dynamic scaling (ms).
+ * MUST be lower than the OpenClaw gateway's idleTimeoutSeconds (600s)
+ * so the bridge's own fallback fires BEFORE the gateway kills the request.
+ * 580s gives a 20s safety margin under the gateway's 600s hard limit.
+ */
+export const MAX_EFFECTIVE_TIMEOUT_MS = 580_000; // 9m 40s — under gateway's 600s
 /** Extra timeout per message beyond 10 in the conversation (ms). */
 export const TIMEOUT_PER_EXTRA_MSG_MS = 2_000;
@@ -47,9 +52,32 @@ export const DEFAULT_CLI_TIMEOUT_MS = 120_000; // 2 min
 /** Grace period between SIGTERM and SIGKILL when a timeout fires (ms). */
 export const TIMEOUT_GRACE_MS = 5_000;
+/**
+ * Stale output timeout — if a CLI subprocess produces no stdout for this long,
+ * assume it's stuck and SIGTERM early. 0 = disabled.
+ * Prevents waiting the full timeout when Claude CLI hangs silently.
+ */
+export const STALE_OUTPUT_TIMEOUT_MS = 60_000; // 1 min of silence → kill (Sonnet goes silent when rate-limited)
 /** Max messages to include in the prompt sent to CLI subprocesses. */
 export const MAX_MESSAGES = 20;
+/**
+ * Reduced message limit when tools are heavy (> TOOL_HEAVY_THRESHOLD).
+ * Fewer history messages = smaller prompt = faster CLI response.
+ */
+export const MAX_MESSAGES_HEAVY_TOOLS = 12;
+/** Tool count threshold that triggers reduced message limit. */
+export const TOOL_HEAVY_THRESHOLD = 10;
+/**
+ * Tool count threshold that triggers smart routing to a faster model.
+ * When Sonnet receives a request with this many tools, route to Haiku instead.
+ * Haiku handles tool calls in ~11s vs Sonnet's 80-120s (and Sonnet hangs intermittently).
+ */
+export const TOOL_ROUTING_THRESHOLD = 8;
 /** Max characters per message content before truncation. */
 export const MAX_MSG_CHARS = 4_000;
@@ -91,8 +119,8 @@ export const PROVIDER_SESSION_SWEEP_MS = 10 * 60 * 1_000; // 10 min
  *   - Fast/lightweight (Haiku, Flash, Mini): 120s
  */
 export const DEFAULT_MODEL_TIMEOUTS: Record<string, number> = {
-  "cli-claude/claude-opus-4-6":        420_000,  // 7 min
-  "cli-claude/claude-sonnet-4-6":      420_000,  // 7 min — prevent timeout→Haiku fallback on large sessions
+  "cli-claude/claude-opus-4-6":        360_000,  // 6 min — leaves room for dynamic scaling up to 580s cap
+  "cli-claude/claude-sonnet-4-6":      300_000,  // 5 min — was 7 min, reduced so fallback fires before gateway's 600s
   "cli-claude/claude-haiku-4-5":       120_000,  // 2 min
   "cli-gemini/gemini-2.5-pro":         300_000,  // 5 min — image generation needs more time
   "cli-gemini/gemini-2.5-flash":       180_000,  // 3 min

package/src/debug-log.ts ADDED Viewed

@@ -0,0 +1,55 @@
+/**
+ * debug-log.ts
+ *
+ * File-based debug logger for the CLI bridge.
+ * Writes to ~/.openclaw/cli-bridge/debug.log with automatic rotation at 5 MB.
+ *
+ * Usage:
+ *   tail -f ~/.openclaw/cli-bridge/debug.log
+ */
+import { appendFileSync, statSync, renameSync, mkdirSync } from "node:fs";
+import { join } from "node:path";
+import { homedir } from "node:os";
+const LOG_DIR = join(homedir(), ".openclaw", "cli-bridge");
+const LOG_FILE = join(LOG_DIR, "debug.log");
+const LOG_FILE_PREV = join(LOG_DIR, "debug.log.1");
+const MAX_LOG_SIZE = 5 * 1024 * 1024; // 5 MB
+let initialized = false;
+function ensureDir(): void {
+  if (initialized) return;
+  try { mkdirSync(LOG_DIR, { recursive: true }); } catch { /* exists */ }
+  initialized = true;
+}
+function rotate(): void {
+  try {
+    const stat = statSync(LOG_FILE);
+    if (stat.size > MAX_LOG_SIZE) {
+      try { renameSync(LOG_FILE, LOG_FILE_PREV); } catch { /* best effort */ }
+    }
+  } catch { /* file doesn't exist yet */ }
+}
+function ts(): string {
+  return new Date().toISOString();
+}
+/**
+ * Append a debug line to the log file.
+ * Non-blocking, never throws — logging must not crash the bridge.
+ */
+export function debugLog(category: string, message: string, data?: Record<string, unknown>): void {
+  try {
+    ensureDir();
+    rotate();
+    const extra = data ? ` ${JSON.stringify(data)}` : "";
+    appendFileSync(LOG_FILE, `${ts()} [${category}] ${message}${extra}\n`);
+  } catch { /* never crash on log failure */ }
+}
+/** Log path for display on status page / startup messages. */
+export const DEBUG_LOG_PATH = LOG_FILE;

package/src/proxy-server.ts CHANGED Viewed

@@ -32,7 +32,9 @@ import {
   BITNET_MAX_MESSAGES,
   BITNET_SYSTEM_PROMPT,
   DEFAULT_MODEL_TIMEOUTS,
+  TOOL_ROUTING_THRESHOLD,
 } from "./config.js";
+import { debugLog, DEBUG_LOG_PATH } from "./debug-log.js";
 // ── Active request tracking ─────────────────────────────────────────────────
@@ -214,6 +216,7 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
       opts.log(
         `[cli-bridge] proxy listening on :${opts.port}`
       );
+      debugLog("STARTUP", `proxy listening on :${opts.port}`, { debugLog: DEBUG_LOG_PATH });
       // unref() so the proxy server does not keep the Node.js event loop alive
       // when openclaw doctor or other short-lived CLI commands load plugins.
       // The gateway's own main loop keeps the process alive during normal operation.
@@ -389,6 +392,8 @@ async function handleRequest(
     const lastUserMsg = [...cleanMessages].reverse().find(m => m.role === "user");
     const promptPreview = typeof lastUserMsg?.content === "string" ? lastUserMsg.content.slice(0, 80) : "";
+    debugLog("REQ", `${model} start`, { msgs: cleanMessages.length, tools: tools?.length ?? 0, stream, media: mediaFiles.length, promptPreview: promptPreview.slice(0, 60) });
     // Track active request for dashboard
     activeRequests.set(id, { id, model, startedAt: Date.now(), messageCount: cleanMessages.length, toolCount: tools?.length ?? 0, promptPreview });
@@ -786,6 +791,18 @@ async function handleRequest(
     // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
     let result: CliToolResult;
     let usedModel = model;
+    // ── Smart tool routing: heavy tool requests → Haiku for speed ──────────
+    // Sonnet hangs intermittently on large tool prompts (20KB+, 21 tools).
+    // Haiku handles tool calls in ~11s vs Sonnet's 80-120s (when it works).
+    // Route tool-heavy requests directly to Haiku, keep Sonnet for reasoning.
+    if (hasTools && tools!.length > TOOL_ROUTING_THRESHOLD && model === "cli-claude/claude-sonnet-4-6") {
+      const toolModel = "cli-claude/claude-haiku-4-5";
+      opts.log(`[cli-bridge] tool-routing: ${model} → ${toolModel} (${tools!.length} tools)`);
+      debugLog("TOOL-ROUTE", `${model} → ${toolModel}`, { tools: tools!.length, threshold: TOOL_ROUTING_THRESHOLD });
+      usedModel = toolModel;
+    }
     const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
     // ── Provider session: ensure a persistent session for this model ────────
@@ -805,6 +822,7 @@ async function handleRequest(
     const toolExtra = (tools?.length ?? 0) * TIMEOUT_PER_TOOL_MS;
     const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, MAX_EFFECTIVE_TIMEOUT_MS);
     opts.log(`[cli-bridge] ${model} session=${session.id} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
+    debugLog("TIMEOUT", `${model} effective=${Math.round(effectiveTimeout / 1000)}s`, { base: Math.round(baseTimeout / 1000), msgExtra: Math.round(msgExtra / 1000), toolExtra: Math.round(toolExtra / 1000), cap: Math.round(MAX_EFFECTIVE_TIMEOUT_MS / 1000) });
     // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
     let sseHeadersSent = false;
@@ -821,17 +839,35 @@ async function handleRequest(
       keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, SSE_KEEPALIVE_INTERVAL_MS);
     }
+    // ── Progress notifications: send visible status updates to the webchat ──
+    // Users shouldn't stare at a blank screen for minutes without feedback.
+    let progressInterval: ReturnType<typeof setInterval> | null = null;
+    const PROGRESS_INTERVAL_MS = 30_000; // 30s between updates
+    if (stream && sseHeadersSent) {
+      const progressStart = Date.now();
+      progressInterval = setInterval(() => {
+        const elapsed = Math.round((Date.now() - progressStart) / 1000);
+        const timeoutSec = Math.round(effectiveTimeout / 1000);
+        // Send an SSE comment with progress info — visible in raw SSE but won't render as content
+        // Also send a small content delta that OpenClaw can show as typing indicator
+        res.write(`: progress ${elapsed}s/${timeoutSec}s — ${model} processing\n\n`);
+      }, PROGRESS_INTERVAL_MS);
+    }
     const cliStart = Date.now();
     try {
-      result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
+      result = await routeToCliRunner(usedModel, cleanMessages, effectiveTimeout, routeOpts);
+      const latencyMs = Date.now() - cliStart;
       const estCompletionTokens = estimateTokens(result.content ?? "");
-      metrics.recordRequest(model, Date.now() - cliStart, true, estPromptTokens, estCompletionTokens, promptPreview);
+      metrics.recordRequest(usedModel, latencyMs, true, estPromptTokens, estCompletionTokens, promptPreview);
       providerSessions.recordRun(session.id, false);
+      debugLog("OK", `${usedModel} completed in ${(latencyMs / 1000).toFixed(1)}s`, { toolCalls: result.tool_calls?.length ?? 0, contentLen: result.content?.length ?? 0 });
     } catch (err) {
       const primaryDuration = Date.now() - cliStart;
       const msg = (err as Error).message;
       // ── Model fallback: retry once with a lighter model if configured ────
       const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
+      debugLog("FAIL", `${model} failed after ${(primaryDuration / 1000).toFixed(1)}s`, { isTimeout, error: msg.slice(0, 200) });
       // Record the run (with timeout flag) — session is preserved, not deleted
       providerSessions.recordRun(session.id, isTimeout);
       const fallbackModel = opts.modelFallbacks?.[model];
@@ -839,6 +875,11 @@ async function handleRequest(
         metrics.recordRequest(model, primaryDuration, false, estPromptTokens, undefined, promptPreview);
         const reason = isTimeout ? `timeout by supervisor, session=${session.id} preserved` : msg;
         opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
+        debugLog("FALLBACK", `${model} → ${fallbackModel}`, { reason: isTimeout ? "timeout" : "error", primaryDuration: Math.round(primaryDuration / 1000) });
+        // Notify the user via SSE that we're retrying with a different model
+        if (sseHeadersSent) {
+          res.write(`: fallback — ${model} ${isTimeout ? "timed out" : "failed"} after ${Math.round(primaryDuration / 1000)}s, retrying with ${fallbackModel}\n\n`);
+        }
         const fallbackStart = Date.now();
         try {
           result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
@@ -877,6 +918,7 @@ async function handleRequest(
       }
     } finally {
       if (keepaliveInterval) clearInterval(keepaliveInterval);
+      if (progressInterval) clearInterval(progressInterval);
       cleanupMediaFiles(mediaFiles);
       activeRequests.delete(id);
     }

package/src/tool-protocol.ts CHANGED Viewed

@@ -10,6 +10,7 @@
  */
 import { randomBytes } from "node:crypto";
+import { debugLog } from "./debug-log.js";
 // ──────────────────────────────────────────────────────────────────────────────
 // Types
@@ -46,13 +47,34 @@ export interface CliToolResult {
  * Build a text block describing available tools and response format instructions.
  * This block is prepended to the system message (or added as a new system message).
  */
+/** Threshold: when tool count exceeds this, use compact schema to reduce prompt size. */
+const COMPACT_TOOL_THRESHOLD = 8;
+/**
+ * Build a compact tool description: name + required param names only.
+ * Cuts prompt size by ~60-70% for large tool sets.
+ */
+function compactToolDescription(t: ToolDefinition): string {
+  const fn = t.function;
+  const params = fn.parameters as { properties?: Record<string, unknown>; required?: string[] };
+  const required = params?.required ?? Object.keys(params?.properties ?? {});
+  const paramList = required.length > 0 ? `(${required.join(", ")})` : "()";
+  return `- ${fn.name}${paramList}: ${fn.description}`;
+}
+/**
+ * Build a full tool description: name, description, and full JSON schema.
+ */
+function fullToolDescription(t: ToolDefinition): string {
+  const fn = t.function;
+  const params = JSON.stringify(fn.parameters);
+  return `- name: ${fn.name}\n  description: ${fn.description}\n  parameters: ${params}`;
+}
 export function buildToolPromptBlock(tools: ToolDefinition[]): string {
+  const useCompact = tools.length > COMPACT_TOOL_THRESHOLD;
   const toolDescriptions = tools
-    .map((t) => {
-      const fn = t.function;
-      const params = JSON.stringify(fn.parameters);
-      return `- name: ${fn.name}\n  description: ${fn.description}\n  parameters: ${params}`;
-    })
+    .map(useCompact ? compactToolDescription : fullToolDescription)
     .join("\n");
   return [
@@ -67,6 +89,7 @@ export function buildToolPromptBlock(tools: ToolDefinition[]): string {
     '{"content":"<your text response>"}',
     "",
     "Do NOT include any text outside the JSON. Do NOT wrap in markdown code blocks.",
+    useCompact ? "Call ONE tool at a time. Do NOT batch multiple tool calls." : "",
     "",
     "Available tools:",
     toolDescriptions,
@@ -117,6 +140,7 @@ export function buildToolCallJsonSchema(): object {
  */
 export function parseToolCallResponse(text: string): CliToolResult {
   const trimmed = text.trim();
+  const preview = trimmed.slice(0, 120);
   // Check for Claude's --output-format json wrapper FIRST.
   // Claude returns: { "type": "result", "result": "..." }
@@ -124,30 +148,48 @@ export function parseToolCallResponse(text: string): CliToolResult {
   const claudeResult = tryExtractClaudeJsonResult(trimmed);
   if (claudeResult) {
     const inner = tryParseJson(claudeResult);
-    if (inner) return normalizeResult(inner);
+    if (inner) {
+      const result = normalizeResult(inner);
+      debugLog("PARSE", `claude-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
+      return result;
+    }
     // Claude result is plain text
+    debugLog("PARSE", "claude-json → plain text", { len: claudeResult.length });
     return { content: claudeResult };
   }
   // Try direct JSON parse (for non-Claude outputs)
   const parsed = tryParseJson(trimmed);
-  if (parsed) return normalizeResult(parsed);
+  if (parsed) {
+    const result = normalizeResult(parsed);
+    debugLog("PARSE", `direct-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
+    return result;
+  }
   // Try extracting JSON from markdown code blocks: ```json ... ```
   const codeBlock = tryExtractCodeBlock(trimmed);
   if (codeBlock) {
     const inner = tryParseJson(codeBlock);
-    if (inner) return normalizeResult(inner);
+    if (inner) {
+      const result = normalizeResult(inner);
+      debugLog("PARSE", `code-block → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
+      return result;
+    }
   }
   // Try finding a JSON object anywhere in the text
   const embedded = tryExtractEmbeddedJson(trimmed);
   if (embedded) {
     const inner = tryParseJson(embedded);
-    if (inner) return normalizeResult(inner);
+    if (inner) {
+      const result = normalizeResult(inner);
+      debugLog("PARSE", `embedded-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
+      return result;
+    }
   }
   // Fallback: treat entire text as content
+  debugLog("PARSE", "no JSON found → raw content", { len: trimmed.length, preview });
   return { content: trimmed || null };
 }
@@ -167,11 +209,17 @@ function normalizeResult(obj: Record<string, unknown>): CliToolResult {
           : JSON.stringify(tc.arguments ?? {}),
       },
     }));
-    return { content: null, tool_calls: toolCalls };
+    // If the model also returned a content string alongside tool_calls, include it
+    const content = typeof obj.content === "string" ? obj.content : null;
+    return { content, tool_calls: toolCalls };
   }
-  // Check for content field
+  // Check for content field — but rescue embedded tool_calls JSON from inside content strings.
+  // Models sometimes wrap tool calls inside a content string:
+  //   {"content":"I'll write that file.\n{\"tool_calls\":[...]}"}
   if (typeof obj.content === "string") {
+    const rescued = tryRescueToolCallsFromContent(obj.content);
+    if (rescued) return rescued;
     return { content: obj.content };
   }
@@ -179,6 +227,41 @@ function normalizeResult(obj: Record<string, unknown>): CliToolResult {
   return { content: JSON.stringify(obj) };
 }
+/**
+ * Rescue tool_calls embedded inside a content string.
+ * Handles cases where the model wraps tool calls in a content field:
+ *   {"content":"Some text\n{\"tool_calls\":[...]}"}
+ *   {"content":"{\"tool_calls\":[{\"name\":\"write\",...}]}"}
+ */
+function tryRescueToolCallsFromContent(content: string): CliToolResult | null {
+  // Only attempt rescue if content contains the tool_calls signature
+  if (!content.includes('"tool_calls"') && !content.includes("tool_calls")) return null;
+  // Try to find embedded JSON with tool_calls
+  const embedded = tryExtractEmbeddedJson(content);
+  if (!embedded) return null;
+  const parsed = tryParseJson(embedded);
+  if (!parsed || !Array.isArray(parsed.tool_calls) || parsed.tool_calls.length === 0) return null;
+  // Extract the text content before the JSON (if any)
+  const jsonStart = content.indexOf(embedded);
+  const textBefore = jsonStart > 0 ? content.slice(0, jsonStart).trim() : null;
+  const toolCalls: ToolCall[] = parsed.tool_calls.map((tc: Record<string, unknown>) => ({
+    id: generateCallId(),
+    type: "function" as const,
+    function: {
+      name: String(tc.name ?? ""),
+      arguments: typeof tc.arguments === "string"
+        ? tc.arguments
+        : JSON.stringify(tc.arguments ?? {}),
+    },
+  }));
+  return { content: textBefore || null, tool_calls: toolCalls };
+}
 function tryParseJson(text: string): Record<string, unknown> | null {
   try {
     const obj = JSON.parse(text);

package/test/config.test.ts CHANGED Viewed

@@ -38,7 +38,7 @@ describe("config.ts exports", () => {
     expect(DEFAULT_PROXY_TIMEOUT_MS).toBe(300_000);
     expect(DEFAULT_CLI_TIMEOUT_MS).toBe(120_000);
     expect(TIMEOUT_GRACE_MS).toBe(5_000);
-    expect(MAX_EFFECTIVE_TIMEOUT_MS).toBe(900_000);
+    expect(MAX_EFFECTIVE_TIMEOUT_MS).toBe(580_000); // under gateway's 600s
     expect(SESSION_TTL_MS).toBe(30 * 60 * 1000);
     expect(CLEANUP_INTERVAL_MS).toBe(5 * 60 * 1000);
     expect(SESSION_KILL_GRACE_MS).toBe(5_000);
@@ -61,8 +61,8 @@ describe("config.ts exports", () => {
   });
   it("exports per-model timeouts for all major models", () => {
-    expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-opus-4-6"]).toBe(420_000);
-    expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-sonnet-4-6"]).toBe(420_000);
+    expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-opus-4-6"]).toBe(360_000);
+    expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-sonnet-4-6"]).toBe(300_000);
     expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-haiku-4-5"]).toBe(120_000);
     expect(DEFAULT_MODEL_TIMEOUTS["cli-gemini/gemini-2.5-pro"]).toBe(300_000);
     expect(DEFAULT_MODEL_TIMEOUTS["cli-gemini/gemini-2.5-flash"]).toBe(180_000);

package/test/session-manager.test.ts CHANGED Viewed

@@ -63,8 +63,10 @@ vi.mock("../src/workdir.js", () => ({
 }));
 // Mock config module — provide all constants needed by session-manager.ts and cli-runner.ts
-vi.mock("../src/config.js", async () => {
+vi.mock("../src/config.js", async (importOriginal) => {
+  const actual = await importOriginal<typeof import("../src/config.js")>();
   return {
+    ...actual,
     SESSION_TTL_MS: 30 * 60 * 1000,
     CLEANUP_INTERVAL_MS: 5 * 60 * 1000,
     SESSION_KILL_GRACE_MS: 5_000,