@elvatis_com/openclaw-cli-bridge-elvatis 2.9.0 → 2.10.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
4
4
 
5
- **Current version:** `2.9.0`
5
+ **Current version:** `2.10.1`
6
6
 
7
7
  ---
8
8
 
@@ -406,6 +406,23 @@ npm run ci # lint + typecheck + test
406
406
 
407
407
  ## Changelog
408
408
 
409
+ ### v2.10.1
410
+ - **feat:** smart tool-routing — tool-heavy requests (>8 tools) auto-route to Haiku instead of Sonnet. Haiku handles tool calls in ~11s vs Sonnet's 80-120s (with intermittent hangs). Sonnet is preserved for reasoning/text responses.
411
+ - **fix:** reduce stale-output timeout 120s→60s — faster fallback when Sonnet goes silent
412
+ - **feat:** per-model spawn logging with prompt size for debugging
413
+
414
+ ### v2.10.0
415
+ - **fix:** cap effective timeout at 580s (under gateway's 600s `idleTimeoutSeconds`) so bridge fallback fires BEFORE gateway kills the request — eliminates the race condition where both compete to handle the timeout
416
+ - **fix:** reduce Sonnet base timeout 420s→300s, Opus 420s→360s — ensures fallback triggers faster for stuck CLI sessions
417
+ - **feat:** compact tool schema mode — when >8 tools, compress definitions to name+params only, cutting prompt size ~60%
418
+ - **feat:** stale-output detection — if CLI produces no stdout for 120s, SIGTERM early instead of waiting full timeout
419
+ - **feat:** adaptive message limits — reduce history from 20→12 messages when >10 tools to keep prompts smaller
420
+ - **feat:** file-based debug log at `~/.openclaw/cli-bridge/debug.log` — `tail -f` for real-time request lifecycle visibility
421
+ - **feat:** SSE progress comments every 30s so the webchat connection stays informed during long CLI runs
422
+ - **feat:** SSE fallback notification — visible comment when a model times out and the bridge retries with fallback
423
+ - **fix:** rescue tool_calls embedded inside content strings — handles models that wrap `{"tool_calls":[...]}` inside a `{"content":"..."}` wrapper
424
+ - **fix:** parse robustness — debug logging on all parse paths to diagnose raw-JSON-instead-of-tool-calls issues
425
+
409
426
  ### v2.9.0
410
427
  - **feat:** enhanced `/status` dashboard with 5 new panels:
411
428
  - **Active Requests**: live in-flight requests with model, elapsed time, message/tool count, prompt preview
package/SKILL.md CHANGED
@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
68
68
 
69
69
  See `README.md` for full configuration reference and architecture diagram.
70
70
 
71
- **Version:** 2.9.0
71
+ **Version:** 2.10.1
@@ -2,7 +2,7 @@
2
2
  "id": "openclaw-cli-bridge-elvatis",
3
3
  "slug": "openclaw-cli-bridge-elvatis",
4
4
  "name": "OpenClaw CLI Bridge",
5
- "version": "2.9.0",
5
+ "version": "2.10.1",
6
6
  "license": "MIT",
7
7
  "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
8
8
  "providers": [
@@ -43,8 +43,8 @@
43
43
  "type": "number"
44
44
  },
45
45
  "default": {
46
- "cli-claude/claude-opus-4-6": 420000,
47
- "cli-claude/claude-sonnet-4-6": 420000,
46
+ "cli-claude/claude-opus-4-6": 360000,
47
+ "cli-claude/claude-sonnet-4-6": 300000,
48
48
  "cli-claude/claude-haiku-4-5": 120000,
49
49
  "cli-gemini/gemini-2.5-pro": 300000,
50
50
  "cli-gemini/gemini-2.5-flash": 180000,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
3
- "version": "2.9.0",
3
+ "version": "2.10.1",
4
4
  "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
5
5
  "type": "module",
6
6
  "openclaw": {
package/src/cli-runner.ts CHANGED
@@ -30,11 +30,15 @@ import {
30
30
  } from "./tool-protocol.js";
31
31
  import {
32
32
  MAX_MESSAGES,
33
+ MAX_MESSAGES_HEAVY_TOOLS,
34
+ TOOL_HEAVY_THRESHOLD,
33
35
  MAX_MSG_CHARS,
34
36
  DEFAULT_CLI_TIMEOUT_MS,
35
37
  TIMEOUT_GRACE_MS,
36
38
  MEDIA_TMP_DIR,
39
+ STALE_OUTPUT_TIMEOUT_MS,
37
40
  } from "./config.js";
41
+ import { debugLog } from "./debug-log.js";
38
42
 
39
43
  // ──────────────────────────────────────────────────────────────────────────────
40
44
  // Message formatting
@@ -69,13 +73,16 @@ export type { ToolDefinition, CliToolResult } from "./tool-protocol.js";
69
73
  * - role "tool": formatted as [Tool Result: name]
70
74
  * - role "assistant" with tool_calls: formatted as [Assistant Tool Call: name(args)]
71
75
  */
72
- export function formatPrompt(messages: ChatMessage[]): string {
76
+ export function formatPrompt(messages: ChatMessage[], toolCount = 0): string {
73
77
  if (messages.length === 0) return "";
74
78
 
79
+ // Reduce history when tool schemas dominate the prompt
80
+ const maxMsgs = toolCount > TOOL_HEAVY_THRESHOLD ? MAX_MESSAGES_HEAVY_TOOLS : MAX_MESSAGES;
81
+
75
82
  // Keep system message (if any) + last N non-system messages
76
83
  const system = messages.find((m) => m.role === "system");
77
84
  const nonSystem = messages.filter((m) => m.role !== "system");
78
- const recent = nonSystem.slice(-MAX_MESSAGES);
85
+ const recent = nonSystem.slice(-maxMsgs);
79
86
  const truncated = system ? [system, ...recent] : recent;
80
87
 
81
88
  // Single short user message — send bare (no wrapping needed)
@@ -331,17 +338,20 @@ export function runCli(
331
338
  let timedOut = false;
332
339
  let killTimer: ReturnType<typeof setTimeout> | null = null;
333
340
  let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
341
+ let staleTimer: ReturnType<typeof setInterval> | null = null;
342
+ let lastOutputAt = Date.now();
334
343
 
335
344
  const clearTimers = () => {
336
345
  if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
337
346
  if (killTimer) { clearTimeout(killTimer); killTimer = null; }
347
+ if (staleTimer) { clearInterval(staleTimer); staleTimer = null; }
338
348
  };
339
349
 
340
- // ── Timeout sequence: SIGTERM grace → SIGKILL ──────────────────────
341
- timeoutTimer = setTimeout(() => {
350
+ const doKill = (reason: string) => {
351
+ if (timedOut) return; // already killing
342
352
  timedOut = true;
343
- const elapsed = Math.round(timeoutMs / 1000);
344
- log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
353
+ log(`[cli-bridge] ${reason} for ${cmd}, sending SIGTERM`);
354
+ debugLog("KILL", `${cmd} ${reason}`, { stdoutLen: stdout.length, stderrLen: stderr.length });
345
355
  proc.kill("SIGTERM");
346
356
 
347
357
  killTimer = setTimeout(() => {
@@ -350,14 +360,36 @@ export function runCli(
350
360
  proc.kill("SIGKILL");
351
361
  }
352
362
  }, TIMEOUT_GRACE_MS);
363
+ };
364
+
365
+ // ── Hard timeout: SIGTERM → grace → SIGKILL ──────────────────────────
366
+ timeoutTimer = setTimeout(() => {
367
+ doKill(`timeout after ${Math.round(timeoutMs / 1000)}s`);
353
368
  }, timeoutMs);
354
369
 
370
+ // ── Stale-output detection: kill if no stdout for STALE_OUTPUT_TIMEOUT_MS
371
+ if (STALE_OUTPUT_TIMEOUT_MS > 0) {
372
+ const checkInterval = 15_000; // check every 15s
373
+ staleTimer = setInterval(() => {
374
+ const silent = Date.now() - lastOutputAt;
375
+ if (silent >= STALE_OUTPUT_TIMEOUT_MS) {
376
+ doKill(`stale output — no stdout for ${Math.round(silent / 1000)}s`);
377
+ }
378
+ }, checkInterval);
379
+ }
380
+
355
381
  proc.stdin.write(prompt, "utf8", () => {
356
382
  proc.stdin.end();
357
383
  });
358
384
 
359
- proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
360
- proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
385
+ proc.stdout.on("data", (d: Buffer) => {
386
+ stdout += d.toString();
387
+ lastOutputAt = Date.now();
388
+ });
389
+ proc.stderr.on("data", (d: Buffer) => {
390
+ stderr += d.toString();
391
+ lastOutputAt = Date.now(); // stderr also counts as activity
392
+ });
361
393
 
362
394
  proc.on("close", (code) => {
363
395
  clearTimers();
@@ -534,6 +566,7 @@ export async function runClaude(
534
566
  : prompt;
535
567
 
536
568
  const cwd = workdir ?? homedir();
569
+ debugLog("CLAUDE", `spawn ${model}`, { promptLen: effectivePrompt.length, promptKB: Math.round(effectivePrompt.length / 1024), cwd, timeoutMs: Math.round(timeoutMs / 1000) });
537
570
  const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
538
571
 
539
572
  // On 401: attempt one token refresh + retry before giving up.
@@ -770,8 +803,9 @@ export async function routeToCliRunner(
770
803
  timeoutMs: number,
771
804
  opts: RouteOptions = {}
772
805
  ): Promise<CliToolResult> {
773
- const prompt = formatPrompt(messages);
774
- const hasTools = !!(opts.tools?.length);
806
+ const toolCount = opts.tools?.length ?? 0;
807
+ const prompt = formatPrompt(messages, toolCount);
808
+ const hasTools = toolCount > 0;
775
809
 
776
810
  // Strip "vllm/" prefix if present — OpenClaw sends the full provider path
777
811
  // (e.g. "vllm/cli-claude/claude-sonnet-4-6") but the router only needs the
package/src/config.ts CHANGED
@@ -25,8 +25,13 @@ export const DEFAULT_PROXY_API_KEY = "cli-bridge";
25
25
  /** Default base timeout for CLI subprocess responses (ms). Scales dynamically. */
26
26
  export const DEFAULT_PROXY_TIMEOUT_MS = 300_000; // 5 min
27
27
 
28
- /** Maximum effective timeout after dynamic scaling (ms). */
29
- export const MAX_EFFECTIVE_TIMEOUT_MS = 900_000; // 15 min
28
+ /**
29
+ * Maximum effective timeout after dynamic scaling (ms).
30
+ * MUST be lower than the OpenClaw gateway's idleTimeoutSeconds (600s)
31
+ * so the bridge's own fallback fires BEFORE the gateway kills the request.
32
+ * 580s gives a 20s safety margin under the gateway's 600s hard limit.
33
+ */
34
+ export const MAX_EFFECTIVE_TIMEOUT_MS = 580_000; // 9m 40s — under gateway's 600s
30
35
 
31
36
  /** Extra timeout per message beyond 10 in the conversation (ms). */
32
37
  export const TIMEOUT_PER_EXTRA_MSG_MS = 2_000;
@@ -47,9 +52,32 @@ export const DEFAULT_CLI_TIMEOUT_MS = 120_000; // 2 min
47
52
  /** Grace period between SIGTERM and SIGKILL when a timeout fires (ms). */
48
53
  export const TIMEOUT_GRACE_MS = 5_000;
49
54
 
55
+ /**
56
+ * Stale output timeout — if a CLI subprocess produces no stdout for this long,
57
+ * assume it's stuck and SIGTERM early. 0 = disabled.
58
+ * Prevents waiting the full timeout when Claude CLI hangs silently.
59
+ */
60
+ export const STALE_OUTPUT_TIMEOUT_MS = 60_000; // 1 min of silence → kill (Sonnet goes silent when rate-limited)
61
+
50
62
  /** Max messages to include in the prompt sent to CLI subprocesses. */
51
63
  export const MAX_MESSAGES = 20;
52
64
 
65
+ /**
66
+ * Reduced message limit when tools are heavy (> TOOL_HEAVY_THRESHOLD).
67
+ * Fewer history messages = smaller prompt = faster CLI response.
68
+ */
69
+ export const MAX_MESSAGES_HEAVY_TOOLS = 12;
70
+
71
+ /** Tool count threshold that triggers reduced message limit. */
72
+ export const TOOL_HEAVY_THRESHOLD = 10;
73
+
74
+ /**
75
+ * Tool count threshold that triggers smart routing to a faster model.
76
+ * When Sonnet receives a request with this many tools, route to Haiku instead.
77
+ * Haiku handles tool calls in ~11s vs Sonnet's 80-120s (and Sonnet hangs intermittently).
78
+ */
79
+ export const TOOL_ROUTING_THRESHOLD = 8;
80
+
53
81
  /** Max characters per message content before truncation. */
54
82
  export const MAX_MSG_CHARS = 4_000;
55
83
 
@@ -91,8 +119,8 @@ export const PROVIDER_SESSION_SWEEP_MS = 10 * 60 * 1_000; // 10 min
91
119
  * - Fast/lightweight (Haiku, Flash, Mini): 120s
92
120
  */
93
121
  export const DEFAULT_MODEL_TIMEOUTS: Record<string, number> = {
94
- "cli-claude/claude-opus-4-6": 420_000, // 7 min
95
- "cli-claude/claude-sonnet-4-6": 420_000, // 7 min — prevent timeout→Haiku fallback on large sessions
122
+ "cli-claude/claude-opus-4-6": 360_000, // 6 min — leaves room for dynamic scaling up to 580s cap
123
+ "cli-claude/claude-sonnet-4-6": 300_000, // 5 min — was 7 min, reduced so fallback fires before gateway's 600s
96
124
  "cli-claude/claude-haiku-4-5": 120_000, // 2 min
97
125
  "cli-gemini/gemini-2.5-pro": 300_000, // 5 min — image generation needs more time
98
126
  "cli-gemini/gemini-2.5-flash": 180_000, // 3 min
@@ -0,0 +1,55 @@
1
+ /**
2
+ * debug-log.ts
3
+ *
4
+ * File-based debug logger for the CLI bridge.
5
+ * Writes to ~/.openclaw/cli-bridge/debug.log with automatic rotation at 5 MB.
6
+ *
7
+ * Usage:
8
+ * tail -f ~/.openclaw/cli-bridge/debug.log
9
+ */
10
+
11
+ import { appendFileSync, statSync, renameSync, mkdirSync } from "node:fs";
12
+ import { join } from "node:path";
13
+ import { homedir } from "node:os";
14
+
15
+ const LOG_DIR = join(homedir(), ".openclaw", "cli-bridge");
16
+ const LOG_FILE = join(LOG_DIR, "debug.log");
17
+ const LOG_FILE_PREV = join(LOG_DIR, "debug.log.1");
18
+ const MAX_LOG_SIZE = 5 * 1024 * 1024; // 5 MB
19
+
20
+ let initialized = false;
21
+
22
+ function ensureDir(): void {
23
+ if (initialized) return;
24
+ try { mkdirSync(LOG_DIR, { recursive: true }); } catch { /* exists */ }
25
+ initialized = true;
26
+ }
27
+
28
+ function rotate(): void {
29
+ try {
30
+ const stat = statSync(LOG_FILE);
31
+ if (stat.size > MAX_LOG_SIZE) {
32
+ try { renameSync(LOG_FILE, LOG_FILE_PREV); } catch { /* best effort */ }
33
+ }
34
+ } catch { /* file doesn't exist yet */ }
35
+ }
36
+
37
+ function ts(): string {
38
+ return new Date().toISOString();
39
+ }
40
+
41
+ /**
42
+ * Append a debug line to the log file.
43
+ * Non-blocking, never throws — logging must not crash the bridge.
44
+ */
45
+ export function debugLog(category: string, message: string, data?: Record<string, unknown>): void {
46
+ try {
47
+ ensureDir();
48
+ rotate();
49
+ const extra = data ? ` ${JSON.stringify(data)}` : "";
50
+ appendFileSync(LOG_FILE, `${ts()} [${category}] ${message}${extra}\n`);
51
+ } catch { /* never crash on log failure */ }
52
+ }
53
+
54
+ /** Log path for display on status page / startup messages. */
55
+ export const DEBUG_LOG_PATH = LOG_FILE;
@@ -32,7 +32,9 @@ import {
32
32
  BITNET_MAX_MESSAGES,
33
33
  BITNET_SYSTEM_PROMPT,
34
34
  DEFAULT_MODEL_TIMEOUTS,
35
+ TOOL_ROUTING_THRESHOLD,
35
36
  } from "./config.js";
37
+ import { debugLog, DEBUG_LOG_PATH } from "./debug-log.js";
36
38
 
37
39
  // ── Active request tracking ─────────────────────────────────────────────────
38
40
 
@@ -214,6 +216,7 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
214
216
  opts.log(
215
217
  `[cli-bridge] proxy listening on :${opts.port}`
216
218
  );
219
+ debugLog("STARTUP", `proxy listening on :${opts.port}`, { debugLog: DEBUG_LOG_PATH });
217
220
  // unref() so the proxy server does not keep the Node.js event loop alive
218
221
  // when openclaw doctor or other short-lived CLI commands load plugins.
219
222
  // The gateway's own main loop keeps the process alive during normal operation.
@@ -389,6 +392,8 @@ async function handleRequest(
389
392
  const lastUserMsg = [...cleanMessages].reverse().find(m => m.role === "user");
390
393
  const promptPreview = typeof lastUserMsg?.content === "string" ? lastUserMsg.content.slice(0, 80) : "";
391
394
 
395
+ debugLog("REQ", `${model} start`, { msgs: cleanMessages.length, tools: tools?.length ?? 0, stream, media: mediaFiles.length, promptPreview: promptPreview.slice(0, 60) });
396
+
392
397
  // Track active request for dashboard
393
398
  activeRequests.set(id, { id, model, startedAt: Date.now(), messageCount: cleanMessages.length, toolCount: tools?.length ?? 0, promptPreview });
394
399
 
@@ -786,6 +791,18 @@ async function handleRequest(
786
791
  // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
787
792
  let result: CliToolResult;
788
793
  let usedModel = model;
794
+
795
+ // ── Smart tool routing: heavy tool requests → Haiku for speed ──────────
796
+ // Sonnet hangs intermittently on large tool prompts (20KB+, 21 tools).
797
+ // Haiku handles tool calls in ~11s vs Sonnet's 80-120s (when it works).
798
+ // Route tool-heavy requests directly to Haiku, keep Sonnet for reasoning.
799
+ if (hasTools && tools!.length > TOOL_ROUTING_THRESHOLD && model === "cli-claude/claude-sonnet-4-6") {
800
+ const toolModel = "cli-claude/claude-haiku-4-5";
801
+ opts.log(`[cli-bridge] tool-routing: ${model} → ${toolModel} (${tools!.length} tools)`);
802
+ debugLog("TOOL-ROUTE", `${model} → ${toolModel}`, { tools: tools!.length, threshold: TOOL_ROUTING_THRESHOLD });
803
+ usedModel = toolModel;
804
+ }
805
+
789
806
  const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
790
807
 
791
808
  // ── Provider session: ensure a persistent session for this model ────────
@@ -805,6 +822,7 @@ async function handleRequest(
805
822
  const toolExtra = (tools?.length ?? 0) * TIMEOUT_PER_TOOL_MS;
806
823
  const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, MAX_EFFECTIVE_TIMEOUT_MS);
807
824
  opts.log(`[cli-bridge] ${model} session=${session.id} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
825
+ debugLog("TIMEOUT", `${model} effective=${Math.round(effectiveTimeout / 1000)}s`, { base: Math.round(baseTimeout / 1000), msgExtra: Math.round(msgExtra / 1000), toolExtra: Math.round(toolExtra / 1000), cap: Math.round(MAX_EFFECTIVE_TIMEOUT_MS / 1000) });
808
826
 
809
827
  // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
810
828
  let sseHeadersSent = false;
@@ -821,17 +839,35 @@ async function handleRequest(
821
839
  keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, SSE_KEEPALIVE_INTERVAL_MS);
822
840
  }
823
841
 
842
+ // ── Progress notifications: send visible status updates to the webchat ──
843
+ // Users shouldn't stare at a blank screen for minutes without feedback.
844
+ let progressInterval: ReturnType<typeof setInterval> | null = null;
845
+ const PROGRESS_INTERVAL_MS = 30_000; // 30s between updates
846
+ if (stream && sseHeadersSent) {
847
+ const progressStart = Date.now();
848
+ progressInterval = setInterval(() => {
849
+ const elapsed = Math.round((Date.now() - progressStart) / 1000);
850
+ const timeoutSec = Math.round(effectiveTimeout / 1000);
851
+ // Send an SSE comment with progress info — visible in raw SSE but won't render as content
852
+ // Also send a small content delta that OpenClaw can show as typing indicator
853
+ res.write(`: progress ${elapsed}s/${timeoutSec}s — ${model} processing\n\n`);
854
+ }, PROGRESS_INTERVAL_MS);
855
+ }
856
+
824
857
  const cliStart = Date.now();
825
858
  try {
826
- result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
859
+ result = await routeToCliRunner(usedModel, cleanMessages, effectiveTimeout, routeOpts);
860
+ const latencyMs = Date.now() - cliStart;
827
861
  const estCompletionTokens = estimateTokens(result.content ?? "");
828
- metrics.recordRequest(model, Date.now() - cliStart, true, estPromptTokens, estCompletionTokens, promptPreview);
862
+ metrics.recordRequest(usedModel, latencyMs, true, estPromptTokens, estCompletionTokens, promptPreview);
829
863
  providerSessions.recordRun(session.id, false);
864
+ debugLog("OK", `${usedModel} completed in ${(latencyMs / 1000).toFixed(1)}s`, { toolCalls: result.tool_calls?.length ?? 0, contentLen: result.content?.length ?? 0 });
830
865
  } catch (err) {
831
866
  const primaryDuration = Date.now() - cliStart;
832
867
  const msg = (err as Error).message;
833
868
  // ── Model fallback: retry once with a lighter model if configured ────
834
869
  const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
870
+ debugLog("FAIL", `${model} failed after ${(primaryDuration / 1000).toFixed(1)}s`, { isTimeout, error: msg.slice(0, 200) });
835
871
  // Record the run (with timeout flag) — session is preserved, not deleted
836
872
  providerSessions.recordRun(session.id, isTimeout);
837
873
  const fallbackModel = opts.modelFallbacks?.[model];
@@ -839,6 +875,11 @@ async function handleRequest(
839
875
  metrics.recordRequest(model, primaryDuration, false, estPromptTokens, undefined, promptPreview);
840
876
  const reason = isTimeout ? `timeout by supervisor, session=${session.id} preserved` : msg;
841
877
  opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
878
+ debugLog("FALLBACK", `${model} → ${fallbackModel}`, { reason: isTimeout ? "timeout" : "error", primaryDuration: Math.round(primaryDuration / 1000) });
879
+ // Notify the user via SSE that we're retrying with a different model
880
+ if (sseHeadersSent) {
881
+ res.write(`: fallback — ${model} ${isTimeout ? "timed out" : "failed"} after ${Math.round(primaryDuration / 1000)}s, retrying with ${fallbackModel}\n\n`);
882
+ }
842
883
  const fallbackStart = Date.now();
843
884
  try {
844
885
  result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
@@ -877,6 +918,7 @@ async function handleRequest(
877
918
  }
878
919
  } finally {
879
920
  if (keepaliveInterval) clearInterval(keepaliveInterval);
921
+ if (progressInterval) clearInterval(progressInterval);
880
922
  cleanupMediaFiles(mediaFiles);
881
923
  activeRequests.delete(id);
882
924
  }
@@ -10,6 +10,7 @@
10
10
  */
11
11
 
12
12
  import { randomBytes } from "node:crypto";
13
+ import { debugLog } from "./debug-log.js";
13
14
 
14
15
  // ──────────────────────────────────────────────────────────────────────────────
15
16
  // Types
@@ -46,13 +47,34 @@ export interface CliToolResult {
46
47
  * Build a text block describing available tools and response format instructions.
47
48
  * This block is prepended to the system message (or added as a new system message).
48
49
  */
50
+ /** Threshold: when tool count exceeds this, use compact schema to reduce prompt size. */
51
+ const COMPACT_TOOL_THRESHOLD = 8;
52
+
53
+ /**
54
+ * Build a compact tool description: name + required param names only.
55
+ * Cuts prompt size by ~60-70% for large tool sets.
56
+ */
57
+ function compactToolDescription(t: ToolDefinition): string {
58
+ const fn = t.function;
59
+ const params = fn.parameters as { properties?: Record<string, unknown>; required?: string[] };
60
+ const required = params?.required ?? Object.keys(params?.properties ?? {});
61
+ const paramList = required.length > 0 ? `(${required.join(", ")})` : "()";
62
+ return `- ${fn.name}${paramList}: ${fn.description}`;
63
+ }
64
+
65
+ /**
66
+ * Build a full tool description: name, description, and full JSON schema.
67
+ */
68
+ function fullToolDescription(t: ToolDefinition): string {
69
+ const fn = t.function;
70
+ const params = JSON.stringify(fn.parameters);
71
+ return `- name: ${fn.name}\n description: ${fn.description}\n parameters: ${params}`;
72
+ }
73
+
49
74
  export function buildToolPromptBlock(tools: ToolDefinition[]): string {
75
+ const useCompact = tools.length > COMPACT_TOOL_THRESHOLD;
50
76
  const toolDescriptions = tools
51
- .map((t) => {
52
- const fn = t.function;
53
- const params = JSON.stringify(fn.parameters);
54
- return `- name: ${fn.name}\n description: ${fn.description}\n parameters: ${params}`;
55
- })
77
+ .map(useCompact ? compactToolDescription : fullToolDescription)
56
78
  .join("\n");
57
79
 
58
80
  return [
@@ -67,6 +89,7 @@ export function buildToolPromptBlock(tools: ToolDefinition[]): string {
67
89
  '{"content":"<your text response>"}',
68
90
  "",
69
91
  "Do NOT include any text outside the JSON. Do NOT wrap in markdown code blocks.",
92
+ useCompact ? "Call ONE tool at a time. Do NOT batch multiple tool calls." : "",
70
93
  "",
71
94
  "Available tools:",
72
95
  toolDescriptions,
@@ -117,6 +140,7 @@ export function buildToolCallJsonSchema(): object {
117
140
  */
118
141
  export function parseToolCallResponse(text: string): CliToolResult {
119
142
  const trimmed = text.trim();
143
+ const preview = trimmed.slice(0, 120);
120
144
 
121
145
  // Check for Claude's --output-format json wrapper FIRST.
122
146
  // Claude returns: { "type": "result", "result": "..." }
@@ -124,30 +148,48 @@ export function parseToolCallResponse(text: string): CliToolResult {
124
148
  const claudeResult = tryExtractClaudeJsonResult(trimmed);
125
149
  if (claudeResult) {
126
150
  const inner = tryParseJson(claudeResult);
127
- if (inner) return normalizeResult(inner);
151
+ if (inner) {
152
+ const result = normalizeResult(inner);
153
+ debugLog("PARSE", `claude-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
154
+ return result;
155
+ }
128
156
  // Claude result is plain text
157
+ debugLog("PARSE", "claude-json → plain text", { len: claudeResult.length });
129
158
  return { content: claudeResult };
130
159
  }
131
160
 
132
161
  // Try direct JSON parse (for non-Claude outputs)
133
162
  const parsed = tryParseJson(trimmed);
134
- if (parsed) return normalizeResult(parsed);
163
+ if (parsed) {
164
+ const result = normalizeResult(parsed);
165
+ debugLog("PARSE", `direct-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
166
+ return result;
167
+ }
135
168
 
136
169
  // Try extracting JSON from markdown code blocks: ```json ... ```
137
170
  const codeBlock = tryExtractCodeBlock(trimmed);
138
171
  if (codeBlock) {
139
172
  const inner = tryParseJson(codeBlock);
140
- if (inner) return normalizeResult(inner);
173
+ if (inner) {
174
+ const result = normalizeResult(inner);
175
+ debugLog("PARSE", `code-block → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
176
+ return result;
177
+ }
141
178
  }
142
179
 
143
180
  // Try finding a JSON object anywhere in the text
144
181
  const embedded = tryExtractEmbeddedJson(trimmed);
145
182
  if (embedded) {
146
183
  const inner = tryParseJson(embedded);
147
- if (inner) return normalizeResult(inner);
184
+ if (inner) {
185
+ const result = normalizeResult(inner);
186
+ debugLog("PARSE", `embedded-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
187
+ return result;
188
+ }
148
189
  }
149
190
 
150
191
  // Fallback: treat entire text as content
192
+ debugLog("PARSE", "no JSON found → raw content", { len: trimmed.length, preview });
151
193
  return { content: trimmed || null };
152
194
  }
153
195
 
@@ -167,11 +209,17 @@ function normalizeResult(obj: Record<string, unknown>): CliToolResult {
167
209
  : JSON.stringify(tc.arguments ?? {}),
168
210
  },
169
211
  }));
170
- return { content: null, tool_calls: toolCalls };
212
+ // If the model also returned a content string alongside tool_calls, include it
213
+ const content = typeof obj.content === "string" ? obj.content : null;
214
+ return { content, tool_calls: toolCalls };
171
215
  }
172
216
 
173
- // Check for content field
217
+ // Check for content field — but rescue embedded tool_calls JSON from inside content strings.
218
+ // Models sometimes wrap tool calls inside a content string:
219
+ // {"content":"I'll write that file.\n{\"tool_calls\":[...]}"}
174
220
  if (typeof obj.content === "string") {
221
+ const rescued = tryRescueToolCallsFromContent(obj.content);
222
+ if (rescued) return rescued;
175
223
  return { content: obj.content };
176
224
  }
177
225
 
@@ -179,6 +227,41 @@ function normalizeResult(obj: Record<string, unknown>): CliToolResult {
179
227
  return { content: JSON.stringify(obj) };
180
228
  }
181
229
 
230
+ /**
231
+ * Rescue tool_calls embedded inside a content string.
232
+ * Handles cases where the model wraps tool calls in a content field:
233
+ * {"content":"Some text\n{\"tool_calls\":[...]}"}
234
+ * {"content":"{\"tool_calls\":[{\"name\":\"write\",...}]}"}
235
+ */
236
+ function tryRescueToolCallsFromContent(content: string): CliToolResult | null {
237
+ // Only attempt rescue if content contains the tool_calls signature
238
+ if (!content.includes('"tool_calls"') && !content.includes("tool_calls")) return null;
239
+
240
+ // Try to find embedded JSON with tool_calls
241
+ const embedded = tryExtractEmbeddedJson(content);
242
+ if (!embedded) return null;
243
+
244
+ const parsed = tryParseJson(embedded);
245
+ if (!parsed || !Array.isArray(parsed.tool_calls) || parsed.tool_calls.length === 0) return null;
246
+
247
+ // Extract the text content before the JSON (if any)
248
+ const jsonStart = content.indexOf(embedded);
249
+ const textBefore = jsonStart > 0 ? content.slice(0, jsonStart).trim() : null;
250
+
251
+ const toolCalls: ToolCall[] = parsed.tool_calls.map((tc: Record<string, unknown>) => ({
252
+ id: generateCallId(),
253
+ type: "function" as const,
254
+ function: {
255
+ name: String(tc.name ?? ""),
256
+ arguments: typeof tc.arguments === "string"
257
+ ? tc.arguments
258
+ : JSON.stringify(tc.arguments ?? {}),
259
+ },
260
+ }));
261
+
262
+ return { content: textBefore || null, tool_calls: toolCalls };
263
+ }
264
+
182
265
  function tryParseJson(text: string): Record<string, unknown> | null {
183
266
  try {
184
267
  const obj = JSON.parse(text);
@@ -38,7 +38,7 @@ describe("config.ts exports", () => {
38
38
  expect(DEFAULT_PROXY_TIMEOUT_MS).toBe(300_000);
39
39
  expect(DEFAULT_CLI_TIMEOUT_MS).toBe(120_000);
40
40
  expect(TIMEOUT_GRACE_MS).toBe(5_000);
41
- expect(MAX_EFFECTIVE_TIMEOUT_MS).toBe(900_000);
41
+ expect(MAX_EFFECTIVE_TIMEOUT_MS).toBe(580_000); // under gateway's 600s
42
42
  expect(SESSION_TTL_MS).toBe(30 * 60 * 1000);
43
43
  expect(CLEANUP_INTERVAL_MS).toBe(5 * 60 * 1000);
44
44
  expect(SESSION_KILL_GRACE_MS).toBe(5_000);
@@ -61,8 +61,8 @@ describe("config.ts exports", () => {
61
61
  });
62
62
 
63
63
  it("exports per-model timeouts for all major models", () => {
64
- expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-opus-4-6"]).toBe(420_000);
65
- expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-sonnet-4-6"]).toBe(420_000);
64
+ expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-opus-4-6"]).toBe(360_000);
65
+ expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-sonnet-4-6"]).toBe(300_000);
66
66
  expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-haiku-4-5"]).toBe(120_000);
67
67
  expect(DEFAULT_MODEL_TIMEOUTS["cli-gemini/gemini-2.5-pro"]).toBe(300_000);
68
68
  expect(DEFAULT_MODEL_TIMEOUTS["cli-gemini/gemini-2.5-flash"]).toBe(180_000);
@@ -63,8 +63,10 @@ vi.mock("../src/workdir.js", () => ({
63
63
  }));
64
64
 
65
65
  // Mock config module — provide all constants needed by session-manager.ts and cli-runner.ts
66
- vi.mock("../src/config.js", async () => {
66
+ vi.mock("../src/config.js", async (importOriginal) => {
67
+ const actual = await importOriginal<typeof import("../src/config.js")>();
67
68
  return {
69
+ ...actual,
68
70
  SESSION_TTL_MS: 30 * 60 * 1000,
69
71
  CLEANUP_INTERVAL_MS: 5 * 60 * 1000,
70
72
  SESSION_KILL_GRACE_MS: 5_000,