@elvatis_com/openclaw-cli-bridge-elvatis 2.9.0 → 2.10.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
4
4
 
5
- **Current version:** `2.9.0`
5
+ **Current version:** `2.10.0`
6
6
 
7
7
  ---
8
8
 
@@ -406,6 +406,18 @@ npm run ci # lint + typecheck + test
406
406
 
407
407
  ## Changelog
408
408
 
409
+ ### v2.10.0
410
+ - **fix:** cap effective timeout at 580s (under gateway's 600s `idleTimeoutSeconds`) so bridge fallback fires BEFORE gateway kills the request — eliminates the race condition where both compete to handle the timeout
411
+ - **fix:** reduce Sonnet base timeout 420s→300s, Opus 420s→360s — ensures fallback triggers faster for stuck CLI sessions
412
+ - **feat:** compact tool schema mode — when >8 tools, compress definitions to name+params only, cutting prompt size ~60%
413
+ - **feat:** stale-output detection — if CLI produces no stdout for 120s, SIGTERM early instead of waiting full timeout
414
+ - **feat:** adaptive message limits — reduce history from 20→12 messages when >10 tools to keep prompts smaller
415
+ - **feat:** file-based debug log at `~/.openclaw/cli-bridge/debug.log` — `tail -f` for real-time request lifecycle visibility
416
+ - **feat:** SSE progress comments every 30s so the webchat connection stays informed during long CLI runs
417
+ - **feat:** SSE fallback notification — visible comment when a model times out and the bridge retries with fallback
418
+ - **fix:** rescue tool_calls embedded inside content strings — handles models that wrap `{"tool_calls":[...]}` inside a `{"content":"..."}` wrapper
419
+ - **fix:** parse robustness — debug logging on all parse paths to diagnose raw-JSON-instead-of-tool-calls issues
420
+
409
421
  ### v2.9.0
410
422
  - **feat:** enhanced `/status` dashboard with 5 new panels:
411
423
  - **Active Requests**: live in-flight requests with model, elapsed time, message/tool count, prompt preview
package/SKILL.md CHANGED
@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
68
68
 
69
69
  See `README.md` for full configuration reference and architecture diagram.
70
70
 
71
- **Version:** 2.9.0
71
+ **Version:** 2.10.0
@@ -2,7 +2,7 @@
2
2
  "id": "openclaw-cli-bridge-elvatis",
3
3
  "slug": "openclaw-cli-bridge-elvatis",
4
4
  "name": "OpenClaw CLI Bridge",
5
- "version": "2.9.0",
5
+ "version": "2.10.0",
6
6
  "license": "MIT",
7
7
  "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
8
8
  "providers": [
@@ -43,8 +43,8 @@
43
43
  "type": "number"
44
44
  },
45
45
  "default": {
46
- "cli-claude/claude-opus-4-6": 420000,
47
- "cli-claude/claude-sonnet-4-6": 420000,
46
+ "cli-claude/claude-opus-4-6": 360000,
47
+ "cli-claude/claude-sonnet-4-6": 300000,
48
48
  "cli-claude/claude-haiku-4-5": 120000,
49
49
  "cli-gemini/gemini-2.5-pro": 300000,
50
50
  "cli-gemini/gemini-2.5-flash": 180000,
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
3
- "version": "2.9.0",
3
+ "version": "2.10.0",
4
4
  "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
5
5
  "type": "module",
6
6
  "openclaw": {
package/src/cli-runner.ts CHANGED
@@ -30,11 +30,15 @@ import {
30
30
  } from "./tool-protocol.js";
31
31
  import {
32
32
  MAX_MESSAGES,
33
+ MAX_MESSAGES_HEAVY_TOOLS,
34
+ TOOL_HEAVY_THRESHOLD,
33
35
  MAX_MSG_CHARS,
34
36
  DEFAULT_CLI_TIMEOUT_MS,
35
37
  TIMEOUT_GRACE_MS,
36
38
  MEDIA_TMP_DIR,
39
+ STALE_OUTPUT_TIMEOUT_MS,
37
40
  } from "./config.js";
41
+ import { debugLog } from "./debug-log.js";
38
42
 
39
43
  // ──────────────────────────────────────────────────────────────────────────────
40
44
  // Message formatting
@@ -69,13 +73,16 @@ export type { ToolDefinition, CliToolResult } from "./tool-protocol.js";
69
73
  * - role "tool": formatted as [Tool Result: name]
70
74
  * - role "assistant" with tool_calls: formatted as [Assistant Tool Call: name(args)]
71
75
  */
72
- export function formatPrompt(messages: ChatMessage[]): string {
76
+ export function formatPrompt(messages: ChatMessage[], toolCount = 0): string {
73
77
  if (messages.length === 0) return "";
74
78
 
79
+ // Reduce history when tool schemas dominate the prompt
80
+ const maxMsgs = toolCount > TOOL_HEAVY_THRESHOLD ? MAX_MESSAGES_HEAVY_TOOLS : MAX_MESSAGES;
81
+
75
82
  // Keep system message (if any) + last N non-system messages
76
83
  const system = messages.find((m) => m.role === "system");
77
84
  const nonSystem = messages.filter((m) => m.role !== "system");
78
- const recent = nonSystem.slice(-MAX_MESSAGES);
85
+ const recent = nonSystem.slice(-maxMsgs);
79
86
  const truncated = system ? [system, ...recent] : recent;
80
87
 
81
88
  // Single short user message — send bare (no wrapping needed)
@@ -331,17 +338,20 @@ export function runCli(
331
338
  let timedOut = false;
332
339
  let killTimer: ReturnType<typeof setTimeout> | null = null;
333
340
  let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
341
+ let staleTimer: ReturnType<typeof setInterval> | null = null;
342
+ let lastOutputAt = Date.now();
334
343
 
335
344
  const clearTimers = () => {
336
345
  if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
337
346
  if (killTimer) { clearTimeout(killTimer); killTimer = null; }
347
+ if (staleTimer) { clearInterval(staleTimer); staleTimer = null; }
338
348
  };
339
349
 
340
- // ── Timeout sequence: SIGTERM grace → SIGKILL ──────────────────────
341
- timeoutTimer = setTimeout(() => {
350
+ const doKill = (reason: string) => {
351
+ if (timedOut) return; // already killing
342
352
  timedOut = true;
343
- const elapsed = Math.round(timeoutMs / 1000);
344
- log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
353
+ log(`[cli-bridge] ${reason} for ${cmd}, sending SIGTERM`);
354
+ debugLog("KILL", `${cmd} ${reason}`, { stdoutLen: stdout.length, stderrLen: stderr.length });
345
355
  proc.kill("SIGTERM");
346
356
 
347
357
  killTimer = setTimeout(() => {
@@ -350,14 +360,36 @@ export function runCli(
350
360
  proc.kill("SIGKILL");
351
361
  }
352
362
  }, TIMEOUT_GRACE_MS);
363
+ };
364
+
365
+ // ── Hard timeout: SIGTERM → grace → SIGKILL ──────────────────────────
366
+ timeoutTimer = setTimeout(() => {
367
+ doKill(`timeout after ${Math.round(timeoutMs / 1000)}s`);
353
368
  }, timeoutMs);
354
369
 
370
+ // ── Stale-output detection: kill if no stdout for STALE_OUTPUT_TIMEOUT_MS
371
+ if (STALE_OUTPUT_TIMEOUT_MS > 0) {
372
+ const checkInterval = 15_000; // check every 15s
373
+ staleTimer = setInterval(() => {
374
+ const silent = Date.now() - lastOutputAt;
375
+ if (silent >= STALE_OUTPUT_TIMEOUT_MS) {
376
+ doKill(`stale output — no stdout for ${Math.round(silent / 1000)}s`);
377
+ }
378
+ }, checkInterval);
379
+ }
380
+
355
381
  proc.stdin.write(prompt, "utf8", () => {
356
382
  proc.stdin.end();
357
383
  });
358
384
 
359
- proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
360
- proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
385
+ proc.stdout.on("data", (d: Buffer) => {
386
+ stdout += d.toString();
387
+ lastOutputAt = Date.now();
388
+ });
389
+ proc.stderr.on("data", (d: Buffer) => {
390
+ stderr += d.toString();
391
+ lastOutputAt = Date.now(); // stderr also counts as activity
392
+ });
361
393
 
362
394
  proc.on("close", (code) => {
363
395
  clearTimers();
@@ -770,8 +802,9 @@ export async function routeToCliRunner(
770
802
  timeoutMs: number,
771
803
  opts: RouteOptions = {}
772
804
  ): Promise<CliToolResult> {
773
- const prompt = formatPrompt(messages);
774
- const hasTools = !!(opts.tools?.length);
805
+ const toolCount = opts.tools?.length ?? 0;
806
+ const prompt = formatPrompt(messages, toolCount);
807
+ const hasTools = toolCount > 0;
775
808
 
776
809
  // Strip "vllm/" prefix if present — OpenClaw sends the full provider path
777
810
  // (e.g. "vllm/cli-claude/claude-sonnet-4-6") but the router only needs the
package/src/config.ts CHANGED
@@ -25,8 +25,13 @@ export const DEFAULT_PROXY_API_KEY = "cli-bridge";
25
25
  /** Default base timeout for CLI subprocess responses (ms). Scales dynamically. */
26
26
  export const DEFAULT_PROXY_TIMEOUT_MS = 300_000; // 5 min
27
27
 
28
- /** Maximum effective timeout after dynamic scaling (ms). */
29
- export const MAX_EFFECTIVE_TIMEOUT_MS = 900_000; // 15 min
28
+ /**
29
+ * Maximum effective timeout after dynamic scaling (ms).
30
+ * MUST be lower than the OpenClaw gateway's idleTimeoutSeconds (600s)
31
+ * so the bridge's own fallback fires BEFORE the gateway kills the request.
32
+ * 580s gives a 20s safety margin under the gateway's 600s hard limit.
33
+ */
34
+ export const MAX_EFFECTIVE_TIMEOUT_MS = 580_000; // 9m 40s — under gateway's 600s
30
35
 
31
36
  /** Extra timeout per message beyond 10 in the conversation (ms). */
32
37
  export const TIMEOUT_PER_EXTRA_MSG_MS = 2_000;
@@ -47,9 +52,25 @@ export const DEFAULT_CLI_TIMEOUT_MS = 120_000; // 2 min
47
52
  /** Grace period between SIGTERM and SIGKILL when a timeout fires (ms). */
48
53
  export const TIMEOUT_GRACE_MS = 5_000;
49
54
 
55
+ /**
56
+ * Stale output timeout — if a CLI subprocess produces no stdout for this long,
57
+ * assume it's stuck and SIGTERM early. 0 = disabled.
58
+ * Prevents waiting the full timeout when Claude CLI hangs silently.
59
+ */
60
+ export const STALE_OUTPUT_TIMEOUT_MS = 120_000; // 2 min of silence → kill
61
+
50
62
  /** Max messages to include in the prompt sent to CLI subprocesses. */
51
63
  export const MAX_MESSAGES = 20;
52
64
 
65
+ /**
66
+ * Reduced message limit when tools are heavy (> TOOL_HEAVY_THRESHOLD).
67
+ * Fewer history messages = smaller prompt = faster CLI response.
68
+ */
69
+ export const MAX_MESSAGES_HEAVY_TOOLS = 12;
70
+
71
+ /** Tool count threshold that triggers reduced message limit. */
72
+ export const TOOL_HEAVY_THRESHOLD = 10;
73
+
53
74
  /** Max characters per message content before truncation. */
54
75
  export const MAX_MSG_CHARS = 4_000;
55
76
 
@@ -91,8 +112,8 @@ export const PROVIDER_SESSION_SWEEP_MS = 10 * 60 * 1_000; // 10 min
91
112
  * - Fast/lightweight (Haiku, Flash, Mini): 120s
92
113
  */
93
114
  export const DEFAULT_MODEL_TIMEOUTS: Record<string, number> = {
94
- "cli-claude/claude-opus-4-6": 420_000, // 7 min
95
- "cli-claude/claude-sonnet-4-6": 420_000, // 7 min — prevent timeout→Haiku fallback on large sessions
115
+ "cli-claude/claude-opus-4-6": 360_000, // 6 min — leaves room for dynamic scaling up to 580s cap
116
+ "cli-claude/claude-sonnet-4-6": 300_000, // 5 min — was 7 min, reduced so fallback fires before gateway's 600s
96
117
  "cli-claude/claude-haiku-4-5": 120_000, // 2 min
97
118
  "cli-gemini/gemini-2.5-pro": 300_000, // 5 min — image generation needs more time
98
119
  "cli-gemini/gemini-2.5-flash": 180_000, // 3 min
@@ -0,0 +1,55 @@
1
+ /**
2
+ * debug-log.ts
3
+ *
4
+ * File-based debug logger for the CLI bridge.
5
+ * Writes to ~/.openclaw/cli-bridge/debug.log with automatic rotation at 5 MB.
6
+ *
7
+ * Usage:
8
+ * tail -f ~/.openclaw/cli-bridge/debug.log
9
+ */
10
+
11
+ import { appendFileSync, statSync, renameSync, mkdirSync } from "node:fs";
12
+ import { join } from "node:path";
13
+ import { homedir } from "node:os";
14
+
15
+ const LOG_DIR = join(homedir(), ".openclaw", "cli-bridge");
16
+ const LOG_FILE = join(LOG_DIR, "debug.log");
17
+ const LOG_FILE_PREV = join(LOG_DIR, "debug.log.1");
18
+ const MAX_LOG_SIZE = 5 * 1024 * 1024; // 5 MB
19
+
20
+ let initialized = false;
21
+
22
+ function ensureDir(): void {
23
+ if (initialized) return;
24
+ try { mkdirSync(LOG_DIR, { recursive: true }); } catch { /* exists */ }
25
+ initialized = true;
26
+ }
27
+
28
+ function rotate(): void {
29
+ try {
30
+ const stat = statSync(LOG_FILE);
31
+ if (stat.size > MAX_LOG_SIZE) {
32
+ try { renameSync(LOG_FILE, LOG_FILE_PREV); } catch { /* best effort */ }
33
+ }
34
+ } catch { /* file doesn't exist yet */ }
35
+ }
36
+
37
+ function ts(): string {
38
+ return new Date().toISOString();
39
+ }
40
+
41
+ /**
42
+ * Append a debug line to the log file.
43
+ * Non-blocking, never throws — logging must not crash the bridge.
44
+ */
45
+ export function debugLog(category: string, message: string, data?: Record<string, unknown>): void {
46
+ try {
47
+ ensureDir();
48
+ rotate();
49
+ const extra = data ? ` ${JSON.stringify(data)}` : "";
50
+ appendFileSync(LOG_FILE, `${ts()} [${category}] ${message}${extra}\n`);
51
+ } catch { /* never crash on log failure */ }
52
+ }
53
+
54
+ /** Log path for display on status page / startup messages. */
55
+ export const DEBUG_LOG_PATH = LOG_FILE;
@@ -33,6 +33,7 @@ import {
33
33
  BITNET_SYSTEM_PROMPT,
34
34
  DEFAULT_MODEL_TIMEOUTS,
35
35
  } from "./config.js";
36
+ import { debugLog, DEBUG_LOG_PATH } from "./debug-log.js";
36
37
 
37
38
  // ── Active request tracking ─────────────────────────────────────────────────
38
39
 
@@ -214,6 +215,7 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
214
215
  opts.log(
215
216
  `[cli-bridge] proxy listening on :${opts.port}`
216
217
  );
218
+ debugLog("STARTUP", `proxy listening on :${opts.port}`, { debugLog: DEBUG_LOG_PATH });
217
219
  // unref() so the proxy server does not keep the Node.js event loop alive
218
220
  // when openclaw doctor or other short-lived CLI commands load plugins.
219
221
  // The gateway's own main loop keeps the process alive during normal operation.
@@ -389,6 +391,8 @@ async function handleRequest(
389
391
  const lastUserMsg = [...cleanMessages].reverse().find(m => m.role === "user");
390
392
  const promptPreview = typeof lastUserMsg?.content === "string" ? lastUserMsg.content.slice(0, 80) : "";
391
393
 
394
+ debugLog("REQ", `${model} start`, { msgs: cleanMessages.length, tools: tools?.length ?? 0, stream, media: mediaFiles.length, promptPreview: promptPreview.slice(0, 60) });
395
+
392
396
  // Track active request for dashboard
393
397
  activeRequests.set(id, { id, model, startedAt: Date.now(), messageCount: cleanMessages.length, toolCount: tools?.length ?? 0, promptPreview });
394
398
 
@@ -805,6 +809,7 @@ async function handleRequest(
805
809
  const toolExtra = (tools?.length ?? 0) * TIMEOUT_PER_TOOL_MS;
806
810
  const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, MAX_EFFECTIVE_TIMEOUT_MS);
807
811
  opts.log(`[cli-bridge] ${model} session=${session.id} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
812
+ debugLog("TIMEOUT", `${model} effective=${Math.round(effectiveTimeout / 1000)}s`, { base: Math.round(baseTimeout / 1000), msgExtra: Math.round(msgExtra / 1000), toolExtra: Math.round(toolExtra / 1000), cap: Math.round(MAX_EFFECTIVE_TIMEOUT_MS / 1000) });
808
813
 
809
814
  // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
810
815
  let sseHeadersSent = false;
@@ -821,17 +826,35 @@ async function handleRequest(
821
826
  keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, SSE_KEEPALIVE_INTERVAL_MS);
822
827
  }
823
828
 
829
+ // ── Progress notifications: send visible status updates to the webchat ──
830
+ // Users shouldn't stare at a blank screen for minutes without feedback.
831
+ let progressInterval: ReturnType<typeof setInterval> | null = null;
832
+ const PROGRESS_INTERVAL_MS = 30_000; // 30s between updates
833
+ if (stream && sseHeadersSent) {
834
+ const progressStart = Date.now();
835
+ progressInterval = setInterval(() => {
836
+ const elapsed = Math.round((Date.now() - progressStart) / 1000);
837
+ const timeoutSec = Math.round(effectiveTimeout / 1000);
838
+ // Send an SSE comment with progress info — visible in raw SSE but won't render as content
839
+ // Also send a small content delta that OpenClaw can show as typing indicator
840
+ res.write(`: progress ${elapsed}s/${timeoutSec}s — ${model} processing\n\n`);
841
+ }, PROGRESS_INTERVAL_MS);
842
+ }
843
+
824
844
  const cliStart = Date.now();
825
845
  try {
826
846
  result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
847
+ const latencyMs = Date.now() - cliStart;
827
848
  const estCompletionTokens = estimateTokens(result.content ?? "");
828
- metrics.recordRequest(model, Date.now() - cliStart, true, estPromptTokens, estCompletionTokens, promptPreview);
849
+ metrics.recordRequest(model, latencyMs, true, estPromptTokens, estCompletionTokens, promptPreview);
829
850
  providerSessions.recordRun(session.id, false);
851
+ debugLog("OK", `${model} completed in ${(latencyMs / 1000).toFixed(1)}s`, { toolCalls: result.tool_calls?.length ?? 0, contentLen: result.content?.length ?? 0 });
830
852
  } catch (err) {
831
853
  const primaryDuration = Date.now() - cliStart;
832
854
  const msg = (err as Error).message;
833
855
  // ── Model fallback: retry once with a lighter model if configured ────
834
856
  const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
857
+ debugLog("FAIL", `${model} failed after ${(primaryDuration / 1000).toFixed(1)}s`, { isTimeout, error: msg.slice(0, 200) });
835
858
  // Record the run (with timeout flag) — session is preserved, not deleted
836
859
  providerSessions.recordRun(session.id, isTimeout);
837
860
  const fallbackModel = opts.modelFallbacks?.[model];
@@ -839,6 +862,11 @@ async function handleRequest(
839
862
  metrics.recordRequest(model, primaryDuration, false, estPromptTokens, undefined, promptPreview);
840
863
  const reason = isTimeout ? `timeout by supervisor, session=${session.id} preserved` : msg;
841
864
  opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
865
+ debugLog("FALLBACK", `${model} → ${fallbackModel}`, { reason: isTimeout ? "timeout" : "error", primaryDuration: Math.round(primaryDuration / 1000) });
866
+ // Notify the user via SSE that we're retrying with a different model
867
+ if (sseHeadersSent) {
868
+ res.write(`: fallback — ${model} ${isTimeout ? "timed out" : "failed"} after ${Math.round(primaryDuration / 1000)}s, retrying with ${fallbackModel}\n\n`);
869
+ }
842
870
  const fallbackStart = Date.now();
843
871
  try {
844
872
  result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
@@ -877,6 +905,7 @@ async function handleRequest(
877
905
  }
878
906
  } finally {
879
907
  if (keepaliveInterval) clearInterval(keepaliveInterval);
908
+ if (progressInterval) clearInterval(progressInterval);
880
909
  cleanupMediaFiles(mediaFiles);
881
910
  activeRequests.delete(id);
882
911
  }
@@ -10,6 +10,7 @@
10
10
  */
11
11
 
12
12
  import { randomBytes } from "node:crypto";
13
+ import { debugLog } from "./debug-log.js";
13
14
 
14
15
  // ──────────────────────────────────────────────────────────────────────────────
15
16
  // Types
@@ -46,13 +47,34 @@ export interface CliToolResult {
46
47
  * Build a text block describing available tools and response format instructions.
47
48
  * This block is prepended to the system message (or added as a new system message).
48
49
  */
50
+ /** Threshold: when tool count exceeds this, use compact schema to reduce prompt size. */
51
+ const COMPACT_TOOL_THRESHOLD = 8;
52
+
53
+ /**
54
+ * Build a compact tool description: name + required param names only.
55
+ * Cuts prompt size by ~60-70% for large tool sets.
56
+ */
57
+ function compactToolDescription(t: ToolDefinition): string {
58
+ const fn = t.function;
59
+ const params = fn.parameters as { properties?: Record<string, unknown>; required?: string[] };
60
+ const required = params?.required ?? Object.keys(params?.properties ?? {});
61
+ const paramList = required.length > 0 ? `(${required.join(", ")})` : "()";
62
+ return `- ${fn.name}${paramList}: ${fn.description}`;
63
+ }
64
+
65
+ /**
66
+ * Build a full tool description: name, description, and full JSON schema.
67
+ */
68
+ function fullToolDescription(t: ToolDefinition): string {
69
+ const fn = t.function;
70
+ const params = JSON.stringify(fn.parameters);
71
+ return `- name: ${fn.name}\n description: ${fn.description}\n parameters: ${params}`;
72
+ }
73
+
49
74
  export function buildToolPromptBlock(tools: ToolDefinition[]): string {
75
+ const useCompact = tools.length > COMPACT_TOOL_THRESHOLD;
50
76
  const toolDescriptions = tools
51
- .map((t) => {
52
- const fn = t.function;
53
- const params = JSON.stringify(fn.parameters);
54
- return `- name: ${fn.name}\n description: ${fn.description}\n parameters: ${params}`;
55
- })
77
+ .map(useCompact ? compactToolDescription : fullToolDescription)
56
78
  .join("\n");
57
79
 
58
80
  return [
@@ -67,6 +89,7 @@ export function buildToolPromptBlock(tools: ToolDefinition[]): string {
67
89
  '{"content":"<your text response>"}',
68
90
  "",
69
91
  "Do NOT include any text outside the JSON. Do NOT wrap in markdown code blocks.",
92
+ useCompact ? "Call ONE tool at a time. Do NOT batch multiple tool calls." : "",
70
93
  "",
71
94
  "Available tools:",
72
95
  toolDescriptions,
@@ -117,6 +140,7 @@ export function buildToolCallJsonSchema(): object {
117
140
  */
118
141
  export function parseToolCallResponse(text: string): CliToolResult {
119
142
  const trimmed = text.trim();
143
+ const preview = trimmed.slice(0, 120);
120
144
 
121
145
  // Check for Claude's --output-format json wrapper FIRST.
122
146
  // Claude returns: { "type": "result", "result": "..." }
@@ -124,30 +148,48 @@ export function parseToolCallResponse(text: string): CliToolResult {
124
148
  const claudeResult = tryExtractClaudeJsonResult(trimmed);
125
149
  if (claudeResult) {
126
150
  const inner = tryParseJson(claudeResult);
127
- if (inner) return normalizeResult(inner);
151
+ if (inner) {
152
+ const result = normalizeResult(inner);
153
+ debugLog("PARSE", `claude-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
154
+ return result;
155
+ }
128
156
  // Claude result is plain text
157
+ debugLog("PARSE", "claude-json → plain text", { len: claudeResult.length });
129
158
  return { content: claudeResult };
130
159
  }
131
160
 
132
161
  // Try direct JSON parse (for non-Claude outputs)
133
162
  const parsed = tryParseJson(trimmed);
134
- if (parsed) return normalizeResult(parsed);
163
+ if (parsed) {
164
+ const result = normalizeResult(parsed);
165
+ debugLog("PARSE", `direct-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
166
+ return result;
167
+ }
135
168
 
136
169
  // Try extracting JSON from markdown code blocks: ```json ... ```
137
170
  const codeBlock = tryExtractCodeBlock(trimmed);
138
171
  if (codeBlock) {
139
172
  const inner = tryParseJson(codeBlock);
140
- if (inner) return normalizeResult(inner);
173
+ if (inner) {
174
+ const result = normalizeResult(inner);
175
+ debugLog("PARSE", `code-block → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
176
+ return result;
177
+ }
141
178
  }
142
179
 
143
180
  // Try finding a JSON object anywhere in the text
144
181
  const embedded = tryExtractEmbeddedJson(trimmed);
145
182
  if (embedded) {
146
183
  const inner = tryParseJson(embedded);
147
- if (inner) return normalizeResult(inner);
184
+ if (inner) {
185
+ const result = normalizeResult(inner);
186
+ debugLog("PARSE", `embedded-json → ${result.tool_calls ? "tool_calls" : "content"}`, { toolCalls: result.tool_calls?.length ?? 0 });
187
+ return result;
188
+ }
148
189
  }
149
190
 
150
191
  // Fallback: treat entire text as content
192
+ debugLog("PARSE", "no JSON found → raw content", { len: trimmed.length, preview });
151
193
  return { content: trimmed || null };
152
194
  }
153
195
 
@@ -167,11 +209,17 @@ function normalizeResult(obj: Record<string, unknown>): CliToolResult {
167
209
  : JSON.stringify(tc.arguments ?? {}),
168
210
  },
169
211
  }));
170
- return { content: null, tool_calls: toolCalls };
212
+ // If the model also returned a content string alongside tool_calls, include it
213
+ const content = typeof obj.content === "string" ? obj.content : null;
214
+ return { content, tool_calls: toolCalls };
171
215
  }
172
216
 
173
- // Check for content field
217
+ // Check for content field — but rescue embedded tool_calls JSON from inside content strings.
218
+ // Models sometimes wrap tool calls inside a content string:
219
+ // {"content":"I'll write that file.\n{\"tool_calls\":[...]}"}
174
220
  if (typeof obj.content === "string") {
221
+ const rescued = tryRescueToolCallsFromContent(obj.content);
222
+ if (rescued) return rescued;
175
223
  return { content: obj.content };
176
224
  }
177
225
 
@@ -179,6 +227,41 @@ function normalizeResult(obj: Record<string, unknown>): CliToolResult {
179
227
  return { content: JSON.stringify(obj) };
180
228
  }
181
229
 
230
+ /**
231
+ * Rescue tool_calls embedded inside a content string.
232
+ * Handles cases where the model wraps tool calls in a content field:
233
+ * {"content":"Some text\n{\"tool_calls\":[...]}"}
234
+ * {"content":"{\"tool_calls\":[{\"name\":\"write\",...}]}"}
235
+ */
236
+ function tryRescueToolCallsFromContent(content: string): CliToolResult | null {
237
+ // Only attempt rescue if content contains the tool_calls signature
238
+ if (!content.includes('"tool_calls"') && !content.includes("tool_calls")) return null;
239
+
240
+ // Try to find embedded JSON with tool_calls
241
+ const embedded = tryExtractEmbeddedJson(content);
242
+ if (!embedded) return null;
243
+
244
+ const parsed = tryParseJson(embedded);
245
+ if (!parsed || !Array.isArray(parsed.tool_calls) || parsed.tool_calls.length === 0) return null;
246
+
247
+ // Extract the text content before the JSON (if any)
248
+ const jsonStart = content.indexOf(embedded);
249
+ const textBefore = jsonStart > 0 ? content.slice(0, jsonStart).trim() : null;
250
+
251
+ const toolCalls: ToolCall[] = parsed.tool_calls.map((tc: Record<string, unknown>) => ({
252
+ id: generateCallId(),
253
+ type: "function" as const,
254
+ function: {
255
+ name: String(tc.name ?? ""),
256
+ arguments: typeof tc.arguments === "string"
257
+ ? tc.arguments
258
+ : JSON.stringify(tc.arguments ?? {}),
259
+ },
260
+ }));
261
+
262
+ return { content: textBefore || null, tool_calls: toolCalls };
263
+ }
264
+
182
265
  function tryParseJson(text: string): Record<string, unknown> | null {
183
266
  try {
184
267
  const obj = JSON.parse(text);
@@ -38,7 +38,7 @@ describe("config.ts exports", () => {
38
38
  expect(DEFAULT_PROXY_TIMEOUT_MS).toBe(300_000);
39
39
  expect(DEFAULT_CLI_TIMEOUT_MS).toBe(120_000);
40
40
  expect(TIMEOUT_GRACE_MS).toBe(5_000);
41
- expect(MAX_EFFECTIVE_TIMEOUT_MS).toBe(900_000);
41
+ expect(MAX_EFFECTIVE_TIMEOUT_MS).toBe(580_000); // under gateway's 600s
42
42
  expect(SESSION_TTL_MS).toBe(30 * 60 * 1000);
43
43
  expect(CLEANUP_INTERVAL_MS).toBe(5 * 60 * 1000);
44
44
  expect(SESSION_KILL_GRACE_MS).toBe(5_000);
@@ -61,8 +61,8 @@ describe("config.ts exports", () => {
61
61
  });
62
62
 
63
63
  it("exports per-model timeouts for all major models", () => {
64
- expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-opus-4-6"]).toBe(420_000);
65
- expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-sonnet-4-6"]).toBe(420_000);
64
+ expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-opus-4-6"]).toBe(360_000);
65
+ expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-sonnet-4-6"]).toBe(300_000);
66
66
  expect(DEFAULT_MODEL_TIMEOUTS["cli-claude/claude-haiku-4-5"]).toBe(120_000);
67
67
  expect(DEFAULT_MODEL_TIMEOUTS["cli-gemini/gemini-2.5-pro"]).toBe(300_000);
68
68
  expect(DEFAULT_MODEL_TIMEOUTS["cli-gemini/gemini-2.5-flash"]).toBe(180_000);
@@ -63,8 +63,10 @@ vi.mock("../src/workdir.js", () => ({
63
63
  }));
64
64
 
65
65
  // Mock config module — provide all constants needed by session-manager.ts and cli-runner.ts
66
- vi.mock("../src/config.js", async () => {
66
+ vi.mock("../src/config.js", async (importOriginal) => {
67
+ const actual = await importOriginal<typeof import("../src/config.js")>();
67
68
  return {
69
+ ...actual,
68
70
  SESSION_TTL_MS: 30 * 60 * 1000,
69
71
  CLEANUP_INTERVAL_MS: 5 * 60 * 1000,
70
72
  SESSION_KILL_GRACE_MS: 5_000,