npm - @elvatis_com/openclaw-cli-bridge-elvatis - Versions diffs - 2.3.0 → 2.5.0 - Mend

@elvatis_com/openclaw-cli-bridge-elvatis 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (12) hide show

package/.ai/handoff/DASHBOARD.md +5 -4
package/README.md +24 -3
package/SKILL.md +1 -1
package/index.ts +19 -2
package/openclaw.plugin.json +21 -2
package/package.json +1 -1
package/src/cli-runner.ts +113 -25
package/src/metrics.ts +85 -0
package/src/proxy-server.ts +93 -16
package/src/session-manager.ts +20 -4
package/src/status-template.ts +122 -0
package/test/cli-runner-extended.test.ts +72 -0

package/.ai/handoff/DASHBOARD.md CHANGED Viewed

@@ -7,7 +7,7 @@ _Last updated: 2026-04-10_
 | Component | Version | Build | Tests | Status |
 |-----------|---------|-------|-------|--------|
-| openclaw-cli-bridge-elvatis | 2.3.0 | ✅ | ✅ | ✅ Stable |
+| openclaw-cli-bridge-elvatis | 2.4.0 | ✅ | ✅ | ✅ Stable |
 <!-- /SECTION: plugin_status -->
 <!-- SECTION: release_state -->
@@ -15,9 +15,9 @@ _Last updated: 2026-04-10_
 | Platform | Published Version | Status |
 |----------|------------------|--------|
-| GitHub | v2.3.0 | ✅ Pushed to main |
-| npm | 2.3.0 | ✅ Published (via CI) |
-| ClawHub | 2.3.0 | ✅ Published (via CI) |
+| GitHub | v2.4.0 | ✅ Pushed to main |
+| npm | 2.4.0 | ⏳ Pending (via CI) |
+| ClawHub | 2.4.0 | ⏳ Pending (via CI) |
 <!-- /SECTION: release_state -->
 <!-- SECTION: open_tasks -->
@@ -31,6 +31,7 @@ _No open tasks._
 | Task | Title | Version |
 |------|-------|---------|
+| T-020 | Metrics & health dashboard: request volume, latency, errors, token usage | 2.4.0 |
 | T-019 | Full-featured CLI bridge: tool calls + multimodal + autonomous execution | 2.3.0 |
 | T-018 | Fix vllm apiKey corruption (401) + harden config-patcher | 2.2.1 |
 | T-017 | Fix log spam, restart loops, CLI blocking | 2.2.0 |

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
-**Current version:** `2.3.0`
+**Current version:** `2.5.0`
 ---
@@ -282,7 +282,17 @@ In `~/.openclaw/openclaw.json` → `plugins.entries.openclaw-cli-bridge-elvatis.
   "enableProxy": true,         // start local CLI proxy server (default: true)
   "proxyPort": 31337,          // proxy port (default: 31337)
   "proxyApiKey": "cli-bridge", // key between OpenClaw vllm provider and proxy (default: "cli-bridge")
-  "proxyTimeoutMs": 120000     // CLI subprocess timeout in ms (default: 120s)
+  "proxyTimeoutMs": 300000,    // base CLI subprocess timeout in ms (default: 300s, scales dynamically)
+  "modelTimeouts": {           // per-model timeout overrides in ms (optional)
+    "cli-claude/claude-opus-4-6":       300000,   // 5 min — heavy/agentic tasks
+    "cli-claude/claude-sonnet-4-6":     180000,   // 3 min — interactive chat
+    "cli-claude/claude-haiku-4-5":       90000,   // 90s  — fast responses
+    "cli-gemini/gemini-2.5-pro":        180000,
+    "cli-gemini/gemini-2.5-flash":       90000,
+    "openai-codex/gpt-5.4":            300000,
+    "openai-codex/gpt-5.3-codex":      180000,
+    "openai-codex/gpt-5.1-codex-mini":  90000
+  }
 }
 ```
@@ -368,7 +378,7 @@ Model fallback (v1.9.0):
 ```bash
 npm run lint        # eslint (TypeScript-aware)
 npm run typecheck   # tsc --noEmit
-npm test            # vitest run (121 tests)
+npm test            # vitest run (217 tests)
 npm run ci          # lint + typecheck + test
 ```
@@ -376,6 +386,17 @@ npm run ci          # lint + typecheck + test
 ## Changelog
+### v2.5.0
+- **feat:** Graceful timeout handling — replaces Node's `spawn({ timeout })` with manual SIGTERM→SIGKILL sequence (5s grace period). Exit 143 is now clearly annotated as "timeout by supervisor" in logs, not a cryptic model error.
+- **feat:** Per-model timeout profiles — new `modelTimeouts` config option sets sensible defaults per model: Opus 5 min, Sonnet 3 min, Haiku 90s, Flash models 90s. Scales dynamically with conversation size (+2s/msg beyond 10, +5s/tool).
+- **feat:** Timeout logging — every timeout event logs model, elapsed time, SIGTERM/SIGKILL steps. Fallback messages now show "timeout by supervisor" instead of raw exit codes.
+- **fix:** Base timeout raised from 120s to 300s (was causing frequent Exit 143 on normal Sonnet conversations)
+- **fix:** Session manager `kill()`, `cleanup()`, and `stop()` now use graceful SIGTERM→SIGKILL instead of immediate SIGTERM
+- **test:** 7 new tests for timeout handling and exit code annotation (217 total)
+### v2.4.0
+- **feat:** Metrics & health dashboard — request volume, latency, errors, token usage
 ### v2.3.0
 - **feat:** OpenAI tool calling protocol support for all CLI models — tool definitions are injected into the prompt, structured `tool_calls` responses are parsed and returned in OpenAI format
 - **feat:** Multimodal content support — images and audio from webchat are extracted to temp files and passed to CLIs (Codex uses native `-i` flag, Claude/Gemini reference file paths in prompt)

package/SKILL.md CHANGED Viewed

@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
 See `README.md` for full configuration reference and architecture diagram.
-**Version:** 2.1.3
+**Version:** 2.5.0

package/index.ts CHANGED Viewed

@@ -98,6 +98,7 @@ interface CliPluginConfig {
   proxyPort?: number;
   proxyApiKey?: string;
   proxyTimeoutMs?: number;
+  modelTimeouts?: Record<string, number>;
   grokSessionPath?: string;
 }
@@ -987,7 +988,22 @@ const plugin = {
     const enableProxy = cfg.enableProxy ?? true;
     const port = cfg.proxyPort ?? DEFAULT_PROXY_PORT;
     const apiKey = cfg.proxyApiKey ?? DEFAULT_PROXY_API_KEY;
-    const timeoutMs = cfg.proxyTimeoutMs ?? 120_000;
+    const timeoutMs = cfg.proxyTimeoutMs ?? 300_000;
+    // Per-model timeout overrides — fall back to sensible defaults if not configured.
+    // Interactive/fast models get shorter timeouts, heavy models get more time.
+    const defaultModelTimeouts: Record<string, number> = {
+      "cli-claude/claude-opus-4-6":       300_000,  // 5 min — heavy, agentic tasks
+      "cli-claude/claude-sonnet-4-6":     180_000,  // 3 min — standard interactive chat
+      "cli-claude/claude-haiku-4-5":       90_000,  // 90s  — fast responses
+      "cli-gemini/gemini-2.5-pro":        180_000,
+      "cli-gemini/gemini-2.5-flash":       90_000,
+      "cli-gemini/gemini-3-pro-preview":  180_000,
+      "cli-gemini/gemini-3-flash-preview": 90_000,
+      "openai-codex/gpt-5.4":            300_000,
+      "openai-codex/gpt-5.3-codex":      180_000,
+      "openai-codex/gpt-5.1-codex-mini":  90_000,
+    };
+    const modelTimeouts = { ...defaultModelTimeouts, ...cfg.modelTimeouts };
     const codexAuthPath = cfg.codexAuthPath ?? DEFAULT_CODEX_AUTH_PATH;
     const grokSessionPath = cfg.grokSessionPath ?? DEFAULT_SESSION_PATH;
@@ -1379,6 +1395,7 @@ const plugin = {
             version: plugin.version,
             modelCommands,
             modelFallbacks,
+            modelTimeouts,
             getExpiryInfo: () => ({
               grok:    (() => { const e = loadGrokExpiry();    return e ? formatExpiryInfo(e)    : null; })(),
               gemini:  (() => { const e = loadGeminiExpiry();  return e ? formatGeminiExpiry(e)  : null; })(),
@@ -1415,7 +1432,7 @@ const plugin = {
             // One final attempt
             try {
               const server = await startProxyServer({
-                port, apiKey, timeoutMs, modelCommands, modelFallbacks,
+                port, apiKey, timeoutMs, modelCommands, modelFallbacks, modelTimeouts,
                 log: (msg) => api.logger.info(msg),
                 warn: (msg) => api.logger.warn(msg),
                 getGrokContext: () => grokContext,

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "openclaw-cli-bridge-elvatis",
   "slug": "openclaw-cli-bridge-elvatis",
   "name": "OpenClaw CLI Bridge",
-  "version": "2.2.2",
+  "version": "2.5.0",
   "license": "MIT",
   "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
   "providers": [
@@ -34,7 +34,26 @@
       },
       "proxyTimeoutMs": {
         "type": "number",
-        "description": "Max time to wait for a CLI response in ms (default: 120000)"
+        "description": "Base timeout for CLI responses in ms (default: 300000). Scales dynamically with conversation size."
+      },
+      "modelTimeouts": {
+        "type": "object",
+        "description": "Per-model timeout overrides in ms. Keys are model IDs (e.g. 'cli-claude/claude-sonnet-4-6'). Use this to give heavy models more time or limit fast models. When not set, falls back to proxyTimeoutMs.",
+        "additionalProperties": {
+          "type": "number"
+        },
+        "default": {
+          "cli-claude/claude-opus-4-6": 300000,
+          "cli-claude/claude-sonnet-4-6": 180000,
+          "cli-claude/claude-haiku-4-5": 90000,
+          "cli-gemini/gemini-2.5-pro": 180000,
+          "cli-gemini/gemini-2.5-flash": 90000,
+          "cli-gemini/gemini-3-pro-preview": 180000,
+          "cli-gemini/gemini-3-flash-preview": 90000,
+          "openai-codex/gpt-5.4": 300000,
+          "openai-codex/gpt-5.3-codex": 180000,
+          "openai-codex/gpt-5.1-codex-mini": 90000
+        }
       }
     }
   }

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
-  "version": "2.3.0",
+  "version": "2.5.0",
   "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
   "type": "module",
   "openclaw": {

package/src/cli-runner.ts CHANGED Viewed

@@ -278,6 +278,8 @@ export interface CliRunResult {
   stdout: string;
   stderr: string;
   exitCode: number;
+  /** True when the process was killed due to a timeout (exit 143 = SIGTERM). */
+  timedOut: boolean;
 }
 export interface RunCliOptions {
@@ -287,11 +289,25 @@ export interface RunCliOptions {
    */
   cwd?: string;
   timeoutMs?: number;
+  /** Optional logger for timeout events. */
+  log?: (msg: string) => void;
 }
+/**
+ * Grace period between SIGTERM and SIGKILL when a timeout fires.
+ * Gives the CLI process 5 seconds to flush output and exit cleanly.
+ */
+const TIMEOUT_GRACE_MS = 5_000;
 /**
  * Spawn a CLI and deliver the prompt via stdin.
  *
+ * Timeout handling (replaces Node's spawn({ timeout }) for better control):
+ *   1. After `timeoutMs`, send SIGTERM and log a clear message.
+ *   2. If the process doesn't exit within TIMEOUT_GRACE_MS (5s), send SIGKILL.
+ *   3. The result's `timedOut` flag is set so callers can distinguish
+ *      supervisor timeouts from real CLI errors.
+ *
  * cwd defaults to homedir() so CLIs that scan the working directory for
  * project context (like Gemini) don't accidentally enter agentic mode.
  */
@@ -303,16 +319,40 @@ export function runCli(
   opts: RunCliOptions = {}
 ): Promise<CliRunResult> {
   const cwd = opts.cwd ?? homedir();
+  const log = opts.log ?? (() => {});
   return new Promise((resolve, reject) => {
+    // Do NOT pass timeout to spawn() — we manage it ourselves for graceful shutdown.
     const proc = spawn(cmd, args, {
-      timeout: timeoutMs,
       env: buildMinimalEnv(),
       cwd,
     });
     let stdout = "";
     let stderr = "";
+    let timedOut = false;
+    let killTimer: ReturnType<typeof setTimeout> | null = null;
+    let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
+    const clearTimers = () => {
+      if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
+      if (killTimer) { clearTimeout(killTimer); killTimer = null; }
+    };
+    // ── Timeout sequence: SIGTERM → grace → SIGKILL ──────────────────────
+    timeoutTimer = setTimeout(() => {
+      timedOut = true;
+      const elapsed = Math.round(timeoutMs / 1000);
+      log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
+      proc.kill("SIGTERM");
+      killTimer = setTimeout(() => {
+        if (!proc.killed) {
+          log(`[cli-bridge] ${cmd} still running after ${TIMEOUT_GRACE_MS / 1000}s grace, sending SIGKILL`);
+          proc.kill("SIGKILL");
+        }
+      }, TIMEOUT_GRACE_MS);
+    }, timeoutMs);
     proc.stdin.write(prompt, "utf8", () => {
       proc.stdin.end();
@@ -322,10 +362,12 @@ export function runCli(
     proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
     proc.on("close", (code) => {
-      resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0 });
+      clearTimers();
+      resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0, timedOut });
     });
     proc.on("error", (err) => {
+      clearTimers();
       reject(new Error(`Failed to spawn '${cmd}': ${err.message}`));
     });
   });
@@ -334,6 +376,7 @@ export function runCli(
 /**
  * Spawn a CLI with the prompt delivered as a CLI argument (not stdin).
  * Used by OpenCode which expects `opencode run "prompt"`.
+ * Uses the same graceful SIGTERM→SIGKILL timeout sequence as runCli.
  */
 export function runCliWithArg(
   cmd: string,
@@ -342,30 +385,66 @@ export function runCliWithArg(
   opts: RunCliOptions = {}
 ): Promise<CliRunResult> {
   const cwd = opts.cwd ?? homedir();
+  const log = opts.log ?? (() => {});
   return new Promise((resolve, reject) => {
     const proc = spawn(cmd, args, {
-      timeout: timeoutMs,
       env: buildMinimalEnv(),
       cwd,
     });
     let stdout = "";
     let stderr = "";
+    let timedOut = false;
+    let killTimer: ReturnType<typeof setTimeout> | null = null;
+    let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
+    const clearTimers = () => {
+      if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
+      if (killTimer) { clearTimeout(killTimer); killTimer = null; }
+    };
+    timeoutTimer = setTimeout(() => {
+      timedOut = true;
+      const elapsed = Math.round(timeoutMs / 1000);
+      log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
+      proc.kill("SIGTERM");
+      killTimer = setTimeout(() => {
+        if (!proc.killed) {
+          log(`[cli-bridge] ${cmd} still running after ${TIMEOUT_GRACE_MS / 1000}s grace, sending SIGKILL`);
+          proc.kill("SIGKILL");
+        }
+      }, TIMEOUT_GRACE_MS);
+    }, timeoutMs);
     proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
     proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
     proc.on("close", (code) => {
-      resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0 });
+      clearTimers();
+      resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0, timedOut });
     });
     proc.on("error", (err) => {
+      clearTimers();
       reject(new Error(`Failed to spawn '${cmd}': ${err.message}`));
     });
   });
 }
+/**
+ * Annotate an error message when exit code 143 (SIGTERM) is detected.
+ * Makes it clear in logs that this was a supervisor timeout, not a model error.
+ */
+export function annotateExitError(exitCode: number, stderr: string, timedOut: boolean, model: string): string {
+  const base = stderr || "(no output)";
+  if (timedOut || exitCode === 143) {
+    return `timeout: ${model} killed by supervisor (exit ${exitCode}, likely timeout) — ${base}`;
+  }
+  return base;
+}
 // ──────────────────────────────────────────────────────────────────────────────
 // Gemini CLI
 // ──────────────────────────────────────────────────────────────────────────────
@@ -391,7 +470,7 @@ export async function runGemini(
   modelId: string,
   timeoutMs: number,
   workdir?: string,
-  opts?: { tools?: ToolDefinition[] }
+  opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
 ): Promise<string> {
   const model = stripPrefix(modelId);
   // -p "" = headless mode trigger; actual prompt arrives via stdin
@@ -404,7 +483,7 @@ export async function runGemini(
     ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt
     : prompt;
-  const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd });
+  const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
   // Filter out [WARN] lines from stderr (Gemini emits noisy permission warnings)
   const cleanStderr = result.stderr
@@ -414,7 +493,7 @@ export async function runGemini(
     .trim();
   if (result.exitCode !== 0 && result.stdout.length === 0) {
-    throw new Error(`gemini exited ${result.exitCode}: ${cleanStderr || "(no output)"}`);
+    throw new Error(`gemini exited ${result.exitCode}: ${annotateExitError(result.exitCode, cleanStderr, result.timedOut, modelId)}`);
   }
   return result.stdout || cleanStderr;
@@ -434,7 +513,7 @@ export async function runClaude(
   modelId: string,
   timeoutMs: number,
   workdir?: string,
-  opts?: { tools?: ToolDefinition[] }
+  opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
 ): Promise<string> {
   // Proactively refresh OAuth token if it's about to expire (< 5 min remaining).
   // No-op for API-key users.
@@ -457,15 +536,19 @@ export async function runClaude(
     : prompt;
   const cwd = workdir ?? homedir();
-  const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd });
+  const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
   // On 401: attempt one token refresh + retry before giving up.
   if (result.exitCode !== 0 && result.stdout.length === 0) {
+    // If this was a timeout, don't bother with auth retry — it's a supervisor kill, not a 401.
+    if (result.timedOut) {
+      throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, true, modelId)}`);
+    }
     const stderr = result.stderr || "(no output)";
     if (stderr.includes("401") || stderr.includes("Invalid authentication credentials") || stderr.includes("authentication_error")) {
       // Refresh and retry once
       await refreshClaudeToken();
-      const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd });
+      const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
       if (retry.exitCode !== 0 && retry.stdout.length === 0) {
         const retryStderr = retry.stderr || "(no output)";
         if (retryStderr.includes("401") || retryStderr.includes("authentication_error") || retryStderr.includes("Invalid authentication credentials")) {
@@ -478,7 +561,7 @@ export async function runClaude(
       }
       return retry.stdout;
     }
-    throw new Error(`claude exited ${result.exitCode}: ${stderr}`);
+    throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, stderr, false, modelId)}`);
   }
   return result.stdout;
@@ -508,7 +591,7 @@ export async function runCodex(
   modelId: string,
   timeoutMs: number,
   workdir?: string,
-  opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[] }
+  opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[]; log?: (msg: string) => void }
 ): Promise<string> {
   const model = stripPrefix(modelId);
   const args = ["--model", model, "--quiet", "--full-auto"];
@@ -532,10 +615,10 @@ export async function runCodex(
     ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt
     : prompt;
-  const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd });
+  const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
   if (result.exitCode !== 0 && result.stdout.length === 0) {
-    throw new Error(`codex exited ${result.exitCode}: ${result.stderr || "(no output)"}`);
+    throw new Error(`codex exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, modelId)}`);
   }
   return result.stdout || result.stderr;
@@ -553,14 +636,15 @@ export async function runOpenCode(
   prompt: string,
   _modelId: string,
   timeoutMs: number,
-  workdir?: string
+  workdir?: string,
+  opts?: { log?: (msg: string) => void }
 ): Promise<string> {
   const args = ["run", prompt];
   const cwd = workdir ?? homedir();
-  const result = await runCliWithArg("opencode", args, timeoutMs, { cwd });
+  const result = await runCliWithArg("opencode", args, timeoutMs, { cwd, log: opts?.log });
   if (result.exitCode !== 0 && result.stdout.length === 0) {
-    throw new Error(`opencode exited ${result.exitCode}: ${result.stderr || "(no output)"}`);
+    throw new Error(`opencode exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, "opencode")}`);
   }
   return result.stdout || result.stderr;
@@ -578,14 +662,15 @@ export async function runPi(
   prompt: string,
   _modelId: string,
   timeoutMs: number,
-  workdir?: string
+  workdir?: string,
+  opts?: { log?: (msg: string) => void }
 ): Promise<string> {
   const args = ["-p", prompt];
   const cwd = workdir ?? homedir();
-  const result = await runCliWithArg("pi", args, timeoutMs, { cwd });
+  const result = await runCliWithArg("pi", args, timeoutMs, { cwd, log: opts?.log });
   if (result.exitCode !== 0 && result.stdout.length === 0) {
-    throw new Error(`pi exited ${result.exitCode}: ${result.stderr || "(no output)"}`);
+    throw new Error(`pi exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, "pi")}`);
   }
   return result.stdout || result.stderr;
@@ -663,6 +748,8 @@ export interface RouteOptions {
    * Passed to CLIs that support native media input (e.g. codex -i).
    */
   mediaFiles?: MediaFile[];
+  /** Logger for timeout and lifecycle events. */
+  log?: (msg: string) => void;
 }
 /**
@@ -708,12 +795,13 @@ export async function routeToCliRunner(
   // Resolve aliases (e.g. gemini-3-pro → gemini-3-pro-preview) after allowlist check
   const resolved = normalizeModelAlias(normalized);
+  const log = opts.log;
   let rawText: string;
-  if (resolved.startsWith("cli-gemini/"))        rawText = await runGemini(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools });
-  else if (resolved.startsWith("cli-claude/"))   rawText = await runClaude(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools });
-  else if (resolved.startsWith("openai-codex/")) rawText = await runCodex(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, mediaFiles: opts.mediaFiles });
-  else if (resolved.startsWith("opencode/"))     rawText = await runOpenCode(prompt, resolved, timeoutMs, opts.workdir);
-  else if (resolved.startsWith("pi/"))           rawText = await runPi(prompt, resolved, timeoutMs, opts.workdir);
+  if (resolved.startsWith("cli-gemini/"))        rawText = await runGemini(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, log });
+  else if (resolved.startsWith("cli-claude/"))   rawText = await runClaude(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, log });
+  else if (resolved.startsWith("openai-codex/")) rawText = await runCodex(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, mediaFiles: opts.mediaFiles, log });
+  else if (resolved.startsWith("opencode/"))     rawText = await runOpenCode(prompt, resolved, timeoutMs, opts.workdir, { log });
+  else if (resolved.startsWith("pi/"))           rawText = await runPi(prompt, resolved, timeoutMs, opts.workdir, { log });
   else throw new Error(
     `Unknown CLI bridge model: "${model}". Use "vllm/cli-gemini/<model>", "vllm/cli-claude/<model>", "openai-codex/<model>", "opencode/<model>", or "pi/<model>".`
   );

package/src/metrics.ts ADDED Viewed

@@ -0,0 +1,85 @@
+/**
+ * metrics.ts
+ *
+ * In-memory metrics collector for the CLI bridge proxy.
+ * Tracks request counts, errors, latency, and token usage per model.
+ * All operations are O(1) — cannot block the event loop.
+ */
+export interface ModelMetrics {
+  model: string;
+  requests: number;
+  errors: number;
+  totalLatencyMs: number;
+  promptTokens: number;
+  completionTokens: number;
+  lastRequestAt: number | null;
+}
+export interface MetricsSnapshot {
+  startedAt: number;
+  totalRequests: number;
+  totalErrors: number;
+  models: ModelMetrics[]; // sorted by requests desc
+}
+class MetricsCollector {
+  private startedAt = Date.now();
+  private data = new Map<string, ModelMetrics>();
+  recordRequest(
+    model: string,
+    durationMs: number,
+    success: boolean,
+    promptTokens?: number,
+    completionTokens?: number,
+  ): void {
+    let entry = this.data.get(model);
+    if (!entry) {
+      entry = {
+        model,
+        requests: 0,
+        errors: 0,
+        totalLatencyMs: 0,
+        promptTokens: 0,
+        completionTokens: 0,
+        lastRequestAt: null,
+      };
+      this.data.set(model, entry);
+    }
+    entry.requests++;
+    if (!success) entry.errors++;
+    entry.totalLatencyMs += durationMs;
+    if (promptTokens) entry.promptTokens += promptTokens;
+    if (completionTokens) entry.completionTokens += completionTokens;
+    entry.lastRequestAt = Date.now();
+  }
+  getMetrics(): MetricsSnapshot {
+    let totalRequests = 0;
+    let totalErrors = 0;
+    const models: ModelMetrics[] = [];
+    for (const entry of this.data.values()) {
+      totalRequests += entry.requests;
+      totalErrors += entry.errors;
+      models.push({ ...entry });
+    }
+    models.sort((a, b) => b.requests - a.requests);
+    return {
+      startedAt: this.startedAt,
+      totalRequests,
+      totalErrors,
+      models,
+    };
+  }
+  reset(): void {
+    this.startedAt = Date.now();
+    this.data.clear();
+  }
+}
+export const metrics = new MetricsCollector();

package/src/proxy-server.ts CHANGED Viewed

@@ -19,6 +19,7 @@ import { chatgptComplete, chatgptCompleteStream, type ChatMessage as ChatGPTBrow
 import type { BrowserContext } from "playwright";
 import { renderStatusPage, type StatusProvider } from "./status-template.js";
 import { sessionManager } from "./session-manager.js";
+import { metrics } from "./metrics.js";
 export type GrokCompleteOptions = Parameters<typeof grokComplete>[1];
 export type GrokCompleteStreamOptions = Parameters<typeof grokCompleteStream>[1];
@@ -81,6 +82,20 @@ export interface ProxyServerOptions {
    * with the fallback model. Example: "cli-gemini/gemini-2.5-pro" → "cli-gemini/gemini-2.5-flash"
    */
   modelFallbacks?: Record<string, string>;
+  /**
+   * Per-model timeout overrides (ms). Keys are model IDs (without "vllm/" prefix).
+   * Use this to give heavy models more time or limit fast models.
+   *
+   * Example:
+   *   {
+   *     "cli-claude/claude-sonnet-4-6": 180_000,   // 3 min for interactive chat
+   *     "cli-claude/claude-opus-4-6":   300_000,    // 5 min for heavy tasks
+   *     "cli-claude/claude-haiku-4-5":  90_000,     // 90s for fast responses
+   *   }
+   *
+   * When not set for a model, falls back to proxyTimeoutMs (default 300s base).
+   */
+  modelTimeouts?: Record<string, number>;
 }
 /** Available CLI bridge models for GET /v1/models */
@@ -222,6 +237,7 @@ async function handleRequest(
         chatgpt: sessionStatus("chatgpt", opts.getChatGPTContext, expiry.chatgpt),
       },
       models: CLI_MODELS.length,
+      metrics: metrics.getMetrics(),
     };
     res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
     res.end(JSON.stringify(health, null, 2));
@@ -240,7 +256,7 @@ async function handleRequest(
       { name: "ChatGPT",  icon: "◉",  expiry: expiry.chatgpt, loginCmd: "/chatgpt-login", ctx: opts.getChatGPTContext?.() ?? null },
     ];
-    const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands });
+    const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands, metrics: metrics.getMetrics() });
     res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
     res.end(html);
     return;
@@ -331,6 +347,7 @@ async function handleRequest(
       const grokMessages = messages as GrokChatMessage[];
       const doGrokComplete = opts._grokComplete ?? grokComplete;
       const doGrokCompleteStream = opts._grokCompleteStream ?? grokCompleteStream;
+      const grokStart = Date.now();
       try {
         if (stream) {
           res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -341,11 +358,13 @@ async function handleRequest(
             (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
             opts.log
           );
+          metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
           sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
           res.write("data: [DONE]\n\n");
           res.end();
         } else {
           const result = await doGrokComplete(grokCtx, { messages: grokMessages, model: grokModel, timeoutMs }, opts.log);
+          metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
           res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
           res.end(JSON.stringify({
             id, object: "chat.completion", created, model,
@@ -354,6 +373,7 @@ async function handleRequest(
           }));
         }
       } catch (err) {
+        metrics.recordRequest(model, Date.now() - grokStart, false);
         const msg = (err as Error).message;
         opts.warn(`[cli-bridge] Grok error for ${model}: ${msg}`);
         if (!res.headersSent) {
@@ -380,6 +400,7 @@ async function handleRequest(
       const geminiMessages = messages as GeminiBrowserChatMessage[];
       const doGeminiComplete = opts._geminiComplete ?? geminiComplete;
       const doGeminiCompleteStream = opts._geminiCompleteStream ?? geminiCompleteStream;
+      const geminiStart = Date.now();
       try {
         if (stream) {
           res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -390,11 +411,13 @@ async function handleRequest(
             (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
             opts.log
           );
+          metrics.recordRequest(model, Date.now() - geminiStart, true);
           sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
           res.write("data: [DONE]\n\n");
           res.end();
         } else {
           const result = await doGeminiComplete(geminiCtx, { messages: geminiMessages, model, timeoutMs }, opts.log);
+          metrics.recordRequest(model, Date.now() - geminiStart, true);
           res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
           res.end(JSON.stringify({
             id, object: "chat.completion", created, model,
@@ -403,6 +426,7 @@ async function handleRequest(
           }));
         }
       } catch (err) {
+        metrics.recordRequest(model, Date.now() - geminiStart, false);
         const msg = (err as Error).message;
         opts.warn(`[cli-bridge] Gemini browser error for ${model}: ${msg}`);
         if (!res.headersSent) {
@@ -429,6 +453,7 @@ async function handleRequest(
       const claudeMessages = messages as ClaudeBrowserChatMessage[];
       const doClaudeComplete = opts._claudeComplete ?? claudeComplete;
       const doClaudeCompleteStream = opts._claudeCompleteStream ?? claudeCompleteStream;
+      const claudeStart = Date.now();
       try {
         if (stream) {
           res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -439,11 +464,13 @@ async function handleRequest(
             (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
             opts.log
           );
+          metrics.recordRequest(model, Date.now() - claudeStart, true);
           sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
           res.write("data: [DONE]\n\n");
           res.end();
         } else {
           const result = await doClaudeComplete(claudeCtx, { messages: claudeMessages, model, timeoutMs }, opts.log);
+          metrics.recordRequest(model, Date.now() - claudeStart, true);
           res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
           res.end(JSON.stringify({
             id, object: "chat.completion", created, model,
@@ -452,6 +479,7 @@ async function handleRequest(
           }));
         }
       } catch (err) {
+        metrics.recordRequest(model, Date.now() - claudeStart, false);
         const msg = (err as Error).message;
         opts.warn(`[cli-bridge] Claude browser error for ${model}: ${msg}`);
         if (!res.headersSent) {
@@ -479,6 +507,7 @@ async function handleRequest(
       const chatgptMessages = messages as ChatGPTBrowserChatMessage[];
       const doChatGPTComplete = opts._chatgptComplete ?? chatgptComplete;
       const doChatGPTCompleteStream = opts._chatgptCompleteStream ?? chatgptCompleteStream;
+      const chatgptStart = Date.now();
       try {
         if (stream) {
           res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -489,11 +518,13 @@ async function handleRequest(
             (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
             opts.log
           );
+          metrics.recordRequest(model, Date.now() - chatgptStart, true);
           sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
           res.write("data: [DONE]\n\n");
           res.end();
         } else {
           const result = await doChatGPTComplete(chatgptCtx, { messages: chatgptMessages, model: chatgptModel, timeoutMs }, opts.log);
+          metrics.recordRequest(model, Date.now() - chatgptStart, true);
           res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
           res.end(JSON.stringify({
             id, object: "chat.completion", created, model,
@@ -502,6 +533,7 @@ async function handleRequest(
           }));
         }
       } catch (err) {
+        metrics.recordRequest(model, Date.now() - chatgptStart, false);
         const msg = (err as Error).message;
         opts.warn(`[cli-bridge] ChatGPT browser error for ${model}: ${msg}`);
         if (!res.headersSent) {
@@ -546,6 +578,7 @@ async function handleRequest(
       const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM }, ...truncated];
       const requestBody = JSON.stringify({ ...parsed, messages: bitnetMessages, tools: undefined });
+      const bitnetStart = Date.now();
       try {
         const targetUrl = new URL("/v1/chat/completions", bitnetUrl);
         const proxyRes = await new Promise<http.IncomingMessage>((resolve, reject) => {
@@ -566,6 +599,7 @@ async function handleRequest(
           proxyReq.end();
         });
+        metrics.recordRequest(model, Date.now() - bitnetStart, true);
         // Forward status + headers
         const fwdHeaders: Record<string, string> = { ...corsHeaders() };
         const ct = proxyRes.headers["content-type"];
@@ -577,6 +611,7 @@ async function handleRequest(
         res.writeHead(proxyRes.statusCode ?? 200, fwdHeaders);
         proxyRes.pipe(res);
       } catch (err) {
+        metrics.recordRequest(model, Date.now() - bitnetStart, false);
         const msg = (err as Error).message;
         if (msg.includes("ECONNREFUSED") || msg.includes("ECONNRESET") || msg.includes("ENOTFOUND")) {
           res.writeHead(503, { "Content-Type": "application/json", ...corsHeaders() });
@@ -602,34 +637,81 @@ async function handleRequest(
     // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
     let result: CliToolResult;
     let usedModel = model;
-    const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined };
+    const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
+    // ── Dynamic timeout: scale with conversation size ────────────────────────
+    // Per-model timeout takes precedence, then global proxyTimeoutMs, then 300s default.
+    const perModelTimeout = opts.modelTimeouts?.[model];
+    const baseTimeout = perModelTimeout ?? opts.timeoutMs ?? 300_000;
+    const msgExtra = Math.max(0, cleanMessages.length - 10) * 2_000;
+    const toolExtra = (tools?.length ?? 0) * 5_000;
+    const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, 600_000);
+    opts.log(`[cli-bridge] ${model} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
+    // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
+    let sseHeadersSent = false;
+    let keepaliveInterval: ReturnType<typeof setInterval> | null = null;
+    if (stream) {
+      res.writeHead(200, {
+        "Content-Type": "text/event-stream",
+        "Cache-Control": "no-cache",
+        Connection: "keep-alive",
+        ...corsHeaders(),
+      });
+      sseHeadersSent = true;
+      res.write(": keepalive\n\n");
+      keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, 15_000);
+    }
+    const cliStart = Date.now();
     try {
-      result = await routeToCliRunner(model, cleanMessages, opts.timeoutMs ?? 120_000, routeOpts);
+      result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
+      metrics.recordRequest(model, Date.now() - cliStart, true);
     } catch (err) {
+      const primaryDuration = Date.now() - cliStart;
       const msg = (err as Error).message;
       // ── Model fallback: retry once with a lighter model if configured ────
+      const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
       const fallbackModel = opts.modelFallbacks?.[model];
       if (fallbackModel) {
-        opts.warn(`[cli-bridge] ${model} failed (${msg}), falling back to ${fallbackModel}`);
+        metrics.recordRequest(model, primaryDuration, false);
+        const reason = isTimeout ? "timeout by supervisor" : msg;
+        opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
+        const fallbackStart = Date.now();
         try {
-          result = await routeToCliRunner(fallbackModel, cleanMessages, opts.timeoutMs ?? 120_000, routeOpts);
+          result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
+          metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, true);
           usedModel = fallbackModel;
           opts.log(`[cli-bridge] fallback to ${fallbackModel} succeeded`);
         } catch (fallbackErr) {
+          metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, false);
           const fallbackMsg = (fallbackErr as Error).message;
           opts.warn(`[cli-bridge] fallback ${fallbackModel} also failed: ${fallbackMsg}`);
-          res.writeHead(500, { "Content-Type": "application/json" });
-          res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
+          if (sseHeadersSent) {
+            res.write(`data: ${JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } })}\n\n`);
+            res.write("data: [DONE]\n\n");
+            res.end();
+          } else {
+            res.writeHead(500, { "Content-Type": "application/json" });
+            res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
+          }
           return;
         }
       } else {
+        metrics.recordRequest(model, primaryDuration, false);
         opts.warn(`[cli-bridge] CLI error for ${model}: ${msg}`);
-        res.writeHead(500, { "Content-Type": "application/json" });
-        res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
+        if (sseHeadersSent) {
+          res.write(`data: ${JSON.stringify({ error: { message: msg, type: "cli_error" } })}\n\n`);
+          res.write("data: [DONE]\n\n");
+          res.end();
+        } else {
+          res.writeHead(500, { "Content-Type": "application/json" });
+          res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
+        }
         return;
       }
     } finally {
-      // Clean up temp media files after response
+      if (keepaliveInterval) clearInterval(keepaliveInterval);
       cleanupMediaFiles(mediaFiles);
     }
@@ -637,12 +719,7 @@ async function handleRequest(
     const finishReason = hasToolCalls ? "tool_calls" : "stop";
     if (stream) {
-      res.writeHead(200, {
-        "Content-Type": "text/event-stream",
-        "Cache-Control": "no-cache",
-        Connection: "keep-alive",
-        ...corsHeaders(),
-      });
+      // SSE headers already sent above — stream response chunks directly
       if (hasToolCalls) {
         // Stream tool_calls in OpenAI SSE format

package/src/session-manager.ts CHANGED Viewed

@@ -95,6 +95,8 @@ function buildMinimalEnv(): Record<string, string> {
 /** Auto-cleanup interval: 30 minutes. */
 const SESSION_TTL_MS = 30 * 60 * 1000;
 const CLEANUP_INTERVAL_MS = 5 * 60 * 1000;
+/** Grace period between SIGTERM and SIGKILL for session termination. */
+const KILL_GRACE_MS = 5_000;
 export class SessionManager {
   private sessions = new Map<string, SessionEntry>();
@@ -213,12 +215,19 @@ export class SessionManager {
     }
   }
-  /** Send SIGTERM to the session process. */
+  /**
+   * Gracefully terminate a session: SIGTERM first, then SIGKILL after grace period.
+   * This prevents the ambiguous "exit 143 (no output)" pattern.
+   */
   kill(sessionId: string): boolean {
     const entry = this.sessions.get(sessionId);
     if (!entry || entry.status !== "running") return false;
     entry.status = "killed";
     entry.proc.kill("SIGTERM");
+    // If the process doesn't exit within the grace period, force-kill it
+    setTimeout(() => {
+      try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
+    }, KILL_GRACE_MS);
     return true;
   }
@@ -238,7 +247,7 @@ export class SessionManager {
     return result;
   }
-  /** Remove sessions older than SESSION_TTL_MS. Kill running ones first. Clean up isolated workdirs. */
+  /** Remove sessions older than SESSION_TTL_MS. Kill running ones with graceful SIGTERM→SIGKILL. */
   cleanup(): void {
     const now = Date.now();
     for (const [sessionId, entry] of this.sessions) {
@@ -246,6 +255,10 @@ export class SessionManager {
         if (entry.status === "running") {
           entry.proc.kill("SIGTERM");
           entry.status = "killed";
+          // Escalate to SIGKILL after grace period
+          setTimeout(() => {
+            try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
+          }, KILL_GRACE_MS);
         }
         // Clean up isolated workdir if it wasn't cleaned on exit
         if (entry.isolatedWorkdir) {
@@ -258,17 +271,20 @@ export class SessionManager {
     sweepOrphanedWorkdirs();
   }
-  /** Stop the cleanup timer (for graceful shutdown). */
+  /** Stop the cleanup timer (for graceful shutdown). SIGTERM all sessions, SIGKILL after grace. */
   stop(): void {
     if (this.cleanupTimer) {
       clearInterval(this.cleanupTimer);
       this.cleanupTimer = null;
     }
-    // Kill all running sessions and clean up their workdirs
+    // Kill all running sessions with graceful SIGTERM → SIGKILL escalation
     for (const [, entry] of this.sessions) {
       if (entry.status === "running") {
         entry.proc.kill("SIGTERM");
         entry.status = "killed";
+        setTimeout(() => {
+          try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
+        }, KILL_GRACE_MS);
       }
       if (entry.isolatedWorkdir) {
         cleanupWorkdir(entry.isolatedWorkdir);

package/src/status-template.ts CHANGED Viewed

@@ -6,6 +6,7 @@
  */
 import type { BrowserContext } from "playwright";
+import type { MetricsSnapshot } from "./metrics.js";
 export interface StatusProvider {
   name: string;
@@ -22,6 +23,8 @@ export interface StatusTemplateOptions {
   models: Array<{ id: string; name: string; contextWindow: number; maxTokens: number }>;
   /** Maps model ID → slash command name (e.g. "openai-codex/gpt-5.3-codex" → "/cli-codex") */
   modelCommands?: Record<string, string>;
+  /** In-memory metrics snapshot — optional for backward compat */
+  metrics?: MetricsSnapshot;
 }
 function statusBadge(p: StatusProvider): { label: string; color: string; dot: string } {
@@ -32,6 +35,114 @@ function statusBadge(p: StatusProvider): { label: string; color: string; dot: st
   return { label: "Logged in", color: "#3b82f6", dot: "🔵" };
 }
+// ── Formatting helpers ──────────────────────────────────────────────────────
+function formatDuration(ms: number): string {
+  if (ms < 1000) return `${Math.round(ms)}ms`;
+  if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
+  return `${(ms / 60_000).toFixed(1)}m`;
+}
+function formatTokens(n: number): string {
+  if (n === 0) return "—";
+  if (n < 1000) return String(n);
+  if (n < 1_000_000) return `${(n / 1000).toFixed(1)}k`;
+  return `${(n / 1_000_000).toFixed(2)}M`;
+}
+function timeAgo(epochMs: number | null): string {
+  if (!epochMs) return "—";
+  const diff = Date.now() - epochMs;
+  if (diff < 60_000) return "just now";
+  if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m ago`;
+  if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h ago`;
+  return `${Math.floor(diff / 86_400_000)}d ago`;
+}
+function formatUptime(startedAt: number): string {
+  const diff = Date.now() - startedAt;
+  const s = Math.floor(diff / 1000);
+  if (s < 60) return `${s}s`;
+  const m = Math.floor(s / 60);
+  if (m < 60) return `${m}m ${s % 60}s`;
+  const h = Math.floor(m / 60);
+  if (h < 24) return `${h}h ${m % 60}m`;
+  const d = Math.floor(h / 24);
+  return `${d}d ${h % 24}h`;
+}
+function escapeHtml(s: string): string {
+  return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
+}
+// ── Metrics sections ────────────────────────────────────────────────────────
+function renderMetricsSection(m: MetricsSnapshot): string {
+  const errorRate = m.totalRequests > 0 ? ((m.totalErrors / m.totalRequests) * 100).toFixed(1) : "0.0";
+  const totalTokens = m.models.reduce((sum, mod) => sum + mod.promptTokens + mod.completionTokens, 0);
+  // Summary cards
+  const summaryCards = `
+  <div class="summary-grid">
+    <div class="summary-card">
+      <div class="summary-value">${m.totalRequests}</div>
+      <div class="summary-label">Total Requests</div>
+    </div>
+    <div class="summary-card">
+      <div class="summary-value" style="color:${m.totalErrors > 0 ? '#ef4444' : '#22c55e'}">${errorRate}%</div>
+      <div class="summary-label">Error Rate</div>
+    </div>
+    <div class="summary-card">
+      <div class="summary-value">${formatTokens(totalTokens)}</div>
+      <div class="summary-label">Total Tokens</div>
+    </div>
+    <div class="summary-card">
+      <div class="summary-value">${formatUptime(m.startedAt)}</div>
+      <div class="summary-label">Uptime</div>
+    </div>
+  </div>`;
+  // Per-model stats table
+  let modelRows: string;
+  if (m.models.length === 0) {
+    modelRows = `<tr><td colspan="6" style="padding:16px;color:#6b7280;text-align:center;font-style:italic">No requests recorded yet.</td></tr>`;
+  } else {
+    modelRows = m.models.map(mod => {
+      const avgLatency = mod.requests > 0 ? mod.totalLatencyMs / mod.requests : 0;
+      const modErrorRate = mod.requests > 0 ? ((mod.errors / mod.requests) * 100).toFixed(1) : "0.0";
+      return `
+        <tr>
+          <td class="metrics-cell"><code style="color:#93c5fd">${escapeHtml(mod.model)}</code></td>
+          <td class="metrics-cell" style="text-align:right">${mod.requests}</td>
+          <td class="metrics-cell" style="text-align:right;color:${mod.errors > 0 ? '#ef4444' : '#6b7280'}">${mod.errors} <span style="color:#6b7280;font-size:11px">(${modErrorRate}%)</span></td>
+          <td class="metrics-cell" style="text-align:right">${formatDuration(avgLatency)}</td>
+          <td class="metrics-cell" style="text-align:right">${formatTokens(mod.promptTokens)} / ${formatTokens(mod.completionTokens)}</td>
+          <td class="metrics-cell" style="text-align:right;color:#9ca3af">${timeAgo(mod.lastRequestAt)}</td>
+        </tr>`;
+    }).join("");
+  }
+  const modelTable = `
+  <div class="card">
+    <div class="card-header">Per-Model Stats</div>
+    <table class="metrics-table">
+      <thead>
+        <tr style="background:#13151f">
+          <th class="metrics-th" style="text-align:left">Model</th>
+          <th class="metrics-th" style="text-align:right">Requests</th>
+          <th class="metrics-th" style="text-align:right">Errors</th>
+          <th class="metrics-th" style="text-align:right">Avg Latency</th>
+          <th class="metrics-th" style="text-align:right">Tokens (in/out)</th>
+          <th class="metrics-th" style="text-align:right">Last Request</th>
+        </tr>
+      </thead>
+      <tbody>${modelRows}</tbody>
+    </table>
+  </div>`;
+  return summaryCards + modelTable;
+}
 export function renderStatusPage(opts: StatusTemplateOptions): string {
   const { version, port, providers, models } = opts;
@@ -66,6 +177,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
       return `<li style="margin:2px 0;font-size:13px;color:#d1d5db"><code style="color:#93c5fd">${m.id}</code>${cmdBadge}</li>`;
     }).join("");
+  const metricsHtml = opts.metrics ? renderMetricsSection(opts.metrics) : "";
   return `<!DOCTYPE html>
 <html lang="en">
 <head>
@@ -86,6 +199,13 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
     ul { list-style: none; padding: 12px 16px; }
     .footer { color: #374151; font-size: 12px; text-align: center; margin-top: 16px; }
     code { background: #1e2130; padding: 1px 5px; border-radius: 4px; }
+    .summary-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin-bottom: 24px; }
+    .summary-card { background: #1a1d27; border: 1px solid #2d3148; border-radius: 12px; padding: 20px 16px; text-align: center; }
+    .summary-value { font-size: 28px; font-weight: 700; color: #f9fafb; margin-bottom: 4px; }
+    .summary-label { font-size: 12px; color: #6b7280; text-transform: uppercase; letter-spacing: 0.05em; }
+    .metrics-table { width: 100%; border-collapse: collapse; }
+    .metrics-th { padding: 10px 16px; font-size: 12px; color: #4b5563; font-weight: 600; }
+    .metrics-cell { padding: 10px 16px; font-size: 13px; }
   </style>
 </head>
 <body>
@@ -107,6 +227,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
     </table>
   </div>
+  ${metricsHtml}
   <div class="models">
     <div class="card">
       <div class="card-header">CLI Models (${cliModels.length})</div>

package/test/cli-runner-extended.test.ts CHANGED Viewed

@@ -265,3 +265,75 @@ describe("Codex auto-git-init via routeToCliRunner", () => {
     expect(mockExecSync).toHaveBeenCalledWith("git init", expect.objectContaining({ cwd: "/no-git-dir" }));
   });
 });
+// ──────────────────────────────────────────────────────────────────────────────
+// Timeout handling: graceful SIGTERM → SIGKILL and exit 143 annotation
+// ──────────────────────────────────────────────────────────────────────────────
+import { runCli, annotateExitError } from "../src/cli-runner.js";
+describe("runCli() timeout handling", () => {
+  it("does NOT pass timeout to spawn options (manual timer instead)", async () => {
+    mockSpawn.mockImplementation(() => makeFakeProc("ok", 0));
+    await runCli("echo", [], "hello", 60_000);
+    const spawnOpts = mockSpawn.mock.calls[0][2];
+    expect(spawnOpts.timeout).toBeUndefined();
+  });
+  it("sends SIGTERM after timeout fires", async () => {
+    vi.useFakeTimers();
+    const proc = new EventEmitter() as any;
+    proc.stdout = new EventEmitter();
+    proc.stderr = new EventEmitter();
+    proc.stdin = { write: vi.fn((_d: string, _e: string, cb?: () => void) => { cb?.(); }), end: vi.fn() };
+    proc.kill = vi.fn(() => { proc.emit("close", 143); });
+    proc.killed = false;
+    mockSpawn.mockImplementation(() => proc);
+    const logMessages: string[] = [];
+    const promise = runCli("claude", [], "prompt", 100, { log: (m) => logMessages.push(m) });
+    // Advance past the timeout
+    vi.advanceTimersByTime(101);
+    const result = await promise;
+    expect(proc.kill).toHaveBeenCalledWith("SIGTERM");
+    expect(result.timedOut).toBe(true);
+    expect(result.exitCode).toBe(143);
+    expect(logMessages.some(m => m.includes("timeout") && m.includes("SIGTERM"))).toBe(true);
+    vi.useRealTimers();
+  });
+  it("sets timedOut=false for normal exits", async () => {
+    mockSpawn.mockImplementation(() => makeFakeProc("output", 0));
+    const result = await runCli("echo", [], "hello", 60_000);
+    expect(result.timedOut).toBe(false);
+    expect(result.exitCode).toBe(0);
+  });
+});
+describe("annotateExitError()", () => {
+  it("annotates exit 143 as timeout", () => {
+    const msg = annotateExitError(143, "(no output)", false, "cli-claude/claude-sonnet-4-6");
+    expect(msg).toContain("timeout");
+    expect(msg).toContain("supervisor");
+    expect(msg).toContain("cli-claude/claude-sonnet-4-6");
+  });
+  it("annotates when timedOut is true regardless of exit code", () => {
+    const msg = annotateExitError(1, "some error", true, "cli-claude/claude-sonnet-4-6");
+    expect(msg).toContain("timeout");
+    expect(msg).toContain("supervisor");
+  });
+  it("returns plain error when not a timeout", () => {
+    const msg = annotateExitError(1, "auth error", false, "cli-claude/claude-sonnet-4-6");
+    expect(msg).toBe("auth error");
+    expect(msg).not.toContain("timeout");
+  });
+  it("returns (no output) placeholder when stderr is empty and not a timeout", () => {
+    const msg = annotateExitError(1, "", false, "cli-claude/claude-sonnet-4-6");
+    expect(msg).toBe("(no output)");
+  });
+});