@elvatis_com/openclaw-cli-bridge-elvatis 2.3.0 → 2.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -7,7 +7,7 @@ _Last updated: 2026-04-10_
7
7
 
8
8
  | Component | Version | Build | Tests | Status |
9
9
  |-----------|---------|-------|-------|--------|
10
- | openclaw-cli-bridge-elvatis | 2.3.0 | ✅ | ✅ | ✅ Stable |
10
+ | openclaw-cli-bridge-elvatis | 2.4.0 | ✅ | ✅ | ✅ Stable |
11
11
  <!-- /SECTION: plugin_status -->
12
12
 
13
13
  <!-- SECTION: release_state -->
@@ -15,9 +15,9 @@ _Last updated: 2026-04-10_
15
15
 
16
16
  | Platform | Published Version | Status |
17
17
  |----------|------------------|--------|
18
- | GitHub | v2.3.0 | ✅ Pushed to main |
19
- | npm | 2.3.0 | Published (via CI) |
20
- | ClawHub | 2.3.0 | Published (via CI) |
18
+ | GitHub | v2.4.0 | ✅ Pushed to main |
19
+ | npm | 2.4.0 | Pending (via CI) |
20
+ | ClawHub | 2.4.0 | Pending (via CI) |
21
21
  <!-- /SECTION: release_state -->
22
22
 
23
23
  <!-- SECTION: open_tasks -->
@@ -31,6 +31,7 @@ _No open tasks._
31
31
 
32
32
  | Task | Title | Version |
33
33
  |------|-------|---------|
34
+ | T-020 | Metrics & health dashboard: request volume, latency, errors, token usage | 2.4.0 |
34
35
  | T-019 | Full-featured CLI bridge: tool calls + multimodal + autonomous execution | 2.3.0 |
35
36
  | T-018 | Fix vllm apiKey corruption (401) + harden config-patcher | 2.2.1 |
36
37
  | T-017 | Fix log spam, restart loops, CLI blocking | 2.2.0 |
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
4
4
 
5
- **Current version:** `2.3.0`
5
+ **Current version:** `2.5.0`
6
6
 
7
7
  ---
8
8
 
@@ -282,7 +282,17 @@ In `~/.openclaw/openclaw.json` → `plugins.entries.openclaw-cli-bridge-elvatis.
282
282
  "enableProxy": true, // start local CLI proxy server (default: true)
283
283
  "proxyPort": 31337, // proxy port (default: 31337)
284
284
  "proxyApiKey": "cli-bridge", // key between OpenClaw vllm provider and proxy (default: "cli-bridge")
285
- "proxyTimeoutMs": 120000 // CLI subprocess timeout in ms (default: 120s)
285
+ "proxyTimeoutMs": 300000, // base CLI subprocess timeout in ms (default: 300s, scales dynamically)
286
+ "modelTimeouts": { // per-model timeout overrides in ms (optional)
287
+ "cli-claude/claude-opus-4-6": 300000, // 5 min — heavy/agentic tasks
288
+ "cli-claude/claude-sonnet-4-6": 180000, // 3 min — interactive chat
289
+ "cli-claude/claude-haiku-4-5": 90000, // 90s — fast responses
290
+ "cli-gemini/gemini-2.5-pro": 180000,
291
+ "cli-gemini/gemini-2.5-flash": 90000,
292
+ "openai-codex/gpt-5.4": 300000,
293
+ "openai-codex/gpt-5.3-codex": 180000,
294
+ "openai-codex/gpt-5.1-codex-mini": 90000
295
+ }
286
296
  }
287
297
  ```
288
298
 
@@ -368,7 +378,7 @@ Model fallback (v1.9.0):
368
378
  ```bash
369
379
  npm run lint # eslint (TypeScript-aware)
370
380
  npm run typecheck # tsc --noEmit
371
- npm test # vitest run (121 tests)
381
+ npm test # vitest run (217 tests)
372
382
  npm run ci # lint + typecheck + test
373
383
  ```
374
384
 
@@ -376,6 +386,17 @@ npm run ci # lint + typecheck + test
376
386
 
377
387
  ## Changelog
378
388
 
389
+ ### v2.5.0
390
+ - **feat:** Graceful timeout handling — replaces Node's `spawn({ timeout })` with manual SIGTERM→SIGKILL sequence (5s grace period). Exit 143 is now clearly annotated as "timeout by supervisor" in logs, not a cryptic model error.
391
+ - **feat:** Per-model timeout profiles — new `modelTimeouts` config option sets sensible defaults per model: Opus 5 min, Sonnet 3 min, Haiku 90s, Flash models 90s. Scales dynamically with conversation size (+2s/msg beyond 10, +5s/tool).
392
+ - **feat:** Timeout logging — every timeout event logs model, elapsed time, SIGTERM/SIGKILL steps. Fallback messages now show "timeout by supervisor" instead of raw exit codes.
393
+ - **fix:** Base timeout raised from 120s to 300s (was causing frequent Exit 143 on normal Sonnet conversations)
394
+ - **fix:** Session manager `kill()`, `cleanup()`, and `stop()` now use graceful SIGTERM→SIGKILL instead of immediate SIGTERM
395
+ - **test:** 7 new tests for timeout handling and exit code annotation (217 total)
396
+
397
+ ### v2.4.0
398
+ - **feat:** Metrics & health dashboard — request volume, latency, errors, token usage
399
+
379
400
  ### v2.3.0
380
401
  - **feat:** OpenAI tool calling protocol support for all CLI models — tool definitions are injected into the prompt, structured `tool_calls` responses are parsed and returned in OpenAI format
381
402
  - **feat:** Multimodal content support — images and audio from webchat are extracted to temp files and passed to CLIs (Codex uses native `-i` flag, Claude/Gemini reference file paths in prompt)
package/SKILL.md CHANGED
@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
68
68
 
69
69
  See `README.md` for full configuration reference and architecture diagram.
70
70
 
71
- **Version:** 2.1.3
71
+ **Version:** 2.5.0
package/index.ts CHANGED
@@ -98,6 +98,7 @@ interface CliPluginConfig {
98
98
  proxyPort?: number;
99
99
  proxyApiKey?: string;
100
100
  proxyTimeoutMs?: number;
101
+ modelTimeouts?: Record<string, number>;
101
102
  grokSessionPath?: string;
102
103
  }
103
104
 
@@ -987,7 +988,22 @@ const plugin = {
987
988
  const enableProxy = cfg.enableProxy ?? true;
988
989
  const port = cfg.proxyPort ?? DEFAULT_PROXY_PORT;
989
990
  const apiKey = cfg.proxyApiKey ?? DEFAULT_PROXY_API_KEY;
990
- const timeoutMs = cfg.proxyTimeoutMs ?? 120_000;
991
+ const timeoutMs = cfg.proxyTimeoutMs ?? 300_000;
992
+ // Per-model timeout overrides — fall back to sensible defaults if not configured.
993
+ // Interactive/fast models get shorter timeouts, heavy models get more time.
994
+ const defaultModelTimeouts: Record<string, number> = {
995
+ "cli-claude/claude-opus-4-6": 300_000, // 5 min — heavy, agentic tasks
996
+ "cli-claude/claude-sonnet-4-6": 180_000, // 3 min — standard interactive chat
997
+ "cli-claude/claude-haiku-4-5": 90_000, // 90s — fast responses
998
+ "cli-gemini/gemini-2.5-pro": 180_000,
999
+ "cli-gemini/gemini-2.5-flash": 90_000,
1000
+ "cli-gemini/gemini-3-pro-preview": 180_000,
1001
+ "cli-gemini/gemini-3-flash-preview": 90_000,
1002
+ "openai-codex/gpt-5.4": 300_000,
1003
+ "openai-codex/gpt-5.3-codex": 180_000,
1004
+ "openai-codex/gpt-5.1-codex-mini": 90_000,
1005
+ };
1006
+ const modelTimeouts = { ...defaultModelTimeouts, ...cfg.modelTimeouts };
991
1007
  const codexAuthPath = cfg.codexAuthPath ?? DEFAULT_CODEX_AUTH_PATH;
992
1008
  const grokSessionPath = cfg.grokSessionPath ?? DEFAULT_SESSION_PATH;
993
1009
 
@@ -1379,6 +1395,7 @@ const plugin = {
1379
1395
  version: plugin.version,
1380
1396
  modelCommands,
1381
1397
  modelFallbacks,
1398
+ modelTimeouts,
1382
1399
  getExpiryInfo: () => ({
1383
1400
  grok: (() => { const e = loadGrokExpiry(); return e ? formatExpiryInfo(e) : null; })(),
1384
1401
  gemini: (() => { const e = loadGeminiExpiry(); return e ? formatGeminiExpiry(e) : null; })(),
@@ -1415,7 +1432,7 @@ const plugin = {
1415
1432
  // One final attempt
1416
1433
  try {
1417
1434
  const server = await startProxyServer({
1418
- port, apiKey, timeoutMs, modelCommands, modelFallbacks,
1435
+ port, apiKey, timeoutMs, modelCommands, modelFallbacks, modelTimeouts,
1419
1436
  log: (msg) => api.logger.info(msg),
1420
1437
  warn: (msg) => api.logger.warn(msg),
1421
1438
  getGrokContext: () => grokContext,
@@ -2,7 +2,7 @@
2
2
  "id": "openclaw-cli-bridge-elvatis",
3
3
  "slug": "openclaw-cli-bridge-elvatis",
4
4
  "name": "OpenClaw CLI Bridge",
5
- "version": "2.2.2",
5
+ "version": "2.5.0",
6
6
  "license": "MIT",
7
7
  "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
8
8
  "providers": [
@@ -34,7 +34,26 @@
34
34
  },
35
35
  "proxyTimeoutMs": {
36
36
  "type": "number",
37
- "description": "Max time to wait for a CLI response in ms (default: 120000)"
37
+ "description": "Base timeout for CLI responses in ms (default: 300000). Scales dynamically with conversation size."
38
+ },
39
+ "modelTimeouts": {
40
+ "type": "object",
41
+ "description": "Per-model timeout overrides in ms. Keys are model IDs (e.g. 'cli-claude/claude-sonnet-4-6'). Use this to give heavy models more time or limit fast models. When not set, falls back to proxyTimeoutMs.",
42
+ "additionalProperties": {
43
+ "type": "number"
44
+ },
45
+ "default": {
46
+ "cli-claude/claude-opus-4-6": 300000,
47
+ "cli-claude/claude-sonnet-4-6": 180000,
48
+ "cli-claude/claude-haiku-4-5": 90000,
49
+ "cli-gemini/gemini-2.5-pro": 180000,
50
+ "cli-gemini/gemini-2.5-flash": 90000,
51
+ "cli-gemini/gemini-3-pro-preview": 180000,
52
+ "cli-gemini/gemini-3-flash-preview": 90000,
53
+ "openai-codex/gpt-5.4": 300000,
54
+ "openai-codex/gpt-5.3-codex": 180000,
55
+ "openai-codex/gpt-5.1-codex-mini": 90000
56
+ }
38
57
  }
39
58
  }
40
59
  }
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
3
- "version": "2.3.0",
3
+ "version": "2.5.0",
4
4
  "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
5
5
  "type": "module",
6
6
  "openclaw": {
package/src/cli-runner.ts CHANGED
@@ -278,6 +278,8 @@ export interface CliRunResult {
278
278
  stdout: string;
279
279
  stderr: string;
280
280
  exitCode: number;
281
+ /** True when the process was killed due to a timeout (exit 143 = SIGTERM). */
282
+ timedOut: boolean;
281
283
  }
282
284
 
283
285
  export interface RunCliOptions {
@@ -287,11 +289,25 @@ export interface RunCliOptions {
287
289
  */
288
290
  cwd?: string;
289
291
  timeoutMs?: number;
292
+ /** Optional logger for timeout events. */
293
+ log?: (msg: string) => void;
290
294
  }
291
295
 
296
+ /**
297
+ * Grace period between SIGTERM and SIGKILL when a timeout fires.
298
+ * Gives the CLI process 5 seconds to flush output and exit cleanly.
299
+ */
300
+ const TIMEOUT_GRACE_MS = 5_000;
301
+
292
302
  /**
293
303
  * Spawn a CLI and deliver the prompt via stdin.
294
304
  *
305
+ * Timeout handling (replaces Node's spawn({ timeout }) for better control):
306
+ * 1. After `timeoutMs`, send SIGTERM and log a clear message.
307
+ * 2. If the process doesn't exit within TIMEOUT_GRACE_MS (5s), send SIGKILL.
308
+ * 3. The result's `timedOut` flag is set so callers can distinguish
309
+ * supervisor timeouts from real CLI errors.
310
+ *
295
311
  * cwd defaults to homedir() so CLIs that scan the working directory for
296
312
  * project context (like Gemini) don't accidentally enter agentic mode.
297
313
  */
@@ -303,16 +319,40 @@ export function runCli(
303
319
  opts: RunCliOptions = {}
304
320
  ): Promise<CliRunResult> {
305
321
  const cwd = opts.cwd ?? homedir();
322
+ const log = opts.log ?? (() => {});
306
323
 
307
324
  return new Promise((resolve, reject) => {
325
+ // Do NOT pass timeout to spawn() — we manage it ourselves for graceful shutdown.
308
326
  const proc = spawn(cmd, args, {
309
- timeout: timeoutMs,
310
327
  env: buildMinimalEnv(),
311
328
  cwd,
312
329
  });
313
330
 
314
331
  let stdout = "";
315
332
  let stderr = "";
333
+ let timedOut = false;
334
+ let killTimer: ReturnType<typeof setTimeout> | null = null;
335
+ let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
336
+
337
+ const clearTimers = () => {
338
+ if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
339
+ if (killTimer) { clearTimeout(killTimer); killTimer = null; }
340
+ };
341
+
342
+ // ── Timeout sequence: SIGTERM → grace → SIGKILL ──────────────────────
343
+ timeoutTimer = setTimeout(() => {
344
+ timedOut = true;
345
+ const elapsed = Math.round(timeoutMs / 1000);
346
+ log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
347
+ proc.kill("SIGTERM");
348
+
349
+ killTimer = setTimeout(() => {
350
+ if (!proc.killed) {
351
+ log(`[cli-bridge] ${cmd} still running after ${TIMEOUT_GRACE_MS / 1000}s grace, sending SIGKILL`);
352
+ proc.kill("SIGKILL");
353
+ }
354
+ }, TIMEOUT_GRACE_MS);
355
+ }, timeoutMs);
316
356
 
317
357
  proc.stdin.write(prompt, "utf8", () => {
318
358
  proc.stdin.end();
@@ -322,10 +362,12 @@ export function runCli(
322
362
  proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
323
363
 
324
364
  proc.on("close", (code) => {
325
- resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0 });
365
+ clearTimers();
366
+ resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0, timedOut });
326
367
  });
327
368
 
328
369
  proc.on("error", (err) => {
370
+ clearTimers();
329
371
  reject(new Error(`Failed to spawn '${cmd}': ${err.message}`));
330
372
  });
331
373
  });
@@ -334,6 +376,7 @@ export function runCli(
334
376
  /**
335
377
  * Spawn a CLI with the prompt delivered as a CLI argument (not stdin).
336
378
  * Used by OpenCode which expects `opencode run "prompt"`.
379
+ * Uses the same graceful SIGTERM→SIGKILL timeout sequence as runCli.
337
380
  */
338
381
  export function runCliWithArg(
339
382
  cmd: string,
@@ -342,30 +385,66 @@ export function runCliWithArg(
342
385
  opts: RunCliOptions = {}
343
386
  ): Promise<CliRunResult> {
344
387
  const cwd = opts.cwd ?? homedir();
388
+ const log = opts.log ?? (() => {});
345
389
 
346
390
  return new Promise((resolve, reject) => {
347
391
  const proc = spawn(cmd, args, {
348
- timeout: timeoutMs,
349
392
  env: buildMinimalEnv(),
350
393
  cwd,
351
394
  });
352
395
 
353
396
  let stdout = "";
354
397
  let stderr = "";
398
+ let timedOut = false;
399
+ let killTimer: ReturnType<typeof setTimeout> | null = null;
400
+ let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
401
+
402
+ const clearTimers = () => {
403
+ if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
404
+ if (killTimer) { clearTimeout(killTimer); killTimer = null; }
405
+ };
406
+
407
+ timeoutTimer = setTimeout(() => {
408
+ timedOut = true;
409
+ const elapsed = Math.round(timeoutMs / 1000);
410
+ log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
411
+ proc.kill("SIGTERM");
412
+
413
+ killTimer = setTimeout(() => {
414
+ if (!proc.killed) {
415
+ log(`[cli-bridge] ${cmd} still running after ${TIMEOUT_GRACE_MS / 1000}s grace, sending SIGKILL`);
416
+ proc.kill("SIGKILL");
417
+ }
418
+ }, TIMEOUT_GRACE_MS);
419
+ }, timeoutMs);
355
420
 
356
421
  proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
357
422
  proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
358
423
 
359
424
  proc.on("close", (code) => {
360
- resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0 });
425
+ clearTimers();
426
+ resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0, timedOut });
361
427
  });
362
428
 
363
429
  proc.on("error", (err) => {
430
+ clearTimers();
364
431
  reject(new Error(`Failed to spawn '${cmd}': ${err.message}`));
365
432
  });
366
433
  });
367
434
  }
368
435
 
436
+ /**
437
+ * Annotate an error message when exit code 143 (SIGTERM) is detected.
438
+ * Makes it clear in logs that this was a supervisor timeout, not a model error.
439
+ */
440
+ export function annotateExitError(exitCode: number, stderr: string, timedOut: boolean, model: string): string {
441
+ const base = stderr || "(no output)";
442
+ if (timedOut || exitCode === 143) {
443
+ return `timeout: ${model} killed by supervisor (exit ${exitCode}, likely timeout) — ${base}`;
444
+ }
445
+ return base;
446
+ }
447
+
369
448
  // ──────────────────────────────────────────────────────────────────────────────
370
449
  // Gemini CLI
371
450
  // ──────────────────────────────────────────────────────────────────────────────
@@ -391,7 +470,7 @@ export async function runGemini(
391
470
  modelId: string,
392
471
  timeoutMs: number,
393
472
  workdir?: string,
394
- opts?: { tools?: ToolDefinition[] }
473
+ opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
395
474
  ): Promise<string> {
396
475
  const model = stripPrefix(modelId);
397
476
  // -p "" = headless mode trigger; actual prompt arrives via stdin
@@ -404,7 +483,7 @@ export async function runGemini(
404
483
  ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt
405
484
  : prompt;
406
485
 
407
- const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd });
486
+ const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
408
487
 
409
488
  // Filter out [WARN] lines from stderr (Gemini emits noisy permission warnings)
410
489
  const cleanStderr = result.stderr
@@ -414,7 +493,7 @@ export async function runGemini(
414
493
  .trim();
415
494
 
416
495
  if (result.exitCode !== 0 && result.stdout.length === 0) {
417
- throw new Error(`gemini exited ${result.exitCode}: ${cleanStderr || "(no output)"}`);
496
+ throw new Error(`gemini exited ${result.exitCode}: ${annotateExitError(result.exitCode, cleanStderr, result.timedOut, modelId)}`);
418
497
  }
419
498
 
420
499
  return result.stdout || cleanStderr;
@@ -434,7 +513,7 @@ export async function runClaude(
434
513
  modelId: string,
435
514
  timeoutMs: number,
436
515
  workdir?: string,
437
- opts?: { tools?: ToolDefinition[] }
516
+ opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
438
517
  ): Promise<string> {
439
518
  // Proactively refresh OAuth token if it's about to expire (< 5 min remaining).
440
519
  // No-op for API-key users.
@@ -457,15 +536,19 @@ export async function runClaude(
457
536
  : prompt;
458
537
 
459
538
  const cwd = workdir ?? homedir();
460
- const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd });
539
+ const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
461
540
 
462
541
  // On 401: attempt one token refresh + retry before giving up.
463
542
  if (result.exitCode !== 0 && result.stdout.length === 0) {
543
+ // If this was a timeout, don't bother with auth retry — it's a supervisor kill, not a 401.
544
+ if (result.timedOut) {
545
+ throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, true, modelId)}`);
546
+ }
464
547
  const stderr = result.stderr || "(no output)";
465
548
  if (stderr.includes("401") || stderr.includes("Invalid authentication credentials") || stderr.includes("authentication_error")) {
466
549
  // Refresh and retry once
467
550
  await refreshClaudeToken();
468
- const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd });
551
+ const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
469
552
  if (retry.exitCode !== 0 && retry.stdout.length === 0) {
470
553
  const retryStderr = retry.stderr || "(no output)";
471
554
  if (retryStderr.includes("401") || retryStderr.includes("authentication_error") || retryStderr.includes("Invalid authentication credentials")) {
@@ -478,7 +561,7 @@ export async function runClaude(
478
561
  }
479
562
  return retry.stdout;
480
563
  }
481
- throw new Error(`claude exited ${result.exitCode}: ${stderr}`);
564
+ throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, stderr, false, modelId)}`);
482
565
  }
483
566
 
484
567
  return result.stdout;
@@ -508,7 +591,7 @@ export async function runCodex(
508
591
  modelId: string,
509
592
  timeoutMs: number,
510
593
  workdir?: string,
511
- opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[] }
594
+ opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[]; log?: (msg: string) => void }
512
595
  ): Promise<string> {
513
596
  const model = stripPrefix(modelId);
514
597
  const args = ["--model", model, "--quiet", "--full-auto"];
@@ -532,10 +615,10 @@ export async function runCodex(
532
615
  ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt
533
616
  : prompt;
534
617
 
535
- const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd });
618
+ const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
536
619
 
537
620
  if (result.exitCode !== 0 && result.stdout.length === 0) {
538
- throw new Error(`codex exited ${result.exitCode}: ${result.stderr || "(no output)"}`);
621
+ throw new Error(`codex exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, modelId)}`);
539
622
  }
540
623
 
541
624
  return result.stdout || result.stderr;
@@ -553,14 +636,15 @@ export async function runOpenCode(
553
636
  prompt: string,
554
637
  _modelId: string,
555
638
  timeoutMs: number,
556
- workdir?: string
639
+ workdir?: string,
640
+ opts?: { log?: (msg: string) => void }
557
641
  ): Promise<string> {
558
642
  const args = ["run", prompt];
559
643
  const cwd = workdir ?? homedir();
560
- const result = await runCliWithArg("opencode", args, timeoutMs, { cwd });
644
+ const result = await runCliWithArg("opencode", args, timeoutMs, { cwd, log: opts?.log });
561
645
 
562
646
  if (result.exitCode !== 0 && result.stdout.length === 0) {
563
- throw new Error(`opencode exited ${result.exitCode}: ${result.stderr || "(no output)"}`);
647
+ throw new Error(`opencode exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, "opencode")}`);
564
648
  }
565
649
 
566
650
  return result.stdout || result.stderr;
@@ -578,14 +662,15 @@ export async function runPi(
578
662
  prompt: string,
579
663
  _modelId: string,
580
664
  timeoutMs: number,
581
- workdir?: string
665
+ workdir?: string,
666
+ opts?: { log?: (msg: string) => void }
582
667
  ): Promise<string> {
583
668
  const args = ["-p", prompt];
584
669
  const cwd = workdir ?? homedir();
585
- const result = await runCliWithArg("pi", args, timeoutMs, { cwd });
670
+ const result = await runCliWithArg("pi", args, timeoutMs, { cwd, log: opts?.log });
586
671
 
587
672
  if (result.exitCode !== 0 && result.stdout.length === 0) {
588
- throw new Error(`pi exited ${result.exitCode}: ${result.stderr || "(no output)"}`);
673
+ throw new Error(`pi exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, "pi")}`);
589
674
  }
590
675
 
591
676
  return result.stdout || result.stderr;
@@ -663,6 +748,8 @@ export interface RouteOptions {
663
748
  * Passed to CLIs that support native media input (e.g. codex -i).
664
749
  */
665
750
  mediaFiles?: MediaFile[];
751
+ /** Logger for timeout and lifecycle events. */
752
+ log?: (msg: string) => void;
666
753
  }
667
754
 
668
755
  /**
@@ -708,12 +795,13 @@ export async function routeToCliRunner(
708
795
  // Resolve aliases (e.g. gemini-3-pro → gemini-3-pro-preview) after allowlist check
709
796
  const resolved = normalizeModelAlias(normalized);
710
797
 
798
+ const log = opts.log;
711
799
  let rawText: string;
712
- if (resolved.startsWith("cli-gemini/")) rawText = await runGemini(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools });
713
- else if (resolved.startsWith("cli-claude/")) rawText = await runClaude(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools });
714
- else if (resolved.startsWith("openai-codex/")) rawText = await runCodex(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, mediaFiles: opts.mediaFiles });
715
- else if (resolved.startsWith("opencode/")) rawText = await runOpenCode(prompt, resolved, timeoutMs, opts.workdir);
716
- else if (resolved.startsWith("pi/")) rawText = await runPi(prompt, resolved, timeoutMs, opts.workdir);
800
+ if (resolved.startsWith("cli-gemini/")) rawText = await runGemini(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, log });
801
+ else if (resolved.startsWith("cli-claude/")) rawText = await runClaude(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, log });
802
+ else if (resolved.startsWith("openai-codex/")) rawText = await runCodex(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, mediaFiles: opts.mediaFiles, log });
803
+ else if (resolved.startsWith("opencode/")) rawText = await runOpenCode(prompt, resolved, timeoutMs, opts.workdir, { log });
804
+ else if (resolved.startsWith("pi/")) rawText = await runPi(prompt, resolved, timeoutMs, opts.workdir, { log });
717
805
  else throw new Error(
718
806
  `Unknown CLI bridge model: "${model}". Use "vllm/cli-gemini/<model>", "vllm/cli-claude/<model>", "openai-codex/<model>", "opencode/<model>", or "pi/<model>".`
719
807
  );
package/src/metrics.ts ADDED
@@ -0,0 +1,85 @@
1
+ /**
2
+ * metrics.ts
3
+ *
4
+ * In-memory metrics collector for the CLI bridge proxy.
5
+ * Tracks request counts, errors, latency, and token usage per model.
6
+ * All operations are O(1) — cannot block the event loop.
7
+ */
8
+
9
+ export interface ModelMetrics {
10
+ model: string;
11
+ requests: number;
12
+ errors: number;
13
+ totalLatencyMs: number;
14
+ promptTokens: number;
15
+ completionTokens: number;
16
+ lastRequestAt: number | null;
17
+ }
18
+
19
+ export interface MetricsSnapshot {
20
+ startedAt: number;
21
+ totalRequests: number;
22
+ totalErrors: number;
23
+ models: ModelMetrics[]; // sorted by requests desc
24
+ }
25
+
26
+ class MetricsCollector {
27
+ private startedAt = Date.now();
28
+ private data = new Map<string, ModelMetrics>();
29
+
30
+ recordRequest(
31
+ model: string,
32
+ durationMs: number,
33
+ success: boolean,
34
+ promptTokens?: number,
35
+ completionTokens?: number,
36
+ ): void {
37
+ let entry = this.data.get(model);
38
+ if (!entry) {
39
+ entry = {
40
+ model,
41
+ requests: 0,
42
+ errors: 0,
43
+ totalLatencyMs: 0,
44
+ promptTokens: 0,
45
+ completionTokens: 0,
46
+ lastRequestAt: null,
47
+ };
48
+ this.data.set(model, entry);
49
+ }
50
+ entry.requests++;
51
+ if (!success) entry.errors++;
52
+ entry.totalLatencyMs += durationMs;
53
+ if (promptTokens) entry.promptTokens += promptTokens;
54
+ if (completionTokens) entry.completionTokens += completionTokens;
55
+ entry.lastRequestAt = Date.now();
56
+ }
57
+
58
+ getMetrics(): MetricsSnapshot {
59
+ let totalRequests = 0;
60
+ let totalErrors = 0;
61
+ const models: ModelMetrics[] = [];
62
+
63
+ for (const entry of this.data.values()) {
64
+ totalRequests += entry.requests;
65
+ totalErrors += entry.errors;
66
+ models.push({ ...entry });
67
+ }
68
+
69
+ models.sort((a, b) => b.requests - a.requests);
70
+
71
+ return {
72
+ startedAt: this.startedAt,
73
+ totalRequests,
74
+ totalErrors,
75
+ models,
76
+ };
77
+ }
78
+
79
+ reset(): void {
80
+ this.startedAt = Date.now();
81
+ this.data.clear();
82
+ }
83
+ }
84
+
85
+ export const metrics = new MetricsCollector();
@@ -19,6 +19,7 @@ import { chatgptComplete, chatgptCompleteStream, type ChatMessage as ChatGPTBrow
19
19
  import type { BrowserContext } from "playwright";
20
20
  import { renderStatusPage, type StatusProvider } from "./status-template.js";
21
21
  import { sessionManager } from "./session-manager.js";
22
+ import { metrics } from "./metrics.js";
22
23
 
23
24
  export type GrokCompleteOptions = Parameters<typeof grokComplete>[1];
24
25
  export type GrokCompleteStreamOptions = Parameters<typeof grokCompleteStream>[1];
@@ -81,6 +82,20 @@ export interface ProxyServerOptions {
81
82
  * with the fallback model. Example: "cli-gemini/gemini-2.5-pro" → "cli-gemini/gemini-2.5-flash"
82
83
  */
83
84
  modelFallbacks?: Record<string, string>;
85
+ /**
86
+ * Per-model timeout overrides (ms). Keys are model IDs (without "vllm/" prefix).
87
+ * Use this to give heavy models more time or limit fast models.
88
+ *
89
+ * Example:
90
+ * {
91
+ * "cli-claude/claude-sonnet-4-6": 180_000, // 3 min for interactive chat
92
+ * "cli-claude/claude-opus-4-6": 300_000, // 5 min for heavy tasks
93
+ * "cli-claude/claude-haiku-4-5": 90_000, // 90s for fast responses
94
+ * }
95
+ *
96
+ * When not set for a model, falls back to proxyTimeoutMs (default 300s base).
97
+ */
98
+ modelTimeouts?: Record<string, number>;
84
99
  }
85
100
 
86
101
  /** Available CLI bridge models for GET /v1/models */
@@ -222,6 +237,7 @@ async function handleRequest(
222
237
  chatgpt: sessionStatus("chatgpt", opts.getChatGPTContext, expiry.chatgpt),
223
238
  },
224
239
  models: CLI_MODELS.length,
240
+ metrics: metrics.getMetrics(),
225
241
  };
226
242
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
227
243
  res.end(JSON.stringify(health, null, 2));
@@ -240,7 +256,7 @@ async function handleRequest(
240
256
  { name: "ChatGPT", icon: "◉", expiry: expiry.chatgpt, loginCmd: "/chatgpt-login", ctx: opts.getChatGPTContext?.() ?? null },
241
257
  ];
242
258
 
243
- const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands });
259
+ const html = renderStatusPage({ version, port: opts.port, providers, models: CLI_MODELS, modelCommands: opts.modelCommands, metrics: metrics.getMetrics() });
244
260
  res.writeHead(200, { "Content-Type": "text/html; charset=utf-8" });
245
261
  res.end(html);
246
262
  return;
@@ -331,6 +347,7 @@ async function handleRequest(
331
347
  const grokMessages = messages as GrokChatMessage[];
332
348
  const doGrokComplete = opts._grokComplete ?? grokComplete;
333
349
  const doGrokCompleteStream = opts._grokCompleteStream ?? grokCompleteStream;
350
+ const grokStart = Date.now();
334
351
  try {
335
352
  if (stream) {
336
353
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -341,11 +358,13 @@ async function handleRequest(
341
358
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
342
359
  opts.log
343
360
  );
361
+ metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
344
362
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
345
363
  res.write("data: [DONE]\n\n");
346
364
  res.end();
347
365
  } else {
348
366
  const result = await doGrokComplete(grokCtx, { messages: grokMessages, model: grokModel, timeoutMs }, opts.log);
367
+ metrics.recordRequest(model, Date.now() - grokStart, true, result.promptTokens, result.completionTokens);
349
368
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
350
369
  res.end(JSON.stringify({
351
370
  id, object: "chat.completion", created, model,
@@ -354,6 +373,7 @@ async function handleRequest(
354
373
  }));
355
374
  }
356
375
  } catch (err) {
376
+ metrics.recordRequest(model, Date.now() - grokStart, false);
357
377
  const msg = (err as Error).message;
358
378
  opts.warn(`[cli-bridge] Grok error for ${model}: ${msg}`);
359
379
  if (!res.headersSent) {
@@ -380,6 +400,7 @@ async function handleRequest(
380
400
  const geminiMessages = messages as GeminiBrowserChatMessage[];
381
401
  const doGeminiComplete = opts._geminiComplete ?? geminiComplete;
382
402
  const doGeminiCompleteStream = opts._geminiCompleteStream ?? geminiCompleteStream;
403
+ const geminiStart = Date.now();
383
404
  try {
384
405
  if (stream) {
385
406
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -390,11 +411,13 @@ async function handleRequest(
390
411
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
391
412
  opts.log
392
413
  );
414
+ metrics.recordRequest(model, Date.now() - geminiStart, true);
393
415
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
394
416
  res.write("data: [DONE]\n\n");
395
417
  res.end();
396
418
  } else {
397
419
  const result = await doGeminiComplete(geminiCtx, { messages: geminiMessages, model, timeoutMs }, opts.log);
420
+ metrics.recordRequest(model, Date.now() - geminiStart, true);
398
421
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
399
422
  res.end(JSON.stringify({
400
423
  id, object: "chat.completion", created, model,
@@ -403,6 +426,7 @@ async function handleRequest(
403
426
  }));
404
427
  }
405
428
  } catch (err) {
429
+ metrics.recordRequest(model, Date.now() - geminiStart, false);
406
430
  const msg = (err as Error).message;
407
431
  opts.warn(`[cli-bridge] Gemini browser error for ${model}: ${msg}`);
408
432
  if (!res.headersSent) {
@@ -429,6 +453,7 @@ async function handleRequest(
429
453
  const claudeMessages = messages as ClaudeBrowserChatMessage[];
430
454
  const doClaudeComplete = opts._claudeComplete ?? claudeComplete;
431
455
  const doClaudeCompleteStream = opts._claudeCompleteStream ?? claudeCompleteStream;
456
+ const claudeStart = Date.now();
432
457
  try {
433
458
  if (stream) {
434
459
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -439,11 +464,13 @@ async function handleRequest(
439
464
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
440
465
  opts.log
441
466
  );
467
+ metrics.recordRequest(model, Date.now() - claudeStart, true);
442
468
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
443
469
  res.write("data: [DONE]\n\n");
444
470
  res.end();
445
471
  } else {
446
472
  const result = await doClaudeComplete(claudeCtx, { messages: claudeMessages, model, timeoutMs }, opts.log);
473
+ metrics.recordRequest(model, Date.now() - claudeStart, true);
447
474
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
448
475
  res.end(JSON.stringify({
449
476
  id, object: "chat.completion", created, model,
@@ -452,6 +479,7 @@ async function handleRequest(
452
479
  }));
453
480
  }
454
481
  } catch (err) {
482
+ metrics.recordRequest(model, Date.now() - claudeStart, false);
455
483
  const msg = (err as Error).message;
456
484
  opts.warn(`[cli-bridge] Claude browser error for ${model}: ${msg}`);
457
485
  if (!res.headersSent) {
@@ -479,6 +507,7 @@ async function handleRequest(
479
507
  const chatgptMessages = messages as ChatGPTBrowserChatMessage[];
480
508
  const doChatGPTComplete = opts._chatgptComplete ?? chatgptComplete;
481
509
  const doChatGPTCompleteStream = opts._chatgptCompleteStream ?? chatgptCompleteStream;
510
+ const chatgptStart = Date.now();
482
511
  try {
483
512
  if (stream) {
484
513
  res.writeHead(200, { "Content-Type": "text/event-stream", "Cache-Control": "no-cache", Connection: "keep-alive", ...corsHeaders() });
@@ -489,11 +518,13 @@ async function handleRequest(
489
518
  (token) => sendSseChunk(res, { id, created, model, delta: { content: token }, finish_reason: null }),
490
519
  opts.log
491
520
  );
521
+ metrics.recordRequest(model, Date.now() - chatgptStart, true);
492
522
  sendSseChunk(res, { id, created, model, delta: {}, finish_reason: result.finishReason });
493
523
  res.write("data: [DONE]\n\n");
494
524
  res.end();
495
525
  } else {
496
526
  const result = await doChatGPTComplete(chatgptCtx, { messages: chatgptMessages, model: chatgptModel, timeoutMs }, opts.log);
527
+ metrics.recordRequest(model, Date.now() - chatgptStart, true);
497
528
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
498
529
  res.end(JSON.stringify({
499
530
  id, object: "chat.completion", created, model,
@@ -502,6 +533,7 @@ async function handleRequest(
502
533
  }));
503
534
  }
504
535
  } catch (err) {
536
+ metrics.recordRequest(model, Date.now() - chatgptStart, false);
505
537
  const msg = (err as Error).message;
506
538
  opts.warn(`[cli-bridge] ChatGPT browser error for ${model}: ${msg}`);
507
539
  if (!res.headersSent) {
@@ -546,6 +578,7 @@ async function handleRequest(
546
578
  const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM }, ...truncated];
547
579
  const requestBody = JSON.stringify({ ...parsed, messages: bitnetMessages, tools: undefined });
548
580
 
581
+ const bitnetStart = Date.now();
549
582
  try {
550
583
  const targetUrl = new URL("/v1/chat/completions", bitnetUrl);
551
584
  const proxyRes = await new Promise<http.IncomingMessage>((resolve, reject) => {
@@ -566,6 +599,7 @@ async function handleRequest(
566
599
  proxyReq.end();
567
600
  });
568
601
 
602
+ metrics.recordRequest(model, Date.now() - bitnetStart, true);
569
603
  // Forward status + headers
570
604
  const fwdHeaders: Record<string, string> = { ...corsHeaders() };
571
605
  const ct = proxyRes.headers["content-type"];
@@ -577,6 +611,7 @@ async function handleRequest(
577
611
  res.writeHead(proxyRes.statusCode ?? 200, fwdHeaders);
578
612
  proxyRes.pipe(res);
579
613
  } catch (err) {
614
+ metrics.recordRequest(model, Date.now() - bitnetStart, false);
580
615
  const msg = (err as Error).message;
581
616
  if (msg.includes("ECONNREFUSED") || msg.includes("ECONNRESET") || msg.includes("ENOTFOUND")) {
582
617
  res.writeHead(503, { "Content-Type": "application/json", ...corsHeaders() });
@@ -602,34 +637,81 @@ async function handleRequest(
602
637
  // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
603
638
  let result: CliToolResult;
604
639
  let usedModel = model;
605
- const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined };
640
+ const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
641
+
642
+ // ── Dynamic timeout: scale with conversation size ────────────────────────
643
+ // Per-model timeout takes precedence, then global proxyTimeoutMs, then 300s default.
644
+ const perModelTimeout = opts.modelTimeouts?.[model];
645
+ const baseTimeout = perModelTimeout ?? opts.timeoutMs ?? 300_000;
646
+ const msgExtra = Math.max(0, cleanMessages.length - 10) * 2_000;
647
+ const toolExtra = (tools?.length ?? 0) * 5_000;
648
+ const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, 600_000);
649
+ opts.log(`[cli-bridge] ${model} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
650
+
651
+ // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
652
+ let sseHeadersSent = false;
653
+ let keepaliveInterval: ReturnType<typeof setInterval> | null = null;
654
+ if (stream) {
655
+ res.writeHead(200, {
656
+ "Content-Type": "text/event-stream",
657
+ "Cache-Control": "no-cache",
658
+ Connection: "keep-alive",
659
+ ...corsHeaders(),
660
+ });
661
+ sseHeadersSent = true;
662
+ res.write(": keepalive\n\n");
663
+ keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, 15_000);
664
+ }
665
+
666
+ const cliStart = Date.now();
606
667
  try {
607
- result = await routeToCliRunner(model, cleanMessages, opts.timeoutMs ?? 120_000, routeOpts);
668
+ result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
669
+ metrics.recordRequest(model, Date.now() - cliStart, true);
608
670
  } catch (err) {
671
+ const primaryDuration = Date.now() - cliStart;
609
672
  const msg = (err as Error).message;
610
673
  // ── Model fallback: retry once with a lighter model if configured ────
674
+ const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
611
675
  const fallbackModel = opts.modelFallbacks?.[model];
612
676
  if (fallbackModel) {
613
- opts.warn(`[cli-bridge] ${model} failed (${msg}), falling back to ${fallbackModel}`);
677
+ metrics.recordRequest(model, primaryDuration, false);
678
+ const reason = isTimeout ? "timeout by supervisor" : msg;
679
+ opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
680
+ const fallbackStart = Date.now();
614
681
  try {
615
- result = await routeToCliRunner(fallbackModel, cleanMessages, opts.timeoutMs ?? 120_000, routeOpts);
682
+ result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
683
+ metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, true);
616
684
  usedModel = fallbackModel;
617
685
  opts.log(`[cli-bridge] fallback to ${fallbackModel} succeeded`);
618
686
  } catch (fallbackErr) {
687
+ metrics.recordRequest(fallbackModel, Date.now() - fallbackStart, false);
619
688
  const fallbackMsg = (fallbackErr as Error).message;
620
689
  opts.warn(`[cli-bridge] fallback ${fallbackModel} also failed: ${fallbackMsg}`);
621
- res.writeHead(500, { "Content-Type": "application/json" });
622
- res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
690
+ if (sseHeadersSent) {
691
+ res.write(`data: ${JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } })}\n\n`);
692
+ res.write("data: [DONE]\n\n");
693
+ res.end();
694
+ } else {
695
+ res.writeHead(500, { "Content-Type": "application/json" });
696
+ res.end(JSON.stringify({ error: { message: `${model}: ${msg} | fallback ${fallbackModel}: ${fallbackMsg}`, type: "cli_error" } }));
697
+ }
623
698
  return;
624
699
  }
625
700
  } else {
701
+ metrics.recordRequest(model, primaryDuration, false);
626
702
  opts.warn(`[cli-bridge] CLI error for ${model}: ${msg}`);
627
- res.writeHead(500, { "Content-Type": "application/json" });
628
- res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
703
+ if (sseHeadersSent) {
704
+ res.write(`data: ${JSON.stringify({ error: { message: msg, type: "cli_error" } })}\n\n`);
705
+ res.write("data: [DONE]\n\n");
706
+ res.end();
707
+ } else {
708
+ res.writeHead(500, { "Content-Type": "application/json" });
709
+ res.end(JSON.stringify({ error: { message: msg, type: "cli_error" } }));
710
+ }
629
711
  return;
630
712
  }
631
713
  } finally {
632
- // Clean up temp media files after response
714
+ if (keepaliveInterval) clearInterval(keepaliveInterval);
633
715
  cleanupMediaFiles(mediaFiles);
634
716
  }
635
717
 
@@ -637,12 +719,7 @@ async function handleRequest(
637
719
  const finishReason = hasToolCalls ? "tool_calls" : "stop";
638
720
 
639
721
  if (stream) {
640
- res.writeHead(200, {
641
- "Content-Type": "text/event-stream",
642
- "Cache-Control": "no-cache",
643
- Connection: "keep-alive",
644
- ...corsHeaders(),
645
- });
722
+ // SSE headers already sent above — stream response chunks directly
646
723
 
647
724
  if (hasToolCalls) {
648
725
  // Stream tool_calls in OpenAI SSE format
@@ -95,6 +95,8 @@ function buildMinimalEnv(): Record<string, string> {
95
95
  /** Auto-cleanup interval: 30 minutes. */
96
96
  const SESSION_TTL_MS = 30 * 60 * 1000;
97
97
  const CLEANUP_INTERVAL_MS = 5 * 60 * 1000;
98
+ /** Grace period between SIGTERM and SIGKILL for session termination. */
99
+ const KILL_GRACE_MS = 5_000;
98
100
 
99
101
  export class SessionManager {
100
102
  private sessions = new Map<string, SessionEntry>();
@@ -213,12 +215,19 @@ export class SessionManager {
213
215
  }
214
216
  }
215
217
 
216
- /** Send SIGTERM to the session process. */
218
+ /**
219
+ * Gracefully terminate a session: SIGTERM first, then SIGKILL after grace period.
220
+ * This prevents the ambiguous "exit 143 (no output)" pattern.
221
+ */
217
222
  kill(sessionId: string): boolean {
218
223
  const entry = this.sessions.get(sessionId);
219
224
  if (!entry || entry.status !== "running") return false;
220
225
  entry.status = "killed";
221
226
  entry.proc.kill("SIGTERM");
227
+ // If the process doesn't exit within the grace period, force-kill it
228
+ setTimeout(() => {
229
+ try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
230
+ }, KILL_GRACE_MS);
222
231
  return true;
223
232
  }
224
233
 
@@ -238,7 +247,7 @@ export class SessionManager {
238
247
  return result;
239
248
  }
240
249
 
241
- /** Remove sessions older than SESSION_TTL_MS. Kill running ones first. Clean up isolated workdirs. */
250
+ /** Remove sessions older than SESSION_TTL_MS. Kill running ones with graceful SIGTERM→SIGKILL. */
242
251
  cleanup(): void {
243
252
  const now = Date.now();
244
253
  for (const [sessionId, entry] of this.sessions) {
@@ -246,6 +255,10 @@ export class SessionManager {
246
255
  if (entry.status === "running") {
247
256
  entry.proc.kill("SIGTERM");
248
257
  entry.status = "killed";
258
+ // Escalate to SIGKILL after grace period
259
+ setTimeout(() => {
260
+ try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
261
+ }, KILL_GRACE_MS);
249
262
  }
250
263
  // Clean up isolated workdir if it wasn't cleaned on exit
251
264
  if (entry.isolatedWorkdir) {
@@ -258,17 +271,20 @@ export class SessionManager {
258
271
  sweepOrphanedWorkdirs();
259
272
  }
260
273
 
261
- /** Stop the cleanup timer (for graceful shutdown). */
274
+ /** Stop the cleanup timer (for graceful shutdown). SIGTERM all sessions, SIGKILL after grace. */
262
275
  stop(): void {
263
276
  if (this.cleanupTimer) {
264
277
  clearInterval(this.cleanupTimer);
265
278
  this.cleanupTimer = null;
266
279
  }
267
- // Kill all running sessions and clean up their workdirs
280
+ // Kill all running sessions with graceful SIGTERM SIGKILL escalation
268
281
  for (const [, entry] of this.sessions) {
269
282
  if (entry.status === "running") {
270
283
  entry.proc.kill("SIGTERM");
271
284
  entry.status = "killed";
285
+ setTimeout(() => {
286
+ try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
287
+ }, KILL_GRACE_MS);
272
288
  }
273
289
  if (entry.isolatedWorkdir) {
274
290
  cleanupWorkdir(entry.isolatedWorkdir);
@@ -6,6 +6,7 @@
6
6
  */
7
7
 
8
8
  import type { BrowserContext } from "playwright";
9
+ import type { MetricsSnapshot } from "./metrics.js";
9
10
 
10
11
  export interface StatusProvider {
11
12
  name: string;
@@ -22,6 +23,8 @@ export interface StatusTemplateOptions {
22
23
  models: Array<{ id: string; name: string; contextWindow: number; maxTokens: number }>;
23
24
  /** Maps model ID → slash command name (e.g. "openai-codex/gpt-5.3-codex" → "/cli-codex") */
24
25
  modelCommands?: Record<string, string>;
26
+ /** In-memory metrics snapshot — optional for backward compat */
27
+ metrics?: MetricsSnapshot;
25
28
  }
26
29
 
27
30
  function statusBadge(p: StatusProvider): { label: string; color: string; dot: string } {
@@ -32,6 +35,114 @@ function statusBadge(p: StatusProvider): { label: string; color: string; dot: st
32
35
  return { label: "Logged in", color: "#3b82f6", dot: "🔵" };
33
36
  }
34
37
 
38
+ // ── Formatting helpers ──────────────────────────────────────────────────────
39
+
40
+ function formatDuration(ms: number): string {
41
+ if (ms < 1000) return `${Math.round(ms)}ms`;
42
+ if (ms < 60_000) return `${(ms / 1000).toFixed(1)}s`;
43
+ return `${(ms / 60_000).toFixed(1)}m`;
44
+ }
45
+
46
+ function formatTokens(n: number): string {
47
+ if (n === 0) return "—";
48
+ if (n < 1000) return String(n);
49
+ if (n < 1_000_000) return `${(n / 1000).toFixed(1)}k`;
50
+ return `${(n / 1_000_000).toFixed(2)}M`;
51
+ }
52
+
53
+ function timeAgo(epochMs: number | null): string {
54
+ if (!epochMs) return "—";
55
+ const diff = Date.now() - epochMs;
56
+ if (diff < 60_000) return "just now";
57
+ if (diff < 3_600_000) return `${Math.floor(diff / 60_000)}m ago`;
58
+ if (diff < 86_400_000) return `${Math.floor(diff / 3_600_000)}h ago`;
59
+ return `${Math.floor(diff / 86_400_000)}d ago`;
60
+ }
61
+
62
+ function formatUptime(startedAt: number): string {
63
+ const diff = Date.now() - startedAt;
64
+ const s = Math.floor(diff / 1000);
65
+ if (s < 60) return `${s}s`;
66
+ const m = Math.floor(s / 60);
67
+ if (m < 60) return `${m}m ${s % 60}s`;
68
+ const h = Math.floor(m / 60);
69
+ if (h < 24) return `${h}h ${m % 60}m`;
70
+ const d = Math.floor(h / 24);
71
+ return `${d}d ${h % 24}h`;
72
+ }
73
+
74
+ function escapeHtml(s: string): string {
75
+ return s.replace(/&/g, "&amp;").replace(/</g, "&lt;").replace(/>/g, "&gt;");
76
+ }
77
+
78
+ // ── Metrics sections ────────────────────────────────────────────────────────
79
+
80
+ function renderMetricsSection(m: MetricsSnapshot): string {
81
+ const errorRate = m.totalRequests > 0 ? ((m.totalErrors / m.totalRequests) * 100).toFixed(1) : "0.0";
82
+ const totalTokens = m.models.reduce((sum, mod) => sum + mod.promptTokens + mod.completionTokens, 0);
83
+
84
+ // Summary cards
85
+ const summaryCards = `
86
+ <div class="summary-grid">
87
+ <div class="summary-card">
88
+ <div class="summary-value">${m.totalRequests}</div>
89
+ <div class="summary-label">Total Requests</div>
90
+ </div>
91
+ <div class="summary-card">
92
+ <div class="summary-value" style="color:${m.totalErrors > 0 ? '#ef4444' : '#22c55e'}">${errorRate}%</div>
93
+ <div class="summary-label">Error Rate</div>
94
+ </div>
95
+ <div class="summary-card">
96
+ <div class="summary-value">${formatTokens(totalTokens)}</div>
97
+ <div class="summary-label">Total Tokens</div>
98
+ </div>
99
+ <div class="summary-card">
100
+ <div class="summary-value">${formatUptime(m.startedAt)}</div>
101
+ <div class="summary-label">Uptime</div>
102
+ </div>
103
+ </div>`;
104
+
105
+ // Per-model stats table
106
+ let modelRows: string;
107
+ if (m.models.length === 0) {
108
+ modelRows = `<tr><td colspan="6" style="padding:16px;color:#6b7280;text-align:center;font-style:italic">No requests recorded yet.</td></tr>`;
109
+ } else {
110
+ modelRows = m.models.map(mod => {
111
+ const avgLatency = mod.requests > 0 ? mod.totalLatencyMs / mod.requests : 0;
112
+ const modErrorRate = mod.requests > 0 ? ((mod.errors / mod.requests) * 100).toFixed(1) : "0.0";
113
+ return `
114
+ <tr>
115
+ <td class="metrics-cell"><code style="color:#93c5fd">${escapeHtml(mod.model)}</code></td>
116
+ <td class="metrics-cell" style="text-align:right">${mod.requests}</td>
117
+ <td class="metrics-cell" style="text-align:right;color:${mod.errors > 0 ? '#ef4444' : '#6b7280'}">${mod.errors} <span style="color:#6b7280;font-size:11px">(${modErrorRate}%)</span></td>
118
+ <td class="metrics-cell" style="text-align:right">${formatDuration(avgLatency)}</td>
119
+ <td class="metrics-cell" style="text-align:right">${formatTokens(mod.promptTokens)} / ${formatTokens(mod.completionTokens)}</td>
120
+ <td class="metrics-cell" style="text-align:right;color:#9ca3af">${timeAgo(mod.lastRequestAt)}</td>
121
+ </tr>`;
122
+ }).join("");
123
+ }
124
+
125
+ const modelTable = `
126
+ <div class="card">
127
+ <div class="card-header">Per-Model Stats</div>
128
+ <table class="metrics-table">
129
+ <thead>
130
+ <tr style="background:#13151f">
131
+ <th class="metrics-th" style="text-align:left">Model</th>
132
+ <th class="metrics-th" style="text-align:right">Requests</th>
133
+ <th class="metrics-th" style="text-align:right">Errors</th>
134
+ <th class="metrics-th" style="text-align:right">Avg Latency</th>
135
+ <th class="metrics-th" style="text-align:right">Tokens (in/out)</th>
136
+ <th class="metrics-th" style="text-align:right">Last Request</th>
137
+ </tr>
138
+ </thead>
139
+ <tbody>${modelRows}</tbody>
140
+ </table>
141
+ </div>`;
142
+
143
+ return summaryCards + modelTable;
144
+ }
145
+
35
146
  export function renderStatusPage(opts: StatusTemplateOptions): string {
36
147
  const { version, port, providers, models } = opts;
37
148
 
@@ -66,6 +177,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
66
177
  return `<li style="margin:2px 0;font-size:13px;color:#d1d5db"><code style="color:#93c5fd">${m.id}</code>${cmdBadge}</li>`;
67
178
  }).join("");
68
179
 
180
+ const metricsHtml = opts.metrics ? renderMetricsSection(opts.metrics) : "";
181
+
69
182
  return `<!DOCTYPE html>
70
183
  <html lang="en">
71
184
  <head>
@@ -86,6 +199,13 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
86
199
  ul { list-style: none; padding: 12px 16px; }
87
200
  .footer { color: #374151; font-size: 12px; text-align: center; margin-top: 16px; }
88
201
  code { background: #1e2130; padding: 1px 5px; border-radius: 4px; }
202
+ .summary-grid { display: grid; grid-template-columns: repeat(4, 1fr); gap: 16px; margin-bottom: 24px; }
203
+ .summary-card { background: #1a1d27; border: 1px solid #2d3148; border-radius: 12px; padding: 20px 16px; text-align: center; }
204
+ .summary-value { font-size: 28px; font-weight: 700; color: #f9fafb; margin-bottom: 4px; }
205
+ .summary-label { font-size: 12px; color: #6b7280; text-transform: uppercase; letter-spacing: 0.05em; }
206
+ .metrics-table { width: 100%; border-collapse: collapse; }
207
+ .metrics-th { padding: 10px 16px; font-size: 12px; color: #4b5563; font-weight: 600; }
208
+ .metrics-cell { padding: 10px 16px; font-size: 13px; }
89
209
  </style>
90
210
  </head>
91
211
  <body>
@@ -107,6 +227,8 @@ export function renderStatusPage(opts: StatusTemplateOptions): string {
107
227
  </table>
108
228
  </div>
109
229
 
230
+ ${metricsHtml}
231
+
110
232
  <div class="models">
111
233
  <div class="card">
112
234
  <div class="card-header">CLI Models (${cliModels.length})</div>
@@ -265,3 +265,75 @@ describe("Codex auto-git-init via routeToCliRunner", () => {
265
265
  expect(mockExecSync).toHaveBeenCalledWith("git init", expect.objectContaining({ cwd: "/no-git-dir" }));
266
266
  });
267
267
  });
268
+
269
+ // ──────────────────────────────────────────────────────────────────────────────
270
+ // Timeout handling: graceful SIGTERM → SIGKILL and exit 143 annotation
271
+ // ──────────────────────────────────────────────────────────────────────────────
272
+
273
+ import { runCli, annotateExitError } from "../src/cli-runner.js";
274
+
275
+ describe("runCli() timeout handling", () => {
276
+ it("does NOT pass timeout to spawn options (manual timer instead)", async () => {
277
+ mockSpawn.mockImplementation(() => makeFakeProc("ok", 0));
278
+ await runCli("echo", [], "hello", 60_000);
279
+ const spawnOpts = mockSpawn.mock.calls[0][2];
280
+ expect(spawnOpts.timeout).toBeUndefined();
281
+ });
282
+
283
+ it("sends SIGTERM after timeout fires", async () => {
284
+ vi.useFakeTimers();
285
+ const proc = new EventEmitter() as any;
286
+ proc.stdout = new EventEmitter();
287
+ proc.stderr = new EventEmitter();
288
+ proc.stdin = { write: vi.fn((_d: string, _e: string, cb?: () => void) => { cb?.(); }), end: vi.fn() };
289
+ proc.kill = vi.fn(() => { proc.emit("close", 143); });
290
+ proc.killed = false;
291
+ mockSpawn.mockImplementation(() => proc);
292
+
293
+ const logMessages: string[] = [];
294
+ const promise = runCli("claude", [], "prompt", 100, { log: (m) => logMessages.push(m) });
295
+
296
+ // Advance past the timeout
297
+ vi.advanceTimersByTime(101);
298
+
299
+ const result = await promise;
300
+ expect(proc.kill).toHaveBeenCalledWith("SIGTERM");
301
+ expect(result.timedOut).toBe(true);
302
+ expect(result.exitCode).toBe(143);
303
+ expect(logMessages.some(m => m.includes("timeout") && m.includes("SIGTERM"))).toBe(true);
304
+ vi.useRealTimers();
305
+ });
306
+
307
+ it("sets timedOut=false for normal exits", async () => {
308
+ mockSpawn.mockImplementation(() => makeFakeProc("output", 0));
309
+ const result = await runCli("echo", [], "hello", 60_000);
310
+ expect(result.timedOut).toBe(false);
311
+ expect(result.exitCode).toBe(0);
312
+ });
313
+ });
314
+
315
+ describe("annotateExitError()", () => {
316
+ it("annotates exit 143 as timeout", () => {
317
+ const msg = annotateExitError(143, "(no output)", false, "cli-claude/claude-sonnet-4-6");
318
+ expect(msg).toContain("timeout");
319
+ expect(msg).toContain("supervisor");
320
+ expect(msg).toContain("cli-claude/claude-sonnet-4-6");
321
+ });
322
+
323
+ it("annotates when timedOut is true regardless of exit code", () => {
324
+ const msg = annotateExitError(1, "some error", true, "cli-claude/claude-sonnet-4-6");
325
+ expect(msg).toContain("timeout");
326
+ expect(msg).toContain("supervisor");
327
+ });
328
+
329
+ it("returns plain error when not a timeout", () => {
330
+ const msg = annotateExitError(1, "auth error", false, "cli-claude/claude-sonnet-4-6");
331
+ expect(msg).toBe("auth error");
332
+ expect(msg).not.toContain("timeout");
333
+ });
334
+
335
+ it("returns (no output) placeholder when stderr is empty and not a timeout", () => {
336
+ const msg = annotateExitError(1, "", false, "cli-claude/claude-sonnet-4-6");
337
+ expect(msg).toBe("(no output)");
338
+ });
339
+ });