@elvatis_com/openclaw-cli-bridge-elvatis 2.4.0 → 2.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +24 -3
- package/SKILL.md +1 -1
- package/index.ts +19 -2
- package/openclaw.plugin.json +21 -2
- package/package.json +1 -1
- package/src/cli-runner.ts +113 -25
- package/src/proxy-server.ts +22 -3
- package/src/session-manager.ts +20 -4
- package/test/cli-runner-extended.test.ts +72 -0
package/README.md
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
> OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
|
|
4
4
|
|
|
5
|
-
**Current version:** `2.
|
|
5
|
+
**Current version:** `2.5.0`
|
|
6
6
|
|
|
7
7
|
---
|
|
8
8
|
|
|
@@ -282,7 +282,17 @@ In `~/.openclaw/openclaw.json` → `plugins.entries.openclaw-cli-bridge-elvatis.
|
|
|
282
282
|
"enableProxy": true, // start local CLI proxy server (default: true)
|
|
283
283
|
"proxyPort": 31337, // proxy port (default: 31337)
|
|
284
284
|
"proxyApiKey": "cli-bridge", // key between OpenClaw vllm provider and proxy (default: "cli-bridge")
|
|
285
|
-
"proxyTimeoutMs":
|
|
285
|
+
"proxyTimeoutMs": 300000, // base CLI subprocess timeout in ms (default: 300s, scales dynamically)
|
|
286
|
+
"modelTimeouts": { // per-model timeout overrides in ms (optional)
|
|
287
|
+
"cli-claude/claude-opus-4-6": 300000, // 5 min — heavy/agentic tasks
|
|
288
|
+
"cli-claude/claude-sonnet-4-6": 180000, // 3 min — interactive chat
|
|
289
|
+
"cli-claude/claude-haiku-4-5": 90000, // 90s — fast responses
|
|
290
|
+
"cli-gemini/gemini-2.5-pro": 180000,
|
|
291
|
+
"cli-gemini/gemini-2.5-flash": 90000,
|
|
292
|
+
"openai-codex/gpt-5.4": 300000,
|
|
293
|
+
"openai-codex/gpt-5.3-codex": 180000,
|
|
294
|
+
"openai-codex/gpt-5.1-codex-mini": 90000
|
|
295
|
+
}
|
|
286
296
|
}
|
|
287
297
|
```
|
|
288
298
|
|
|
@@ -368,7 +378,7 @@ Model fallback (v1.9.0):
|
|
|
368
378
|
```bash
|
|
369
379
|
npm run lint # eslint (TypeScript-aware)
|
|
370
380
|
npm run typecheck # tsc --noEmit
|
|
371
|
-
npm test # vitest run (
|
|
381
|
+
npm test # vitest run (217 tests)
|
|
372
382
|
npm run ci # lint + typecheck + test
|
|
373
383
|
```
|
|
374
384
|
|
|
@@ -376,6 +386,17 @@ npm run ci # lint + typecheck + test
|
|
|
376
386
|
|
|
377
387
|
## Changelog
|
|
378
388
|
|
|
389
|
+
### v2.5.0
|
|
390
|
+
- **feat:** Graceful timeout handling — replaces Node's `spawn({ timeout })` with manual SIGTERM→SIGKILL sequence (5s grace period). Exit 143 is now clearly annotated as "timeout by supervisor" in logs, not a cryptic model error.
|
|
391
|
+
- **feat:** Per-model timeout profiles — new `modelTimeouts` config option sets sensible defaults per model: Opus 5 min, Sonnet 3 min, Haiku 90s, Flash models 90s. Scales dynamically with conversation size (+2s/msg beyond 10, +5s/tool).
|
|
392
|
+
- **feat:** Timeout logging — every timeout event logs model, elapsed time, SIGTERM/SIGKILL steps. Fallback messages now show "timeout by supervisor" instead of raw exit codes.
|
|
393
|
+
- **fix:** Base timeout raised from 120s to 300s (was causing frequent Exit 143 on normal Sonnet conversations)
|
|
394
|
+
- **fix:** Session manager `kill()`, `cleanup()`, and `stop()` now use graceful SIGTERM→SIGKILL instead of immediate SIGTERM
|
|
395
|
+
- **test:** 7 new tests for timeout handling and exit code annotation (217 total)
|
|
396
|
+
|
|
397
|
+
### v2.4.0
|
|
398
|
+
- **feat:** Metrics & health dashboard — request volume, latency, errors, token usage
|
|
399
|
+
|
|
379
400
|
### v2.3.0
|
|
380
401
|
- **feat:** OpenAI tool calling protocol support for all CLI models — tool definitions are injected into the prompt, structured `tool_calls` responses are parsed and returned in OpenAI format
|
|
381
402
|
- **feat:** Multimodal content support — images and audio from webchat are extracted to temp files and passed to CLIs (Codex uses native `-i` flag, Claude/Gemini reference file paths in prompt)
|
package/SKILL.md
CHANGED
package/index.ts
CHANGED
|
@@ -98,6 +98,7 @@ interface CliPluginConfig {
|
|
|
98
98
|
proxyPort?: number;
|
|
99
99
|
proxyApiKey?: string;
|
|
100
100
|
proxyTimeoutMs?: number;
|
|
101
|
+
modelTimeouts?: Record<string, number>;
|
|
101
102
|
grokSessionPath?: string;
|
|
102
103
|
}
|
|
103
104
|
|
|
@@ -987,7 +988,22 @@ const plugin = {
|
|
|
987
988
|
const enableProxy = cfg.enableProxy ?? true;
|
|
988
989
|
const port = cfg.proxyPort ?? DEFAULT_PROXY_PORT;
|
|
989
990
|
const apiKey = cfg.proxyApiKey ?? DEFAULT_PROXY_API_KEY;
|
|
990
|
-
const timeoutMs = cfg.proxyTimeoutMs ??
|
|
991
|
+
const timeoutMs = cfg.proxyTimeoutMs ?? 300_000;
|
|
992
|
+
// Per-model timeout overrides — fall back to sensible defaults if not configured.
|
|
993
|
+
// Interactive/fast models get shorter timeouts, heavy models get more time.
|
|
994
|
+
const defaultModelTimeouts: Record<string, number> = {
|
|
995
|
+
"cli-claude/claude-opus-4-6": 300_000, // 5 min — heavy, agentic tasks
|
|
996
|
+
"cli-claude/claude-sonnet-4-6": 180_000, // 3 min — standard interactive chat
|
|
997
|
+
"cli-claude/claude-haiku-4-5": 90_000, // 90s — fast responses
|
|
998
|
+
"cli-gemini/gemini-2.5-pro": 180_000,
|
|
999
|
+
"cli-gemini/gemini-2.5-flash": 90_000,
|
|
1000
|
+
"cli-gemini/gemini-3-pro-preview": 180_000,
|
|
1001
|
+
"cli-gemini/gemini-3-flash-preview": 90_000,
|
|
1002
|
+
"openai-codex/gpt-5.4": 300_000,
|
|
1003
|
+
"openai-codex/gpt-5.3-codex": 180_000,
|
|
1004
|
+
"openai-codex/gpt-5.1-codex-mini": 90_000,
|
|
1005
|
+
};
|
|
1006
|
+
const modelTimeouts = { ...defaultModelTimeouts, ...cfg.modelTimeouts };
|
|
991
1007
|
const codexAuthPath = cfg.codexAuthPath ?? DEFAULT_CODEX_AUTH_PATH;
|
|
992
1008
|
const grokSessionPath = cfg.grokSessionPath ?? DEFAULT_SESSION_PATH;
|
|
993
1009
|
|
|
@@ -1379,6 +1395,7 @@ const plugin = {
|
|
|
1379
1395
|
version: plugin.version,
|
|
1380
1396
|
modelCommands,
|
|
1381
1397
|
modelFallbacks,
|
|
1398
|
+
modelTimeouts,
|
|
1382
1399
|
getExpiryInfo: () => ({
|
|
1383
1400
|
grok: (() => { const e = loadGrokExpiry(); return e ? formatExpiryInfo(e) : null; })(),
|
|
1384
1401
|
gemini: (() => { const e = loadGeminiExpiry(); return e ? formatGeminiExpiry(e) : null; })(),
|
|
@@ -1415,7 +1432,7 @@ const plugin = {
|
|
|
1415
1432
|
// One final attempt
|
|
1416
1433
|
try {
|
|
1417
1434
|
const server = await startProxyServer({
|
|
1418
|
-
port, apiKey, timeoutMs, modelCommands, modelFallbacks,
|
|
1435
|
+
port, apiKey, timeoutMs, modelCommands, modelFallbacks, modelTimeouts,
|
|
1419
1436
|
log: (msg) => api.logger.info(msg),
|
|
1420
1437
|
warn: (msg) => api.logger.warn(msg),
|
|
1421
1438
|
getGrokContext: () => grokContext,
|
package/openclaw.plugin.json
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
"id": "openclaw-cli-bridge-elvatis",
|
|
3
3
|
"slug": "openclaw-cli-bridge-elvatis",
|
|
4
4
|
"name": "OpenClaw CLI Bridge",
|
|
5
|
-
"version": "2.
|
|
5
|
+
"version": "2.5.0",
|
|
6
6
|
"license": "MIT",
|
|
7
7
|
"description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
|
|
8
8
|
"providers": [
|
|
@@ -34,7 +34,26 @@
|
|
|
34
34
|
},
|
|
35
35
|
"proxyTimeoutMs": {
|
|
36
36
|
"type": "number",
|
|
37
|
-
"description": "
|
|
37
|
+
"description": "Base timeout for CLI responses in ms (default: 300000). Scales dynamically with conversation size."
|
|
38
|
+
},
|
|
39
|
+
"modelTimeouts": {
|
|
40
|
+
"type": "object",
|
|
41
|
+
"description": "Per-model timeout overrides in ms. Keys are model IDs (e.g. 'cli-claude/claude-sonnet-4-6'). Use this to give heavy models more time or limit fast models. When not set, falls back to proxyTimeoutMs.",
|
|
42
|
+
"additionalProperties": {
|
|
43
|
+
"type": "number"
|
|
44
|
+
},
|
|
45
|
+
"default": {
|
|
46
|
+
"cli-claude/claude-opus-4-6": 300000,
|
|
47
|
+
"cli-claude/claude-sonnet-4-6": 180000,
|
|
48
|
+
"cli-claude/claude-haiku-4-5": 90000,
|
|
49
|
+
"cli-gemini/gemini-2.5-pro": 180000,
|
|
50
|
+
"cli-gemini/gemini-2.5-flash": 90000,
|
|
51
|
+
"cli-gemini/gemini-3-pro-preview": 180000,
|
|
52
|
+
"cli-gemini/gemini-3-flash-preview": 90000,
|
|
53
|
+
"openai-codex/gpt-5.4": 300000,
|
|
54
|
+
"openai-codex/gpt-5.3-codex": 180000,
|
|
55
|
+
"openai-codex/gpt-5.1-codex-mini": 90000
|
|
56
|
+
}
|
|
38
57
|
}
|
|
39
58
|
}
|
|
40
59
|
}
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "@elvatis_com/openclaw-cli-bridge-elvatis",
|
|
3
|
-
"version": "2.
|
|
3
|
+
"version": "2.5.0",
|
|
4
4
|
"description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"openclaw": {
|
package/src/cli-runner.ts
CHANGED
|
@@ -278,6 +278,8 @@ export interface CliRunResult {
|
|
|
278
278
|
stdout: string;
|
|
279
279
|
stderr: string;
|
|
280
280
|
exitCode: number;
|
|
281
|
+
/** True when the process was killed due to a timeout (exit 143 = SIGTERM). */
|
|
282
|
+
timedOut: boolean;
|
|
281
283
|
}
|
|
282
284
|
|
|
283
285
|
export interface RunCliOptions {
|
|
@@ -287,11 +289,25 @@ export interface RunCliOptions {
|
|
|
287
289
|
*/
|
|
288
290
|
cwd?: string;
|
|
289
291
|
timeoutMs?: number;
|
|
292
|
+
/** Optional logger for timeout events. */
|
|
293
|
+
log?: (msg: string) => void;
|
|
290
294
|
}
|
|
291
295
|
|
|
296
|
+
/**
|
|
297
|
+
* Grace period between SIGTERM and SIGKILL when a timeout fires.
|
|
298
|
+
* Gives the CLI process 5 seconds to flush output and exit cleanly.
|
|
299
|
+
*/
|
|
300
|
+
const TIMEOUT_GRACE_MS = 5_000;
|
|
301
|
+
|
|
292
302
|
/**
|
|
293
303
|
* Spawn a CLI and deliver the prompt via stdin.
|
|
294
304
|
*
|
|
305
|
+
* Timeout handling (replaces Node's spawn({ timeout }) for better control):
|
|
306
|
+
* 1. After `timeoutMs`, send SIGTERM and log a clear message.
|
|
307
|
+
* 2. If the process doesn't exit within TIMEOUT_GRACE_MS (5s), send SIGKILL.
|
|
308
|
+
* 3. The result's `timedOut` flag is set so callers can distinguish
|
|
309
|
+
* supervisor timeouts from real CLI errors.
|
|
310
|
+
*
|
|
295
311
|
* cwd defaults to homedir() so CLIs that scan the working directory for
|
|
296
312
|
* project context (like Gemini) don't accidentally enter agentic mode.
|
|
297
313
|
*/
|
|
@@ -303,16 +319,40 @@ export function runCli(
|
|
|
303
319
|
opts: RunCliOptions = {}
|
|
304
320
|
): Promise<CliRunResult> {
|
|
305
321
|
const cwd = opts.cwd ?? homedir();
|
|
322
|
+
const log = opts.log ?? (() => {});
|
|
306
323
|
|
|
307
324
|
return new Promise((resolve, reject) => {
|
|
325
|
+
// Do NOT pass timeout to spawn() — we manage it ourselves for graceful shutdown.
|
|
308
326
|
const proc = spawn(cmd, args, {
|
|
309
|
-
timeout: timeoutMs,
|
|
310
327
|
env: buildMinimalEnv(),
|
|
311
328
|
cwd,
|
|
312
329
|
});
|
|
313
330
|
|
|
314
331
|
let stdout = "";
|
|
315
332
|
let stderr = "";
|
|
333
|
+
let timedOut = false;
|
|
334
|
+
let killTimer: ReturnType<typeof setTimeout> | null = null;
|
|
335
|
+
let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
|
|
336
|
+
|
|
337
|
+
const clearTimers = () => {
|
|
338
|
+
if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
|
|
339
|
+
if (killTimer) { clearTimeout(killTimer); killTimer = null; }
|
|
340
|
+
};
|
|
341
|
+
|
|
342
|
+
// ── Timeout sequence: SIGTERM → grace → SIGKILL ──────────────────────
|
|
343
|
+
timeoutTimer = setTimeout(() => {
|
|
344
|
+
timedOut = true;
|
|
345
|
+
const elapsed = Math.round(timeoutMs / 1000);
|
|
346
|
+
log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
|
|
347
|
+
proc.kill("SIGTERM");
|
|
348
|
+
|
|
349
|
+
killTimer = setTimeout(() => {
|
|
350
|
+
if (!proc.killed) {
|
|
351
|
+
log(`[cli-bridge] ${cmd} still running after ${TIMEOUT_GRACE_MS / 1000}s grace, sending SIGKILL`);
|
|
352
|
+
proc.kill("SIGKILL");
|
|
353
|
+
}
|
|
354
|
+
}, TIMEOUT_GRACE_MS);
|
|
355
|
+
}, timeoutMs);
|
|
316
356
|
|
|
317
357
|
proc.stdin.write(prompt, "utf8", () => {
|
|
318
358
|
proc.stdin.end();
|
|
@@ -322,10 +362,12 @@ export function runCli(
|
|
|
322
362
|
proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
|
|
323
363
|
|
|
324
364
|
proc.on("close", (code) => {
|
|
325
|
-
|
|
365
|
+
clearTimers();
|
|
366
|
+
resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0, timedOut });
|
|
326
367
|
});
|
|
327
368
|
|
|
328
369
|
proc.on("error", (err) => {
|
|
370
|
+
clearTimers();
|
|
329
371
|
reject(new Error(`Failed to spawn '${cmd}': ${err.message}`));
|
|
330
372
|
});
|
|
331
373
|
});
|
|
@@ -334,6 +376,7 @@ export function runCli(
|
|
|
334
376
|
/**
|
|
335
377
|
* Spawn a CLI with the prompt delivered as a CLI argument (not stdin).
|
|
336
378
|
* Used by OpenCode which expects `opencode run "prompt"`.
|
|
379
|
+
* Uses the same graceful SIGTERM→SIGKILL timeout sequence as runCli.
|
|
337
380
|
*/
|
|
338
381
|
export function runCliWithArg(
|
|
339
382
|
cmd: string,
|
|
@@ -342,30 +385,66 @@ export function runCliWithArg(
|
|
|
342
385
|
opts: RunCliOptions = {}
|
|
343
386
|
): Promise<CliRunResult> {
|
|
344
387
|
const cwd = opts.cwd ?? homedir();
|
|
388
|
+
const log = opts.log ?? (() => {});
|
|
345
389
|
|
|
346
390
|
return new Promise((resolve, reject) => {
|
|
347
391
|
const proc = spawn(cmd, args, {
|
|
348
|
-
timeout: timeoutMs,
|
|
349
392
|
env: buildMinimalEnv(),
|
|
350
393
|
cwd,
|
|
351
394
|
});
|
|
352
395
|
|
|
353
396
|
let stdout = "";
|
|
354
397
|
let stderr = "";
|
|
398
|
+
let timedOut = false;
|
|
399
|
+
let killTimer: ReturnType<typeof setTimeout> | null = null;
|
|
400
|
+
let timeoutTimer: ReturnType<typeof setTimeout> | null = null;
|
|
401
|
+
|
|
402
|
+
const clearTimers = () => {
|
|
403
|
+
if (timeoutTimer) { clearTimeout(timeoutTimer); timeoutTimer = null; }
|
|
404
|
+
if (killTimer) { clearTimeout(killTimer); killTimer = null; }
|
|
405
|
+
};
|
|
406
|
+
|
|
407
|
+
timeoutTimer = setTimeout(() => {
|
|
408
|
+
timedOut = true;
|
|
409
|
+
const elapsed = Math.round(timeoutMs / 1000);
|
|
410
|
+
log(`[cli-bridge] timeout after ${elapsed}s for ${cmd}, sending SIGTERM`);
|
|
411
|
+
proc.kill("SIGTERM");
|
|
412
|
+
|
|
413
|
+
killTimer = setTimeout(() => {
|
|
414
|
+
if (!proc.killed) {
|
|
415
|
+
log(`[cli-bridge] ${cmd} still running after ${TIMEOUT_GRACE_MS / 1000}s grace, sending SIGKILL`);
|
|
416
|
+
proc.kill("SIGKILL");
|
|
417
|
+
}
|
|
418
|
+
}, TIMEOUT_GRACE_MS);
|
|
419
|
+
}, timeoutMs);
|
|
355
420
|
|
|
356
421
|
proc.stdout.on("data", (d: Buffer) => { stdout += d.toString(); });
|
|
357
422
|
proc.stderr.on("data", (d: Buffer) => { stderr += d.toString(); });
|
|
358
423
|
|
|
359
424
|
proc.on("close", (code) => {
|
|
360
|
-
|
|
425
|
+
clearTimers();
|
|
426
|
+
resolve({ stdout: stdout.trim(), stderr: stderr.trim(), exitCode: code ?? 0, timedOut });
|
|
361
427
|
});
|
|
362
428
|
|
|
363
429
|
proc.on("error", (err) => {
|
|
430
|
+
clearTimers();
|
|
364
431
|
reject(new Error(`Failed to spawn '${cmd}': ${err.message}`));
|
|
365
432
|
});
|
|
366
433
|
});
|
|
367
434
|
}
|
|
368
435
|
|
|
436
|
+
/**
|
|
437
|
+
* Annotate an error message when exit code 143 (SIGTERM) is detected.
|
|
438
|
+
* Makes it clear in logs that this was a supervisor timeout, not a model error.
|
|
439
|
+
*/
|
|
440
|
+
export function annotateExitError(exitCode: number, stderr: string, timedOut: boolean, model: string): string {
|
|
441
|
+
const base = stderr || "(no output)";
|
|
442
|
+
if (timedOut || exitCode === 143) {
|
|
443
|
+
return `timeout: ${model} killed by supervisor (exit ${exitCode}, likely timeout) — ${base}`;
|
|
444
|
+
}
|
|
445
|
+
return base;
|
|
446
|
+
}
|
|
447
|
+
|
|
369
448
|
// ──────────────────────────────────────────────────────────────────────────────
|
|
370
449
|
// Gemini CLI
|
|
371
450
|
// ──────────────────────────────────────────────────────────────────────────────
|
|
@@ -391,7 +470,7 @@ export async function runGemini(
|
|
|
391
470
|
modelId: string,
|
|
392
471
|
timeoutMs: number,
|
|
393
472
|
workdir?: string,
|
|
394
|
-
opts?: { tools?: ToolDefinition[] }
|
|
473
|
+
opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
|
|
395
474
|
): Promise<string> {
|
|
396
475
|
const model = stripPrefix(modelId);
|
|
397
476
|
// -p "" = headless mode trigger; actual prompt arrives via stdin
|
|
@@ -404,7 +483,7 @@ export async function runGemini(
|
|
|
404
483
|
? buildToolPromptBlock(opts.tools) + "\n\n" + prompt
|
|
405
484
|
: prompt;
|
|
406
485
|
|
|
407
|
-
const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd });
|
|
486
|
+
const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
|
|
408
487
|
|
|
409
488
|
// Filter out [WARN] lines from stderr (Gemini emits noisy permission warnings)
|
|
410
489
|
const cleanStderr = result.stderr
|
|
@@ -414,7 +493,7 @@ export async function runGemini(
|
|
|
414
493
|
.trim();
|
|
415
494
|
|
|
416
495
|
if (result.exitCode !== 0 && result.stdout.length === 0) {
|
|
417
|
-
throw new Error(`gemini exited ${result.exitCode}: ${cleanStderr
|
|
496
|
+
throw new Error(`gemini exited ${result.exitCode}: ${annotateExitError(result.exitCode, cleanStderr, result.timedOut, modelId)}`);
|
|
418
497
|
}
|
|
419
498
|
|
|
420
499
|
return result.stdout || cleanStderr;
|
|
@@ -434,7 +513,7 @@ export async function runClaude(
|
|
|
434
513
|
modelId: string,
|
|
435
514
|
timeoutMs: number,
|
|
436
515
|
workdir?: string,
|
|
437
|
-
opts?: { tools?: ToolDefinition[] }
|
|
516
|
+
opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
|
|
438
517
|
): Promise<string> {
|
|
439
518
|
// Proactively refresh OAuth token if it's about to expire (< 5 min remaining).
|
|
440
519
|
// No-op for API-key users.
|
|
@@ -457,15 +536,19 @@ export async function runClaude(
|
|
|
457
536
|
: prompt;
|
|
458
537
|
|
|
459
538
|
const cwd = workdir ?? homedir();
|
|
460
|
-
const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd });
|
|
539
|
+
const result = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
|
|
461
540
|
|
|
462
541
|
// On 401: attempt one token refresh + retry before giving up.
|
|
463
542
|
if (result.exitCode !== 0 && result.stdout.length === 0) {
|
|
543
|
+
// If this was a timeout, don't bother with auth retry — it's a supervisor kill, not a 401.
|
|
544
|
+
if (result.timedOut) {
|
|
545
|
+
throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, true, modelId)}`);
|
|
546
|
+
}
|
|
464
547
|
const stderr = result.stderr || "(no output)";
|
|
465
548
|
if (stderr.includes("401") || stderr.includes("Invalid authentication credentials") || stderr.includes("authentication_error")) {
|
|
466
549
|
// Refresh and retry once
|
|
467
550
|
await refreshClaudeToken();
|
|
468
|
-
const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd });
|
|
551
|
+
const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
|
|
469
552
|
if (retry.exitCode !== 0 && retry.stdout.length === 0) {
|
|
470
553
|
const retryStderr = retry.stderr || "(no output)";
|
|
471
554
|
if (retryStderr.includes("401") || retryStderr.includes("authentication_error") || retryStderr.includes("Invalid authentication credentials")) {
|
|
@@ -478,7 +561,7 @@ export async function runClaude(
|
|
|
478
561
|
}
|
|
479
562
|
return retry.stdout;
|
|
480
563
|
}
|
|
481
|
-
throw new Error(`claude exited ${result.exitCode}: ${stderr}`);
|
|
564
|
+
throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, stderr, false, modelId)}`);
|
|
482
565
|
}
|
|
483
566
|
|
|
484
567
|
return result.stdout;
|
|
@@ -508,7 +591,7 @@ export async function runCodex(
|
|
|
508
591
|
modelId: string,
|
|
509
592
|
timeoutMs: number,
|
|
510
593
|
workdir?: string,
|
|
511
|
-
opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[] }
|
|
594
|
+
opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[]; log?: (msg: string) => void }
|
|
512
595
|
): Promise<string> {
|
|
513
596
|
const model = stripPrefix(modelId);
|
|
514
597
|
const args = ["--model", model, "--quiet", "--full-auto"];
|
|
@@ -532,10 +615,10 @@ export async function runCodex(
|
|
|
532
615
|
? buildToolPromptBlock(opts.tools) + "\n\n" + prompt
|
|
533
616
|
: prompt;
|
|
534
617
|
|
|
535
|
-
const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd });
|
|
618
|
+
const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
|
|
536
619
|
|
|
537
620
|
if (result.exitCode !== 0 && result.stdout.length === 0) {
|
|
538
|
-
throw new Error(`codex exited ${result.exitCode}: ${result.stderr
|
|
621
|
+
throw new Error(`codex exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, modelId)}`);
|
|
539
622
|
}
|
|
540
623
|
|
|
541
624
|
return result.stdout || result.stderr;
|
|
@@ -553,14 +636,15 @@ export async function runOpenCode(
|
|
|
553
636
|
prompt: string,
|
|
554
637
|
_modelId: string,
|
|
555
638
|
timeoutMs: number,
|
|
556
|
-
workdir?: string
|
|
639
|
+
workdir?: string,
|
|
640
|
+
opts?: { log?: (msg: string) => void }
|
|
557
641
|
): Promise<string> {
|
|
558
642
|
const args = ["run", prompt];
|
|
559
643
|
const cwd = workdir ?? homedir();
|
|
560
|
-
const result = await runCliWithArg("opencode", args, timeoutMs, { cwd });
|
|
644
|
+
const result = await runCliWithArg("opencode", args, timeoutMs, { cwd, log: opts?.log });
|
|
561
645
|
|
|
562
646
|
if (result.exitCode !== 0 && result.stdout.length === 0) {
|
|
563
|
-
throw new Error(`opencode exited ${result.exitCode}: ${result.stderr
|
|
647
|
+
throw new Error(`opencode exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, "opencode")}`);
|
|
564
648
|
}
|
|
565
649
|
|
|
566
650
|
return result.stdout || result.stderr;
|
|
@@ -578,14 +662,15 @@ export async function runPi(
|
|
|
578
662
|
prompt: string,
|
|
579
663
|
_modelId: string,
|
|
580
664
|
timeoutMs: number,
|
|
581
|
-
workdir?: string
|
|
665
|
+
workdir?: string,
|
|
666
|
+
opts?: { log?: (msg: string) => void }
|
|
582
667
|
): Promise<string> {
|
|
583
668
|
const args = ["-p", prompt];
|
|
584
669
|
const cwd = workdir ?? homedir();
|
|
585
|
-
const result = await runCliWithArg("pi", args, timeoutMs, { cwd });
|
|
670
|
+
const result = await runCliWithArg("pi", args, timeoutMs, { cwd, log: opts?.log });
|
|
586
671
|
|
|
587
672
|
if (result.exitCode !== 0 && result.stdout.length === 0) {
|
|
588
|
-
throw new Error(`pi exited ${result.exitCode}: ${result.stderr
|
|
673
|
+
throw new Error(`pi exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, "pi")}`);
|
|
589
674
|
}
|
|
590
675
|
|
|
591
676
|
return result.stdout || result.stderr;
|
|
@@ -663,6 +748,8 @@ export interface RouteOptions {
|
|
|
663
748
|
* Passed to CLIs that support native media input (e.g. codex -i).
|
|
664
749
|
*/
|
|
665
750
|
mediaFiles?: MediaFile[];
|
|
751
|
+
/** Logger for timeout and lifecycle events. */
|
|
752
|
+
log?: (msg: string) => void;
|
|
666
753
|
}
|
|
667
754
|
|
|
668
755
|
/**
|
|
@@ -708,12 +795,13 @@ export async function routeToCliRunner(
|
|
|
708
795
|
// Resolve aliases (e.g. gemini-3-pro → gemini-3-pro-preview) after allowlist check
|
|
709
796
|
const resolved = normalizeModelAlias(normalized);
|
|
710
797
|
|
|
798
|
+
const log = opts.log;
|
|
711
799
|
let rawText: string;
|
|
712
|
-
if (resolved.startsWith("cli-gemini/")) rawText = await runGemini(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools });
|
|
713
|
-
else if (resolved.startsWith("cli-claude/")) rawText = await runClaude(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools });
|
|
714
|
-
else if (resolved.startsWith("openai-codex/")) rawText = await runCodex(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, mediaFiles: opts.mediaFiles });
|
|
715
|
-
else if (resolved.startsWith("opencode/")) rawText = await runOpenCode(prompt, resolved, timeoutMs, opts.workdir);
|
|
716
|
-
else if (resolved.startsWith("pi/")) rawText = await runPi(prompt, resolved, timeoutMs, opts.workdir);
|
|
800
|
+
if (resolved.startsWith("cli-gemini/")) rawText = await runGemini(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, log });
|
|
801
|
+
else if (resolved.startsWith("cli-claude/")) rawText = await runClaude(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, log });
|
|
802
|
+
else if (resolved.startsWith("openai-codex/")) rawText = await runCodex(prompt, resolved, timeoutMs, opts.workdir, { tools: opts.tools, mediaFiles: opts.mediaFiles, log });
|
|
803
|
+
else if (resolved.startsWith("opencode/")) rawText = await runOpenCode(prompt, resolved, timeoutMs, opts.workdir, { log });
|
|
804
|
+
else if (resolved.startsWith("pi/")) rawText = await runPi(prompt, resolved, timeoutMs, opts.workdir, { log });
|
|
717
805
|
else throw new Error(
|
|
718
806
|
`Unknown CLI bridge model: "${model}". Use "vllm/cli-gemini/<model>", "vllm/cli-claude/<model>", "openai-codex/<model>", "opencode/<model>", or "pi/<model>".`
|
|
719
807
|
);
|
package/src/proxy-server.ts
CHANGED
|
@@ -82,6 +82,20 @@ export interface ProxyServerOptions {
|
|
|
82
82
|
* with the fallback model. Example: "cli-gemini/gemini-2.5-pro" → "cli-gemini/gemini-2.5-flash"
|
|
83
83
|
*/
|
|
84
84
|
modelFallbacks?: Record<string, string>;
|
|
85
|
+
/**
|
|
86
|
+
* Per-model timeout overrides (ms). Keys are model IDs (without "vllm/" prefix).
|
|
87
|
+
* Use this to give heavy models more time or limit fast models.
|
|
88
|
+
*
|
|
89
|
+
* Example:
|
|
90
|
+
* {
|
|
91
|
+
* "cli-claude/claude-sonnet-4-6": 180_000, // 3 min for interactive chat
|
|
92
|
+
* "cli-claude/claude-opus-4-6": 300_000, // 5 min for heavy tasks
|
|
93
|
+
* "cli-claude/claude-haiku-4-5": 90_000, // 90s for fast responses
|
|
94
|
+
* }
|
|
95
|
+
*
|
|
96
|
+
* When not set for a model, falls back to proxyTimeoutMs (default 300s base).
|
|
97
|
+
*/
|
|
98
|
+
modelTimeouts?: Record<string, number>;
|
|
85
99
|
}
|
|
86
100
|
|
|
87
101
|
/** Available CLI bridge models for GET /v1/models */
|
|
@@ -623,13 +637,16 @@ async function handleRequest(
|
|
|
623
637
|
// ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
|
|
624
638
|
let result: CliToolResult;
|
|
625
639
|
let usedModel = model;
|
|
626
|
-
const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined };
|
|
640
|
+
const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
|
|
627
641
|
|
|
628
642
|
// ── Dynamic timeout: scale with conversation size ────────────────────────
|
|
629
|
-
|
|
643
|
+
// Per-model timeout takes precedence, then global proxyTimeoutMs, then 300s default.
|
|
644
|
+
const perModelTimeout = opts.modelTimeouts?.[model];
|
|
645
|
+
const baseTimeout = perModelTimeout ?? opts.timeoutMs ?? 300_000;
|
|
630
646
|
const msgExtra = Math.max(0, cleanMessages.length - 10) * 2_000;
|
|
631
647
|
const toolExtra = (tools?.length ?? 0) * 5_000;
|
|
632
648
|
const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, 600_000);
|
|
649
|
+
opts.log(`[cli-bridge] ${model} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
|
|
633
650
|
|
|
634
651
|
// ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
|
|
635
652
|
let sseHeadersSent = false;
|
|
@@ -654,10 +671,12 @@ async function handleRequest(
|
|
|
654
671
|
const primaryDuration = Date.now() - cliStart;
|
|
655
672
|
const msg = (err as Error).message;
|
|
656
673
|
// ── Model fallback: retry once with a lighter model if configured ────
|
|
674
|
+
const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
|
|
657
675
|
const fallbackModel = opts.modelFallbacks?.[model];
|
|
658
676
|
if (fallbackModel) {
|
|
659
677
|
metrics.recordRequest(model, primaryDuration, false);
|
|
660
|
-
|
|
678
|
+
const reason = isTimeout ? "timeout by supervisor" : msg;
|
|
679
|
+
opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
|
|
661
680
|
const fallbackStart = Date.now();
|
|
662
681
|
try {
|
|
663
682
|
result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
|
package/src/session-manager.ts
CHANGED
|
@@ -95,6 +95,8 @@ function buildMinimalEnv(): Record<string, string> {
|
|
|
95
95
|
/** Auto-cleanup interval: 30 minutes. */
|
|
96
96
|
const SESSION_TTL_MS = 30 * 60 * 1000;
|
|
97
97
|
const CLEANUP_INTERVAL_MS = 5 * 60 * 1000;
|
|
98
|
+
/** Grace period between SIGTERM and SIGKILL for session termination. */
|
|
99
|
+
const KILL_GRACE_MS = 5_000;
|
|
98
100
|
|
|
99
101
|
export class SessionManager {
|
|
100
102
|
private sessions = new Map<string, SessionEntry>();
|
|
@@ -213,12 +215,19 @@ export class SessionManager {
|
|
|
213
215
|
}
|
|
214
216
|
}
|
|
215
217
|
|
|
216
|
-
/**
|
|
218
|
+
/**
|
|
219
|
+
* Gracefully terminate a session: SIGTERM first, then SIGKILL after grace period.
|
|
220
|
+
* This prevents the ambiguous "exit 143 (no output)" pattern.
|
|
221
|
+
*/
|
|
217
222
|
kill(sessionId: string): boolean {
|
|
218
223
|
const entry = this.sessions.get(sessionId);
|
|
219
224
|
if (!entry || entry.status !== "running") return false;
|
|
220
225
|
entry.status = "killed";
|
|
221
226
|
entry.proc.kill("SIGTERM");
|
|
227
|
+
// If the process doesn't exit within the grace period, force-kill it
|
|
228
|
+
setTimeout(() => {
|
|
229
|
+
try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
|
|
230
|
+
}, KILL_GRACE_MS);
|
|
222
231
|
return true;
|
|
223
232
|
}
|
|
224
233
|
|
|
@@ -238,7 +247,7 @@ export class SessionManager {
|
|
|
238
247
|
return result;
|
|
239
248
|
}
|
|
240
249
|
|
|
241
|
-
/** Remove sessions older than SESSION_TTL_MS. Kill running ones
|
|
250
|
+
/** Remove sessions older than SESSION_TTL_MS. Kill running ones with graceful SIGTERM→SIGKILL. */
|
|
242
251
|
cleanup(): void {
|
|
243
252
|
const now = Date.now();
|
|
244
253
|
for (const [sessionId, entry] of this.sessions) {
|
|
@@ -246,6 +255,10 @@ export class SessionManager {
|
|
|
246
255
|
if (entry.status === "running") {
|
|
247
256
|
entry.proc.kill("SIGTERM");
|
|
248
257
|
entry.status = "killed";
|
|
258
|
+
// Escalate to SIGKILL after grace period
|
|
259
|
+
setTimeout(() => {
|
|
260
|
+
try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
|
|
261
|
+
}, KILL_GRACE_MS);
|
|
249
262
|
}
|
|
250
263
|
// Clean up isolated workdir if it wasn't cleaned on exit
|
|
251
264
|
if (entry.isolatedWorkdir) {
|
|
@@ -258,17 +271,20 @@ export class SessionManager {
|
|
|
258
271
|
sweepOrphanedWorkdirs();
|
|
259
272
|
}
|
|
260
273
|
|
|
261
|
-
/** Stop the cleanup timer (for graceful shutdown). */
|
|
274
|
+
/** Stop the cleanup timer (for graceful shutdown). SIGTERM all sessions, SIGKILL after grace. */
|
|
262
275
|
stop(): void {
|
|
263
276
|
if (this.cleanupTimer) {
|
|
264
277
|
clearInterval(this.cleanupTimer);
|
|
265
278
|
this.cleanupTimer = null;
|
|
266
279
|
}
|
|
267
|
-
// Kill all running sessions
|
|
280
|
+
// Kill all running sessions with graceful SIGTERM → SIGKILL escalation
|
|
268
281
|
for (const [, entry] of this.sessions) {
|
|
269
282
|
if (entry.status === "running") {
|
|
270
283
|
entry.proc.kill("SIGTERM");
|
|
271
284
|
entry.status = "killed";
|
|
285
|
+
setTimeout(() => {
|
|
286
|
+
try { if (!entry.proc.killed) entry.proc.kill("SIGKILL"); } catch { /* already dead */ }
|
|
287
|
+
}, KILL_GRACE_MS);
|
|
272
288
|
}
|
|
273
289
|
if (entry.isolatedWorkdir) {
|
|
274
290
|
cleanupWorkdir(entry.isolatedWorkdir);
|
|
@@ -265,3 +265,75 @@ describe("Codex auto-git-init via routeToCliRunner", () => {
|
|
|
265
265
|
expect(mockExecSync).toHaveBeenCalledWith("git init", expect.objectContaining({ cwd: "/no-git-dir" }));
|
|
266
266
|
});
|
|
267
267
|
});
|
|
268
|
+
|
|
269
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
270
|
+
// Timeout handling: graceful SIGTERM → SIGKILL and exit 143 annotation
|
|
271
|
+
// ──────────────────────────────────────────────────────────────────────────────
|
|
272
|
+
|
|
273
|
+
import { runCli, annotateExitError } from "../src/cli-runner.js";
|
|
274
|
+
|
|
275
|
+
describe("runCli() timeout handling", () => {
|
|
276
|
+
it("does NOT pass timeout to spawn options (manual timer instead)", async () => {
|
|
277
|
+
mockSpawn.mockImplementation(() => makeFakeProc("ok", 0));
|
|
278
|
+
await runCli("echo", [], "hello", 60_000);
|
|
279
|
+
const spawnOpts = mockSpawn.mock.calls[0][2];
|
|
280
|
+
expect(spawnOpts.timeout).toBeUndefined();
|
|
281
|
+
});
|
|
282
|
+
|
|
283
|
+
it("sends SIGTERM after timeout fires", async () => {
|
|
284
|
+
vi.useFakeTimers();
|
|
285
|
+
const proc = new EventEmitter() as any;
|
|
286
|
+
proc.stdout = new EventEmitter();
|
|
287
|
+
proc.stderr = new EventEmitter();
|
|
288
|
+
proc.stdin = { write: vi.fn((_d: string, _e: string, cb?: () => void) => { cb?.(); }), end: vi.fn() };
|
|
289
|
+
proc.kill = vi.fn(() => { proc.emit("close", 143); });
|
|
290
|
+
proc.killed = false;
|
|
291
|
+
mockSpawn.mockImplementation(() => proc);
|
|
292
|
+
|
|
293
|
+
const logMessages: string[] = [];
|
|
294
|
+
const promise = runCli("claude", [], "prompt", 100, { log: (m) => logMessages.push(m) });
|
|
295
|
+
|
|
296
|
+
// Advance past the timeout
|
|
297
|
+
vi.advanceTimersByTime(101);
|
|
298
|
+
|
|
299
|
+
const result = await promise;
|
|
300
|
+
expect(proc.kill).toHaveBeenCalledWith("SIGTERM");
|
|
301
|
+
expect(result.timedOut).toBe(true);
|
|
302
|
+
expect(result.exitCode).toBe(143);
|
|
303
|
+
expect(logMessages.some(m => m.includes("timeout") && m.includes("SIGTERM"))).toBe(true);
|
|
304
|
+
vi.useRealTimers();
|
|
305
|
+
});
|
|
306
|
+
|
|
307
|
+
it("sets timedOut=false for normal exits", async () => {
|
|
308
|
+
mockSpawn.mockImplementation(() => makeFakeProc("output", 0));
|
|
309
|
+
const result = await runCli("echo", [], "hello", 60_000);
|
|
310
|
+
expect(result.timedOut).toBe(false);
|
|
311
|
+
expect(result.exitCode).toBe(0);
|
|
312
|
+
});
|
|
313
|
+
});
|
|
314
|
+
|
|
315
|
+
describe("annotateExitError()", () => {
|
|
316
|
+
it("annotates exit 143 as timeout", () => {
|
|
317
|
+
const msg = annotateExitError(143, "(no output)", false, "cli-claude/claude-sonnet-4-6");
|
|
318
|
+
expect(msg).toContain("timeout");
|
|
319
|
+
expect(msg).toContain("supervisor");
|
|
320
|
+
expect(msg).toContain("cli-claude/claude-sonnet-4-6");
|
|
321
|
+
});
|
|
322
|
+
|
|
323
|
+
it("annotates when timedOut is true regardless of exit code", () => {
|
|
324
|
+
const msg = annotateExitError(1, "some error", true, "cli-claude/claude-sonnet-4-6");
|
|
325
|
+
expect(msg).toContain("timeout");
|
|
326
|
+
expect(msg).toContain("supervisor");
|
|
327
|
+
});
|
|
328
|
+
|
|
329
|
+
it("returns plain error when not a timeout", () => {
|
|
330
|
+
const msg = annotateExitError(1, "auth error", false, "cli-claude/claude-sonnet-4-6");
|
|
331
|
+
expect(msg).toBe("auth error");
|
|
332
|
+
expect(msg).not.toContain("timeout");
|
|
333
|
+
});
|
|
334
|
+
|
|
335
|
+
it("returns (no output) placeholder when stderr is empty and not a timeout", () => {
|
|
336
|
+
const msg = annotateExitError(1, "", false, "cli-claude/claude-sonnet-4-6");
|
|
337
|
+
expect(msg).toBe("(no output)");
|
|
338
|
+
});
|
|
339
|
+
});
|