@elvatis_com/openclaw-cli-bridge-elvatis 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,187 @@
1
+ # Handover: CLI Session Resume Pattern
2
+
3
+ ## Problem Solved
4
+
5
+ Spawning fresh CLI processes (`claude -p`, `gemini -p`, `codex exec`) for every request forces the model to re-process the entire conversation history (20KB+) from scratch. This causes:
6
+ - **Silent hangs** — Sonnet goes completely silent (zero stdout) ~50% of the time on large prompts
7
+ - **Slow responses** — 80-120s per request instead of 5-10s
8
+ - **Wasted tokens** — the full history is re-tokenized on every call
9
+
10
+ ## Solution: Session Resume
11
+
12
+ Instead of one-shot processes, maintain persistent sessions per model. First request creates a session, subsequent requests resume it — the CLI keeps the full conversation context.
13
+
14
+ ## Implementation by CLI Tool
15
+
16
+ ### Claude Code (`claude`)
17
+
18
+ ```bash
19
+ # First request — create session
20
+ echo "user prompt" | claude -p \
21
+ --session-id "550e8400-e29b-41d4-a716-446655440000" \
22
+ --model claude-sonnet-4-6 \
23
+ --output-format text \
24
+ --permission-mode bypassPermissions \
25
+ --dangerously-skip-permissions
26
+
27
+ # Subsequent requests — resume (Claude has full context, only new message needed)
28
+ echo "follow-up prompt" | claude -p \
29
+ --resume "550e8400-e29b-41d4-a716-446655440000" \
30
+ --model claude-sonnet-4-6 \
31
+ --output-format text \
32
+ --permission-mode bypassPermissions \
33
+ --dangerously-skip-permissions
34
+ ```
35
+
36
+ **Key flags:**
37
+ - `--session-id <uuid>` — creates a new session with this ID (first request)
38
+ - `--resume <uuid>` — resumes an existing session (subsequent requests)
39
+ - Both work with `-p` (print/headless mode)
40
+ - Session files stored by Claude CLI internally (~/.claude/projects/)
41
+
42
+ ### Gemini CLI (`gemini`)
43
+
44
+ ```bash
45
+ # First request — auto-creates session
46
+ echo "user prompt" | gemini -m gemini-2.5-flash -p "" --approval-mode yolo
47
+
48
+ # Subsequent requests — resume by UUID
49
+ echo "follow-up" | gemini -m gemini-2.5-flash -p "" --resume "ad79893c-4e3d-40e6-83e7-400e49dba0d6" --approval-mode yolo
50
+ ```
51
+
52
+ **Key flags:**
53
+ - `--resume <uuid>` — resume by session UUID
54
+ - `--list-sessions` — list available sessions
55
+ - Session UUID is visible in `--list-sessions` output
56
+
57
+ **Note:** Gemini doesn't have a `--session-id` flag to create a specific UUID. The session is auto-created and the UUID is extracted from `--list-sessions` or from the output. For the bridge, we generate a UUID and pass it as `--resume` — Gemini creates a new session if the UUID doesn't exist.
58
+
59
+ ### OpenAI Codex (`codex`)
60
+
61
+ ```bash
62
+ # First request — auto-creates session
63
+ echo "user prompt" | codex exec --model gpt-5.3-codex --full-auto
64
+
65
+ # Subsequent requests — resume subcommand
66
+ echo "follow-up" | codex exec resume "550e8400-xxxx" --model gpt-5.3-codex --full-auto
67
+ ```
68
+
69
+ **Key flags:**
70
+ - `codex exec resume <session-id>` — resume subcommand (not a flag)
71
+ - `--ephemeral` — skip session persistence (opposite of what we want)
72
+ - Session ID is a UUID
73
+
74
+ ## Session Registry Pattern (TypeScript)
75
+
76
+ ```typescript
77
+ interface CliSessionEntry {
78
+ sessionId: string; // UUID
79
+ provider: string; // "claude" | "gemini" | "codex"
80
+ model: string; // e.g. "claude-sonnet-4-6"
81
+ createdAt: number; // epoch ms
82
+ lastUsedAt: number; // epoch ms
83
+ requestCount: number; // total requests in this session
84
+ }
85
+
86
+ // Persist to JSON file
87
+ const SESSIONS_FILE = "~/.openclaw/cli-bridge/cli-sessions.json";
88
+
89
+ // Session lifecycle
90
+ function getOrCreateSession(provider: string, model: string): CliSessionEntry {
91
+ const existing = sessions.get(model);
92
+
93
+ // Reuse if fresh enough
94
+ const TTL = 2 * 60 * 60 * 1000; // 2 hours
95
+ const MAX_REQUESTS = 50; // context rotation
96
+ if (existing &&
97
+ (Date.now() - existing.lastUsedAt) < TTL &&
98
+ existing.requestCount < MAX_REQUESTS) {
99
+ return existing;
100
+ }
101
+
102
+ // Create fresh session
103
+ return { sessionId: randomUUID(), provider, model, ... };
104
+ }
105
+
106
+ // After successful response
107
+ function recordSuccess(model: string): void {
108
+ session.requestCount++;
109
+ session.lastUsedAt = Date.now();
110
+ saveToDisk();
111
+ }
112
+
113
+ // On session error (corrupted, expired, not found)
114
+ function invalidate(model: string): void {
115
+ sessions.delete(model);
116
+ saveToDisk();
117
+ // Next request will auto-create a fresh session
118
+ }
119
+ ```
120
+
121
+ ## Session Expiry Strategy
122
+
123
+ | Condition | Action | Why |
124
+ |-----------|--------|-----|
125
+ | `lastUsedAt > 2 hours` | Create new session | Context may be stale |
126
+ | `requestCount >= 50` | Create new session | Prevent context bloat |
127
+ | CLI returns "session not found" | Invalidate + retry | Session file was cleaned up |
128
+ | CLI returns auth error | Refresh token + retry | OAuth token expired |
129
+ | CLI timeout (exit 143) | Keep session alive | Session is valid, API was slow |
130
+
131
+ ## Performance Impact (measured on openclaw-cli-bridge)
132
+
133
+ | Metric | Before (one-shot) | After (session resume) |
134
+ |--------|-------------------|----------------------|
135
+ | Prompt size per request | 18-25 KB | < 1 KB (new message only) |
136
+ | Sonnet response time | 80-120s (50% hang rate) | 5-10s |
137
+ | Haiku response time | 5-15s | 3-5s |
138
+ | Silent hang rate | ~50% | Near 0% |
139
+
140
+ ## Stream-JSON Mode (Future Enhancement)
141
+
142
+ Claude CLI supports bidirectional streaming via `--input-format stream-json --output-format stream-json --verbose`. This enables:
143
+ - **Persistent process** — don't spawn/kill per request, keep one running
144
+ - **Real-time streaming** — token-by-token output via SSE
145
+ - **Native tool calls** — Claude's own tools (Bash, Read, Write, Edit, Grep)
146
+ - **Rate limit visibility** — `rate_limit_event` messages show quota state
147
+ - **Cost tracking** — per-request cost in USD
148
+
149
+ ```bash
150
+ # Bidirectional streaming session
151
+ echo '{"type":"user","message":{"role":"user","content":"hello"}}' | \
152
+ claude -p \
153
+ --model claude-sonnet-4-6 \
154
+ --input-format stream-json \
155
+ --output-format stream-json \
156
+ --verbose \
157
+ --permission-mode bypassPermissions \
158
+ --dangerously-skip-permissions
159
+ ```
160
+
161
+ Response includes `session_id`, tool list, model info, thinking blocks, and full usage metrics. This is the path to a fully persistent agent process.
162
+
163
+ ## Files Reference (openclaw-cli-bridge-elvatis)
164
+
165
+ | File | What it does |
166
+ |------|-------------|
167
+ | `src/cli-runner.ts` | Session registry + `runClaude()`, `runGemini()`, `runCodex()` with resume |
168
+ | `src/config.ts` | `STALE_OUTPUT_TIMEOUT_MS = 30_000` (kill silent processes fast) |
169
+ | `src/tool-protocol.ts` | Tool schema injection + JSON response parsing |
170
+ | `src/proxy-server.ts` | Cross-provider fallback chains, empty-response detection |
171
+ | `src/debug-log.ts` | File-based debug log + SSE streaming |
172
+ | `~/.openclaw/cli-bridge/cli-sessions.json` | Persisted session registry |
173
+ | `~/.openclaw/cli-bridge/debug.log` | Real-time request lifecycle log |
174
+
175
+ ## Key Learnings
176
+
177
+ 1. **Claude Sonnet hangs silently** on large prompts (~50% of the time). NOT RAM (28GB free). Likely API-side rate limiting. Session resume fixes it by keeping prompts small.
178
+
179
+ 2. **Exit code 143 = SIGTERM**, not OOM. Our stale-output detector sends it when the CLI produces zero stdout for 30 seconds.
180
+
181
+ 3. **Haiku ignores JSON tool format** in long conversations — returns conversational text instead of `{"tool_calls":[...]}`. Fix: JSON reminder at the END of the prompt + reject text responses during tool loops.
182
+
183
+ 4. **Empty responses (0 bytes) must trigger fallback**, not be treated as success. The model exits 0 but produces nothing useful.
184
+
185
+ 5. **Cross-provider fallback chains** are essential: `Sonnet → Haiku → Gemini Flash → Codex`. Each provider has different failure modes.
186
+
187
+ 6. **The gateway loads plugins from `~/.openclaw/extensions/`**, NOT from the workspace. Must rsync + `openclaw gateway restart` after every change.
package/README.md CHANGED
@@ -2,7 +2,7 @@
2
2
 
3
3
  > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
4
4
 
5
- **Current version:** `3.2.0`
5
+ **Current version:** `3.3.1`
6
6
 
7
7
  ---
8
8
 
@@ -406,6 +406,15 @@ npm run ci # lint + typecheck + test
406
406
 
407
407
  ## Changelog
408
408
 
409
+ ### v3.3.1
410
+ - **fix:** test requests no longer pollute `debug.log` — test instances (port 0) now skip file logging
411
+ - **fix:** Codex test updated for session resume args
412
+
413
+ ### v3.3.0
414
+ - **feat:** session resume for ALL CLI providers — Claude, Gemini, and Codex all now use persistent sessions with `--resume`. Unified session registry at `~/.openclaw/cli-bridge/cli-sessions.json`.
415
+ - **feat:** auto-rotation: sessions expire after 2 hours OR 50 requests (whichever first) to prevent context bloat
416
+ - **feat:** per-provider debug logging: `[GEMINI]`, `[CODEX]` categories with session state
417
+
409
418
  ### v3.2.0
410
419
  - **feat:** Claude session resume — persistent sessions eliminate the 20KB prompt replay that caused Sonnet to hang. First request creates a session (`--session-id`), subsequent requests resume it (`--resume`). Claude keeps the conversation context; the bridge only sends the new message.
411
420
  - **feat:** session registry persisted to `~/.openclaw/cli-bridge/claude-sessions.json` — survives gateway restarts, auto-expires after 2 hours of inactivity
package/SKILL.md CHANGED
@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
68
68
 
69
69
  See `README.md` for full configuration reference and architecture diagram.
70
70
 
71
- **Version:** 3.2.0
71
+ **Version:** 3.3.1
@@ -2,7 +2,7 @@
2
2
  "id": "openclaw-cli-bridge-elvatis",
3
3
  "slug": "openclaw-cli-bridge-elvatis",
4
4
  "name": "OpenClaw CLI Bridge",
5
- "version": "3.2.0",
5
+ "version": "3.3.1",
6
6
  "license": "MIT",
7
7
  "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
8
8
  "providers": [
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
3
- "version": "3.2.0",
3
+ "version": "3.3.1",
4
4
  "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
5
5
  "type": "module",
6
6
  "openclaw": {
package/src/cli-runner.ts CHANGED
@@ -503,18 +503,26 @@ export async function runGemini(
503
503
  opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
504
504
  ): Promise<string> {
505
505
  const model = stripPrefix(modelId);
506
+ const session = getOrCreateSession("gemini", model);
507
+ const isResume = session.requestCount > 0;
508
+
506
509
  // -p "" = headless mode trigger; actual prompt arrives via stdin
507
510
  // --approval-mode yolo: auto-approve all tool executions, never ask questions
508
511
  const args = ["-m", model, "-p", "", "--approval-mode", "yolo"];
512
+ if (isResume) {
513
+ args.push("--resume", session.sessionId);
514
+ }
509
515
  const cwd = workdir ?? tmpdir();
510
516
 
511
517
  // When tools are present, sandwich the conversation between tool instructions.
512
- // The reminder at the end ensures models (especially Haiku) remember the JSON format
513
- // after processing a long conversation history.
514
518
  const effectivePrompt = opts?.tools?.length
515
519
  ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt + "\n\nREMINDER: You MUST respond with ONLY valid JSON — either {\"tool_calls\":[...]} or {\"content\":\"...\"}. Nothing else."
516
520
  : prompt;
517
521
 
522
+ debugLog("GEMINI", `${isResume ? "resume" : "new"} ${model} session=${session.sessionId.slice(0, 8)}`, {
523
+ promptLen: effectivePrompt.length, requestCount: session.requestCount,
524
+ });
525
+
518
526
  const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
519
527
 
520
528
  // Filter out [WARN] lines from stderr (Gemini emits noisy permission warnings)
@@ -525,9 +533,14 @@ export async function runGemini(
525
533
  .trim();
526
534
 
527
535
  if (result.exitCode !== 0 && result.stdout.length === 0) {
536
+ // Session might be invalid — invalidate and let next request create a fresh one
537
+ if (cleanStderr.includes("session") || cleanStderr.includes("resume") || cleanStderr.includes("not found")) {
538
+ invalidateSession(model);
539
+ }
528
540
  throw new Error(`gemini exited ${result.exitCode}: ${annotateExitError(result.exitCode, cleanStderr, result.timedOut, modelId)}`);
529
541
  }
530
542
 
543
+ recordSessionSuccess(model);
531
544
  return result.stdout || cleanStderr;
532
545
  }
533
546
 
@@ -539,59 +552,77 @@ export async function runGemini(
539
552
  // Persistent sessions avoid re-sending the full 20KB prompt on every request.
540
553
  // First call creates a session; subsequent calls resume it with just the new message.
541
554
 
542
- const CLAUDE_SESSIONS_FILE = join(homedir(), ".openclaw", "cli-bridge", "claude-sessions.json");
555
+ // ── Generic CLI session registry ────────────────────────────────────────────
556
+ // Shared by Claude, Gemini, and Codex — persistent sessions avoid replaying
557
+ // the full conversation on every request.
558
+
559
+ const CLI_SESSIONS_FILE = join(homedir(), ".openclaw", "cli-bridge", "cli-sessions.json");
560
+ const SESSION_TTL = 2 * 60 * 60 * 1000; // 2 hours
561
+ const SESSION_MAX_REQUESTS = 50;
543
562
 
544
- interface ClaudeSessionEntry {
563
+ interface CliSessionEntry {
545
564
  sessionId: string;
565
+ provider: string; // "claude" | "gemini" | "codex"
546
566
  model: string;
547
567
  createdAt: number;
548
568
  lastUsedAt: number;
549
569
  requestCount: number;
550
570
  }
551
571
 
552
- const claudeSessions = new Map<string, ClaudeSessionEntry>();
572
+ const cliSessions = new Map<string, CliSessionEntry>();
573
+ let sessionsLoaded = false;
553
574
 
554
- function loadClaudeSessions(): void {
575
+ function loadCliSessions(): void {
576
+ if (sessionsLoaded) return;
577
+ sessionsLoaded = true;
555
578
  try {
556
- const data = JSON.parse(readFileSync(CLAUDE_SESSIONS_FILE, "utf8"));
579
+ const data = JSON.parse(readFileSync(CLI_SESSIONS_FILE, "utf8"));
557
580
  if (Array.isArray(data.sessions)) {
558
- for (const s of data.sessions) claudeSessions.set(s.model, s);
581
+ for (const s of data.sessions) cliSessions.set(s.model, s);
559
582
  }
560
583
  } catch { /* no sessions file yet */ }
561
584
  }
562
585
 
563
- function saveClaudeSessions(): void {
586
+ function saveCliSessions(): void {
564
587
  try {
565
588
  mkdirSync(join(homedir(), ".openclaw", "cli-bridge"), { recursive: true });
566
- writeFileSync(CLAUDE_SESSIONS_FILE, JSON.stringify({
589
+ writeFileSync(CLI_SESSIONS_FILE, JSON.stringify({
567
590
  version: 1,
568
- sessions: [...claudeSessions.values()],
591
+ sessions: [...cliSessions.values()],
569
592
  }, null, 2));
570
593
  } catch { /* best effort */ }
571
594
  }
572
595
 
573
- function getOrCreateSession(model: string): ClaudeSessionEntry {
574
- if (claudeSessions.size === 0) loadClaudeSessions();
575
- const existing = claudeSessions.get(model);
576
- // Reuse session if it's less than 2 hours old
577
- if (existing && (Date.now() - existing.lastUsedAt) < 2 * 60 * 60 * 1000) {
596
+ function getOrCreateSession(provider: string, model: string): CliSessionEntry {
597
+ loadCliSessions();
598
+ const existing = cliSessions.get(model);
599
+ if (existing && (Date.now() - existing.lastUsedAt) < SESSION_TTL && existing.requestCount < SESSION_MAX_REQUESTS) {
578
600
  return existing;
579
601
  }
580
- const entry: ClaudeSessionEntry = {
602
+ if (existing) {
603
+ debugLog("SESSION", `${provider} session ${existing.sessionId.slice(0, 8)} expired`, { reason: existing.requestCount >= SESSION_MAX_REQUESTS ? "max_requests" : "ttl", requestCount: existing.requestCount });
604
+ }
605
+ const entry: CliSessionEntry = {
581
606
  sessionId: randomUUID(),
607
+ provider,
582
608
  model,
583
609
  createdAt: Date.now(),
584
610
  lastUsedAt: Date.now(),
585
611
  requestCount: 0,
586
612
  };
587
- claudeSessions.set(model, entry);
588
- saveClaudeSessions();
613
+ cliSessions.set(model, entry);
614
+ saveCliSessions();
589
615
  return entry;
590
616
  }
591
617
 
618
+ function recordSessionSuccess(model: string): void {
619
+ const s = cliSessions.get(model);
620
+ if (s) { s.requestCount++; s.lastUsedAt = Date.now(); saveCliSessions(); }
621
+ }
622
+
592
623
  function invalidateSession(model: string): void {
593
- claudeSessions.delete(model);
594
- saveClaudeSessions();
624
+ cliSessions.delete(model);
625
+ saveCliSessions();
595
626
  }
596
627
 
597
628
  /**
@@ -611,7 +642,7 @@ export async function runClaude(
611
642
  await ensureClaudeToken();
612
643
 
613
644
  const model = stripPrefix(modelId);
614
- const session = getOrCreateSession(model);
645
+ const session = getOrCreateSession("claude", model);
615
646
  const isResume = session.requestCount > 0;
616
647
 
617
648
  const args: string[] = [
@@ -645,17 +676,14 @@ export async function runClaude(
645
676
 
646
677
  // Session succeeded — update registry
647
678
  if (result.exitCode === 0 || result.stdout.length > 0) {
648
- session.requestCount++;
649
- session.lastUsedAt = Date.now();
650
- saveClaudeSessions();
679
+ recordSessionSuccess(model);
651
680
  return result.stdout;
652
681
  }
653
682
 
654
683
  // Session failed — check if it's a timeout or auth issue
655
684
  if (result.timedOut) {
656
685
  // Don't invalidate session on timeout — it's still valid, just slow
657
- session.lastUsedAt = Date.now();
658
- saveClaudeSessions();
686
+ recordSessionSuccess(model); // keep session alive
659
687
  throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, true, modelId)}`);
660
688
  }
661
689
 
@@ -666,7 +694,7 @@ export async function runClaude(
666
694
  debugLog("CLAUDE", `session ${session.sessionId.slice(0, 8)} invalid, creating fresh`, { error: stderr.slice(0, 100) });
667
695
  invalidateSession(model);
668
696
  // Retry once with a fresh session
669
- const freshSession = getOrCreateSession(model);
697
+ const freshSession = getOrCreateSession("claude", model);
670
698
  const freshArgs = [
671
699
  "-p", "--output-format", "text",
672
700
  "--permission-mode", "bypassPermissions", "--dangerously-skip-permissions",
@@ -674,9 +702,7 @@ export async function runClaude(
674
702
  ];
675
703
  const retry = await runCli("claude", freshArgs, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
676
704
  if (retry.exitCode === 0 || retry.stdout.length > 0) {
677
- freshSession.requestCount++;
678
- freshSession.lastUsedAt = Date.now();
679
- saveClaudeSessions();
705
+ recordSessionSuccess(model);
680
706
  return retry.stdout;
681
707
  }
682
708
  throw new Error(`claude exited ${retry.exitCode}: ${annotateExitError(retry.exitCode, retry.stderr || "(no output)", false, modelId)}`);
@@ -687,9 +713,7 @@ export async function runClaude(
687
713
  await refreshClaudeToken();
688
714
  const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
689
715
  if (retry.exitCode === 0 || retry.stdout.length > 0) {
690
- session.requestCount++;
691
- session.lastUsedAt = Date.now();
692
- saveClaudeSessions();
716
+ recordSessionSuccess(model);
693
717
  return retry.stdout;
694
718
  }
695
719
  const retryStderr = retry.stderr || "(no output)";
@@ -729,7 +753,13 @@ export async function runCodex(
729
753
  opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[]; log?: (msg: string) => void }
730
754
  ): Promise<string> {
731
755
  const model = stripPrefix(modelId);
732
- const args = ["exec", "--model", model, "--full-auto"];
756
+ const session = getOrCreateSession("codex", model);
757
+ const isResume = session.requestCount > 0;
758
+
759
+ // Codex uses "exec resume <session-id>" for resume, "exec" for new
760
+ const args = isResume
761
+ ? ["exec", "resume", session.sessionId, "--model", model, "--full-auto"]
762
+ : ["exec", "--model", model, "--full-auto"];
733
763
 
734
764
  // Codex supports native image input via -i flag
735
765
  if (opts?.mediaFiles?.length) {
@@ -741,23 +771,24 @@ export async function runCodex(
741
771
  }
742
772
 
743
773
  const cwd = workdir ?? homedir();
744
-
745
- // Codex requires a git repo in the working directory
746
774
  ensureGitRepo(cwd);
747
775
 
748
- // When tools are present, sandwich the conversation between tool instructions.
749
- // The reminder at the end ensures models (especially Haiku) remember the JSON format
750
- // after processing a long conversation history.
751
776
  const effectivePrompt = opts?.tools?.length
752
777
  ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt + "\n\nREMINDER: You MUST respond with ONLY valid JSON — either {\"tool_calls\":[...]} or {\"content\":\"...\"}. Nothing else."
753
778
  : prompt;
754
779
 
780
+ debugLog("CODEX", `${isResume ? "resume" : "new"} ${model} session=${session.sessionId.slice(0, 8)}`, {
781
+ promptLen: effectivePrompt.length, requestCount: session.requestCount,
782
+ });
783
+
755
784
  const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
756
785
 
757
786
  if (result.exitCode !== 0 && result.stdout.length === 0) {
787
+ if (isResume) invalidateSession(model); // session might be stale
758
788
  throw new Error(`codex exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, modelId)}`);
759
789
  }
760
790
 
791
+ recordSessionSuccess(model);
761
792
  return result.stdout || result.stderr;
762
793
  }
763
794
 
package/src/debug-log.ts CHANGED
@@ -38,11 +38,19 @@ function ts(): string {
38
38
  return new Date().toISOString();
39
39
  }
40
40
 
41
+ /**
42
+ * Suppress logging in test mode (vitest sets NODE_ENV or uses port 0).
43
+ * Without this, every test run pollutes the production debug log with 43+ fake requests.
44
+ */
45
+ let _enabled = true;
46
+ export function setDebugLogEnabled(enabled: boolean): void { _enabled = enabled; }
47
+
41
48
  /**
42
49
  * Append a debug line to the log file.
43
50
  * Non-blocking, never throws — logging must not crash the bridge.
44
51
  */
45
52
  export function debugLog(category: string, message: string, data?: Record<string, unknown>): void {
53
+ if (!_enabled) return;
46
54
  try {
47
55
  ensureDir();
48
56
  rotate();
@@ -34,7 +34,7 @@ import {
34
34
  DEFAULT_MODEL_TIMEOUTS,
35
35
  TOOL_ROUTING_THRESHOLD,
36
36
  } from "./config.js";
37
- import { debugLog, DEBUG_LOG_PATH, getLogTail, watchLogFile } from "./debug-log.js";
37
+ import { debugLog, DEBUG_LOG_PATH, getLogTail, watchLogFile, setDebugLogEnabled } from "./debug-log.js";
38
38
 
39
39
  // ── Active request tracking ─────────────────────────────────────────────────
40
40
 
@@ -212,6 +212,9 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
212
212
  reject(err);
213
213
  }
214
214
  });
215
+ // Disable debug file logging for test instances (port 0) to avoid polluting production logs
216
+ if (opts.port === 0) setDebugLogEnabled(false);
217
+
215
218
  server.listen(opts.port, "127.0.0.1", () => {
216
219
  opts.log(
217
220
  `[cli-bridge] proxy listening on :${opts.port}`
@@ -87,7 +87,7 @@ describe("runCodex()", () => {
87
87
  expect(result).toBe("codex result");
88
88
  expect(mockSpawn).toHaveBeenCalledWith(
89
89
  "codex",
90
- ["exec", "--model", "gpt-5.3-codex", "--full-auto"],
90
+ expect.arrayContaining(["exec", "--model", "gpt-5.3-codex", "--full-auto"]),
91
91
  expect.any(Object)
92
92
  );
93
93
  });