npm - @elvatis_com/openclaw-cli-bridge-elvatis - Versions diffs - 3.2.0 → 3.3.1 - Mend

@elvatis_com/openclaw-cli-bridge-elvatis 3.2.0 → 3.3.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

package/.ai/handoff-session-resume.md +187 -0
package/README.md +10 -1
package/SKILL.md +1 -1
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/src/cli-runner.ts +71 -40
package/src/debug-log.ts +8 -0
package/src/proxy-server.ts +4 -1
package/test/cli-runner-extended.test.ts +1 -1

package/.ai/handoff-session-resume.md ADDED Viewed

@@ -0,0 +1,187 @@
+# Handover: CLI Session Resume Pattern
+## Problem Solved
+Spawning fresh CLI processes (`claude -p`, `gemini -p`, `codex exec`) for every request forces the model to re-process the entire conversation history (20KB+) from scratch. This causes:
+- **Silent hangs** — Sonnet goes completely silent (zero stdout) ~50% of the time on large prompts
+- **Slow responses** — 80-120s per request instead of 5-10s
+- **Wasted tokens** — the full history is re-tokenized on every call
+## Solution: Session Resume
+Instead of one-shot processes, maintain persistent sessions per model. First request creates a session, subsequent requests resume it — the CLI keeps the full conversation context.
+## Implementation by CLI Tool
+### Claude Code (`claude`)
+```bash
+# First request — create session
+echo "user prompt" | claude -p \
+  --session-id "550e8400-e29b-41d4-a716-446655440000" \
+  --model claude-sonnet-4-6 \
+  --output-format text \
+  --permission-mode bypassPermissions \
+  --dangerously-skip-permissions
+# Subsequent requests — resume (Claude has full context, only new message needed)
+echo "follow-up prompt" | claude -p \
+  --resume "550e8400-e29b-41d4-a716-446655440000" \
+  --model claude-sonnet-4-6 \
+  --output-format text \
+  --permission-mode bypassPermissions \
+  --dangerously-skip-permissions
+```
+**Key flags:**
+- `--session-id <uuid>` — creates a new session with this ID (first request)
+- `--resume <uuid>` — resumes an existing session (subsequent requests)
+- Both work with `-p` (print/headless mode)
+- Session files stored by Claude CLI internally (~/.claude/projects/)
+### Gemini CLI (`gemini`)
+```bash
+# First request — auto-creates session
+echo "user prompt" | gemini -m gemini-2.5-flash -p "" --approval-mode yolo
+# Subsequent requests — resume by UUID
+echo "follow-up" | gemini -m gemini-2.5-flash -p "" --resume "ad79893c-4e3d-40e6-83e7-400e49dba0d6" --approval-mode yolo
+```
+**Key flags:**
+- `--resume <uuid>` — resume by session UUID
+- `--list-sessions` — list available sessions
+- Session UUID is visible in `--list-sessions` output
+**Note:** Gemini doesn't have a `--session-id` flag to create a specific UUID. The session is auto-created and the UUID is extracted from `--list-sessions` or from the output. For the bridge, we generate a UUID and pass it as `--resume` — Gemini creates a new session if the UUID doesn't exist.
+### OpenAI Codex (`codex`)
+```bash
+# First request — auto-creates session
+echo "user prompt" | codex exec --model gpt-5.3-codex --full-auto
+# Subsequent requests — resume subcommand
+echo "follow-up" | codex exec resume "550e8400-xxxx" --model gpt-5.3-codex --full-auto
+```
+**Key flags:**
+- `codex exec resume <session-id>` — resume subcommand (not a flag)
+- `--ephemeral` — skip session persistence (opposite of what we want)
+- Session ID is a UUID
+## Session Registry Pattern (TypeScript)
+```typescript
+interface CliSessionEntry {
+  sessionId: string;        // UUID
+  provider: string;         // "claude" | "gemini" | "codex"
+  model: string;            // e.g. "claude-sonnet-4-6"
+  createdAt: number;        // epoch ms
+  lastUsedAt: number;       // epoch ms
+  requestCount: number;     // total requests in this session
+}
+// Persist to JSON file
+const SESSIONS_FILE = "~/.openclaw/cli-bridge/cli-sessions.json";
+// Session lifecycle
+function getOrCreateSession(provider: string, model: string): CliSessionEntry {
+  const existing = sessions.get(model);
+  // Reuse if fresh enough
+  const TTL = 2 * 60 * 60 * 1000;     // 2 hours
+  const MAX_REQUESTS = 50;              // context rotation
+  if (existing &&
+      (Date.now() - existing.lastUsedAt) < TTL &&
+      existing.requestCount < MAX_REQUESTS) {
+    return existing;
+  }
+  // Create fresh session
+  return { sessionId: randomUUID(), provider, model, ... };
+}
+// After successful response
+function recordSuccess(model: string): void {
+  session.requestCount++;
+  session.lastUsedAt = Date.now();
+  saveToDisk();
+}
+// On session error (corrupted, expired, not found)
+function invalidate(model: string): void {
+  sessions.delete(model);
+  saveToDisk();
+  // Next request will auto-create a fresh session
+}
+```
+## Session Expiry Strategy
+| Condition | Action | Why |
+|-----------|--------|-----|
+| `lastUsedAt > 2 hours` | Create new session | Context may be stale |
+| `requestCount >= 50` | Create new session | Prevent context bloat |
+| CLI returns "session not found" | Invalidate + retry | Session file was cleaned up |
+| CLI returns auth error | Refresh token + retry | OAuth token expired |
+| CLI timeout (exit 143) | Keep session alive | Session is valid, API was slow |
+## Performance Impact (measured on openclaw-cli-bridge)
+| Metric | Before (one-shot) | After (session resume) |
+|--------|-------------------|----------------------|
+| Prompt size per request | 18-25 KB | < 1 KB (new message only) |
+| Sonnet response time | 80-120s (50% hang rate) | 5-10s |
+| Haiku response time | 5-15s | 3-5s |
+| Silent hang rate | ~50% | Near 0% |
+## Stream-JSON Mode (Future Enhancement)
+Claude CLI supports bidirectional streaming via `--input-format stream-json --output-format stream-json --verbose`. This enables:
+- **Persistent process** — don't spawn/kill per request, keep one running
+- **Real-time streaming** — token-by-token output via SSE
+- **Native tool calls** — Claude's own tools (Bash, Read, Write, Edit, Grep)
+- **Rate limit visibility** — `rate_limit_event` messages show quota state
+- **Cost tracking** — per-request cost in USD
+```bash
+# Bidirectional streaming session
+echo '{"type":"user","message":{"role":"user","content":"hello"}}' | \
+  claude -p \
+  --model claude-sonnet-4-6 \
+  --input-format stream-json \
+  --output-format stream-json \
+  --verbose \
+  --permission-mode bypassPermissions \
+  --dangerously-skip-permissions
+```
+Response includes `session_id`, tool list, model info, thinking blocks, and full usage metrics. This is the path to a fully persistent agent process.
+## Files Reference (openclaw-cli-bridge-elvatis)
+| File | What it does |
+|------|-------------|
+| `src/cli-runner.ts` | Session registry + `runClaude()`, `runGemini()`, `runCodex()` with resume |
+| `src/config.ts` | `STALE_OUTPUT_TIMEOUT_MS = 30_000` (kill silent processes fast) |
+| `src/tool-protocol.ts` | Tool schema injection + JSON response parsing |
+| `src/proxy-server.ts` | Cross-provider fallback chains, empty-response detection |
+| `src/debug-log.ts` | File-based debug log + SSE streaming |
+| `~/.openclaw/cli-bridge/cli-sessions.json` | Persisted session registry |
+| `~/.openclaw/cli-bridge/debug.log` | Real-time request lifecycle log |
+## Key Learnings
+1. **Claude Sonnet hangs silently** on large prompts (~50% of the time). NOT RAM (28GB free). Likely API-side rate limiting. Session resume fixes it by keeping prompts small.
+2. **Exit code 143 = SIGTERM**, not OOM. Our stale-output detector sends it when the CLI produces zero stdout for 30 seconds.
+3. **Haiku ignores JSON tool format** in long conversations — returns conversational text instead of `{"tool_calls":[...]}`. Fix: JSON reminder at the END of the prompt + reject text responses during tool loops.
+4. **Empty responses (0 bytes) must trigger fallback**, not be treated as success. The model exits 0 but produces nothing useful.
+5. **Cross-provider fallback chains** are essential: `Sonnet → Haiku → Gemini Flash → Codex`. Each provider has different failure modes.
+6. **The gateway loads plugins from `~/.openclaw/extensions/`**, NOT from the workspace. Must rsync + `openclaw gateway restart` after every change.

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
-**Current version:** `3.2.0`
+**Current version:** `3.3.1`
 ---
@@ -406,6 +406,15 @@ npm run ci          # lint + typecheck + test
 ## Changelog
+### v3.3.1
+- **fix:** test requests no longer pollute `debug.log` — test instances (port 0) now skip file logging
+- **fix:** Codex test updated for session resume args
+### v3.3.0
+- **feat:** session resume for ALL CLI providers — Claude, Gemini, and Codex all now use persistent sessions with `--resume`. Unified session registry at `~/.openclaw/cli-bridge/cli-sessions.json`.
+- **feat:** auto-rotation: sessions expire after 2 hours OR 50 requests (whichever first) to prevent context bloat
+- **feat:** per-provider debug logging: `[GEMINI]`, `[CODEX]` categories with session state
 ### v3.2.0
 - **feat:** Claude session resume — persistent sessions eliminate the 20KB prompt replay that caused Sonnet to hang. First request creates a session (`--session-id`), subsequent requests resume it (`--resume`). Claude keeps the conversation context; the bridge only sends the new message.
 - **feat:** session registry persisted to `~/.openclaw/cli-bridge/claude-sessions.json` — survives gateway restarts, auto-expires after 2 hours of inactivity

package/SKILL.md CHANGED Viewed

@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
 See `README.md` for full configuration reference and architecture diagram.
-**Version:** 3.2.0
+**Version:** 3.3.1

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "openclaw-cli-bridge-elvatis",
   "slug": "openclaw-cli-bridge-elvatis",
   "name": "OpenClaw CLI Bridge",
-  "version": "3.2.0",
+  "version": "3.3.1",
   "license": "MIT",
   "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
   "providers": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
-  "version": "3.2.0",
+  "version": "3.3.1",
   "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
   "type": "module",
   "openclaw": {

package/src/cli-runner.ts CHANGED Viewed

@@ -503,18 +503,26 @@ export async function runGemini(
   opts?: { tools?: ToolDefinition[]; log?: (msg: string) => void }
 ): Promise<string> {
   const model = stripPrefix(modelId);
+  const session = getOrCreateSession("gemini", model);
+  const isResume = session.requestCount > 0;
   // -p "" = headless mode trigger; actual prompt arrives via stdin
   // --approval-mode yolo: auto-approve all tool executions, never ask questions
   const args = ["-m", model, "-p", "", "--approval-mode", "yolo"];
+  if (isResume) {
+    args.push("--resume", session.sessionId);
+  }
   const cwd = workdir ?? tmpdir();
   // When tools are present, sandwich the conversation between tool instructions.
-  // The reminder at the end ensures models (especially Haiku) remember the JSON format
-  // after processing a long conversation history.
   const effectivePrompt = opts?.tools?.length
     ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt + "\n\nREMINDER: You MUST respond with ONLY valid JSON — either {\"tool_calls\":[...]} or {\"content\":\"...\"}. Nothing else."
     : prompt;
+  debugLog("GEMINI", `${isResume ? "resume" : "new"} ${model} session=${session.sessionId.slice(0, 8)}`, {
+    promptLen: effectivePrompt.length, requestCount: session.requestCount,
+  });
   const result = await runCli("gemini", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
   // Filter out [WARN] lines from stderr (Gemini emits noisy permission warnings)
@@ -525,9 +533,14 @@ export async function runGemini(
     .trim();
   if (result.exitCode !== 0 && result.stdout.length === 0) {
+    // Session might be invalid — invalidate and let next request create a fresh one
+    if (cleanStderr.includes("session") || cleanStderr.includes("resume") || cleanStderr.includes("not found")) {
+      invalidateSession(model);
+    }
     throw new Error(`gemini exited ${result.exitCode}: ${annotateExitError(result.exitCode, cleanStderr, result.timedOut, modelId)}`);
   }
+  recordSessionSuccess(model);
   return result.stdout || cleanStderr;
 }
@@ -539,59 +552,77 @@ export async function runGemini(
 // Persistent sessions avoid re-sending the full 20KB prompt on every request.
 // First call creates a session; subsequent calls resume it with just the new message.
-const CLAUDE_SESSIONS_FILE = join(homedir(), ".openclaw", "cli-bridge", "claude-sessions.json");
+// ── Generic CLI session registry ────────────────────────────────────────────
+// Shared by Claude, Gemini, and Codex — persistent sessions avoid replaying
+// the full conversation on every request.
+const CLI_SESSIONS_FILE = join(homedir(), ".openclaw", "cli-bridge", "cli-sessions.json");
+const SESSION_TTL = 2 * 60 * 60 * 1000; // 2 hours
+const SESSION_MAX_REQUESTS = 50;
-interface ClaudeSessionEntry {
+interface CliSessionEntry {
   sessionId: string;
+  provider: string; // "claude" | "gemini" | "codex"
   model: string;
   createdAt: number;
   lastUsedAt: number;
   requestCount: number;
 }
-const claudeSessions = new Map<string, ClaudeSessionEntry>();
+const cliSessions = new Map<string, CliSessionEntry>();
+let sessionsLoaded = false;
-function loadClaudeSessions(): void {
+function loadCliSessions(): void {
+  if (sessionsLoaded) return;
+  sessionsLoaded = true;
   try {
-    const data = JSON.parse(readFileSync(CLAUDE_SESSIONS_FILE, "utf8"));
+    const data = JSON.parse(readFileSync(CLI_SESSIONS_FILE, "utf8"));
     if (Array.isArray(data.sessions)) {
-      for (const s of data.sessions) claudeSessions.set(s.model, s);
+      for (const s of data.sessions) cliSessions.set(s.model, s);
     }
   } catch { /* no sessions file yet */ }
 }
-function saveClaudeSessions(): void {
+function saveCliSessions(): void {
   try {
     mkdirSync(join(homedir(), ".openclaw", "cli-bridge"), { recursive: true });
-    writeFileSync(CLAUDE_SESSIONS_FILE, JSON.stringify({
+    writeFileSync(CLI_SESSIONS_FILE, JSON.stringify({
       version: 1,
-      sessions: [...claudeSessions.values()],
+      sessions: [...cliSessions.values()],
     }, null, 2));
   } catch { /* best effort */ }
 }
-function getOrCreateSession(model: string): ClaudeSessionEntry {
-  if (claudeSessions.size === 0) loadClaudeSessions();
-  const existing = claudeSessions.get(model);
-  // Reuse session if it's less than 2 hours old
-  if (existing && (Date.now() - existing.lastUsedAt) < 2 * 60 * 60 * 1000) {
+function getOrCreateSession(provider: string, model: string): CliSessionEntry {
+  loadCliSessions();
+  const existing = cliSessions.get(model);
+  if (existing && (Date.now() - existing.lastUsedAt) < SESSION_TTL && existing.requestCount < SESSION_MAX_REQUESTS) {
     return existing;
   }
-  const entry: ClaudeSessionEntry = {
+  if (existing) {
+    debugLog("SESSION", `${provider} session ${existing.sessionId.slice(0, 8)} expired`, { reason: existing.requestCount >= SESSION_MAX_REQUESTS ? "max_requests" : "ttl", requestCount: existing.requestCount });
+  }
+  const entry: CliSessionEntry = {
     sessionId: randomUUID(),
+    provider,
     model,
     createdAt: Date.now(),
     lastUsedAt: Date.now(),
     requestCount: 0,
   };
-  claudeSessions.set(model, entry);
-  saveClaudeSessions();
+  cliSessions.set(model, entry);
+  saveCliSessions();
   return entry;
 }
+function recordSessionSuccess(model: string): void {
+  const s = cliSessions.get(model);
+  if (s) { s.requestCount++; s.lastUsedAt = Date.now(); saveCliSessions(); }
+}
 function invalidateSession(model: string): void {
-  claudeSessions.delete(model);
-  saveClaudeSessions();
+  cliSessions.delete(model);
+  saveCliSessions();
 }
 /**
@@ -611,7 +642,7 @@ export async function runClaude(
   await ensureClaudeToken();
   const model = stripPrefix(modelId);
-  const session = getOrCreateSession(model);
+  const session = getOrCreateSession("claude", model);
   const isResume = session.requestCount > 0;
   const args: string[] = [
@@ -645,17 +676,14 @@ export async function runClaude(
   // Session succeeded — update registry
   if (result.exitCode === 0 || result.stdout.length > 0) {
-    session.requestCount++;
-    session.lastUsedAt = Date.now();
-    saveClaudeSessions();
+    recordSessionSuccess(model);
     return result.stdout;
   }
   // Session failed — check if it's a timeout or auth issue
   if (result.timedOut) {
     // Don't invalidate session on timeout — it's still valid, just slow
-    session.lastUsedAt = Date.now();
-    saveClaudeSessions();
+    recordSessionSuccess(model); // keep session alive
     throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, true, modelId)}`);
   }
@@ -666,7 +694,7 @@ export async function runClaude(
     debugLog("CLAUDE", `session ${session.sessionId.slice(0, 8)} invalid, creating fresh`, { error: stderr.slice(0, 100) });
     invalidateSession(model);
     // Retry once with a fresh session
-    const freshSession = getOrCreateSession(model);
+    const freshSession = getOrCreateSession("claude", model);
     const freshArgs = [
       "-p", "--output-format", "text",
       "--permission-mode", "bypassPermissions", "--dangerously-skip-permissions",
@@ -674,9 +702,7 @@ export async function runClaude(
     ];
     const retry = await runCli("claude", freshArgs, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
     if (retry.exitCode === 0 || retry.stdout.length > 0) {
-      freshSession.requestCount++;
-      freshSession.lastUsedAt = Date.now();
-      saveClaudeSessions();
+      recordSessionSuccess(model);
       return retry.stdout;
     }
     throw new Error(`claude exited ${retry.exitCode}: ${annotateExitError(retry.exitCode, retry.stderr || "(no output)", false, modelId)}`);
@@ -687,9 +713,7 @@ export async function runClaude(
     await refreshClaudeToken();
     const retry = await runCli("claude", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
     if (retry.exitCode === 0 || retry.stdout.length > 0) {
-      session.requestCount++;
-      session.lastUsedAt = Date.now();
-      saveClaudeSessions();
+      recordSessionSuccess(model);
       return retry.stdout;
     }
     const retryStderr = retry.stderr || "(no output)";
@@ -729,7 +753,13 @@ export async function runCodex(
   opts?: { tools?: ToolDefinition[]; mediaFiles?: MediaFile[]; log?: (msg: string) => void }
 ): Promise<string> {
   const model = stripPrefix(modelId);
-  const args = ["exec", "--model", model, "--full-auto"];
+  const session = getOrCreateSession("codex", model);
+  const isResume = session.requestCount > 0;
+  // Codex uses "exec resume <session-id>" for resume, "exec" for new
+  const args = isResume
+    ? ["exec", "resume", session.sessionId, "--model", model, "--full-auto"]
+    : ["exec", "--model", model, "--full-auto"];
   // Codex supports native image input via -i flag
   if (opts?.mediaFiles?.length) {
@@ -741,23 +771,24 @@ export async function runCodex(
   }
   const cwd = workdir ?? homedir();
-  // Codex requires a git repo in the working directory
   ensureGitRepo(cwd);
-  // When tools are present, sandwich the conversation between tool instructions.
-  // The reminder at the end ensures models (especially Haiku) remember the JSON format
-  // after processing a long conversation history.
   const effectivePrompt = opts?.tools?.length
     ? buildToolPromptBlock(opts.tools) + "\n\n" + prompt + "\n\nREMINDER: You MUST respond with ONLY valid JSON — either {\"tool_calls\":[...]} or {\"content\":\"...\"}. Nothing else."
     : prompt;
+  debugLog("CODEX", `${isResume ? "resume" : "new"} ${model} session=${session.sessionId.slice(0, 8)}`, {
+    promptLen: effectivePrompt.length, requestCount: session.requestCount,
+  });
   const result = await runCli("codex", args, effectivePrompt, timeoutMs, { cwd, log: opts?.log });
   if (result.exitCode !== 0 && result.stdout.length === 0) {
+    if (isResume) invalidateSession(model); // session might be stale
     throw new Error(`codex exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, result.timedOut, modelId)}`);
   }
+  recordSessionSuccess(model);
   return result.stdout || result.stderr;
 }

package/src/debug-log.ts CHANGED Viewed

@@ -38,11 +38,19 @@ function ts(): string {
   return new Date().toISOString();
 }
+/**
+ * Suppress logging in test mode (vitest sets NODE_ENV or uses port 0).
+ * Without this, every test run pollutes the production debug log with 43+ fake requests.
+ */
+let _enabled = true;
+export function setDebugLogEnabled(enabled: boolean): void { _enabled = enabled; }
 /**
  * Append a debug line to the log file.
  * Non-blocking, never throws — logging must not crash the bridge.
  */
 export function debugLog(category: string, message: string, data?: Record<string, unknown>): void {
+  if (!_enabled) return;
   try {
     ensureDir();
     rotate();

package/src/proxy-server.ts CHANGED Viewed

@@ -34,7 +34,7 @@ import {
   DEFAULT_MODEL_TIMEOUTS,
   TOOL_ROUTING_THRESHOLD,
 } from "./config.js";
-import { debugLog, DEBUG_LOG_PATH, getLogTail, watchLogFile } from "./debug-log.js";
+import { debugLog, DEBUG_LOG_PATH, getLogTail, watchLogFile, setDebugLogEnabled } from "./debug-log.js";
 // ── Active request tracking ─────────────────────────────────────────────────
@@ -212,6 +212,9 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
         reject(err);
       }
     });
+    // Disable debug file logging for test instances (port 0) to avoid polluting production logs
+    if (opts.port === 0) setDebugLogEnabled(false);
     server.listen(opts.port, "127.0.0.1", () => {
       opts.log(
         `[cli-bridge] proxy listening on :${opts.port}`

package/test/cli-runner-extended.test.ts CHANGED Viewed

@@ -87,7 +87,7 @@ describe("runCodex()", () => {
     expect(result).toBe("codex result");
     expect(mockSpawn).toHaveBeenCalledWith(
       "codex",
-      ["exec", "--model", "gpt-5.3-codex", "--full-auto"],
+      expect.arrayContaining(["exec", "--model", "gpt-5.3-codex", "--full-auto"]),
       expect.any(Object)
     );
   });