npm - @elvatis_com/openclaw-cli-bridge-elvatis - Versions diffs - 3.3.0 → 3.4.0 - Mend

@elvatis_com/openclaw-cli-bridge-elvatis 3.3.0 → 3.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (11) hide show

package/.ai/handoff-session-resume.md +187 -0
package/CLAUDE.md +4 -0
package/README.md +5 -1
package/SKILL.md +1 -1
package/openclaw.plugin.json +1 -1
package/package.json +1 -1
package/src/cli-runner.ts +91 -5
package/src/config.ts +3 -0
package/src/debug-log.ts +8 -0
package/src/proxy-server.ts +4 -1
package/test/cli-runner-extended.test.ts +1 -1

package/.ai/handoff-session-resume.md ADDED Viewed

@@ -0,0 +1,187 @@
+# Handover: CLI Session Resume Pattern
+## Problem Solved
+Spawning fresh CLI processes (`claude -p`, `gemini -p`, `codex exec`) for every request forces the model to re-process the entire conversation history (20KB+) from scratch. This causes:
+- **Silent hangs** — Sonnet goes completely silent (zero stdout) ~50% of the time on large prompts
+- **Slow responses** — 80-120s per request instead of 5-10s
+- **Wasted tokens** — the full history is re-tokenized on every call
+## Solution: Session Resume
+Instead of one-shot processes, maintain persistent sessions per model. First request creates a session, subsequent requests resume it — the CLI keeps the full conversation context.
+## Implementation by CLI Tool
+### Claude Code (`claude`)
+```bash
+# First request — create session
+echo "user prompt" | claude -p \
+  --session-id "550e8400-e29b-41d4-a716-446655440000" \
+  --model claude-sonnet-4-6 \
+  --output-format text \
+  --permission-mode bypassPermissions \
+  --dangerously-skip-permissions
+# Subsequent requests — resume (Claude has full context, only new message needed)
+echo "follow-up prompt" | claude -p \
+  --resume "550e8400-e29b-41d4-a716-446655440000" \
+  --model claude-sonnet-4-6 \
+  --output-format text \
+  --permission-mode bypassPermissions \
+  --dangerously-skip-permissions
+```
+**Key flags:**
+- `--session-id <uuid>` — creates a new session with this ID (first request)
+- `--resume <uuid>` — resumes an existing session (subsequent requests)
+- Both work with `-p` (print/headless mode)
+- Session files stored by Claude CLI internally (~/.claude/projects/)
+### Gemini CLI (`gemini`)
+```bash
+# First request — auto-creates session
+echo "user prompt" | gemini -m gemini-2.5-flash -p "" --approval-mode yolo
+# Subsequent requests — resume by UUID
+echo "follow-up" | gemini -m gemini-2.5-flash -p "" --resume "ad79893c-4e3d-40e6-83e7-400e49dba0d6" --approval-mode yolo
+```
+**Key flags:**
+- `--resume <uuid>` — resume by session UUID
+- `--list-sessions` — list available sessions
+- Session UUID is visible in `--list-sessions` output
+**Note:** Gemini doesn't have a `--session-id` flag to create a specific UUID. The session is auto-created and the UUID is extracted from `--list-sessions` or from the output. For the bridge, we generate a UUID and pass it as `--resume` — Gemini creates a new session if the UUID doesn't exist.
+### OpenAI Codex (`codex`)
+```bash
+# First request — auto-creates session
+echo "user prompt" | codex exec --model gpt-5.3-codex --full-auto
+# Subsequent requests — resume subcommand
+echo "follow-up" | codex exec resume "550e8400-xxxx" --model gpt-5.3-codex --full-auto
+```
+**Key flags:**
+- `codex exec resume <session-id>` — resume subcommand (not a flag)
+- `--ephemeral` — skip session persistence (opposite of what we want)
+- Session ID is a UUID
+## Session Registry Pattern (TypeScript)
+```typescript
+interface CliSessionEntry {
+  sessionId: string;        // UUID
+  provider: string;         // "claude" | "gemini" | "codex"
+  model: string;            // e.g. "claude-sonnet-4-6"
+  createdAt: number;        // epoch ms
+  lastUsedAt: number;       // epoch ms
+  requestCount: number;     // total requests in this session
+}
+// Persist to JSON file
+const SESSIONS_FILE = "~/.openclaw/cli-bridge/cli-sessions.json";
+// Session lifecycle
+function getOrCreateSession(provider: string, model: string): CliSessionEntry {
+  const existing = sessions.get(model);
+  // Reuse if fresh enough
+  const TTL = 2 * 60 * 60 * 1000;     // 2 hours
+  const MAX_REQUESTS = 50;              // context rotation
+  if (existing &&
+      (Date.now() - existing.lastUsedAt) < TTL &&
+      existing.requestCount < MAX_REQUESTS) {
+    return existing;
+  }
+  // Create fresh session
+  return { sessionId: randomUUID(), provider, model, ... };
+}
+// After successful response
+function recordSuccess(model: string): void {
+  session.requestCount++;
+  session.lastUsedAt = Date.now();
+  saveToDisk();
+}
+// On session error (corrupted, expired, not found)
+function invalidate(model: string): void {
+  sessions.delete(model);
+  saveToDisk();
+  // Next request will auto-create a fresh session
+}
+```
+## Session Expiry Strategy
+| Condition | Action | Why |
+|-----------|--------|-----|
+| `lastUsedAt > 2 hours` | Create new session | Context may be stale |
+| `requestCount >= 50` | Create new session | Prevent context bloat |
+| CLI returns "session not found" | Invalidate + retry | Session file was cleaned up |
+| CLI returns auth error | Refresh token + retry | OAuth token expired |
+| CLI timeout (exit 143) | Keep session alive | Session is valid, API was slow |
+## Performance Impact (measured on openclaw-cli-bridge)
+| Metric | Before (one-shot) | After (session resume) |
+|--------|-------------------|----------------------|
+| Prompt size per request | 18-25 KB | < 1 KB (new message only) |
+| Sonnet response time | 80-120s (50% hang rate) | 5-10s |
+| Haiku response time | 5-15s | 3-5s |
+| Silent hang rate | ~50% | Near 0% |
+## Stream-JSON Mode (Future Enhancement)
+Claude CLI supports bidirectional streaming via `--input-format stream-json --output-format stream-json --verbose`. This enables:
+- **Persistent process** — don't spawn/kill per request, keep one running
+- **Real-time streaming** — token-by-token output via SSE
+- **Native tool calls** — Claude's own tools (Bash, Read, Write, Edit, Grep)
+- **Rate limit visibility** — `rate_limit_event` messages show quota state
+- **Cost tracking** — per-request cost in USD
+```bash
+# Bidirectional streaming session
+echo '{"type":"user","message":{"role":"user","content":"hello"}}' | \
+  claude -p \
+  --model claude-sonnet-4-6 \
+  --input-format stream-json \
+  --output-format stream-json \
+  --verbose \
+  --permission-mode bypassPermissions \
+  --dangerously-skip-permissions
+```
+Response includes `session_id`, tool list, model info, thinking blocks, and full usage metrics. This is the path to a fully persistent agent process.
+## Files Reference (openclaw-cli-bridge-elvatis)
+| File | What it does |
+|------|-------------|
+| `src/cli-runner.ts` | Session registry + `runClaude()`, `runGemini()`, `runCodex()` with resume |
+| `src/config.ts` | `STALE_OUTPUT_TIMEOUT_MS = 30_000` (kill silent processes fast) |
+| `src/tool-protocol.ts` | Tool schema injection + JSON response parsing |
+| `src/proxy-server.ts` | Cross-provider fallback chains, empty-response detection |
+| `src/debug-log.ts` | File-based debug log + SSE streaming |
+| `~/.openclaw/cli-bridge/cli-sessions.json` | Persisted session registry |
+| `~/.openclaw/cli-bridge/debug.log` | Real-time request lifecycle log |
+## Key Learnings
+1. **Claude Sonnet hangs silently** on large prompts (~50% of the time). NOT RAM (28GB free). Likely API-side rate limiting. Session resume fixes it by keeping prompts small.
+2. **Exit code 143 = SIGTERM**, not OOM. Our stale-output detector sends it when the CLI produces zero stdout for 30 seconds.
+3. **Haiku ignores JSON tool format** in long conversations — returns conversational text instead of `{"tool_calls":[...]}`. Fix: JSON reminder at the END of the prompt + reject text responses during tool loops.
+4. **Empty responses (0 bytes) must trigger fallback**, not be treated as success. The model exits 0 but produces nothing useful.
+5. **Cross-provider fallback chains** are essential: `Sonnet → Haiku → Gemini Flash → Codex`. Each provider has different failure modes.
+6. **The gateway loads plugins from `~/.openclaw/extensions/`**, NOT from the workspace. Must rsync + `openclaw gateway restart` after every change.

package/CLAUDE.md CHANGED Viewed

@@ -30,6 +30,8 @@ OpenClaw Gateway ──(HTTP)──> proxy-server.ts ──(spawn)──> claude
 - **Smart fallback** — Sonnet tries first (better tool selection), 30s stale timeout kills it fast, Haiku takes over (~10s, reliable but picks wrong tools sometimes)
 - **Compact tool schema** — when >8 tools, only send name+params (skip descriptions/full JSON schema), cuts prompt ~60%
 - **Exit 143 = our SIGTERM** — not OOM, not crash. The bridge's timeout/stale-output detector sends SIGTERM, Claude CLI exits 143
+- **Consecutive timeout rotation** — after 3 timeouts in a row on the same session, auto-expire it and create a fresh one. Prevents poisoned sessions from blocking all requests
+- **Workspace project auto-detection** — scans `~/.openclaw/workspace/` for project directories; when the prompt contains an exact match of a project name, auto-sets `workdir` and injects `[Context: Working directory is ...]` into the prompt
 ## Build & Test
@@ -69,6 +71,8 @@ All magic numbers live here. Key values:
 | `TOOL_HEAVY_THRESHOLD` | 10 | Reduce MAX_MESSAGES from 20 to 12 when tools exceed this |
 | `COMPACT_TOOL_THRESHOLD` | 8 | Switch to compact tool schema (name+params only) |
 | `TOOL_ROUTING_THRESHOLD` | 8 | (in proxy-server) Was used for Haiku routing, now Sonnet-first with fast fallback |
+| `CONSECUTIVE_TIMEOUT_LIMIT` | 3 | (in cli-runner) Auto-expire session after N consecutive timeouts |
+| `WORKSPACE_DIR` | `~/.openclaw/workspace` | Project directory scanned for auto-detection |
 ## Tool Protocol (src/tool-protocol.ts)

package/README.md CHANGED Viewed

@@ -2,7 +2,7 @@
 > OpenClaw plugin that bridges locally installed AI CLIs (Codex, Gemini, Claude Code, OpenCode, Pi) as model providers — with slash commands for instant model switching, restore, health testing, and model listing.
-**Current version:** `3.3.0`
+**Current version:** `3.4.0`
 ---
@@ -406,6 +406,10 @@ npm run ci          # lint + typecheck + test
 ## Changelog
+### v3.3.1
+- **fix:** test requests no longer pollute `debug.log` — test instances (port 0) now skip file logging
+- **fix:** Codex test updated for session resume args
 ### v3.3.0
 - **feat:** session resume for ALL CLI providers — Claude, Gemini, and Codex all now use persistent sessions with `--resume`. Unified session registry at `~/.openclaw/cli-bridge/cli-sessions.json`.
 - **feat:** auto-rotation: sessions expire after 2 hours OR 50 requests (whichever first) to prevent context bloat

package/SKILL.md CHANGED Viewed

@@ -68,4 +68,4 @@ On gateway restart, if any session has expired, a **WhatsApp alert** is sent aut
 See `README.md` for full configuration reference and architecture diagram.
-**Version:** 3.3.0
+**Version:** 3.4.0

package/openclaw.plugin.json CHANGED Viewed

@@ -2,7 +2,7 @@
   "id": "openclaw-cli-bridge-elvatis",
   "slug": "openclaw-cli-bridge-elvatis",
   "name": "OpenClaw CLI Bridge",
-  "version": "3.3.0",
+  "version": "3.4.0",
   "license": "MIT",
   "description": "Phase 1: openai-codex auth bridge. Phase 2: local HTTP proxy routing model calls through gemini/claude CLIs (vllm provider).",
   "providers": [

package/package.json CHANGED Viewed

@@ -1,6 +1,6 @@
 {
   "name": "@elvatis_com/openclaw-cli-bridge-elvatis",
-  "version": "3.3.0",
+  "version": "3.4.0",
   "description": "Bridges gemini, claude, and codex CLI tools as OpenClaw model providers. Reads existing CLI auth without re-login.",
   "type": "module",
   "openclaw": {

package/src/cli-runner.ts CHANGED Viewed

@@ -18,7 +18,7 @@
 import { spawn, execSync } from "node:child_process";
 import { tmpdir, homedir } from "node:os";
-import { existsSync, writeFileSync, unlinkSync, mkdirSync, readFileSync } from "node:fs";
+import { existsSync, writeFileSync, unlinkSync, mkdirSync, readFileSync, readdirSync, statSync } from "node:fs";
 import { join } from "node:path";
 import { randomBytes, randomUUID } from "node:crypto";
 import { ensureClaudeToken, refreshClaudeToken } from "./claude-auth.js";
@@ -37,6 +37,7 @@ import {
   TIMEOUT_GRACE_MS,
   MEDIA_TMP_DIR,
   STALE_OUTPUT_TIMEOUT_MS,
+  WORKSPACE_DIR,
 } from "./config.js";
 import { debugLog } from "./debug-log.js";
@@ -567,8 +568,11 @@ interface CliSessionEntry {
   createdAt: number;
   lastUsedAt: number;
   requestCount: number;
+  consecutiveTimeouts: number;
 }
+const CONSECUTIVE_TIMEOUT_LIMIT = 3;
 const cliSessions = new Map<string, CliSessionEntry>();
 let sessionsLoaded = false;
@@ -609,6 +613,7 @@ function getOrCreateSession(provider: string, model: string): CliSessionEntry {
     createdAt: Date.now(),
     lastUsedAt: Date.now(),
     requestCount: 0,
+    consecutiveTimeouts: 0,
   };
   cliSessions.set(model, entry);
   saveCliSessions();
@@ -617,7 +622,21 @@ function getOrCreateSession(provider: string, model: string): CliSessionEntry {
 function recordSessionSuccess(model: string): void {
   const s = cliSessions.get(model);
-  if (s) { s.requestCount++; s.lastUsedAt = Date.now(); saveCliSessions(); }
+  if (s) { s.requestCount++; s.lastUsedAt = Date.now(); s.consecutiveTimeouts = 0; saveCliSessions(); }
+}
+function recordSessionTimeout(model: string): void {
+  const s = cliSessions.get(model);
+  if (!s) return;
+  s.consecutiveTimeouts++;
+  s.lastUsedAt = Date.now();
+  if (s.consecutiveTimeouts >= CONSECUTIVE_TIMEOUT_LIMIT) {
+    debugLog("SESSION", `${s.provider} session ${s.sessionId.slice(0, 8)} expired`, {
+      reason: "consecutive_timeouts", consecutiveTimeouts: s.consecutiveTimeouts, requestCount: s.requestCount,
+    });
+    cliSessions.delete(model);
+  }
+  saveCliSessions();
 }
 function invalidateSession(model: string): void {
@@ -682,8 +701,8 @@ export async function runClaude(
   // Session failed — check if it's a timeout or auth issue
   if (result.timedOut) {
-    // Don't invalidate session on timeout — it's still valid, just slow
-    recordSessionSuccess(model); // keep session alive
+    // Track consecutive timeouts — after 3 in a row, expire the session
+    recordSessionTimeout(model);
     throw new Error(`claude exited ${result.exitCode}: ${annotateExitError(result.exitCode, result.stderr, true, modelId)}`);
   }
@@ -920,6 +939,63 @@ export interface RouteOptions {
   log?: (msg: string) => void;
 }
+// ── Workspace project detection ──────────────────────────────────────────────
+// Scans WORKSPACE_DIR for project directories. When the user's prompt contains
+// an exact match of a project name, auto-sets workdir and injects context.
+let _workspaceProjects: string[] | null = null;
+let _workspaceProjectsRefreshedAt = 0;
+const WORKSPACE_CACHE_TTL = 60_000; // refresh project list every 60s
+function getWorkspaceProjects(): string[] {
+  const now = Date.now();
+  if (_workspaceProjects && (now - _workspaceProjectsRefreshedAt) < WORKSPACE_CACHE_TTL) {
+    return _workspaceProjects;
+  }
+  try {
+    // Find all .openclaw/workspace dirs — default location + any custom ones
+    const candidates = [WORKSPACE_DIR];
+    _workspaceProjects = [];
+    for (const wsDir of candidates) {
+      if (!existsSync(wsDir)) continue;
+      const entries = readdirSync(wsDir);
+      for (const name of entries) {
+        try {
+          if (statSync(join(wsDir, name)).isDirectory()) {
+            _workspaceProjects.push(name);
+          }
+        } catch { /* skip unreadable entries */ }
+      }
+    }
+    _workspaceProjectsRefreshedAt = now;
+  } catch {
+    _workspaceProjects = [];
+  }
+  return _workspaceProjects;
+}
+function detectProjectFromPrompt(prompt: string): { name: string; path: string } | null {
+  const projects = getWorkspaceProjects();
+  if (!projects.length) return null;
+  // Sort by name length descending — match longest project name first
+  // (e.g. "openclaw-cli-bridge-elvatis" before "openclaw-cli-bridge")
+  const sorted = [...projects].sort((a, b) => b.length - a.length);
+  for (const name of sorted) {
+    // Case-insensitive exact word match — the project name must appear as a
+    // distinct token in the prompt (not a substring of a longer word)
+    const regex = new RegExp(`\\b${name.replace(/[.*+?^${}()|[\]\\]/g, "\\$&")}\\b`, "i");
+    if (regex.test(prompt)) {
+      const projectPath = join(WORKSPACE_DIR, name);
+      if (existsSync(projectPath)) {
+        return { name, path: projectPath };
+      }
+    }
+  }
+  return null;
+}
 /**
  * Route a chat completion to the correct CLI based on model prefix.
  *   cli-gemini/<id>      → gemini CLI
@@ -941,9 +1017,19 @@ export async function routeToCliRunner(
   opts: RouteOptions = {}
 ): Promise<CliToolResult> {
   const toolCount = opts.tools?.length ?? 0;
-  const prompt = formatPrompt(messages, toolCount);
+  let prompt = formatPrompt(messages, toolCount);
   const hasTools = toolCount > 0;
+  // Auto-detect project from prompt and set workdir + inject context
+  if (!opts.workdir) {
+    const detected = detectProjectFromPrompt(prompt);
+    if (detected) {
+      opts = { ...opts, workdir: detected.path };
+      prompt = `[Context: Working directory is ${detected.path}]\n\n${prompt}`;
+      debugLog("WORKSPACE", `auto-detected project "${detected.name}"`, { path: detected.path });
+    }
+  }
   // Strip "vllm/" prefix if present — OpenClaw sends the full provider path
   // (e.g. "vllm/cli-claude/claude-sonnet-4-6") but the router only needs the
   // "cli-<type>/<model>" portion.

package/src/config.ts CHANGED Viewed

@@ -162,6 +162,9 @@ export const DEFAULT_MODEL_FALLBACKS: Record<string, string[]> = {
 /** Base directory for all CLI bridge state files. */
 export const OPENCLAW_DIR = join(homedir(), ".openclaw");
+/** Workspace directory containing all projects. */
+export const WORKSPACE_DIR = join(OPENCLAW_DIR, "workspace");
 /** State file — persists the model active before the last /cli-* switch. */
 export const STATE_FILE = join(OPENCLAW_DIR, "cli-bridge-state.json");

package/src/debug-log.ts CHANGED Viewed

@@ -38,11 +38,19 @@ function ts(): string {
   return new Date().toISOString();
 }
+/**
+ * Suppress logging in test mode (vitest sets NODE_ENV or uses port 0).
+ * Without this, every test run pollutes the production debug log with 43+ fake requests.
+ */
+let _enabled = true;
+export function setDebugLogEnabled(enabled: boolean): void { _enabled = enabled; }
 /**
  * Append a debug line to the log file.
  * Non-blocking, never throws — logging must not crash the bridge.
  */
 export function debugLog(category: string, message: string, data?: Record<string, unknown>): void {
+  if (!_enabled) return;
   try {
     ensureDir();
     rotate();

package/src/proxy-server.ts CHANGED Viewed

@@ -34,7 +34,7 @@ import {
   DEFAULT_MODEL_TIMEOUTS,
   TOOL_ROUTING_THRESHOLD,
 } from "./config.js";
-import { debugLog, DEBUG_LOG_PATH, getLogTail, watchLogFile } from "./debug-log.js";
+import { debugLog, DEBUG_LOG_PATH, getLogTail, watchLogFile, setDebugLogEnabled } from "./debug-log.js";
 // ── Active request tracking ─────────────────────────────────────────────────
@@ -212,6 +212,9 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
         reject(err);
       }
     });
+    // Disable debug file logging for test instances (port 0) to avoid polluting production logs
+    if (opts.port === 0) setDebugLogEnabled(false);
     server.listen(opts.port, "127.0.0.1", () => {
       opts.log(
         `[cli-bridge] proxy listening on :${opts.port}`

package/test/cli-runner-extended.test.ts CHANGED Viewed

@@ -87,7 +87,7 @@ describe("runCodex()", () => {
     expect(result).toBe("codex result");
     expect(mockSpawn).toHaveBeenCalledWith(
       "codex",
-      ["exec", "--model", "gpt-5.3-codex", "--full-auto"],
+      expect.arrayContaining(["exec", "--model", "gpt-5.3-codex", "--full-auto"]),
       expect.any(Object)
     );
   });