npm - @elvatis_com/openclaw-cli-bridge-elvatis - Versions diffs - 2.4.0 → 2.6.0 - Mend

@elvatis_com/openclaw-cli-bridge-elvatis 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (14) hide show

package/README.md +33 -3
package/SKILL.md +1 -1
package/index.ts +38 -23
package/openclaw.plugin.json +21 -2
package/package.json +1 -1
package/src/cli-runner.ts +119 -33
package/src/config.ts +217 -0
package/src/provider-sessions.ts +264 -0
package/src/proxy-server.ts +76 -15
package/src/session-manager.ts +24 -7
package/test/cli-runner-extended.test.ts +72 -0
package/test/config.test.ts +102 -0
package/test/provider-sessions.test.ts +294 -0
package/test/session-manager.test.ts +14 -0

package/src/config.ts ADDED Viewed

@@ -0,0 +1,217 @@
+/**
+ * config.ts
+ *
+ * Central configuration defaults for the CLI bridge plugin.
+ * All magic numbers, timeouts, paths, and constants live here.
+ * Import from this module instead of scattering literals across the codebase.
+ *
+ * Values can be overridden at runtime via openclaw.plugin.json configSchema
+ * or via the CliPluginConfig interface in index.ts.
+ */
+import { homedir, tmpdir } from "node:os";
+import { join } from "node:path";
+// ──────────────────────────────────────────────────────────────────────────────
+// Proxy server
+// ──────────────────────────────────────────────────────────────────────────────
+/** Default port for the local OpenAI-compatible proxy server. */
+export const DEFAULT_PROXY_PORT = 31337;
+/** Default API key between OpenClaw vllm provider and the proxy. */
+export const DEFAULT_PROXY_API_KEY = "cli-bridge";
+/** Default base timeout for CLI subprocess responses (ms). Scales dynamically. */
+export const DEFAULT_PROXY_TIMEOUT_MS = 300_000; // 5 min
+/** Maximum effective timeout after dynamic scaling (ms). */
+export const MAX_EFFECTIVE_TIMEOUT_MS = 600_000; // 10 min
+/** Extra timeout per message beyond 10 in the conversation (ms). */
+export const TIMEOUT_PER_EXTRA_MSG_MS = 2_000;
+/** Extra timeout per tool definition in the request (ms). */
+export const TIMEOUT_PER_TOOL_MS = 5_000;
+/** SSE keepalive interval — prevents OpenClaw read-timeout during long CLI runs (ms). */
+export const SSE_KEEPALIVE_INTERVAL_MS = 15_000;
+// ──────────────────────────────────────────────────────────────────────────────
+// CLI subprocess
+// ──────────────────────────────────────────────────────────────────────────────
+/** Default timeout for individual CLI subprocess invocations (ms). */
+export const DEFAULT_CLI_TIMEOUT_MS = 120_000; // 2 min
+/** Grace period between SIGTERM and SIGKILL when a timeout fires (ms). */
+export const TIMEOUT_GRACE_MS = 5_000;
+/** Max messages to include in the prompt sent to CLI subprocesses. */
+export const MAX_MESSAGES = 20;
+/** Max characters per message content before truncation. */
+export const MAX_MSG_CHARS = 4_000;
+// ──────────────────────────────────────────────────────────────────────────────
+// Session manager (long-running sessions)
+// ──────────────────────────────────────────────────────────────────────────────
+/** Auto-cleanup threshold: sessions older than this are killed and removed (ms). */
+export const SESSION_TTL_MS = 30 * 60 * 1_000; // 30 min
+/** Interval for the session cleanup sweep (ms). */
+export const CLEANUP_INTERVAL_MS = 5 * 60 * 1_000; // 5 min
+/** Grace period between SIGTERM and SIGKILL for session termination (ms). */
+export const SESSION_KILL_GRACE_MS = 5_000;
+// ──────────────────────────────────────────────────────────────────────────────
+// Provider sessions (persistent session registry)
+// ──────────────────────────────────────────────────────────────────────────────
+/** Default TTL for provider sessions before they're considered stale (ms). */
+export const PROVIDER_SESSION_TTL_MS = 2 * 60 * 60 * 1_000; // 2 hours
+/** Sweep interval for stale provider sessions (ms). */
+export const PROVIDER_SESSION_SWEEP_MS = 10 * 60 * 1_000; // 10 min
+// ──────────────────────────────────────────────────────────────────────────────
+// Per-model timeout defaults (ms)
+// ──────────────────────────────────────────────────────────────────────────────
+/**
+ * Default per-model timeout overrides.
+ * These are applied as the base timeout before dynamic scaling.
+ * Override via `modelTimeouts` in plugin config.
+ *
+ * Strategy:
+ *   - Heavy/agentic models (Opus, GPT-5.4): 5 min — need time for tool use
+ *   - Standard interactive (Sonnet, Pro, GPT-5.3): 3 min
+ *   - Fast/lightweight (Haiku, Flash, Mini): 90s
+ */
+export const DEFAULT_MODEL_TIMEOUTS: Record<string, number> = {
+  "cli-claude/claude-opus-4-6":        300_000,  // 5 min
+  "cli-claude/claude-sonnet-4-6":      180_000,  // 3 min
+  "cli-claude/claude-haiku-4-5":        90_000,  // 90s
+  "cli-gemini/gemini-2.5-pro":         180_000,
+  "cli-gemini/gemini-2.5-flash":        90_000,
+  "cli-gemini/gemini-3-pro-preview":   180_000,
+  "cli-gemini/gemini-3-flash-preview":  90_000,
+  "openai-codex/gpt-5.4":             300_000,
+  "openai-codex/gpt-5.3-codex":       180_000,
+  "openai-codex/gpt-5.1-codex-mini":   90_000,
+};
+// ──────────────────────────────────────────────────────────────────────────────
+// Model fallback chain
+// ──────────────────────────────────────────────────────────────────────────────
+/**
+ * Default fallback chain: when a primary model fails (timeout, error),
+ * retry once with the lighter variant.
+ */
+export const DEFAULT_MODEL_FALLBACKS: Record<string, string> = {
+  "cli-gemini/gemini-2.5-pro":        "cli-gemini/gemini-2.5-flash",
+  "cli-gemini/gemini-3-pro-preview":  "cli-gemini/gemini-3-flash-preview",
+  "cli-claude/claude-opus-4-6":       "cli-claude/claude-sonnet-4-6",
+  "cli-claude/claude-sonnet-4-6":     "cli-claude/claude-haiku-4-5",
+};
+// ──────────────────────────────────────────────────────────────────────────────
+// Paths
+// ──────────────────────────────────────────────────────────────────────────────
+/** Base directory for all CLI bridge state files. */
+export const OPENCLAW_DIR = join(homedir(), ".openclaw");
+/** State file — persists the model active before the last /cli-* switch. */
+export const STATE_FILE = join(OPENCLAW_DIR, "cli-bridge-state.json");
+/** Pending switch file — stores a staged model switch not yet applied. */
+export const PENDING_FILE = join(OPENCLAW_DIR, "cli-bridge-pending.json");
+/** Provider session registry file. */
+export const PROVIDER_SESSIONS_FILE = join(OPENCLAW_DIR, "cli-bridge", "sessions.json");
+/** Temporary directory for multimodal media files. */
+export const MEDIA_TMP_DIR = join(tmpdir(), "cli-bridge-media");
+/** Browser profile directories. */
+export const PROFILE_DIRS = {
+  grok:    join(OPENCLAW_DIR, "grok-profile"),
+  gemini:  join(OPENCLAW_DIR, "gemini-profile"),
+  claude:  join(OPENCLAW_DIR, "claude-profile"),
+  chatgpt: join(OPENCLAW_DIR, "chatgpt-profile"),
+} as const;
+// ──────────────────────────────────────────────────────────────────────────────
+// Browser automation
+// ──────────────────────────────────────────────────────────────────────────────
+/** Navigation timeout for Playwright page.goto (ms). */
+export const BROWSER_NAV_TIMEOUT_MS = 15_000;
+/** Delay after page load before interacting (ms). */
+export const BROWSER_PAGE_LOAD_DELAY_MS = 2_000;
+/** Delay after typing into input fields (ms). */
+export const BROWSER_INPUT_DELAY_MS = 300;
+/** Default timeout for browser-based completions (ms). */
+export const BROWSER_COMPLETION_TIMEOUT_MS = 120_000;
+/** Consecutive stable reads to confirm a streaming response is done. */
+export const BROWSER_STABLE_CHECKS = 3;
+/** Interval between stability checks (ms). */
+export const BROWSER_STABLE_INTERVAL_MS = 500;
+/** Gemini uses a longer stability interval due to slower streaming. */
+export const GEMINI_STABLE_INTERVAL_MS = 600;
+// ──────────────────────────────────────────────────────────────────────────────
+// Claude auth
+// ──────────────────────────────────────────────────────────────────────────────
+/** Refresh OAuth token this many ms before expiry. */
+export const CLAUDE_REFRESH_BEFORE_EXPIRY_MS = 30 * 60 * 1_000; // 30 min
+/** Sync window for token refresh (ms). */
+export const CLAUDE_REFRESH_SYNC_WINDOW_MS = 5 * 60 * 1_000; // 5 min
+/** Max wait for a single token refresh attempt (ms). */
+export const CLAUDE_REFRESH_TIMEOUT_MS = 30_000;
+/** Polling interval for proactive token refresh (ms). */
+export const CLAUDE_REFRESH_POLL_INTERVAL_MS = 10 * 60 * 1_000; // 10 min
+// ──────────────────────────────────────────────────────────────────────────────
+// Workdir isolation
+// ──────────────────────────────────────────────────────────────────────────────
+/** Prefix for temporary workdir directories. */
+export const WORKDIR_PREFIX = "cli-bridge-";
+/** Max age for orphaned workdirs before they are swept (ms). */
+export const WORKDIR_ORPHAN_MAX_AGE_MS = 60 * 60 * 1_000; // 1 hour
+// ──────────────────────────────────────────────────────────────────────────────
+// BitNet
+// ──────────────────────────────────────────────────────────────────────────────
+/** Default URL for the local BitNet llama-server. */
+export const DEFAULT_BITNET_SERVER_URL = "http://127.0.0.1:8082";
+/** Max messages to send to BitNet (4096 token context limit). */
+export const BITNET_MAX_MESSAGES = 6;
+/** Minimal system prompt for BitNet to conserve tokens. */
+export const BITNET_SYSTEM_PROMPT =
+  "You are Akido, a concise AI assistant. Answer briefly and directly. Current user: Emre. Timezone: Europe/Berlin.";
+// ──────────────────────────────────────────────────────────────────────────────
+// Default model for /cli-test
+// ──────────────────────────────────────────────────────────────────────────────
+export const CLI_TEST_DEFAULT_MODEL = "cli-claude/claude-sonnet-4-6";

package/src/provider-sessions.ts ADDED Viewed

@@ -0,0 +1,264 @@
+/**
+ * provider-sessions.ts
+ *
+ * Persistent session registry for CLI bridge provider sessions.
+ *
+ * A "provider session" represents a long-lived context with a CLI provider
+ * (Claude, Gemini, Codex, etc.). Sessions survive across individual runs:
+ * when a run times out, the session persists so that follow-up runs can
+ * resume in the same context.
+ *
+ * Session vs Run:
+ *   - Session: long-lived unit (provider context, profile, remote session ID)
+ *   - Run: single request within a session (messages, tools, timeout)
+ *
+ * Storage: in-memory Map + periodic flush to ~/.openclaw/cli-bridge/sessions.json.
+ */
+import { randomBytes } from "node:crypto";
+import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
+import { dirname } from "node:path";
+import {
+  PROVIDER_SESSIONS_FILE,
+  PROVIDER_SESSION_TTL_MS,
+  PROVIDER_SESSION_SWEEP_MS,
+} from "./config.js";
+// ──────────────────────────────────────────────────────────────────────────────
+// Types
+// ──────────────────────────────────────────────────────────────────────────────
+export type ProviderAlias = "claude" | "gemini" | "grok" | "codex" | "opencode" | "pi" | "bitnet" | string;
+export type SessionState = "active" | "idle" | "expired";
+export interface ProviderSession {
+  /** Unique session ID, e.g. "claude:session-a1b2c3d4". */
+  id: string;
+  /** Provider type. */
+  provider: ProviderAlias;
+  /** Full model alias, e.g. "cli-claude/claude-sonnet-4-6". */
+  modelAlias: string;
+  /** Unix timestamp when the session was created. */
+  createdAt: number;
+  /** Unix timestamp of the last activity (run start, touch). */
+  updatedAt: number;
+  /** Current session state. */
+  state: SessionState;
+  /** Total runs executed in this session. */
+  runCount: number;
+  /** Number of runs that timed out. */
+  timeoutCount: number;
+  /** Provider-specific state (profile path, remote session ID, etc.). */
+  meta: Record<string, unknown>;
+}
+export interface CreateSessionOptions {
+  /** Provider-specific metadata. */
+  meta?: Record<string, unknown>;
+}
+// ──────────────────────────────────────────────────────────────────────────────
+// Registry
+// ──────────────────────────────────────────────────────────────────────────────
+/** Serialized format of the sessions file. */
+interface SessionStore {
+  version: 1;
+  sessions: ProviderSession[];
+}
+export class ProviderSessionRegistry {
+  private sessions = new Map<string, ProviderSession>();
+  private sweepTimer: ReturnType<typeof setInterval> | null = null;
+  private dirty = false;
+  constructor() {
+    this.load();
+    this.sweepTimer = setInterval(() => this.sweep(), PROVIDER_SESSION_SWEEP_MS);
+    if (this.sweepTimer.unref) this.sweepTimer.unref();
+  }
+  // ── CRUD ─────────────────────────────────────────────────────────────────
+  /**
+   * Create a new provider session.
+   * Returns the session with a unique ID.
+   */
+  createSession(
+    provider: ProviderAlias,
+    modelAlias: string,
+    opts: CreateSessionOptions = {}
+  ): ProviderSession {
+    const now = Date.now();
+    const id = `${provider}:session-${randomBytes(6).toString("hex")}`;
+    const session: ProviderSession = {
+      id,
+      provider,
+      modelAlias,
+      createdAt: now,
+      updatedAt: now,
+      state: "active",
+      runCount: 0,
+      timeoutCount: 0,
+      meta: opts.meta ?? {},
+    };
+    this.sessions.set(id, session);
+    this.dirty = true;
+    this.flush();
+    return session;
+  }
+  /** Get a session by ID. Returns undefined if not found. */
+  getSession(id: string): ProviderSession | undefined {
+    return this.sessions.get(id);
+  }
+  /**
+   * Find an existing active session for the given provider+model.
+   * Returns the most recently updated match, or undefined.
+   */
+  findSession(provider: ProviderAlias, modelAlias: string): ProviderSession | undefined {
+    let best: ProviderSession | undefined;
+    for (const s of this.sessions.values()) {
+      if (s.provider !== provider || s.modelAlias !== modelAlias) continue;
+      if (s.state === "expired") continue;
+      if (!best || s.updatedAt > best.updatedAt) best = s;
+    }
+    return best;
+  }
+  /**
+   * Get or create a session for the given provider+model.
+   * Reuses existing active session if available.
+   */
+  ensureSession(
+    provider: ProviderAlias,
+    modelAlias: string,
+    opts: CreateSessionOptions = {}
+  ): ProviderSession {
+    const existing = this.findSession(provider, modelAlias);
+    if (existing) {
+      this.touchSession(existing.id);
+      return existing;
+    }
+    return this.createSession(provider, modelAlias, opts);
+  }
+  /**
+   * Update the session's last-activity timestamp and set state to active.
+   * Call this at the start of every run.
+   */
+  touchSession(id: string): boolean {
+    const session = this.sessions.get(id);
+    if (!session) return false;
+    session.updatedAt = Date.now();
+    if (session.state === "idle") session.state = "active";
+    this.dirty = true;
+    return true;
+  }
+  /** Record that a run completed in this session. */
+  recordRun(id: string, timedOut: boolean): void {
+    const session = this.sessions.get(id);
+    if (!session) return;
+    session.runCount++;
+    if (timedOut) session.timeoutCount++;
+    session.updatedAt = Date.now();
+    session.state = "idle"; // run finished, session stays alive
+    this.dirty = true;
+    this.flush();
+  }
+  /** Delete a session by ID. */
+  deleteSession(id: string): boolean {
+    const deleted = this.sessions.delete(id);
+    if (deleted) {
+      this.dirty = true;
+      this.flush();
+    }
+    return deleted;
+  }
+  /** List all sessions. */
+  listSessions(): ProviderSession[] {
+    return [...this.sessions.values()];
+  }
+  /** Get summary stats for logging/status. */
+  stats(): { total: number; active: number; idle: number; expired: number } {
+    let active = 0, idle = 0, expired = 0;
+    for (const s of this.sessions.values()) {
+      if (s.state === "active") active++;
+      else if (s.state === "idle") idle++;
+      else expired++;
+    }
+    return { total: this.sessions.size, active, idle, expired };
+  }
+  // ── Lifecycle ────────────────────────────────────────────────────────────
+  /** Sweep stale sessions (older than PROVIDER_SESSION_TTL_MS without activity). */
+  sweep(): void {
+    const now = Date.now();
+    let changed = false;
+    for (const [id, session] of this.sessions) {
+      if (now - session.updatedAt > PROVIDER_SESSION_TTL_MS) {
+        session.state = "expired";
+        this.sessions.delete(id);
+        changed = true;
+      }
+    }
+    if (changed) {
+      this.dirty = true;
+      this.flush();
+    }
+  }
+  /** Stop the sweep timer (for graceful shutdown). */
+  stop(): void {
+    if (this.sweepTimer) {
+      clearInterval(this.sweepTimer);
+      this.sweepTimer = null;
+    }
+    this.flush();
+  }
+  // ── Persistence ──────────────────────────────────────────────────────────
+  /** Load sessions from disk. */
+  private load(): void {
+    try {
+      const raw = readFileSync(PROVIDER_SESSIONS_FILE, "utf-8");
+      const store = JSON.parse(raw) as SessionStore;
+      if (store.version === 1 && Array.isArray(store.sessions)) {
+        for (const s of store.sessions) {
+          // Skip expired sessions on load
+          if (Date.now() - s.updatedAt > PROVIDER_SESSION_TTL_MS) continue;
+          this.sessions.set(s.id, s);
+        }
+      }
+    } catch {
+      // No file yet or corrupt — start fresh
+    }
+  }
+  /** Flush dirty sessions to disk. */
+  private flush(): void {
+    if (!this.dirty) return;
+    try {
+      mkdirSync(dirname(PROVIDER_SESSIONS_FILE), { recursive: true });
+      const store: SessionStore = {
+        version: 1,
+        sessions: [...this.sessions.values()],
+      };
+      writeFileSync(PROVIDER_SESSIONS_FILE, JSON.stringify(store, null, 2) + "\n", "utf-8");
+      this.dirty = false;
+    } catch {
+      // Non-fatal — sessions are still in memory
+    }
+  }
+}
+/** Shared singleton instance. */
+export const providerSessions = new ProviderSessionRegistry();

package/src/proxy-server.ts CHANGED Viewed

@@ -20,6 +20,17 @@ import type { BrowserContext } from "playwright";
 import { renderStatusPage, type StatusProvider } from "./status-template.js";
 import { sessionManager } from "./session-manager.js";
 import { metrics } from "./metrics.js";
+import { providerSessions } from "./provider-sessions.js";
+import {
+  DEFAULT_PROXY_TIMEOUT_MS,
+  MAX_EFFECTIVE_TIMEOUT_MS,
+  TIMEOUT_PER_EXTRA_MSG_MS,
+  TIMEOUT_PER_TOOL_MS,
+  SSE_KEEPALIVE_INTERVAL_MS,
+  DEFAULT_BITNET_SERVER_URL,
+  BITNET_MAX_MESSAGES,
+  BITNET_SYSTEM_PROMPT,
+} from "./config.js";
 export type GrokCompleteOptions = Parameters<typeof grokComplete>[1];
 export type GrokCompleteStreamOptions = Parameters<typeof grokCompleteStream>[1];
@@ -82,6 +93,20 @@ export interface ProxyServerOptions {
    * with the fallback model. Example: "cli-gemini/gemini-2.5-pro" → "cli-gemini/gemini-2.5-flash"
    */
   modelFallbacks?: Record<string, string>;
+  /**
+   * Per-model timeout overrides (ms). Keys are model IDs (without "vllm/" prefix).
+   * Use this to give heavy models more time or limit fast models.
+   *
+   * Example:
+   *   {
+   *     "cli-claude/claude-sonnet-4-6": 180_000,   // 3 min for interactive chat
+   *     "cli-claude/claude-opus-4-6":   300_000,    // 5 min for heavy tasks
+   *     "cli-claude/claude-haiku-4-5":  90_000,     // 90s for fast responses
+   *   }
+   *
+   * When not set for a model, falls back to proxyTimeoutMs (default 300s base).
+   */
+  modelTimeouts?: Record<string, number>;
 }
 /** Available CLI bridge models for GET /v1/models */
@@ -139,10 +164,11 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
       });
     });
-    // Stop the token refresh interval and session manager when the server closes (timer-leak prevention)
+    // Stop timers and flush state when the server closes (timer-leak prevention)
     server.on("close", () => {
       stopTokenRefresh();
       sessionManager.stop();
+      providerSessions.stop();
     });
     server.on("error", (err: NodeJS.ErrnoException) => {
@@ -533,7 +559,7 @@ async function handleRequest(
     // ── BitNet local inference routing ────────────────────────────────────────
     if (model.startsWith("local-bitnet/")) {
-      const bitnetUrl = opts.getBitNetServerUrl?.() ?? "http://127.0.0.1:8082";
+      const bitnetUrl = opts.getBitNetServerUrl?.() ?? DEFAULT_BITNET_SERVER_URL;
       const timeoutMs = opts.timeoutMs ?? 120_000;
       // llama-server (BitNet build) crashes with std::runtime_error on multi-part
       // content arrays (ref: https://github.com/ggerganov/llama.cpp/issues/8367).
@@ -550,18 +576,14 @@ async function handleRequest(
       };
       // BitNet has a 4096 token context window. Long sessions blow it up and
       // cause a hard C++ crash (no graceful error). Truncate to system prompt +
-      // last 10 messages (~2k tokens max) to stay safely within the limit.
-      const BITNET_MAX_MESSAGES = 6;
-      // Replace the full system prompt (MEMORY.md etc, ~2k+ tokens) with a
-      // minimal one so BitNet's 4096-token context isn't blown by the system msg alone.
-      const BITNET_SYSTEM = "You are Akido, a concise AI assistant. Answer briefly and directly. Current user: Emre. Timezone: Europe/Berlin.";
+      // last N messages (~2k tokens max) to stay safely within the limit.
       const allFlat = parsed.messages.map((m) => ({
         role: m.role,
         content: flattenContent(m.content),
       }));
       const nonSystemMsgs = allFlat.filter((m) => m.role !== "system");
       const truncated = nonSystemMsgs.slice(-BITNET_MAX_MESSAGES);
-      const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM }, ...truncated];
+      const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM_PROMPT }, ...truncated];
       const requestBody = JSON.stringify({ ...parsed, messages: bitnetMessages, tools: undefined });
       const bitnetStart = Date.now();
@@ -623,13 +645,25 @@ async function handleRequest(
     // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
     let result: CliToolResult;
     let usedModel = model;
-    const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined };
+    const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
+    // ── Provider session: ensure a persistent session for this model ────────
+    // Extract provider prefix from model (e.g. "cli-claude" from "cli-claude/claude-sonnet-4-6")
+    const providerPrefix = model.split("/")[0];
+    const incomingSessionId = (parsed as { providerSessionId?: string }).providerSessionId;
+    const session = incomingSessionId
+      ? (providerSessions.getSession(incomingSessionId) ?? providerSessions.ensureSession(providerPrefix, model))
+      : providerSessions.ensureSession(providerPrefix, model);
+    providerSessions.touchSession(session.id);
     // ── Dynamic timeout: scale with conversation size ────────────────────────
-    const baseTimeout = opts.timeoutMs ?? 300_000; // 5 min default (was 120s)
-    const msgExtra = Math.max(0, cleanMessages.length - 10) * 2_000;
-    const toolExtra = (tools?.length ?? 0) * 5_000;
-    const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, 600_000);
+    // Per-model timeout takes precedence, then global proxyTimeoutMs, then 300s default.
+    const perModelTimeout = opts.modelTimeouts?.[model];
+    const baseTimeout = perModelTimeout ?? opts.timeoutMs ?? DEFAULT_PROXY_TIMEOUT_MS;
+    const msgExtra = Math.max(0, cleanMessages.length - 10) * TIMEOUT_PER_EXTRA_MSG_MS;
+    const toolExtra = (tools?.length ?? 0) * TIMEOUT_PER_TOOL_MS;
+    const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, MAX_EFFECTIVE_TIMEOUT_MS);
+    opts.log(`[cli-bridge] ${model} session=${session.id} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
     // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
     let sseHeadersSent = false;
@@ -643,21 +677,26 @@ async function handleRequest(
       });
       sseHeadersSent = true;
       res.write(": keepalive\n\n");
-      keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, 15_000);
+      keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, SSE_KEEPALIVE_INTERVAL_MS);
     }
     const cliStart = Date.now();
     try {
       result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
       metrics.recordRequest(model, Date.now() - cliStart, true);
+      providerSessions.recordRun(session.id, false);
     } catch (err) {
       const primaryDuration = Date.now() - cliStart;
       const msg = (err as Error).message;
       // ── Model fallback: retry once with a lighter model if configured ────
+      const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
+      // Record the run (with timeout flag) — session is preserved, not deleted
+      providerSessions.recordRun(session.id, isTimeout);
       const fallbackModel = opts.modelFallbacks?.[model];
       if (fallbackModel) {
         metrics.recordRequest(model, primaryDuration, false);
-        opts.warn(`[cli-bridge] ${model} failed (${msg}), falling back to ${fallbackModel}`);
+        const reason = isTimeout ? `timeout by supervisor, session=${session.id} preserved` : msg;
+        opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
         const fallbackStart = Date.now();
         try {
           result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
@@ -768,6 +807,8 @@ async function handleRequest(
           },
         ],
         usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
+        // Propagate session ID so callers can resume in the same session
+        provider_session_id: session.id,
       };
       res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
@@ -887,6 +928,26 @@ async function handleRequest(
     return;
   }
+  // ── Provider session endpoints ──────────────────────────────────────────────
+  // GET /v1/provider-sessions — list all provider sessions with stats
+  if (url === "/v1/provider-sessions" && req.method === "GET") {
+    const sessions = providerSessions.listSessions();
+    const stats = providerSessions.stats();
+    res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
+    res.end(JSON.stringify({ sessions, stats }));
+    return;
+  }
+  // DELETE /v1/provider-sessions/:id — delete a specific provider session
+  const provSessionMatch = url.match(/^\/v1\/provider-sessions\/([a-zA-Z0-9:_-]+)$/);
+  if (provSessionMatch && req.method === "DELETE") {
+    const ok = providerSessions.deleteSession(decodeURIComponent(provSessionMatch[1]));
+    res.writeHead(ok ? 200 : 404, { "Content-Type": "application/json", ...corsHeaders() });
+    res.end(JSON.stringify({ ok }));
+    return;
+  }
   // 404
   res.writeHead(404, { "Content-Type": "application/json" });
   res.end(JSON.stringify({ error: { message: `Not found: ${url}`, type: "not_found" } }));