@elvatis_com/openclaw-cli-bridge-elvatis 2.4.0 → 2.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/src/config.ts ADDED
@@ -0,0 +1,217 @@
1
+ /**
2
+ * config.ts
3
+ *
4
+ * Central configuration defaults for the CLI bridge plugin.
5
+ * All magic numbers, timeouts, paths, and constants live here.
6
+ * Import from this module instead of scattering literals across the codebase.
7
+ *
8
+ * Values can be overridden at runtime via openclaw.plugin.json configSchema
9
+ * or via the CliPluginConfig interface in index.ts.
10
+ */
11
+
12
+ import { homedir, tmpdir } from "node:os";
13
+ import { join } from "node:path";
14
+
15
+ // ──────────────────────────────────────────────────────────────────────────────
16
+ // Proxy server
17
+ // ──────────────────────────────────────────────────────────────────────────────
18
+
19
+ /** Default port for the local OpenAI-compatible proxy server. */
20
+ export const DEFAULT_PROXY_PORT = 31337;
21
+
22
+ /** Default API key between OpenClaw vllm provider and the proxy. */
23
+ export const DEFAULT_PROXY_API_KEY = "cli-bridge";
24
+
25
+ /** Default base timeout for CLI subprocess responses (ms). Scales dynamically. */
26
+ export const DEFAULT_PROXY_TIMEOUT_MS = 300_000; // 5 min
27
+
28
+ /** Maximum effective timeout after dynamic scaling (ms). */
29
+ export const MAX_EFFECTIVE_TIMEOUT_MS = 600_000; // 10 min
30
+
31
+ /** Extra timeout per message beyond 10 in the conversation (ms). */
32
+ export const TIMEOUT_PER_EXTRA_MSG_MS = 2_000;
33
+
34
+ /** Extra timeout per tool definition in the request (ms). */
35
+ export const TIMEOUT_PER_TOOL_MS = 5_000;
36
+
37
+ /** SSE keepalive interval — prevents OpenClaw read-timeout during long CLI runs (ms). */
38
+ export const SSE_KEEPALIVE_INTERVAL_MS = 15_000;
39
+
40
+ // ──────────────────────────────────────────────────────────────────────────────
41
+ // CLI subprocess
42
+ // ──────────────────────────────────────────────────────────────────────────────
43
+
44
+ /** Default timeout for individual CLI subprocess invocations (ms). */
45
+ export const DEFAULT_CLI_TIMEOUT_MS = 120_000; // 2 min
46
+
47
+ /** Grace period between SIGTERM and SIGKILL when a timeout fires (ms). */
48
+ export const TIMEOUT_GRACE_MS = 5_000;
49
+
50
+ /** Max messages to include in the prompt sent to CLI subprocesses. */
51
+ export const MAX_MESSAGES = 20;
52
+
53
+ /** Max characters per message content before truncation. */
54
+ export const MAX_MSG_CHARS = 4_000;
55
+
56
+ // ──────────────────────────────────────────────────────────────────────────────
57
+ // Session manager (long-running sessions)
58
+ // ──────────────────────────────────────────────────────────────────────────────
59
+
60
+ /** Auto-cleanup threshold: sessions older than this are killed and removed (ms). */
61
+ export const SESSION_TTL_MS = 30 * 60 * 1_000; // 30 min
62
+
63
+ /** Interval for the session cleanup sweep (ms). */
64
+ export const CLEANUP_INTERVAL_MS = 5 * 60 * 1_000; // 5 min
65
+
66
+ /** Grace period between SIGTERM and SIGKILL for session termination (ms). */
67
+ export const SESSION_KILL_GRACE_MS = 5_000;
68
+
69
+ // ──────────────────────────────────────────────────────────────────────────────
70
+ // Provider sessions (persistent session registry)
71
+ // ──────────────────────────────────────────────────────────────────────────────
72
+
73
+ /** Default TTL for provider sessions before they're considered stale (ms). */
74
+ export const PROVIDER_SESSION_TTL_MS = 2 * 60 * 60 * 1_000; // 2 hours
75
+
76
+ /** Sweep interval for stale provider sessions (ms). */
77
+ export const PROVIDER_SESSION_SWEEP_MS = 10 * 60 * 1_000; // 10 min
78
+
79
+ // ──────────────────────────────────────────────────────────────────────────────
80
+ // Per-model timeout defaults (ms)
81
+ // ──────────────────────────────────────────────────────────────────────────────
82
+
83
+ /**
84
+ * Default per-model timeout overrides.
85
+ * These are applied as the base timeout before dynamic scaling.
86
+ * Override via `modelTimeouts` in plugin config.
87
+ *
88
+ * Strategy:
89
+ * - Heavy/agentic models (Opus, GPT-5.4): 5 min — need time for tool use
90
+ * - Standard interactive (Sonnet, Pro, GPT-5.3): 3 min
91
+ * - Fast/lightweight (Haiku, Flash, Mini): 90s
92
+ */
93
+ export const DEFAULT_MODEL_TIMEOUTS: Record<string, number> = {
94
+ "cli-claude/claude-opus-4-6": 300_000, // 5 min
95
+ "cli-claude/claude-sonnet-4-6": 180_000, // 3 min
96
+ "cli-claude/claude-haiku-4-5": 90_000, // 90s
97
+ "cli-gemini/gemini-2.5-pro": 180_000,
98
+ "cli-gemini/gemini-2.5-flash": 90_000,
99
+ "cli-gemini/gemini-3-pro-preview": 180_000,
100
+ "cli-gemini/gemini-3-flash-preview": 90_000,
101
+ "openai-codex/gpt-5.4": 300_000,
102
+ "openai-codex/gpt-5.3-codex": 180_000,
103
+ "openai-codex/gpt-5.1-codex-mini": 90_000,
104
+ };
105
+
106
+ // ──────────────────────────────────────────────────────────────────────────────
107
+ // Model fallback chain
108
+ // ──────────────────────────────────────────────────────────────────────────────
109
+
110
+ /**
111
+ * Default fallback chain: when a primary model fails (timeout, error),
112
+ * retry once with the lighter variant.
113
+ */
114
+ export const DEFAULT_MODEL_FALLBACKS: Record<string, string> = {
115
+ "cli-gemini/gemini-2.5-pro": "cli-gemini/gemini-2.5-flash",
116
+ "cli-gemini/gemini-3-pro-preview": "cli-gemini/gemini-3-flash-preview",
117
+ "cli-claude/claude-opus-4-6": "cli-claude/claude-sonnet-4-6",
118
+ "cli-claude/claude-sonnet-4-6": "cli-claude/claude-haiku-4-5",
119
+ };
120
+
121
+ // ──────────────────────────────────────────────────────────────────────────────
122
+ // Paths
123
+ // ──────────────────────────────────────────────────────────────────────────────
124
+
125
+ /** Base directory for all CLI bridge state files. */
126
+ export const OPENCLAW_DIR = join(homedir(), ".openclaw");
127
+
128
+ /** State file — persists the model active before the last /cli-* switch. */
129
+ export const STATE_FILE = join(OPENCLAW_DIR, "cli-bridge-state.json");
130
+
131
+ /** Pending switch file — stores a staged model switch not yet applied. */
132
+ export const PENDING_FILE = join(OPENCLAW_DIR, "cli-bridge-pending.json");
133
+
134
+ /** Provider session registry file. */
135
+ export const PROVIDER_SESSIONS_FILE = join(OPENCLAW_DIR, "cli-bridge", "sessions.json");
136
+
137
+ /** Temporary directory for multimodal media files. */
138
+ export const MEDIA_TMP_DIR = join(tmpdir(), "cli-bridge-media");
139
+
140
+ /** Browser profile directories. */
141
+ export const PROFILE_DIRS = {
142
+ grok: join(OPENCLAW_DIR, "grok-profile"),
143
+ gemini: join(OPENCLAW_DIR, "gemini-profile"),
144
+ claude: join(OPENCLAW_DIR, "claude-profile"),
145
+ chatgpt: join(OPENCLAW_DIR, "chatgpt-profile"),
146
+ } as const;
147
+
148
+ // ──────────────────────────────────────────────────────────────────────────────
149
+ // Browser automation
150
+ // ──────────────────────────────────────────────────────────────────────────────
151
+
152
+ /** Navigation timeout for Playwright page.goto (ms). */
153
+ export const BROWSER_NAV_TIMEOUT_MS = 15_000;
154
+
155
+ /** Delay after page load before interacting (ms). */
156
+ export const BROWSER_PAGE_LOAD_DELAY_MS = 2_000;
157
+
158
+ /** Delay after typing into input fields (ms). */
159
+ export const BROWSER_INPUT_DELAY_MS = 300;
160
+
161
+ /** Default timeout for browser-based completions (ms). */
162
+ export const BROWSER_COMPLETION_TIMEOUT_MS = 120_000;
163
+
164
+ /** Consecutive stable reads to confirm a streaming response is done. */
165
+ export const BROWSER_STABLE_CHECKS = 3;
166
+
167
+ /** Interval between stability checks (ms). */
168
+ export const BROWSER_STABLE_INTERVAL_MS = 500;
169
+
170
+ /** Gemini uses a longer stability interval due to slower streaming. */
171
+ export const GEMINI_STABLE_INTERVAL_MS = 600;
172
+
173
+ // ──────────────────────────────────────────────────────────────────────────────
174
+ // Claude auth
175
+ // ──────────────────────────────────────────────────────────────────────────────
176
+
177
+ /** Refresh OAuth token this many ms before expiry. */
178
+ export const CLAUDE_REFRESH_BEFORE_EXPIRY_MS = 30 * 60 * 1_000; // 30 min
179
+
180
+ /** Sync window for token refresh (ms). */
181
+ export const CLAUDE_REFRESH_SYNC_WINDOW_MS = 5 * 60 * 1_000; // 5 min
182
+
183
+ /** Max wait for a single token refresh attempt (ms). */
184
+ export const CLAUDE_REFRESH_TIMEOUT_MS = 30_000;
185
+
186
+ /** Polling interval for proactive token refresh (ms). */
187
+ export const CLAUDE_REFRESH_POLL_INTERVAL_MS = 10 * 60 * 1_000; // 10 min
188
+
189
+ // ──────────────────────────────────────────────────────────────────────────────
190
+ // Workdir isolation
191
+ // ──────────────────────────────────────────────────────────────────────────────
192
+
193
+ /** Prefix for temporary workdir directories. */
194
+ export const WORKDIR_PREFIX = "cli-bridge-";
195
+
196
+ /** Max age for orphaned workdirs before they are swept (ms). */
197
+ export const WORKDIR_ORPHAN_MAX_AGE_MS = 60 * 60 * 1_000; // 1 hour
198
+
199
+ // ──────────────────────────────────────────────────────────────────────────────
200
+ // BitNet
201
+ // ──────────────────────────────────────────────────────────────────────────────
202
+
203
+ /** Default URL for the local BitNet llama-server. */
204
+ export const DEFAULT_BITNET_SERVER_URL = "http://127.0.0.1:8082";
205
+
206
+ /** Max messages to send to BitNet (4096 token context limit). */
207
+ export const BITNET_MAX_MESSAGES = 6;
208
+
209
+ /** Minimal system prompt for BitNet to conserve tokens. */
210
+ export const BITNET_SYSTEM_PROMPT =
211
+ "You are Akido, a concise AI assistant. Answer briefly and directly. Current user: Emre. Timezone: Europe/Berlin.";
212
+
213
+ // ──────────────────────────────────────────────────────────────────────────────
214
+ // Default model for /cli-test
215
+ // ──────────────────────────────────────────────────────────────────────────────
216
+
217
+ export const CLI_TEST_DEFAULT_MODEL = "cli-claude/claude-sonnet-4-6";
@@ -0,0 +1,264 @@
1
+ /**
2
+ * provider-sessions.ts
3
+ *
4
+ * Persistent session registry for CLI bridge provider sessions.
5
+ *
6
+ * A "provider session" represents a long-lived context with a CLI provider
7
+ * (Claude, Gemini, Codex, etc.). Sessions survive across individual runs:
8
+ * when a run times out, the session persists so that follow-up runs can
9
+ * resume in the same context.
10
+ *
11
+ * Session vs Run:
12
+ * - Session: long-lived unit (provider context, profile, remote session ID)
13
+ * - Run: single request within a session (messages, tools, timeout)
14
+ *
15
+ * Storage: in-memory Map + periodic flush to ~/.openclaw/cli-bridge/sessions.json.
16
+ */
17
+
18
+ import { randomBytes } from "node:crypto";
19
+ import { readFileSync, writeFileSync, mkdirSync } from "node:fs";
20
+ import { dirname } from "node:path";
21
+ import {
22
+ PROVIDER_SESSIONS_FILE,
23
+ PROVIDER_SESSION_TTL_MS,
24
+ PROVIDER_SESSION_SWEEP_MS,
25
+ } from "./config.js";
26
+
27
+ // ──────────────────────────────────────────────────────────────────────────────
28
+ // Types
29
+ // ──────────────────────────────────────────────────────────────────────────────
30
+
31
+ export type ProviderAlias = "claude" | "gemini" | "grok" | "codex" | "opencode" | "pi" | "bitnet" | string;
32
+
33
+ export type SessionState = "active" | "idle" | "expired";
34
+
35
+ export interface ProviderSession {
36
+ /** Unique session ID, e.g. "claude:session-a1b2c3d4". */
37
+ id: string;
38
+ /** Provider type. */
39
+ provider: ProviderAlias;
40
+ /** Full model alias, e.g. "cli-claude/claude-sonnet-4-6". */
41
+ modelAlias: string;
42
+ /** Unix timestamp when the session was created. */
43
+ createdAt: number;
44
+ /** Unix timestamp of the last activity (run start, touch). */
45
+ updatedAt: number;
46
+ /** Current session state. */
47
+ state: SessionState;
48
+ /** Total runs executed in this session. */
49
+ runCount: number;
50
+ /** Number of runs that timed out. */
51
+ timeoutCount: number;
52
+ /** Provider-specific state (profile path, remote session ID, etc.). */
53
+ meta: Record<string, unknown>;
54
+ }
55
+
56
+ export interface CreateSessionOptions {
57
+ /** Provider-specific metadata. */
58
+ meta?: Record<string, unknown>;
59
+ }
60
+
61
+ // ──────────────────────────────────────────────────────────────────────────────
62
+ // Registry
63
+ // ──────────────────────────────────────────────────────────────────────────────
64
+
65
+ /** Serialized format of the sessions file. */
66
+ interface SessionStore {
67
+ version: 1;
68
+ sessions: ProviderSession[];
69
+ }
70
+
71
+ export class ProviderSessionRegistry {
72
+ private sessions = new Map<string, ProviderSession>();
73
+ private sweepTimer: ReturnType<typeof setInterval> | null = null;
74
+ private dirty = false;
75
+
76
+ constructor() {
77
+ this.load();
78
+ this.sweepTimer = setInterval(() => this.sweep(), PROVIDER_SESSION_SWEEP_MS);
79
+ if (this.sweepTimer.unref) this.sweepTimer.unref();
80
+ }
81
+
82
+ // ── CRUD ─────────────────────────────────────────────────────────────────
83
+
84
+ /**
85
+ * Create a new provider session.
86
+ * Returns the session with a unique ID.
87
+ */
88
+ createSession(
89
+ provider: ProviderAlias,
90
+ modelAlias: string,
91
+ opts: CreateSessionOptions = {}
92
+ ): ProviderSession {
93
+ const now = Date.now();
94
+ const id = `${provider}:session-${randomBytes(6).toString("hex")}`;
95
+ const session: ProviderSession = {
96
+ id,
97
+ provider,
98
+ modelAlias,
99
+ createdAt: now,
100
+ updatedAt: now,
101
+ state: "active",
102
+ runCount: 0,
103
+ timeoutCount: 0,
104
+ meta: opts.meta ?? {},
105
+ };
106
+ this.sessions.set(id, session);
107
+ this.dirty = true;
108
+ this.flush();
109
+ return session;
110
+ }
111
+
112
+ /** Get a session by ID. Returns undefined if not found. */
113
+ getSession(id: string): ProviderSession | undefined {
114
+ return this.sessions.get(id);
115
+ }
116
+
117
+ /**
118
+ * Find an existing active session for the given provider+model.
119
+ * Returns the most recently updated match, or undefined.
120
+ */
121
+ findSession(provider: ProviderAlias, modelAlias: string): ProviderSession | undefined {
122
+ let best: ProviderSession | undefined;
123
+ for (const s of this.sessions.values()) {
124
+ if (s.provider !== provider || s.modelAlias !== modelAlias) continue;
125
+ if (s.state === "expired") continue;
126
+ if (!best || s.updatedAt > best.updatedAt) best = s;
127
+ }
128
+ return best;
129
+ }
130
+
131
+ /**
132
+ * Get or create a session for the given provider+model.
133
+ * Reuses existing active session if available.
134
+ */
135
+ ensureSession(
136
+ provider: ProviderAlias,
137
+ modelAlias: string,
138
+ opts: CreateSessionOptions = {}
139
+ ): ProviderSession {
140
+ const existing = this.findSession(provider, modelAlias);
141
+ if (existing) {
142
+ this.touchSession(existing.id);
143
+ return existing;
144
+ }
145
+ return this.createSession(provider, modelAlias, opts);
146
+ }
147
+
148
+ /**
149
+ * Update the session's last-activity timestamp and set state to active.
150
+ * Call this at the start of every run.
151
+ */
152
+ touchSession(id: string): boolean {
153
+ const session = this.sessions.get(id);
154
+ if (!session) return false;
155
+ session.updatedAt = Date.now();
156
+ if (session.state === "idle") session.state = "active";
157
+ this.dirty = true;
158
+ return true;
159
+ }
160
+
161
+ /** Record that a run completed in this session. */
162
+ recordRun(id: string, timedOut: boolean): void {
163
+ const session = this.sessions.get(id);
164
+ if (!session) return;
165
+ session.runCount++;
166
+ if (timedOut) session.timeoutCount++;
167
+ session.updatedAt = Date.now();
168
+ session.state = "idle"; // run finished, session stays alive
169
+ this.dirty = true;
170
+ this.flush();
171
+ }
172
+
173
+ /** Delete a session by ID. */
174
+ deleteSession(id: string): boolean {
175
+ const deleted = this.sessions.delete(id);
176
+ if (deleted) {
177
+ this.dirty = true;
178
+ this.flush();
179
+ }
180
+ return deleted;
181
+ }
182
+
183
+ /** List all sessions. */
184
+ listSessions(): ProviderSession[] {
185
+ return [...this.sessions.values()];
186
+ }
187
+
188
+ /** Get summary stats for logging/status. */
189
+ stats(): { total: number; active: number; idle: number; expired: number } {
190
+ let active = 0, idle = 0, expired = 0;
191
+ for (const s of this.sessions.values()) {
192
+ if (s.state === "active") active++;
193
+ else if (s.state === "idle") idle++;
194
+ else expired++;
195
+ }
196
+ return { total: this.sessions.size, active, idle, expired };
197
+ }
198
+
199
+ // ── Lifecycle ────────────────────────────────────────────────────────────
200
+
201
+ /** Sweep stale sessions (older than PROVIDER_SESSION_TTL_MS without activity). */
202
+ sweep(): void {
203
+ const now = Date.now();
204
+ let changed = false;
205
+ for (const [id, session] of this.sessions) {
206
+ if (now - session.updatedAt > PROVIDER_SESSION_TTL_MS) {
207
+ session.state = "expired";
208
+ this.sessions.delete(id);
209
+ changed = true;
210
+ }
211
+ }
212
+ if (changed) {
213
+ this.dirty = true;
214
+ this.flush();
215
+ }
216
+ }
217
+
218
+ /** Stop the sweep timer (for graceful shutdown). */
219
+ stop(): void {
220
+ if (this.sweepTimer) {
221
+ clearInterval(this.sweepTimer);
222
+ this.sweepTimer = null;
223
+ }
224
+ this.flush();
225
+ }
226
+
227
+ // ── Persistence ──────────────────────────────────────────────────────────
228
+
229
+ /** Load sessions from disk. */
230
+ private load(): void {
231
+ try {
232
+ const raw = readFileSync(PROVIDER_SESSIONS_FILE, "utf-8");
233
+ const store = JSON.parse(raw) as SessionStore;
234
+ if (store.version === 1 && Array.isArray(store.sessions)) {
235
+ for (const s of store.sessions) {
236
+ // Skip expired sessions on load
237
+ if (Date.now() - s.updatedAt > PROVIDER_SESSION_TTL_MS) continue;
238
+ this.sessions.set(s.id, s);
239
+ }
240
+ }
241
+ } catch {
242
+ // No file yet or corrupt — start fresh
243
+ }
244
+ }
245
+
246
+ /** Flush dirty sessions to disk. */
247
+ private flush(): void {
248
+ if (!this.dirty) return;
249
+ try {
250
+ mkdirSync(dirname(PROVIDER_SESSIONS_FILE), { recursive: true });
251
+ const store: SessionStore = {
252
+ version: 1,
253
+ sessions: [...this.sessions.values()],
254
+ };
255
+ writeFileSync(PROVIDER_SESSIONS_FILE, JSON.stringify(store, null, 2) + "\n", "utf-8");
256
+ this.dirty = false;
257
+ } catch {
258
+ // Non-fatal — sessions are still in memory
259
+ }
260
+ }
261
+ }
262
+
263
+ /** Shared singleton instance. */
264
+ export const providerSessions = new ProviderSessionRegistry();
@@ -20,6 +20,17 @@ import type { BrowserContext } from "playwright";
20
20
  import { renderStatusPage, type StatusProvider } from "./status-template.js";
21
21
  import { sessionManager } from "./session-manager.js";
22
22
  import { metrics } from "./metrics.js";
23
+ import { providerSessions } from "./provider-sessions.js";
24
+ import {
25
+ DEFAULT_PROXY_TIMEOUT_MS,
26
+ MAX_EFFECTIVE_TIMEOUT_MS,
27
+ TIMEOUT_PER_EXTRA_MSG_MS,
28
+ TIMEOUT_PER_TOOL_MS,
29
+ SSE_KEEPALIVE_INTERVAL_MS,
30
+ DEFAULT_BITNET_SERVER_URL,
31
+ BITNET_MAX_MESSAGES,
32
+ BITNET_SYSTEM_PROMPT,
33
+ } from "./config.js";
23
34
 
24
35
  export type GrokCompleteOptions = Parameters<typeof grokComplete>[1];
25
36
  export type GrokCompleteStreamOptions = Parameters<typeof grokCompleteStream>[1];
@@ -82,6 +93,20 @@ export interface ProxyServerOptions {
82
93
  * with the fallback model. Example: "cli-gemini/gemini-2.5-pro" → "cli-gemini/gemini-2.5-flash"
83
94
  */
84
95
  modelFallbacks?: Record<string, string>;
96
+ /**
97
+ * Per-model timeout overrides (ms). Keys are model IDs (without "vllm/" prefix).
98
+ * Use this to give heavy models more time or limit fast models.
99
+ *
100
+ * Example:
101
+ * {
102
+ * "cli-claude/claude-sonnet-4-6": 180_000, // 3 min for interactive chat
103
+ * "cli-claude/claude-opus-4-6": 300_000, // 5 min for heavy tasks
104
+ * "cli-claude/claude-haiku-4-5": 90_000, // 90s for fast responses
105
+ * }
106
+ *
107
+ * When not set for a model, falls back to proxyTimeoutMs (default 300s base).
108
+ */
109
+ modelTimeouts?: Record<string, number>;
85
110
  }
86
111
 
87
112
  /** Available CLI bridge models for GET /v1/models */
@@ -139,10 +164,11 @@ export function startProxyServer(opts: ProxyServerOptions): Promise<http.Server>
139
164
  });
140
165
  });
141
166
 
142
- // Stop the token refresh interval and session manager when the server closes (timer-leak prevention)
167
+ // Stop timers and flush state when the server closes (timer-leak prevention)
143
168
  server.on("close", () => {
144
169
  stopTokenRefresh();
145
170
  sessionManager.stop();
171
+ providerSessions.stop();
146
172
  });
147
173
 
148
174
  server.on("error", (err: NodeJS.ErrnoException) => {
@@ -533,7 +559,7 @@ async function handleRequest(
533
559
 
534
560
  // ── BitNet local inference routing ────────────────────────────────────────
535
561
  if (model.startsWith("local-bitnet/")) {
536
- const bitnetUrl = opts.getBitNetServerUrl?.() ?? "http://127.0.0.1:8082";
562
+ const bitnetUrl = opts.getBitNetServerUrl?.() ?? DEFAULT_BITNET_SERVER_URL;
537
563
  const timeoutMs = opts.timeoutMs ?? 120_000;
538
564
  // llama-server (BitNet build) crashes with std::runtime_error on multi-part
539
565
  // content arrays (ref: https://github.com/ggerganov/llama.cpp/issues/8367).
@@ -550,18 +576,14 @@ async function handleRequest(
550
576
  };
551
577
  // BitNet has a 4096 token context window. Long sessions blow it up and
552
578
  // cause a hard C++ crash (no graceful error). Truncate to system prompt +
553
- // last 10 messages (~2k tokens max) to stay safely within the limit.
554
- const BITNET_MAX_MESSAGES = 6;
555
- // Replace the full system prompt (MEMORY.md etc, ~2k+ tokens) with a
556
- // minimal one so BitNet's 4096-token context isn't blown by the system msg alone.
557
- const BITNET_SYSTEM = "You are Akido, a concise AI assistant. Answer briefly and directly. Current user: Emre. Timezone: Europe/Berlin.";
579
+ // last N messages (~2k tokens max) to stay safely within the limit.
558
580
  const allFlat = parsed.messages.map((m) => ({
559
581
  role: m.role,
560
582
  content: flattenContent(m.content),
561
583
  }));
562
584
  const nonSystemMsgs = allFlat.filter((m) => m.role !== "system");
563
585
  const truncated = nonSystemMsgs.slice(-BITNET_MAX_MESSAGES);
564
- const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM }, ...truncated];
586
+ const bitnetMessages = [{ role: "system", content: BITNET_SYSTEM_PROMPT }, ...truncated];
565
587
  const requestBody = JSON.stringify({ ...parsed, messages: bitnetMessages, tools: undefined });
566
588
 
567
589
  const bitnetStart = Date.now();
@@ -623,13 +645,25 @@ async function handleRequest(
623
645
  // ── CLI runner routing (Gemini / Claude Code / Codex) ──────────────────────
624
646
  let result: CliToolResult;
625
647
  let usedModel = model;
626
- const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined };
648
+ const routeOpts = { workdir, tools: hasTools ? tools : undefined, mediaFiles: mediaFiles.length ? mediaFiles : undefined, log: opts.log };
649
+
650
+ // ── Provider session: ensure a persistent session for this model ────────
651
+ // Extract provider prefix from model (e.g. "cli-claude" from "cli-claude/claude-sonnet-4-6")
652
+ const providerPrefix = model.split("/")[0];
653
+ const incomingSessionId = (parsed as { providerSessionId?: string }).providerSessionId;
654
+ const session = incomingSessionId
655
+ ? (providerSessions.getSession(incomingSessionId) ?? providerSessions.ensureSession(providerPrefix, model))
656
+ : providerSessions.ensureSession(providerPrefix, model);
657
+ providerSessions.touchSession(session.id);
627
658
 
628
659
  // ── Dynamic timeout: scale with conversation size ────────────────────────
629
- const baseTimeout = opts.timeoutMs ?? 300_000; // 5 min default (was 120s)
630
- const msgExtra = Math.max(0, cleanMessages.length - 10) * 2_000;
631
- const toolExtra = (tools?.length ?? 0) * 5_000;
632
- const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, 600_000);
660
+ // Per-model timeout takes precedence, then global proxyTimeoutMs, then 300s default.
661
+ const perModelTimeout = opts.modelTimeouts?.[model];
662
+ const baseTimeout = perModelTimeout ?? opts.timeoutMs ?? DEFAULT_PROXY_TIMEOUT_MS;
663
+ const msgExtra = Math.max(0, cleanMessages.length - 10) * TIMEOUT_PER_EXTRA_MSG_MS;
664
+ const toolExtra = (tools?.length ?? 0) * TIMEOUT_PER_TOOL_MS;
665
+ const effectiveTimeout = Math.min(baseTimeout + msgExtra + toolExtra, MAX_EFFECTIVE_TIMEOUT_MS);
666
+ opts.log(`[cli-bridge] ${model} session=${session.id} timeout: ${Math.round(effectiveTimeout / 1000)}s (base=${Math.round(baseTimeout / 1000)}s${perModelTimeout ? " per-model" : ""}, +${Math.round(msgExtra / 1000)}s msgs, +${Math.round(toolExtra / 1000)}s tools)`);
633
667
 
634
668
  // ── SSE keepalive: send headers early so OpenClaw doesn't read-timeout ──
635
669
  let sseHeadersSent = false;
@@ -643,21 +677,26 @@ async function handleRequest(
643
677
  });
644
678
  sseHeadersSent = true;
645
679
  res.write(": keepalive\n\n");
646
- keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, 15_000);
680
+ keepaliveInterval = setInterval(() => { res.write(": keepalive\n\n"); }, SSE_KEEPALIVE_INTERVAL_MS);
647
681
  }
648
682
 
649
683
  const cliStart = Date.now();
650
684
  try {
651
685
  result = await routeToCliRunner(model, cleanMessages, effectiveTimeout, routeOpts);
652
686
  metrics.recordRequest(model, Date.now() - cliStart, true);
687
+ providerSessions.recordRun(session.id, false);
653
688
  } catch (err) {
654
689
  const primaryDuration = Date.now() - cliStart;
655
690
  const msg = (err as Error).message;
656
691
  // ── Model fallback: retry once with a lighter model if configured ────
692
+ const isTimeout = msg.includes("timeout:") || msg.includes("exit 143") || msg.includes("exited 143");
693
+ // Record the run (with timeout flag) — session is preserved, not deleted
694
+ providerSessions.recordRun(session.id, isTimeout);
657
695
  const fallbackModel = opts.modelFallbacks?.[model];
658
696
  if (fallbackModel) {
659
697
  metrics.recordRequest(model, primaryDuration, false);
660
- opts.warn(`[cli-bridge] ${model} failed (${msg}), falling back to ${fallbackModel}`);
698
+ const reason = isTimeout ? `timeout by supervisor, session=${session.id} preserved` : msg;
699
+ opts.warn(`[cli-bridge] ${model} failed (${reason}), falling back to ${fallbackModel}`);
661
700
  const fallbackStart = Date.now();
662
701
  try {
663
702
  result = await routeToCliRunner(fallbackModel, cleanMessages, effectiveTimeout, routeOpts);
@@ -768,6 +807,8 @@ async function handleRequest(
768
807
  },
769
808
  ],
770
809
  usage: { prompt_tokens: 0, completion_tokens: 0, total_tokens: 0 },
810
+ // Propagate session ID so callers can resume in the same session
811
+ provider_session_id: session.id,
771
812
  };
772
813
 
773
814
  res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
@@ -887,6 +928,26 @@ async function handleRequest(
887
928
  return;
888
929
  }
889
930
 
931
+ // ── Provider session endpoints ──────────────────────────────────────────────
932
+
933
+ // GET /v1/provider-sessions — list all provider sessions with stats
934
+ if (url === "/v1/provider-sessions" && req.method === "GET") {
935
+ const sessions = providerSessions.listSessions();
936
+ const stats = providerSessions.stats();
937
+ res.writeHead(200, { "Content-Type": "application/json", ...corsHeaders() });
938
+ res.end(JSON.stringify({ sessions, stats }));
939
+ return;
940
+ }
941
+
942
+ // DELETE /v1/provider-sessions/:id — delete a specific provider session
943
+ const provSessionMatch = url.match(/^\/v1\/provider-sessions\/([a-zA-Z0-9:_-]+)$/);
944
+ if (provSessionMatch && req.method === "DELETE") {
945
+ const ok = providerSessions.deleteSession(decodeURIComponent(provSessionMatch[1]));
946
+ res.writeHead(ok ? 200 : 404, { "Content-Type": "application/json", ...corsHeaders() });
947
+ res.end(JSON.stringify({ ok }));
948
+ return;
949
+ }
950
+
890
951
  // 404
891
952
  res.writeHead(404, { "Content-Type": "application/json" });
892
953
  res.end(JSON.stringify({ error: { message: `Not found: ${url}`, type: "not_found" } }));