alvin-bot 4.5.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (42) hide show
  1. package/CHANGELOG.md +150 -0
  2. package/README.md +25 -2
  3. package/alvin-bot-4.5.1.tgz +0 -0
  4. package/bin/cli.js +246 -0
  5. package/dist/handlers/commands.js +461 -63
  6. package/dist/handlers/message.js +209 -14
  7. package/dist/i18n.js +470 -13
  8. package/dist/index.js +44 -5
  9. package/dist/providers/claude-sdk-provider.js +106 -14
  10. package/dist/providers/ollama-provider.js +32 -0
  11. package/dist/providers/openai-compatible.js +10 -1
  12. package/dist/providers/registry.js +112 -17
  13. package/dist/providers/types.js +25 -3
  14. package/dist/services/compaction.js +2 -0
  15. package/dist/services/cron.js +53 -42
  16. package/dist/services/heartbeat.js +41 -7
  17. package/dist/services/language-detect.js +12 -2
  18. package/dist/services/ollama-manager.js +339 -0
  19. package/dist/services/personality.js +20 -14
  20. package/dist/services/session.js +21 -3
  21. package/dist/services/subagent-delivery.js +111 -0
  22. package/dist/services/subagents.js +341 -27
  23. package/dist/services/telegram.js +28 -1
  24. package/dist/services/updater.js +158 -0
  25. package/dist/services/usage-tracker.js +11 -4
  26. package/dist/services/users.js +2 -1
  27. package/dist/tui/index.js +36 -30
  28. package/docs/HANDBOOK.md +819 -0
  29. package/package.json +7 -2
  30. package/test/claude-sdk-provider.test.ts +69 -0
  31. package/test/i18n.test.ts +108 -0
  32. package/test/registry.test.ts +201 -0
  33. package/test/subagent-delivery.test.ts +169 -0
  34. package/test/subagents-commands.test.ts +64 -0
  35. package/test/subagents-config.test.ts +108 -0
  36. package/test/subagents-depth.test.ts +58 -0
  37. package/test/subagents-inheritance.test.ts +67 -0
  38. package/test/subagents-name-resolver.test.ts +122 -0
  39. package/test/subagents-priority-reject.test.ts +60 -0
  40. package/test/subagents-shutdown.test.ts +126 -0
  41. package/test/subagents-toolset.test.ts +51 -0
  42. package/vitest.config.ts +17 -0
@@ -10,7 +10,20 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
10
10
  import { readFileSync } from "fs";
11
11
  import { resolve, dirname } from "path";
12
12
  import { fileURLToPath } from "url";
13
+ import { execFile } from "child_process";
14
+ import { promisify } from "util";
13
15
  import { findClaudeBinary } from "../find-claude-binary.js";
16
+ const execFileAsync = promisify(execFile);
17
+ /**
18
+ * Detects the Claude CLI "Not logged in" error message. The CLI emits this
19
+ * as normal assistant text when no valid OAuth token is present, so we have
20
+ * to treat that output as an error in the SDK path too.
21
+ */
22
+ export function isAuthErrorOutput(text) {
23
+ if (!text)
24
+ return false;
25
+ return /^\s*not logged in\b/i.test(text);
26
+ }
14
27
  const BOT_PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
15
28
  // Load CLAUDE.md once at startup
16
29
  let botClaudeMd = "";
@@ -26,6 +39,11 @@ const CHECKPOINT_TOOL_THRESHOLD = 15;
26
39
  const CHECKPOINT_MSG_THRESHOLD = 10;
27
40
  export class ClaudeSDKProvider {
28
41
  config;
42
+ // Cache the availability check: execFile on every user message would block
43
+ // the bot for ~0-5s each time. A 60s cache is safe — the CLI binary does
44
+ // not disappear mid-session.
45
+ availabilityCache = null;
46
+ static AVAILABILITY_CACHE_MS = 60_000;
29
47
  constructor(config) {
30
48
  this.config = {
31
49
  type: "claude-sdk",
@@ -46,9 +64,23 @@ export class ClaudeSDKProvider {
46
64
  let prompt = options.prompt;
47
65
  const sessionState = options._sessionState;
48
66
  if (sessionState) {
49
- const needsCheckpoint = sessionState.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
67
+ // Checkpoint reminder injection with COOLDOWN.
68
+ //
69
+ // Old behaviour: once either threshold was crossed, the hint got
70
+ // prepended to EVERY subsequent turn's prompt. That forced Claude
71
+ // to detour through memory-file reads/writes on every single turn,
72
+ // which bloated turn latency in long sessions and was a major
73
+ // contributor to the 5-minute hard timeout firing.
74
+ //
75
+ // New behaviour: inject only every CHECKPOINT_REMINDER_EVERY turns
76
+ // after the threshold is reached. At messageCount 10 → injected,
77
+ // 11/12/13/14 → skipped, 15 → injected again, etc. 80% reduction
78
+ // in per-turn overhead while still giving Claude periodic reminders.
79
+ const CHECKPOINT_REMINDER_EVERY = 5;
80
+ const overThreshold = sessionState.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
50
81
  sessionState.messageCount >= CHECKPOINT_MSG_THRESHOLD;
51
- if (needsCheckpoint) {
82
+ const onCooldownBeat = sessionState.messageCount % CHECKPOINT_REMINDER_EVERY === 0;
83
+ if (overThreshold && onCooldownBeat) {
52
84
  prompt = `[CHECKPOINT] Du hast bereits ${sessionState.toolUseCount} Tool-Aufrufe und ${sessionState.messageCount} Nachrichten in dieser Session. Schreibe jetzt einen Checkpoint in deine Memory-Datei (docs/memory/YYYY-MM-DD.md) bevor du diese Anfrage bearbeitest.\n\n${prompt}`;
53
85
  }
54
86
  }
@@ -56,15 +88,26 @@ export class ClaudeSDKProvider {
56
88
  const systemPrompt = options.systemPrompt
57
89
  ? `${options.systemPrompt}\n\n${botClaudeMd}`
58
90
  : botClaudeMd;
91
+ // Build a real AbortController the SDK can call .abort() on.
92
+ // The previous implementation cast a plain {signal} object to AbortController,
93
+ // which broke SDK-internal cancellation and left orphan subprocesses.
94
+ let internalAbortController;
95
+ if (options.abortSignal) {
96
+ internalAbortController = new AbortController();
97
+ if (options.abortSignal.aborted) {
98
+ internalAbortController.abort();
99
+ }
100
+ else {
101
+ options.abortSignal.addEventListener("abort", () => internalAbortController?.abort(), { once: true });
102
+ }
103
+ }
59
104
  try {
60
105
  const claudePath = findClaudeBinary();
61
106
  const q = query({
62
107
  prompt,
63
108
  options: {
64
109
  cwd: options.workingDir || process.cwd(),
65
- abortController: options.abortSignal
66
- ? { signal: options.abortSignal }
67
- : undefined,
110
+ abortController: internalAbortController,
68
111
  resume: options.sessionId ?? undefined,
69
112
  pathToClaudeCodeExecutable: claudePath,
70
113
  permissionMode: "bypassPermissions",
@@ -76,7 +119,7 @@ export class ClaudeSDKProvider {
76
119
  "WebSearch", "WebFetch", "Task",
77
120
  ],
78
121
  systemPrompt,
79
- effort: (options.effort || "high"),
122
+ effort: (options.effort || "medium"),
80
123
  maxTurns: 50,
81
124
  betas: ["context-1m-2025-08-07"],
82
125
  },
@@ -97,6 +140,17 @@ export class ClaudeSDKProvider {
97
140
  if (assistantMsg.message?.content) {
98
141
  for (const block of assistantMsg.message.content) {
99
142
  if ("text" in block && block.text) {
143
+ // Guard against "Not logged in" leaking as assistant text.
144
+ // If the very first text chunk matches the CLI auth-error
145
+ // pattern, surface it as an error chunk instead of rendering
146
+ // it as a normal response.
147
+ if (!accumulatedText && isAuthErrorOutput(block.text)) {
148
+ yield {
149
+ type: "error",
150
+ error: "Claude CLI is not logged in. Run `claude login` on this machine.",
151
+ };
152
+ return;
153
+ }
100
154
  accumulatedText += block.text;
101
155
  yield {
102
156
  type: "text",
@@ -107,9 +161,25 @@ export class ClaudeSDKProvider {
107
161
  }
108
162
  if ("name" in block) {
109
163
  localToolUseCount++;
164
+ // Serialise the tool input (parameters) so the message
165
+ // handler can surface detail for specific tools — most
166
+ // importantly the "Task" tool where `input.description`
167
+ // describes what sub-task Claude is delegating.
168
+ let toolInputStr;
169
+ if ("input" in block && block.input !== undefined) {
170
+ try {
171
+ const raw = JSON.stringify(block.input);
172
+ // cap at 500 chars to keep status lines manageable
173
+ toolInputStr = raw.length > 500 ? raw.slice(0, 500) + "…" : raw;
174
+ }
175
+ catch {
176
+ // unserializable — skip
177
+ }
178
+ }
110
179
  yield {
111
180
  type: "tool_use",
112
181
  toolName: block.name,
182
+ toolInput: toolInputStr,
113
183
  sessionId: capturedSessionId,
114
184
  };
115
185
  }
@@ -148,19 +218,41 @@ export class ClaudeSDKProvider {
148
218
  }
149
219
  }
150
220
  async isAvailable() {
151
- // Check if native Claude binary exists and responds to --version.
152
- // NOTE: Don't test with `claude -p "ping"` CLI login and SDK auth
153
- // are separate. The SDK uses its own auth via bypassPermissions.
221
+ // Cached availability check. The previous implementation called execSync
222
+ // on every user message, blocking the Node event loop for up to 5s per
223
+ // query. We now use async execFile and cache the result for 60s.
224
+ const now = Date.now();
225
+ if (this.availabilityCache && this.availabilityCache.expiresAt > now) {
226
+ return this.availabilityCache.result;
227
+ }
228
+ const cache = (result) => {
229
+ this.availabilityCache = {
230
+ result,
231
+ expiresAt: now + ClaudeSDKProvider.AVAILABILITY_CACHE_MS,
232
+ };
233
+ return result;
234
+ };
154
235
  try {
155
236
  const claudePath = findClaudeBinary();
156
237
  if (!claudePath)
157
- return false;
158
- const { execSync } = await import("child_process");
159
- execSync(`"${claudePath}" --version`, { stdio: "pipe", timeout: 5000 });
160
- return true;
238
+ return cache(false);
239
+ // Step 1: binary exists?
240
+ // Async execFile doesn't block the event loop. 5s timeout kills
241
+ // runaway probes without hanging the bot.
242
+ await execFileAsync(claudePath, ["--version"], { timeout: 5000 });
243
+ // Step 2: actually authenticated? The Claude Agent SDK shares the
244
+ // same OAuth token as the CLI — if `claude -p` says "Not logged in",
245
+ // the SDK will fail too. Probe with a trivial -p call and surface
246
+ // the failure before the registry hands a request to a broken
247
+ // provider.
248
+ const { stdout } = await execFileAsync(claudePath, ["-p", "ping", "--output-format", "text"], { timeout: 10000 });
249
+ if (isAuthErrorOutput(stdout)) {
250
+ return cache(false);
251
+ }
252
+ return cache(true);
161
253
  }
162
254
  catch {
163
- return false;
255
+ return cache(false);
164
256
  }
165
257
  }
166
258
  getInfo() {
@@ -0,0 +1,32 @@
1
+ /**
2
+ * Ollama Provider — OpenAI-compatible chat-completions with an on-demand
3
+ * daemon lifecycle.
4
+ *
5
+ * Inherits all the request/response handling (streaming, tool-calling,
6
+ * rate-limit extraction, vision, …) from OpenAICompatibleProvider. Only
7
+ * adds the `lifecycle` field so the rest of the bot (heartbeat, /model
8
+ * switch, /status, shutdown) can manage the local daemon generically
9
+ * without any hardcoded "ollama" string-matching.
10
+ *
11
+ * When the architecture needs another local runner (LM Studio, llama.cpp,
12
+ * vLLM, Jan.ai, …), the pattern is the same: subclass
13
+ * OpenAICompatibleProvider, assign a `lifecycle` with its own
14
+ * ensureRunning/ensureStopped/isRunning/isBotManaged implementation.
15
+ */
16
+ import { OpenAICompatibleProvider } from "./openai-compatible.js";
17
+ import { ensureRunning as managerEnsureRunning, ensureStopped as managerEnsureStopped, isDaemonRunning as managerIsDaemonRunning, isBotManaged as managerIsBotManaged, } from "../services/ollama-manager.js";
18
+ export class OllamaProvider extends OpenAICompatibleProvider {
19
+ lifecycle;
20
+ constructor(config) {
21
+ super(config);
22
+ // Capture the model name at construction time so the lifecycle closures
23
+ // don't need to reach into this.config on every call.
24
+ const modelName = config.model;
25
+ this.lifecycle = {
26
+ ensureRunning: () => managerEnsureRunning(modelName),
27
+ ensureStopped: () => managerEnsureStopped(),
28
+ isRunning: () => managerIsDaemonRunning(),
29
+ isBotManaged: () => managerIsBotManaged(),
30
+ };
31
+ }
32
+ }
@@ -286,10 +286,19 @@ export class OpenAICompatibleProvider {
286
286
  }
287
287
  getInfo() {
288
288
  const tools = this.supportsToolUse() ? " 🔧" : "";
289
+ // Local runners (Ollama, LM Studio, …) don't use API keys. Report their
290
+ // status based on whether the local endpoint is reachable at startup,
291
+ // not based on the missing apiKey field which is semantically irrelevant
292
+ // for loopback endpoints.
293
+ const isLocal = this.config.baseUrl?.includes("localhost")
294
+ || this.config.baseUrl?.includes("127.0.0.1");
295
+ const status = isLocal
296
+ ? "💤 on-demand (local)"
297
+ : (this.config.apiKey ? "✅ configured" : "❌ no API key");
289
298
  return {
290
299
  name: this.config.name + tools,
291
300
  model: this.config.model,
292
- status: this.config.apiKey ? "✅ configured" : "❌ no API key",
301
+ status,
293
302
  };
294
303
  }
295
304
  // ── Rate Limit Extraction ───────────────────────────────────────────────
@@ -9,7 +9,20 @@
9
9
  import { ClaudeSDKProvider } from "./claude-sdk-provider.js";
10
10
  import { CodexCLIProvider } from "./codex-cli-provider.js";
11
11
  import { OpenAICompatibleProvider } from "./openai-compatible.js";
12
+ import { OllamaProvider } from "./ollama-provider.js";
12
13
  import { PROVIDER_PRESETS } from "./types.js";
14
+ import { t } from "../i18n.js";
15
+ /**
16
+ * Identify an Ollama endpoint by its baseUrl rather than by a hardcoded
17
+ * provider key. This lets users define aliases (e.g. `my-ollama`,
18
+ * `ollama-local`) in FALLBACK_PROVIDERS or custom-models.json and still
19
+ * get the on-demand lifecycle behaviour automatically.
20
+ */
21
+ function isOllamaEndpoint(baseUrl) {
22
+ if (!baseUrl)
23
+ return false;
24
+ return baseUrl.includes("localhost:11434") || baseUrl.includes("127.0.0.1:11434");
25
+ }
13
26
  export class ProviderRegistry {
14
27
  providers = new Map();
15
28
  primaryKey;
@@ -80,6 +93,21 @@ export class ProviderRegistry {
80
93
  /**
81
94
  * Query with automatic fallback.
82
95
  * Tries the active provider first, then fallbacks in order.
96
+ *
97
+ * Two invariants beyond the obvious chain-walk:
98
+ *
99
+ * 1. Lifecycle-managed providers (local runners like Ollama) get booted
100
+ * on-demand if they're not already running. Without this, a
101
+ * mid-session Claude failure would silently skip Ollama because its
102
+ * daemon isn't awake yet — the heartbeat's 5-minute cadence can't
103
+ * react fast enough to save an in-flight user request.
104
+ *
105
+ * 2. If the active provider has already emitted text to the user and
106
+ * then errors out mid-stream, we do NOT silently failover to the
107
+ * next provider. Chaining a second model underneath a half-finished
108
+ * Claude response is more confusing than surfacing a clear error
109
+ * and asking the user to retry. The failover is only silent when
110
+ * the failing provider hadn't committed any visible text yet.
83
111
  */
84
112
  async *queryWithFallback(options) {
85
113
  const chain = [this.activeKey, ...this.fallbackKeys.filter(k => k !== this.activeKey)];
@@ -88,35 +116,97 @@ export class ProviderRegistry {
88
116
  const provider = this.providers.get(key);
89
117
  if (!provider)
90
118
  continue;
91
- // Check availability before trying
92
- const available = await provider.isAvailable().catch(() => false);
119
+ // Check availability. For lifecycle-managed providers (Ollama et al.)
120
+ // that are currently asleep, actively try to boot them before giving up.
121
+ let available = await provider.isAvailable().catch(() => false);
122
+ if (!available && provider.lifecycle) {
123
+ console.log(`Provider "${key}" asleep — booting on-demand…`);
124
+ const booted = await provider.lifecycle.ensureRunning().catch(() => false);
125
+ if (booted) {
126
+ available = await provider.isAvailable().catch(() => false);
127
+ }
128
+ }
93
129
  if (!available) {
94
130
  console.log(`Provider "${key}" not available, trying next...`);
95
131
  errors.push({ key, error: "not available (check auth/config)" });
96
132
  continue;
97
133
  }
134
+ // ─── Query with silent retry for transient mid-stream aborts ─────
135
+ // Anthropic occasionally drops streams (network hiccup, server-side
136
+ // flap, rate-limit blip). Rather than surfacing the error on the
137
+ // first failure, we retry the SAME provider once with a short delay.
138
+ // Only mid-stream abort-shaped errors trigger the retry — pre-stream
139
+ // failures and user cancels go straight to the fallback / error path.
140
+ const MAX_ATTEMPTS = 2;
141
+ const RETRY_DELAY_MS = 2_000;
142
+ let attempts = 0;
98
143
  let hadError = false;
99
144
  let lastError = "";
100
- try {
101
- for await (const chunk of provider.query(options)) {
102
- if (chunk.type === "error") {
103
- hadError = true;
104
- lastError = chunk.error || "Unknown error";
105
- break;
145
+ let hadVisibleText = false;
146
+ while (attempts < MAX_ATTEMPTS) {
147
+ attempts++;
148
+ hadError = false;
149
+ lastError = "";
150
+ hadVisibleText = false;
151
+ try {
152
+ for await (const chunk of provider.query(options)) {
153
+ if (chunk.type === "error") {
154
+ hadError = true;
155
+ lastError = chunk.error || "Unknown error";
156
+ break;
157
+ }
158
+ if (chunk.type === "text" && chunk.text && chunk.text.length > 0) {
159
+ hadVisibleText = true;
160
+ }
161
+ yield chunk;
162
+ if (chunk.type === "done")
163
+ return;
106
164
  }
107
- yield chunk;
108
- if (chunk.type === "done")
109
- return;
110
165
  }
111
- }
112
- catch (err) {
113
- hadError = true;
114
- lastError = err instanceof Error ? err.message : String(err);
166
+ catch (err) {
167
+ hadError = true;
168
+ lastError = err instanceof Error ? err.message : String(err);
169
+ }
170
+ if (!hadError) {
171
+ // Loop ended naturally without a done — unusual, fall through.
172
+ break;
173
+ }
174
+ // Retry eligibility:
175
+ // - mid-stream (had visible text before error)
176
+ // - not a user-initiated cancel (abortSignal is externally fired)
177
+ // - error looks transient (contains "abort")
178
+ // - still have attempts left
179
+ const isUserAbort = options.abortSignal?.aborted === true;
180
+ const isTransientLooking = lastError.toLowerCase().includes("abort");
181
+ const shouldRetry = hadVisibleText
182
+ && attempts < MAX_ATTEMPTS
183
+ && !isUserAbort
184
+ && isTransientLooking;
185
+ if (!shouldRetry)
186
+ break;
187
+ console.log(`Provider "${key}" mid-stream abort (attempt ${attempts}/${MAX_ATTEMPTS}) — retrying in ${RETRY_DELAY_MS}ms: ${lastError}`);
188
+ await new Promise(resolve => setTimeout(resolve, RETRY_DELAY_MS));
189
+ // If the user cancelled during the delay, bail before the next attempt.
190
+ if (options.abortSignal?.aborted === true)
191
+ break;
115
192
  }
116
193
  if (hadError) {
117
- console.log(`Provider "${key}" failed: ${lastError}. Trying next...`);
194
+ console.log(`Provider "${key}" failed: ${lastError}. ${hadVisibleText ? "Mid-stream — surfacing error." : "Trying next..."}`);
118
195
  errors.push({ key, error: lastError });
119
- // Find next provider to notify about fallback
196
+ // Mid-stream failure: the user already has partial text on screen.
197
+ // Yield a terminal error instead of switching to a different model
198
+ // that would write a second, unrelated response underneath.
199
+ if (hadVisibleText) {
200
+ yield {
201
+ type: "error",
202
+ error: t("bot.error.midStream", options.locale, {
203
+ name: provider.getInfo().name,
204
+ detail: lastError,
205
+ }),
206
+ };
207
+ return;
208
+ }
209
+ // Pre-stream failure: safe to silently switch to the next provider.
120
210
  const nextIdx = chain.indexOf(key) + 1;
121
211
  if (nextIdx < chain.length) {
122
212
  const nextProvider = this.providers.get(chain[nextIdx]);
@@ -155,6 +245,11 @@ export class ProviderRegistry {
155
245
  case "codex-cli":
156
246
  return new CodexCLIProvider(config);
157
247
  case "openai-compatible":
248
+ // Local runners that happen to speak the OpenAI-compat protocol
249
+ // get a subclass that layers on-demand lifecycle management.
250
+ if (isOllamaEndpoint(config.baseUrl)) {
251
+ return new OllamaProvider(config);
252
+ }
158
253
  return new OpenAICompatibleProvider(config);
159
254
  default:
160
255
  throw new Error(`Unknown provider type: ${config.type}`);
@@ -14,14 +14,16 @@ export const PROVIDER_PRESETS = {
14
14
  supportsTools: true,
15
15
  supportsVision: false,
16
16
  supportsStreaming: true,
17
+ contextWindow: 400_000,
17
18
  },
18
- // Anthropic (via Agent SDK — full tool use)
19
+ // Anthropic (via Agent SDK — full tool use, 1M-context beta enabled)
19
20
  "claude-sdk": {
20
21
  type: "claude-sdk",
21
22
  name: "Claude (Agent SDK)",
22
23
  supportsTools: true,
23
24
  supportsVision: true,
24
25
  supportsStreaming: true,
26
+ contextWindow: 1_000_000,
25
27
  },
26
28
  // Anthropic API (via OpenAI-compatible endpoint — no Agent SDK needed)
27
29
  "claude-opus": {
@@ -32,6 +34,7 @@ export const PROVIDER_PRESETS = {
32
34
  supportsVision: true,
33
35
  supportsStreaming: true,
34
36
  supportsTools: true,
37
+ contextWindow: 200_000,
35
38
  },
36
39
  "claude-sonnet": {
37
40
  type: "openai-compatible",
@@ -41,6 +44,7 @@ export const PROVIDER_PRESETS = {
41
44
  supportsVision: true,
42
45
  supportsStreaming: true,
43
46
  supportsTools: true,
47
+ contextWindow: 200_000,
44
48
  },
45
49
  "claude-haiku": {
46
50
  type: "openai-compatible",
@@ -50,6 +54,7 @@ export const PROVIDER_PRESETS = {
50
54
  supportsVision: true,
51
55
  supportsStreaming: true,
52
56
  supportsTools: true,
57
+ contextWindow: 200_000,
53
58
  },
54
59
  // Groq (fast inference, free tier, supports function calling)
55
60
  "groq": {
@@ -60,6 +65,7 @@ export const PROVIDER_PRESETS = {
60
65
  supportsVision: false,
61
66
  supportsStreaming: true,
62
67
  supportsTools: true,
68
+ contextWindow: 128_000,
63
69
  },
64
70
  // OpenAI (supports function calling)
65
71
  "gpt-4o": {
@@ -70,6 +76,7 @@ export const PROVIDER_PRESETS = {
70
76
  supportsVision: true,
71
77
  supportsStreaming: true,
72
78
  supportsTools: true,
79
+ contextWindow: 128_000,
73
80
  },
74
81
  "gpt-4o-mini": {
75
82
  type: "openai-compatible",
@@ -79,6 +86,7 @@ export const PROVIDER_PRESETS = {
79
86
  supportsVision: true,
80
87
  supportsStreaming: true,
81
88
  supportsTools: true,
89
+ contextWindow: 128_000,
82
90
  },
83
91
  // Google Gemini (via OpenAI-compatible endpoint, supports function calling)
84
92
  "gemini-2.5-pro": {
@@ -89,6 +97,7 @@ export const PROVIDER_PRESETS = {
89
97
  supportsVision: true,
90
98
  supportsStreaming: true,
91
99
  supportsTools: true,
100
+ contextWindow: 1_000_000,
92
101
  },
93
102
  "gemini-2.5-flash": {
94
103
  type: "openai-compatible",
@@ -98,6 +107,7 @@ export const PROVIDER_PRESETS = {
98
107
  supportsVision: true,
99
108
  supportsStreaming: true,
100
109
  supportsTools: true,
110
+ contextWindow: 1_000_000,
101
111
  },
102
112
  "gemini-3-pro": {
103
113
  type: "openai-compatible",
@@ -107,6 +117,7 @@ export const PROVIDER_PRESETS = {
107
117
  supportsVision: true,
108
118
  supportsStreaming: true,
109
119
  supportsTools: true,
120
+ contextWindow: 2_000_000,
110
121
  },
111
122
  "gemini-3-flash": {
112
123
  type: "openai-compatible",
@@ -116,6 +127,7 @@ export const PROVIDER_PRESETS = {
116
127
  supportsVision: true,
117
128
  supportsStreaming: true,
118
129
  supportsTools: true,
130
+ contextWindow: 1_000_000,
119
131
  },
120
132
  // OpenAI newer models
121
133
  "gpt-4.1": {
@@ -126,6 +138,7 @@ export const PROVIDER_PRESETS = {
126
138
  supportsVision: true,
127
139
  supportsStreaming: true,
128
140
  supportsTools: true,
141
+ contextWindow: 1_000_000,
129
142
  },
130
143
  "gpt-4.1-mini": {
131
144
  type: "openai-compatible",
@@ -135,6 +148,7 @@ export const PROVIDER_PRESETS = {
135
148
  supportsVision: true,
136
149
  supportsStreaming: true,
137
150
  supportsTools: true,
151
+ contextWindow: 1_000_000,
138
152
  },
139
153
  "o3-mini": {
140
154
  type: "openai-compatible",
@@ -144,6 +158,7 @@ export const PROVIDER_PRESETS = {
144
158
  supportsVision: false,
145
159
  supportsStreaming: true,
146
160
  supportsTools: true,
161
+ contextWindow: 200_000,
147
162
  },
148
163
  // Groq additional models
149
164
  "groq-llama-3.1-8b": {
@@ -154,6 +169,7 @@ export const PROVIDER_PRESETS = {
154
169
  supportsVision: false,
155
170
  supportsStreaming: true,
156
171
  supportsTools: true,
172
+ contextWindow: 128_000,
157
173
  },
158
174
  "groq-mixtral": {
159
175
  type: "openai-compatible",
@@ -163,6 +179,7 @@ export const PROVIDER_PRESETS = {
163
179
  supportsVision: false,
164
180
  supportsStreaming: true,
165
181
  supportsTools: true,
182
+ contextWindow: 32_768,
166
183
  },
167
184
  // NVIDIA NIM (150+ free models)
168
185
  "nvidia-llama-3.3-70b": {
@@ -173,6 +190,7 @@ export const PROVIDER_PRESETS = {
173
190
  supportsVision: false,
174
191
  supportsStreaming: true,
175
192
  supportsTools: true,
193
+ contextWindow: 128_000,
176
194
  },
177
195
  "nvidia-kimi-k2.5": {
178
196
  type: "openai-compatible",
@@ -182,8 +200,9 @@ export const PROVIDER_PRESETS = {
182
200
  supportsVision: true,
183
201
  supportsStreaming: true,
184
202
  supportsTools: true,
203
+ contextWindow: 200_000,
185
204
  },
186
- // Ollama (local models)
205
+ // Ollama (local models) — Gemma 4 E4B has an 8k context window
187
206
  "ollama": {
188
207
  type: "openai-compatible",
189
208
  name: "Gemma 4 E4B (Ollama)",
@@ -191,8 +210,10 @@ export const PROVIDER_PRESETS = {
191
210
  baseUrl: "http://localhost:11434/v1",
192
211
  supportsVision: true,
193
212
  supportsStreaming: true,
213
+ contextWindow: 8_192,
194
214
  },
195
- // OpenRouter (any model, one API, supports function calling)
215
+ // OpenRouter (any model, one API, supports function calling).
216
+ // Context window varies by model — default 200k is a middle-ground guess.
196
217
  "openrouter": {
197
218
  type: "openai-compatible",
198
219
  name: "OpenRouter",
@@ -201,5 +222,6 @@ export const PROVIDER_PRESETS = {
201
222
  supportsVision: true,
202
223
  supportsStreaming: true,
203
224
  supportsTools: true,
225
+ contextWindow: 200_000,
204
226
  },
205
227
  };
@@ -96,6 +96,8 @@ export async function compactSession(session) {
96
96
  session.history = [summaryMessage, ...fallbackKeep];
97
97
  }
98
98
  const summaryTokens = Math.ceil(summaryMessage.content.length / 4); // rough estimate
99
+ // Track how many compactions this session has seen, for /status telemetry
100
+ session.compactionCount = (session.compactionCount || 0) + 1;
99
101
  return {
100
102
  removedEntries,
101
103
  summaryTokens,