npm - alvin-bot - Versions diffs - 4.5.0 → 4.6.0 - Mend

alvin-bot 4.5.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/CHANGELOG.md +150 -0
package/README.md +25 -2
package/alvin-bot-4.5.1.tgz +0 -0
package/bin/cli.js +246 -0
package/dist/handlers/commands.js +461 -63
package/dist/handlers/message.js +209 -14
package/dist/i18n.js +470 -13
package/dist/index.js +44 -5
package/dist/providers/claude-sdk-provider.js +106 -14
package/dist/providers/ollama-provider.js +32 -0
package/dist/providers/openai-compatible.js +10 -1
package/dist/providers/registry.js +112 -17
package/dist/providers/types.js +25 -3
package/dist/services/compaction.js +2 -0
package/dist/services/cron.js +53 -42
package/dist/services/heartbeat.js +41 -7
package/dist/services/language-detect.js +12 -2
package/dist/services/ollama-manager.js +339 -0
package/dist/services/personality.js +20 -14
package/dist/services/session.js +21 -3
package/dist/services/subagent-delivery.js +111 -0
package/dist/services/subagents.js +341 -27
package/dist/services/telegram.js +28 -1
package/dist/services/updater.js +158 -0
package/dist/services/usage-tracker.js +11 -4
package/dist/services/users.js +2 -1
package/dist/tui/index.js +36 -30
package/docs/HANDBOOK.md +819 -0
package/package.json +7 -2
package/test/claude-sdk-provider.test.ts +69 -0
package/test/i18n.test.ts +108 -0
package/test/registry.test.ts +201 -0
package/test/subagent-delivery.test.ts +169 -0
package/test/subagents-commands.test.ts +64 -0
package/test/subagents-config.test.ts +108 -0
package/test/subagents-depth.test.ts +58 -0
package/test/subagents-inheritance.test.ts +67 -0
package/test/subagents-name-resolver.test.ts +122 -0
package/test/subagents-priority-reject.test.ts +60 -0
package/test/subagents-shutdown.test.ts +126 -0
package/test/subagents-toolset.test.ts +51 -0
package/vitest.config.ts +17 -0

package/dist/providers/claude-sdk-provider.js CHANGED Viewed

@@ -10,7 +10,20 @@ import { query } from "@anthropic-ai/claude-agent-sdk";
 import { readFileSync } from "fs";
 import { resolve, dirname } from "path";
 import { fileURLToPath } from "url";
+import { execFile } from "child_process";
+import { promisify } from "util";
 import { findClaudeBinary } from "../find-claude-binary.js";
+const execFileAsync = promisify(execFile);
+/**
+ * Detects the Claude CLI "Not logged in" error message. The CLI emits this
+ * as normal assistant text when no valid OAuth token is present, so we have
+ * to treat that output as an error in the SDK path too.
+ */
+export function isAuthErrorOutput(text) {
+    if (!text)
+        return false;
+    return /^\s*not logged in\b/i.test(text);
+}
 const BOT_PROJECT_ROOT = resolve(dirname(fileURLToPath(import.meta.url)), "../..");
 // Load CLAUDE.md once at startup
 let botClaudeMd = "";
@@ -26,6 +39,11 @@ const CHECKPOINT_TOOL_THRESHOLD = 15;
 const CHECKPOINT_MSG_THRESHOLD = 10;
 export class ClaudeSDKProvider {
     config;
+    // Cache the availability check: execFile on every user message would block
+    // the bot for ~0-5s each time. A 60s cache is safe — the CLI binary does
+    // not disappear mid-session.
+    availabilityCache = null;
+    static AVAILABILITY_CACHE_MS = 60_000;
     constructor(config) {
         this.config = {
             type: "claude-sdk",
@@ -46,9 +64,23 @@ export class ClaudeSDKProvider {
         let prompt = options.prompt;
         const sessionState = options._sessionState;
         if (sessionState) {
-            const needsCheckpoint = sessionState.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
+            // Checkpoint reminder injection with COOLDOWN.
+            //
+            // Old behaviour: once either threshold was crossed, the hint got
+            // prepended to EVERY subsequent turn's prompt. That forced Claude
+            // to detour through memory-file reads/writes on every single turn,
+            // which bloated turn latency in long sessions and was a major
+            // contributor to the 5-minute hard timeout firing.
+            //
+            // New behaviour: inject only every CHECKPOINT_REMINDER_EVERY turns
+            // after the threshold is reached. At messageCount 10 → injected,
+            // 11/12/13/14 → skipped, 15 → injected again, etc. 80% reduction
+            // in per-turn overhead while still giving Claude periodic reminders.
+            const CHECKPOINT_REMINDER_EVERY = 5;
+            const overThreshold = sessionState.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
                 sessionState.messageCount >= CHECKPOINT_MSG_THRESHOLD;
-            if (needsCheckpoint) {
+            const onCooldownBeat = sessionState.messageCount % CHECKPOINT_REMINDER_EVERY === 0;
+            if (overThreshold && onCooldownBeat) {
                 prompt = `[CHECKPOINT] Du hast bereits ${sessionState.toolUseCount} Tool-Aufrufe und ${sessionState.messageCount} Nachrichten in dieser Session. Schreibe jetzt einen Checkpoint in deine Memory-Datei (docs/memory/YYYY-MM-DD.md) bevor du diese Anfrage bearbeitest.\n\n${prompt}`;
             }
         }
@@ -56,15 +88,26 @@ export class ClaudeSDKProvider {
         const systemPrompt = options.systemPrompt
             ? `${options.systemPrompt}\n\n${botClaudeMd}`
             : botClaudeMd;
+        // Build a real AbortController the SDK can call .abort() on.
+        // The previous implementation cast a plain {signal} object to AbortController,
+        // which broke SDK-internal cancellation and left orphan subprocesses.
+        let internalAbortController;
+        if (options.abortSignal) {
+            internalAbortController = new AbortController();
+            if (options.abortSignal.aborted) {
+                internalAbortController.abort();
+            }
+            else {
+                options.abortSignal.addEventListener("abort", () => internalAbortController?.abort(), { once: true });
+            }
+        }
         try {
             const claudePath = findClaudeBinary();
             const q = query({
                 prompt,
                 options: {
                     cwd: options.workingDir || process.cwd(),
-                    abortController: options.abortSignal
-                        ? { signal: options.abortSignal }
-                        : undefined,
+                    abortController: internalAbortController,
                     resume: options.sessionId ?? undefined,
                     pathToClaudeCodeExecutable: claudePath,
                     permissionMode: "bypassPermissions",
@@ -76,7 +119,7 @@ export class ClaudeSDKProvider {
                         "WebSearch", "WebFetch", "Task",
                     ],
                     systemPrompt,
-                    effort: (options.effort || "high"),
+                    effort: (options.effort || "medium"),
                     maxTurns: 50,
                     betas: ["context-1m-2025-08-07"],
                 },
@@ -97,6 +140,17 @@ export class ClaudeSDKProvider {
                     if (assistantMsg.message?.content) {
                         for (const block of assistantMsg.message.content) {
                             if ("text" in block && block.text) {
+                                // Guard against "Not logged in" leaking as assistant text.
+                                // If the very first text chunk matches the CLI auth-error
+                                // pattern, surface it as an error chunk instead of rendering
+                                // it as a normal response.
+                                if (!accumulatedText && isAuthErrorOutput(block.text)) {
+                                    yield {
+                                        type: "error",
+                                        error: "Claude CLI is not logged in. Run `claude login` on this machine.",
+                                    };
+                                    return;
+                                }
                                 accumulatedText += block.text;
                                 yield {
                                     type: "text",
@@ -107,9 +161,25 @@ export class ClaudeSDKProvider {
                             }
                             if ("name" in block) {
                                 localToolUseCount++;
+                                // Serialise the tool input (parameters) so the message
+                                // handler can surface detail for specific tools — most
+                                // importantly the "Task" tool where `input.description`
+                                // describes what sub-task Claude is delegating.
+                                let toolInputStr;
+                                if ("input" in block && block.input !== undefined) {
+                                    try {
+                                        const raw = JSON.stringify(block.input);
+                                        // cap at 500 chars to keep status lines manageable
+                                        toolInputStr = raw.length > 500 ? raw.slice(0, 500) + "…" : raw;
+                                    }
+                                    catch {
+                                        // unserializable — skip
+                                    }
+                                }
                                 yield {
                                     type: "tool_use",
                                     toolName: block.name,
+                                    toolInput: toolInputStr,
                                     sessionId: capturedSessionId,
                                 };
                             }
@@ -148,19 +218,41 @@ export class ClaudeSDKProvider {
         }
     }
     async isAvailable() {
-        // Check if native Claude binary exists and responds to --version.
-        // NOTE: Don't test with `claude -p "ping"` — CLI login and SDK auth
-        // are separate. The SDK uses its own auth via bypassPermissions.
+        // Cached availability check. The previous implementation called execSync
+        // on every user message, blocking the Node event loop for up to 5s per
+        // query. We now use async execFile and cache the result for 60s.
+        const now = Date.now();
+        if (this.availabilityCache && this.availabilityCache.expiresAt > now) {
+            return this.availabilityCache.result;
+        }
+        const cache = (result) => {
+            this.availabilityCache = {
+                result,
+                expiresAt: now + ClaudeSDKProvider.AVAILABILITY_CACHE_MS,
+            };
+            return result;
+        };
         try {
             const claudePath = findClaudeBinary();
             if (!claudePath)
-                return false;
-            const { execSync } = await import("child_process");
-            execSync(`"${claudePath}" --version`, { stdio: "pipe", timeout: 5000 });
-            return true;
+                return cache(false);
+            // Step 1: binary exists?
+            // Async execFile doesn't block the event loop. 5s timeout kills
+            // runaway probes without hanging the bot.
+            await execFileAsync(claudePath, ["--version"], { timeout: 5000 });
+            // Step 2: actually authenticated? The Claude Agent SDK shares the
+            // same OAuth token as the CLI — if `claude -p` says "Not logged in",
+            // the SDK will fail too. Probe with a trivial -p call and surface
+            // the failure before the registry hands a request to a broken
+            // provider.
+            const { stdout } = await execFileAsync(claudePath, ["-p", "ping", "--output-format", "text"], { timeout: 10000 });
+            if (isAuthErrorOutput(stdout)) {
+                return cache(false);
+            }
+            return cache(true);
         }
         catch {
-            return false;
+            return cache(false);
         }
     }
     getInfo() {

package/dist/providers/ollama-provider.js ADDED Viewed

@@ -0,0 +1,32 @@
+/**
+ * Ollama Provider — OpenAI-compatible chat-completions with an on-demand
+ * daemon lifecycle.
+ *
+ * Inherits all the request/response handling (streaming, tool-calling,
+ * rate-limit extraction, vision, …) from OpenAICompatibleProvider. Only
+ * adds the `lifecycle` field so the rest of the bot (heartbeat, /model
+ * switch, /status, shutdown) can manage the local daemon generically
+ * without any hardcoded "ollama" string-matching.
+ *
+ * When the architecture needs another local runner (LM Studio, llama.cpp,
+ * vLLM, Jan.ai, …), the pattern is the same: subclass
+ * OpenAICompatibleProvider, assign a `lifecycle` with its own
+ * ensureRunning/ensureStopped/isRunning/isBotManaged implementation.
+ */
+import { OpenAICompatibleProvider } from "./openai-compatible.js";
+import { ensureRunning as managerEnsureRunning, ensureStopped as managerEnsureStopped, isDaemonRunning as managerIsDaemonRunning, isBotManaged as managerIsBotManaged, } from "../services/ollama-manager.js";
+export class OllamaProvider extends OpenAICompatibleProvider {
+    lifecycle;
+    constructor(config) {
+        super(config);
+        // Capture the model name at construction time so the lifecycle closures
+        // don't need to reach into this.config on every call.
+        const modelName = config.model;
+        this.lifecycle = {
+            ensureRunning: () => managerEnsureRunning(modelName),
+            ensureStopped: () => managerEnsureStopped(),
+            isRunning: () => managerIsDaemonRunning(),
+            isBotManaged: () => managerIsBotManaged(),
+        };
+    }
+}

package/dist/providers/openai-compatible.js CHANGED Viewed

@@ -286,10 +286,19 @@ export class OpenAICompatibleProvider {
     }
     getInfo() {
         const tools = this.supportsToolUse() ? " 🔧" : "";
+        // Local runners (Ollama, LM Studio, …) don't use API keys. Report their
+        // status based on whether the local endpoint is reachable at startup,
+        // not based on the missing apiKey field which is semantically irrelevant
+        // for loopback endpoints.
+        const isLocal = this.config.baseUrl?.includes("localhost")
+            || this.config.baseUrl?.includes("127.0.0.1");
+        const status = isLocal
+            ? "💤 on-demand (local)"
+            : (this.config.apiKey ? "✅ configured" : "❌ no API key");
         return {
             name: this.config.name + tools,
             model: this.config.model,
-            status: this.config.apiKey ? "✅ configured" : "❌ no API key",
+            status,
         };
     }
     // ── Rate Limit Extraction ───────────────────────────────────────────────

package/dist/providers/registry.js CHANGED Viewed

@@ -9,7 +9,20 @@
 import { ClaudeSDKProvider } from "./claude-sdk-provider.js";
 import { CodexCLIProvider } from "./codex-cli-provider.js";
 import { OpenAICompatibleProvider } from "./openai-compatible.js";
+import { OllamaProvider } from "./ollama-provider.js";
 import { PROVIDER_PRESETS } from "./types.js";
+import { t } from "../i18n.js";
+/**
+ * Identify an Ollama endpoint by its baseUrl rather than by a hardcoded
+ * provider key. This lets users define aliases (e.g. `my-ollama`,
+ * `ollama-local`) in FALLBACK_PROVIDERS or custom-models.json and still
+ * get the on-demand lifecycle behaviour automatically.
+ */
+function isOllamaEndpoint(baseUrl) {
+    if (!baseUrl)
+        return false;
+    return baseUrl.includes("localhost:11434") || baseUrl.includes("127.0.0.1:11434");
+}
 export class ProviderRegistry {
     providers = new Map();
     primaryKey;
@@ -80,6 +93,21 @@ export class ProviderRegistry {
     /**
      * Query with automatic fallback.
      * Tries the active provider first, then fallbacks in order.
+     *
+     * Two invariants beyond the obvious chain-walk:
+     *
+     * 1. Lifecycle-managed providers (local runners like Ollama) get booted
+     *    on-demand if they're not already running. Without this, a
+     *    mid-session Claude failure would silently skip Ollama because its
+     *    daemon isn't awake yet — the heartbeat's 5-minute cadence can't
+     *    react fast enough to save an in-flight user request.
+     *
+     * 2. If the active provider has already emitted text to the user and
+     *    then errors out mid-stream, we do NOT silently failover to the
+     *    next provider. Chaining a second model underneath a half-finished
+     *    Claude response is more confusing than surfacing a clear error
+     *    and asking the user to retry. The failover is only silent when
+     *    the failing provider hadn't committed any visible text yet.
      */
     async *queryWithFallback(options) {
         const chain = [this.activeKey, ...this.fallbackKeys.filter(k => k !== this.activeKey)];
@@ -88,35 +116,97 @@ export class ProviderRegistry {
             const provider = this.providers.get(key);
             if (!provider)
                 continue;
-            // Check availability before trying
-            const available = await provider.isAvailable().catch(() => false);
+            // Check availability. For lifecycle-managed providers (Ollama et al.)
+            // that are currently asleep, actively try to boot them before giving up.
+            let available = await provider.isAvailable().catch(() => false);
+            if (!available && provider.lifecycle) {
+                console.log(`Provider "${key}" asleep — booting on-demand…`);
+                const booted = await provider.lifecycle.ensureRunning().catch(() => false);
+                if (booted) {
+                    available = await provider.isAvailable().catch(() => false);
+                }
+            }
             if (!available) {
                 console.log(`Provider "${key}" not available, trying next...`);
                 errors.push({ key, error: "not available (check auth/config)" });
                 continue;
             }
+            // ─── Query with silent retry for transient mid-stream aborts ─────
+            // Anthropic occasionally drops streams (network hiccup, server-side
+            // flap, rate-limit blip). Rather than surfacing the error on the
+            // first failure, we retry the SAME provider once with a short delay.
+            // Only mid-stream abort-shaped errors trigger the retry — pre-stream
+            // failures and user cancels go straight to the fallback / error path.
+            const MAX_ATTEMPTS = 2;
+            const RETRY_DELAY_MS = 2_000;
+            let attempts = 0;
             let hadError = false;
             let lastError = "";
-            try {
-                for await (const chunk of provider.query(options)) {
-                    if (chunk.type === "error") {
-                        hadError = true;
-                        lastError = chunk.error || "Unknown error";
-                        break;
+            let hadVisibleText = false;
+            while (attempts < MAX_ATTEMPTS) {
+                attempts++;
+                hadError = false;
+                lastError = "";
+                hadVisibleText = false;
+                try {
+                    for await (const chunk of provider.query(options)) {
+                        if (chunk.type === "error") {
+                            hadError = true;
+                            lastError = chunk.error || "Unknown error";
+                            break;
+                        }
+                        if (chunk.type === "text" && chunk.text && chunk.text.length > 0) {
+                            hadVisibleText = true;
+                        }
+                        yield chunk;
+                        if (chunk.type === "done")
+                            return;
                     }
-                    yield chunk;
-                    if (chunk.type === "done")
-                        return;
                 }
-            }
-            catch (err) {
-                hadError = true;
-                lastError = err instanceof Error ? err.message : String(err);
+                catch (err) {
+                    hadError = true;
+                    lastError = err instanceof Error ? err.message : String(err);
+                }
+                if (!hadError) {
+                    // Loop ended naturally without a done — unusual, fall through.
+                    break;
+                }
+                // Retry eligibility:
+                //   - mid-stream (had visible text before error)
+                //   - not a user-initiated cancel (abortSignal is externally fired)
+                //   - error looks transient (contains "abort")
+                //   - still have attempts left
+                const isUserAbort = options.abortSignal?.aborted === true;
+                const isTransientLooking = lastError.toLowerCase().includes("abort");
+                const shouldRetry = hadVisibleText
+                    && attempts < MAX_ATTEMPTS
+                    && !isUserAbort
+                    && isTransientLooking;
+                if (!shouldRetry)
+                    break;
+                console.log(`Provider "${key}" mid-stream abort (attempt ${attempts}/${MAX_ATTEMPTS}) — retrying in ${RETRY_DELAY_MS}ms: ${lastError}`);
+                await new Promise(resolve => setTimeout(resolve, RETRY_DELAY_MS));
+                // If the user cancelled during the delay, bail before the next attempt.
+                if (options.abortSignal?.aborted === true)
+                    break;
             }
             if (hadError) {
-                console.log(`Provider "${key}" failed: ${lastError}. Trying next...`);
+                console.log(`Provider "${key}" failed: ${lastError}. ${hadVisibleText ? "Mid-stream — surfacing error." : "Trying next..."}`);
                 errors.push({ key, error: lastError });
-                // Find next provider to notify about fallback
+                // Mid-stream failure: the user already has partial text on screen.
+                // Yield a terminal error instead of switching to a different model
+                // that would write a second, unrelated response underneath.
+                if (hadVisibleText) {
+                    yield {
+                        type: "error",
+                        error: t("bot.error.midStream", options.locale, {
+                            name: provider.getInfo().name,
+                            detail: lastError,
+                        }),
+                    };
+                    return;
+                }
+                // Pre-stream failure: safe to silently switch to the next provider.
                 const nextIdx = chain.indexOf(key) + 1;
                 if (nextIdx < chain.length) {
                     const nextProvider = this.providers.get(chain[nextIdx]);
@@ -155,6 +245,11 @@ export class ProviderRegistry {
             case "codex-cli":
                 return new CodexCLIProvider(config);
             case "openai-compatible":
+                // Local runners that happen to speak the OpenAI-compat protocol
+                // get a subclass that layers on-demand lifecycle management.
+                if (isOllamaEndpoint(config.baseUrl)) {
+                    return new OllamaProvider(config);
+                }
                 return new OpenAICompatibleProvider(config);
             default:
                 throw new Error(`Unknown provider type: ${config.type}`);

package/dist/providers/types.js CHANGED Viewed

@@ -14,14 +14,16 @@ export const PROVIDER_PRESETS = {
         supportsTools: true,
         supportsVision: false,
         supportsStreaming: true,
+        contextWindow: 400_000,
     },
-    // Anthropic (via Agent SDK — full tool use)
+    // Anthropic (via Agent SDK — full tool use, 1M-context beta enabled)
     "claude-sdk": {
         type: "claude-sdk",
         name: "Claude (Agent SDK)",
         supportsTools: true,
         supportsVision: true,
         supportsStreaming: true,
+        contextWindow: 1_000_000,
     },
     // Anthropic API (via OpenAI-compatible endpoint — no Agent SDK needed)
     "claude-opus": {
@@ -32,6 +34,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 200_000,
     },
     "claude-sonnet": {
         type: "openai-compatible",
@@ -41,6 +44,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 200_000,
     },
     "claude-haiku": {
         type: "openai-compatible",
@@ -50,6 +54,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 200_000,
     },
     // Groq (fast inference, free tier, supports function calling)
     "groq": {
@@ -60,6 +65,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: false,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 128_000,
     },
     // OpenAI (supports function calling)
     "gpt-4o": {
@@ -70,6 +76,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 128_000,
     },
     "gpt-4o-mini": {
         type: "openai-compatible",
@@ -79,6 +86,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 128_000,
     },
     // Google Gemini (via OpenAI-compatible endpoint, supports function calling)
     "gemini-2.5-pro": {
@@ -89,6 +97,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 1_000_000,
     },
     "gemini-2.5-flash": {
         type: "openai-compatible",
@@ -98,6 +107,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 1_000_000,
     },
     "gemini-3-pro": {
         type: "openai-compatible",
@@ -107,6 +117,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 2_000_000,
     },
     "gemini-3-flash": {
         type: "openai-compatible",
@@ -116,6 +127,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 1_000_000,
     },
     // OpenAI newer models
     "gpt-4.1": {
@@ -126,6 +138,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 1_000_000,
     },
     "gpt-4.1-mini": {
         type: "openai-compatible",
@@ -135,6 +148,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 1_000_000,
     },
     "o3-mini": {
         type: "openai-compatible",
@@ -144,6 +158,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: false,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 200_000,
     },
     // Groq additional models
     "groq-llama-3.1-8b": {
@@ -154,6 +169,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: false,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 128_000,
     },
     "groq-mixtral": {
         type: "openai-compatible",
@@ -163,6 +179,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: false,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 32_768,
     },
     // NVIDIA NIM (150+ free models)
     "nvidia-llama-3.3-70b": {
@@ -173,6 +190,7 @@ export const PROVIDER_PRESETS = {
         supportsVision: false,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 128_000,
     },
     "nvidia-kimi-k2.5": {
         type: "openai-compatible",
@@ -182,8 +200,9 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 200_000,
     },
-    // Ollama (local models)
+    // Ollama (local models) — Gemma 4 E4B has an 8k context window
     "ollama": {
         type: "openai-compatible",
         name: "Gemma 4 E4B (Ollama)",
@@ -191,8 +210,10 @@ export const PROVIDER_PRESETS = {
         baseUrl: "http://localhost:11434/v1",
         supportsVision: true,
         supportsStreaming: true,
+        contextWindow: 8_192,
     },
-    // OpenRouter (any model, one API, supports function calling)
+    // OpenRouter (any model, one API, supports function calling).
+    // Context window varies by model — default 200k is a middle-ground guess.
     "openrouter": {
         type: "openai-compatible",
         name: "OpenRouter",
@@ -201,5 +222,6 @@ export const PROVIDER_PRESETS = {
         supportsVision: true,
         supportsStreaming: true,
         supportsTools: true,
+        contextWindow: 200_000,
     },
 };

package/dist/services/compaction.js CHANGED Viewed

@@ -96,6 +96,8 @@ export async function compactSession(session) {
         session.history = [summaryMessage, ...fallbackKeep];
     }
     const summaryTokens = Math.ceil(summaryMessage.content.length / 4); // rough estimate
+    // Track how many compactions this session has seen, for /status telemetry
+    session.compactionCount = (session.compactionCount || 0) + 1;
     return {
         removedEntries,
         summaryTokens,