npm - alvin-bot - Versions diffs - 4.5.0 → 4.6.0 - Mend

alvin-bot 4.5.0 → 4.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (42) hide show

package/CHANGELOG.md +150 -0
package/README.md +25 -2
package/alvin-bot-4.5.1.tgz +0 -0
package/bin/cli.js +246 -0
package/dist/handlers/commands.js +461 -63
package/dist/handlers/message.js +209 -14
package/dist/i18n.js +470 -13
package/dist/index.js +44 -5
package/dist/providers/claude-sdk-provider.js +106 -14
package/dist/providers/ollama-provider.js +32 -0
package/dist/providers/openai-compatible.js +10 -1
package/dist/providers/registry.js +112 -17
package/dist/providers/types.js +25 -3
package/dist/services/compaction.js +2 -0
package/dist/services/cron.js +53 -42
package/dist/services/heartbeat.js +41 -7
package/dist/services/language-detect.js +12 -2
package/dist/services/ollama-manager.js +339 -0
package/dist/services/personality.js +20 -14
package/dist/services/session.js +21 -3
package/dist/services/subagent-delivery.js +111 -0
package/dist/services/subagents.js +341 -27
package/dist/services/telegram.js +28 -1
package/dist/services/updater.js +158 -0
package/dist/services/usage-tracker.js +11 -4
package/dist/services/users.js +2 -1
package/dist/tui/index.js +36 -30
package/docs/HANDBOOK.md +819 -0
package/package.json +7 -2
package/test/claude-sdk-provider.test.ts +69 -0
package/test/i18n.test.ts +108 -0
package/test/registry.test.ts +201 -0
package/test/subagent-delivery.test.ts +169 -0
package/test/subagents-commands.test.ts +64 -0
package/test/subagents-config.test.ts +108 -0
package/test/subagents-depth.test.ts +58 -0
package/test/subagents-inheritance.test.ts +67 -0
package/test/subagents-name-resolver.test.ts +122 -0
package/test/subagents-priority-reject.test.ts +60 -0
package/test/subagents-shutdown.test.ts +126 -0
package/test/subagents-toolset.test.ts +51 -0
package/vitest.config.ts +17 -0

package/dist/handlers/message.js CHANGED Viewed

@@ -13,6 +13,87 @@ import { shouldCompact, compactSession } from "../services/compaction.js";
 import { emit } from "../services/hooks.js";
 import { trackUsage } from "../services/usage-tracker.js";
 import { emitUserMessage as broadcastUserMessage, emitResponseStart as broadcastResponseStart, emitResponseDelta as broadcastResponseDelta, emitResponseDone as broadcastResponseDone, } from "../services/broadcast.js";
+import { t } from "../i18n.js";
+/**
+ * Stuck-only timeout — NO absolute cap.
+ *
+ * Alvin is designed to work as long as it needs to, including overnight
+ * on multi-hour tasks. The ONLY condition under which we abort a running
+ * query is when Claude produces no chunks at all for STUCK_TIMEOUT_MINUTES
+ * — that's a genuine hang, not legitimate work. Every text chunk and
+ * tool_use chunk resets this timer, so an actively-progressing task will
+ * never be cut off regardless of total duration.
+ *
+ * Previous design had an additional 30-minute absolute cap that violated
+ * this "work as long as needed" character. Removed entirely — only the
+ * stuck detector remains.
+ *
+ * Configurable via ALVIN_STUCK_TIMEOUT_MINUTES env var. Default 10 minutes,
+ * which is generous for normal work (Claude typically streams chunks every
+ * few seconds) but still catches real deadlocks quickly.
+ */
+const STUCK_TIMEOUT_MINUTES = Number(process.env.ALVIN_STUCK_TIMEOUT_MINUTES) || 10;
+const STUCK_TIMEOUT_MS = STUCK_TIMEOUT_MINUTES * 60 * 1000;
+/** Checkpoint reminder thresholds — kept in sync with
+ *  src/providers/claude-sdk-provider.ts (where the actual hint injection
+ *  happens). We mirror the check here so the session telemetry knows
+ *  when the SDK provider would have injected a reminder. */
+const CHECKPOINT_TOOL_THRESHOLD = 15;
+const CHECKPOINT_MSG_THRESHOLD = 10;
+/** Maximum characters in the bridge-message preamble that gets prepended
+ * to the first post-recovery SDK query. Oldest gap-turns get truncated. */
+const BRIDGE_MAX_CHARS = 2500;
+/** Maximum characters per individual message in the bridge preamble. */
+const BRIDGE_MSG_MAX_CHARS = 500;
+/**
+ * Build a "catch-up" preamble summarising turns that happened while the
+ * SDK was not the active provider (i.e., during a failover to Ollama or
+ * a manual /model switch). This gets prepended to the first post-recovery
+ * prompt so the SDK sees what its alter-ego did.
+ */
+function buildBridgeMessage(fallbackTurns) {
+    if (fallbackTurns.length === 0)
+        return "";
+    const renderTurn = (m) => {
+        const label = m.role === "user" ? "User" : "Assistant (Fallback)";
+        const content = m.content.length > BRIDGE_MSG_MAX_CHARS
+            ? m.content.slice(0, BRIDGE_MSG_MAX_CHARS) + "…"
+            : m.content;
+        return `${label}: ${content}`;
+    };
+    // Start with all turns rendered, then trim from the oldest if we exceed budget.
+    let lines = fallbackTurns.map(renderTurn);
+    let body = lines.join("\n\n");
+    let truncatedOldest = 0;
+    while (body.length > BRIDGE_MAX_CHARS && lines.length > 2) {
+        lines.shift();
+        truncatedOldest++;
+        body = lines.join("\n\n");
+    }
+    const omittedNote = truncatedOldest > 0
+        ? `[…${truncatedOldest} older turn(s) omitted…]\n\n`
+        : "";
+    const count = fallbackTurns.length;
+    return (`[Context: While you (Claude) were briefly not the active provider, ` +
+        `the following ${count} message(s) were exchanged with a fallback model. ` +
+        `Catching you up:\n\n` +
+        omittedNote +
+        body +
+        `\n\n--- New message from user: ---]\n\n`);
+}
+/** Tool name → emoji. Used to render a status line while Alvin is running
+ * tools, so users see real progress instead of an endless typing indicator. */
+const TOOL_ICONS = {
+    Read: "📖",
+    Write: "📝",
+    Edit: "✏️",
+    Bash: "⚡",
+    Glob: "🔍",
+    Grep: "🔎",
+    WebSearch: "🌐",
+    WebFetch: "📡",
+    Task: "🤖",
+};
 /** React to a message with an emoji. Silently fails if reactions aren't supported. */
 async function react(ctx, emoji) {
     try {
@@ -77,9 +158,25 @@ export async function handleMessage(ctx) {
     session.abortController = new AbortController();
     const streamer = new TelegramStreamer(ctx.chat.id, ctx.api, ctx.message?.message_id);
     let finalText = "";
+    let timedOut = false;
     const typingInterval = setInterval(() => {
         ctx.api.sendChatAction(ctx.chat.id, "typing").catch(() => { });
     }, 4000);
+    // Stuck-only timer: fires if NO chunks arrive for STUCK_TIMEOUT_MS.
+    // Reset on every chunk so any actively-progressing task runs indefinitely.
+    // No absolute cap — Alvin is allowed to work as long as needed.
+    let stuckTimer = null;
+    const resetStuckTimer = () => {
+        if (stuckTimer)
+            clearTimeout(stuckTimer);
+        stuckTimer = setTimeout(() => {
+            if (session.abortController && !session.abortController.signal.aborted) {
+                timedOut = true;
+                session.abortController.abort();
+            }
+        }, STUCK_TIMEOUT_MS);
+    };
+    resetStuckTimer();
     try {
         // React with 🤔 to show we're thinking
         await react(ctx, "🤔");
@@ -126,32 +223,69 @@ export async function handleMessage(ctx) {
         const systemPrompt = (isSDK
             ? buildSystemPrompt(isSDK, session.language, chatIdStr)
             : await buildSmartSystemPrompt(isSDK, session.language, text, chatIdStr)) + skillContext;
+        // Track the user turn in history regardless of provider type. This keeps
+        // the fallback path (Ollama etc.) aware of what was said on SDK turns.
+        addToHistory(userId, { role: "user", content: text });
+        // Checkpoint telemetry: mirror the SDK provider's threshold check here
+        // so session.checkpointHintsInjected reflects reality. The provider
+        // evaluates the exact same condition at query time — if it's true,
+        // it prepends a [CHECKPOINT] reminder to the prompt.
+        if (isSDK) {
+            const wouldInjectCheckpoint = session.toolUseCount >= CHECKPOINT_TOOL_THRESHOLD ||
+                session.messageCount >= CHECKPOINT_MSG_THRESHOLD;
+            if (wouldInjectCheckpoint) {
+                session.checkpointHintsInjected++;
+            }
+        }
+        // B2 Bridge-Message: if SDK is active but there are non-SDK turns since
+        // the last SDK turn, prepend a catch-up preamble so the SDK sees what
+        // happened during the failover. We defensively clamp the index against
+        // history bounds in case compaction shrank the array under our feet.
+        let bridgedPrompt = text;
+        if (isSDK) {
+            const anchor = Math.min(session.lastSdkHistoryIndex, session.history.length - 1);
+            const gapStart = Math.max(0, anchor + 1);
+            // gapEnd excludes the user message we just added (history.length - 1).
+            const gapEnd = session.history.length - 1;
+            if (gapEnd > gapStart) {
+                const gapTurns = session.history.slice(gapStart, gapEnd);
+                const bridge = buildBridgeMessage(gapTurns);
+                if (bridge) {
+                    bridgedPrompt = bridge + text;
+                    console.log(`[bridge] SDK recovery: injecting ${gapTurns.length} fallback turn(s) into prompt`);
+                }
+            }
+        }
         const queryOpts = {
-            prompt: text,
+            prompt: bridgedPrompt,
             systemPrompt,
             workingDir: session.workingDir,
             effort: session.effort,
             abortSignal: session.abortController.signal,
+            // User's UI locale — registry uses it to localize failure messages.
+            locale: session.language,
             // SDK-specific
             sessionId: isSDK ? session.sessionId : null,
-            // Non-SDK: include conversation history
-            history: !isSDK ? session.history : undefined,
+            // Unified history: SDK ignores it (uses filesystem-resume instead),
+            // non-SDK providers use it for context. Keeping it populated for both
+            // means a failover from SDK → Ollama keeps the conversation context.
+            history: session.history,
             // SDK checkpoint tracking
             _sessionState: isSDK ? {
                 messageCount: session.messageCount,
                 toolUseCount: session.toolUseCount,
             } : undefined,
         };
-        // Add user message to history (for non-SDK providers)
-        if (!isSDK) {
-            addToHistory(userId, { role: "user", content: text });
-        }
         // Stream response from provider (with fallback)
         let lastBroadcastLen = 0;
         for await (const chunk of registry.queryWithFallback(queryOpts)) {
+            // Any chunk is progress — reset the stuck timer.
+            resetStuckTimer();
             switch (chunk.type) {
                 case "text":
                     finalText = chunk.text || "";
+                    // Clear any tool-use status line — real content is flowing now.
+                    streamer.setStatus(null);
                     await streamer.update(finalText);
                     // Emit the new delta for observers — accumulated text minus what
                     // we already broadcast.
@@ -168,9 +302,42 @@ export async function handleMessage(ctx) {
                     }
                     break;
                 case "tool_use":
-                    // Could show tool activity indicator
+                    // Surface the active tool so users see real progress instead of
+                    // an endless typing indicator. The streamer renders this as a
+                    // dim italic footer under any accumulated text.
                     if (chunk.toolName) {
                         session.toolUseCount++;
+                        const icon = TOOL_ICONS[chunk.toolName] || "🔧";
+                        // Special treatment for Claude's SDK-internal Task tool:
+                        // track how many sub-tasks Claude delegated and surface the
+                        // task description in the status line so the user sees WHAT
+                        // is being delegated, not just "Task…".
+                        if (chunk.toolName === "Task") {
+                            session.sdkSubTaskCount++;
+                            let label = "Task";
+                            if (chunk.toolInput) {
+                                try {
+                                    const parsed = JSON.parse(chunk.toolInput);
+                                    if (parsed.description) {
+                                        // Trim long descriptions so the status line stays readable
+                                        const desc = parsed.description.length > 80
+                                            ? parsed.description.slice(0, 80) + "…"
+                                            : parsed.description;
+                                        label = `Task: ${desc}`;
+                                    }
+                                    else if (parsed.subagent_type) {
+                                        label = `Task (${parsed.subagent_type})`;
+                                    }
+                                }
+                                catch {
+                                    // not JSON — keep generic label
+                                }
+                            }
+                            streamer.setStatus(`${icon} ${label}…`);
+                        }
+                        else {
+                            streamer.setStatus(`${icon} ${chunk.toolName}…`);
+                        }
                     }
                     break;
                 case "done":
@@ -178,6 +345,13 @@ export async function handleMessage(ctx) {
                         session.sessionId = chunk.sessionId;
                     if (chunk.costUsd)
                         session.totalCost += chunk.costUsd;
+                    // Track the input tokens this turn used — this approximates the
+                    // current context window usage since the model receives the full
+                    // conversation context on every turn. Used for the Context:X/Y
+                    // progress meter in /status.
+                    if (typeof chunk.inputTokens === "number" && chunk.inputTokens > 0) {
+                        session.lastTurnInputTokens = chunk.inputTokens;
+                    }
                     trackProviderUsage(userId, registry.getActiveKey(), chunk.costUsd || 0, chunk.inputTokens, chunk.outputTokens);
                     trackUsage(registry.getActiveKey(), chunk.inputTokens || 0, chunk.outputTokens || 0, chunk.costUsd || 0);
                     session.lastActivity = Date.now();
@@ -186,7 +360,16 @@ export async function handleMessage(ctx) {
                     await ctx.reply(`⚡ _${chunk.failedProvider} unavailable — switching to ${chunk.providerName}_`, { parse_mode: "Markdown" });
                     break;
                 case "error":
-                    await ctx.reply(`Error: ${chunk.error}`);
+                    // If our stuck-timer fired, the abort travels up as a registry
+                    // mid-stream error chunk. Prefer the explicit stuck message over
+                    // the generic one so the user understands this was a real hang,
+                    // not a random error.
+                    if (timedOut) {
+                        await ctx.reply(t("bot.error.timeoutStuck", session.language, { min: STUCK_TIMEOUT_MINUTES }));
+                    }
+                    else {
+                        await ctx.reply(`${t("bot.error.prefix", session.language)} ${chunk.error}`);
+                    }
                     break;
             }
         }
@@ -203,9 +386,15 @@ export async function handleMessage(ctx) {
         });
         // Clear thinking reaction (replace with nothing — message was answered)
         await react(ctx, "👍");
-        // Add assistant response to history (for non-SDK providers)
-        if (!isSDK && finalText) {
+        // Track the assistant turn in history regardless of provider type
+        // (unified history for seamless failover between SDK and Ollama).
+        if (finalText) {
             addToHistory(userId, { role: "assistant", content: finalText });
+            // Advance the B2 bridge anchor to the assistant turn we just added,
+            // so the next SDK turn only bridges turns that happened AFTER this one.
+            if (isSDK) {
+                session.lastSdkHistoryIndex = session.history.length - 1;
+            }
         }
         // Voice reply if enabled
         if (session.voiceReply && finalText.trim()) {
@@ -222,15 +411,21 @@ export async function handleMessage(ctx) {
     }
     catch (err) {
         const errorMsg = err instanceof Error ? err.message : String(err);
+        const lang = session.language;
         await react(ctx, "👎");
-        if (errorMsg.includes("abort")) {
-            await ctx.reply("Anfrage abgebrochen.");
+        if (timedOut) {
+            await ctx.reply(t("bot.error.timeoutStuck", lang, { min: STUCK_TIMEOUT_MINUTES }));
+        }
+        else if (errorMsg.includes("abort")) {
+            await ctx.reply(t("bot.error.requestCancelled", lang));
         }
         else {
-            await ctx.reply(`Error: ${errorMsg}`);
+            await ctx.reply(`${t("bot.error.prefix", lang)} ${errorMsg}`);
         }
     }
     finally {
+        if (stuckTimer)
+            clearTimeout(stuckTimer);
         clearInterval(typingInterval);
         session.isProcessing = false;
         session.abortController = null;