npm - alvin-bot - Versions diffs - 4.9.4 → 4.11.0 - Mend

alvin-bot 4.9.4 → 4.11.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (27) hide show

package/CHANGELOG.md +154 -0
package/dist/handlers/async-agent-chunk-handler.js +33 -0
package/dist/handlers/commands.js +6 -1
package/dist/handlers/message.js +44 -11
package/dist/handlers/platform-message.js +4 -4
package/dist/index.js +20 -1
package/dist/paths.js +18 -1
package/dist/providers/claude-sdk-provider.js +43 -0
package/dist/services/async-agent-parser.js +152 -0
package/dist/services/async-agent-watcher.js +206 -0
package/dist/services/compaction.js +13 -0
package/dist/services/memory-extractor.js +178 -0
package/dist/services/memory-layers.js +147 -0
package/dist/services/memory.js +15 -8
package/dist/services/personality.js +100 -18
package/dist/services/session-persistence.js +159 -0
package/dist/services/session.js +30 -0
package/package.json +2 -2
package/test/async-agent-chunk-flow.test.ts +131 -0
package/test/async-agent-parser.test.ts +322 -0
package/test/async-agent-watcher.test.ts +229 -0
package/test/memory-extractor.test.ts +151 -0
package/test/memory-layers.test.ts +169 -0
package/test/memory-sdk-injection.test.ts +146 -0
package/test/memory-stress-restart.test.ts +336 -0
package/test/session-persistence.test.ts +192 -0
package/test/system-prompt-background-hint.test.ts +48 -0

package/dist/services/async-agent-parser.js ADDED Viewed

@@ -0,0 +1,152 @@
+/**
+ * Pure helpers for the async-agent watcher (Fix #17 Stage 2).
+ *
+ * Two responsibilities, both pure (the file read in parseOutputFileStatus
+ * is pure-by-input — same path returns the same shape at that moment in
+ * time, no mutation, no side effects):
+ *
+ *  1. Parse the SDK's plain-text "Async agent launched successfully" tool
+ *     result into a structured AsyncLaunchedInfo.
+ *  2. Read the tail of an outputFile JSONL stream and decide whether the
+ *     sub-agent is still running, completed, or failed.
+ *
+ * Format details captured live from @anthropic-ai/claude-agent-sdk@0.2.97
+ * on 2026-04-13. See docs/superpowers/specs/sdk-async-agent-outputfile-format.md
+ * for the full investigation notes — the SDK's .d.ts shape DOES NOT match
+ * what the runtime actually emits, which is why the contract is pinned by
+ * tests against real fixtures.
+ */
+import { promises as fs } from "fs";
+// ── Tool-result text parser ──────────────────────────────────────────
+/**
+ * Parse the plain-text SDK tool-result content for an `Agent` call with
+ * `run_in_background: true`. The format is documented in the spec doc
+ * — it's NOT JSON, and the field is `output_file` (snake_case).
+ *
+ * Accepts:
+ *   - the raw text string
+ *   - an Anthropic SDK content array `[{type: "text", text: "..."}]`
+ *   - null/undefined/non-string → returns null
+ */
+export function parseAsyncLaunchedToolResult(raw) {
+    // Normalize to a string
+    let text;
+    if (raw == null)
+        return null;
+    if (typeof raw === "string") {
+        text = raw;
+    }
+    else if (Array.isArray(raw)) {
+        // SDK content blocks shape
+        text = raw
+            .map((b) => (b && typeof b === "object" && "text" in b ? String(b.text) : ""))
+            .join("");
+    }
+    else {
+        return null;
+    }
+    if (!text || text.length === 0)
+        return null;
+    // Quick gate: avoid expensive matching on non-async tool results
+    if (!text.includes("Async agent launched successfully"))
+        return null;
+    // agentId line: "agentId: <id> (...)" — capture everything up to first space/paren
+    const agentMatch = text.match(/agentId:\s*(\S+)/);
+    if (!agentMatch)
+        return null;
+    const agentId = agentMatch[1].trim();
+    if (!agentId)
+        return null;
+    // output_file line: "output_file: <path>" — path may contain spaces, capture
+    // until end of line (the path is always on its own line in real output).
+    const outFileMatch = text.match(/output_file:\s*(.+?)\s*(?:\n|$)/);
+    if (!outFileMatch)
+        return null;
+    const outputFile = outFileMatch[1].trim();
+    if (!outputFile)
+        return null;
+    return { agentId, outputFile };
+}
+const DEFAULT_TAIL_BYTES = 64 * 1024;
+/**
+ * Read the tail of an SDK background-agent outputFile and decide what
+ * state the sub-agent is in. See spec doc for the JSONL format. We only
+ * read the last `maxTailBytes` of the file because long-running agents
+ * (SEO audits etc.) can produce hundreds of KB of intermediate JSONL.
+ */
+export async function parseOutputFileStatus(path, opts = {}) {
+    const maxTailBytes = opts.maxTailBytes ?? DEFAULT_TAIL_BYTES;
+    let stat;
+    try {
+        stat = await fs.stat(path);
+    }
+    catch {
+        return { state: "missing" };
+    }
+    if (stat.size === 0) {
+        // Empty file is functionally the same as missing — we keep polling.
+        return { state: "missing" };
+    }
+    // Tail-read the last maxTailBytes
+    let buf;
+    let fh;
+    try {
+        fh = await fs.open(path, "r");
+        const readSize = Math.min(stat.size, maxTailBytes);
+        buf = Buffer.alloc(readSize);
+        await fh.read(buf, 0, readSize, stat.size - readSize);
+    }
+    catch {
+        return { state: "missing" };
+    }
+    finally {
+        try {
+            await fh?.close();
+        }
+        catch { /* ignore */ }
+    }
+    const text = buf.toString("utf-8");
+    // Split into lines. If we tail-read into the middle of a line (size >
+    // maxTailBytes), drop the first line because it's almost certainly
+    // truncated. The trailing line is dropped if there's no newline — it's
+    // the line being written right now.
+    const lines = text.split("\n");
+    const tailIsMidLine = stat.size > maxTailBytes;
+    const headIncomplete = tailIsMidLine ? 1 : 0;
+    const trailIncomplete = text.endsWith("\n") ? 0 : 1;
+    const usable = lines
+        .slice(headIncomplete, lines.length - (trailIncomplete > 0 ? trailIncomplete : 0))
+        .filter((l) => l.length > 0);
+    // Walk backwards to find the most-recent assistant message with end_turn
+    for (let i = usable.length - 1; i >= 0; i--) {
+        let parsed;
+        try {
+            parsed = JSON.parse(usable[i]);
+        }
+        catch {
+            // Garbage line — skip
+            continue;
+        }
+        if (parsed.type === "assistant" &&
+            parsed.message?.stop_reason === "end_turn" &&
+            Array.isArray(parsed.message.content)) {
+            const finalText = parsed.message.content
+                .filter((c) => c?.type === "text" && typeof c.text === "string")
+                .map((c) => c.text)
+                .join("\n\n");
+            const usage = parsed.message.usage;
+            return {
+                state: "completed",
+                output: finalText,
+                tokensUsed: usage
+                    ? {
+                        input: usage.input_tokens ?? 0,
+                        output: usage.output_tokens ?? 0,
+                    }
+                    : undefined,
+            };
+        }
+    }
+    // No completion marker found — still running.
+    return { state: "running", size: stat.size };
+}

package/dist/services/async-agent-watcher.js ADDED Viewed

@@ -0,0 +1,206 @@
+/**
+ * Async Sub-Agent Watcher (Fix #17 Stage 2)
+ *
+ * Tracks pending background sub-agents that Claude launched with
+ * `run_in_background: true`. Polls each agent's outputFile every
+ * POLL_INTERVAL_MS, detects completion (success/failure/timeout),
+ * and delivers the final result as a separate Telegram message via
+ * the existing subagent-delivery.ts pipeline.
+ *
+ * Persistence: pending agents survive bot restarts via
+ * ~/.alvin-bot/state/async-agents.json. On boot, startWatcher() loads
+ * the file and resumes polling — same catchup pattern as the v4.9.0
+ * cron scheduler.
+ *
+ * Why this exists: Claude's Agent tool defaults to synchronous, which
+ * blocks the main Telegram session for 10+ minutes during long audits.
+ * Stage 1 of the fix tells Claude to use run_in_background; Stage 2
+ * (this file) catches the resulting outputFile and delivers the result
+ * when ready, so the user can keep chatting while the agent works.
+ *
+ * See docs/superpowers/plans/2026-04-13-async-subagents.md for the
+ * full plan and docs/superpowers/specs/sdk-async-agent-outputfile-format.md
+ * for the JSONL format details.
+ */
+import fs from "fs";
+import { dirname } from "path";
+import { parseOutputFileStatus } from "./async-agent-parser.js";
+import { ASYNC_AGENTS_STATE_FILE } from "../paths.js";
+/** How often the polling loop runs against each pending agent. */
+const POLL_INTERVAL_MS = 15_000;
+/** Hard ceiling per agent — 12h. After this, give up and deliver
+ *  a timeout banner. SEO audits historically take ~13 min, so 12h
+ *  is absurdly generous and protects against state-file growth. */
+const MAX_AGENT_AGE_MS = 12 * 60 * 60 * 1000;
+// ── Module state ──────────────────────────────────────────────────
+const pending = new Map();
+let pollTimer = null;
+let started = false;
+// ── Persistence ───────────────────────────────────────────────────
+function loadFromDisk() {
+    try {
+        const raw = fs.readFileSync(ASYNC_AGENTS_STATE_FILE, "utf-8");
+        const arr = JSON.parse(raw);
+        if (!Array.isArray(arr))
+            return;
+        for (const entry of arr) {
+            if (typeof entry?.agentId === "string" && typeof entry?.outputFile === "string") {
+                pending.set(entry.agentId, entry);
+            }
+        }
+    }
+    catch {
+        // No state file yet — fresh start. Not an error.
+    }
+}
+function saveToDisk() {
+    try {
+        fs.mkdirSync(dirname(ASYNC_AGENTS_STATE_FILE), { recursive: true });
+        fs.writeFileSync(ASYNC_AGENTS_STATE_FILE, JSON.stringify([...pending.values()], null, 2), "utf-8");
+    }
+    catch (err) {
+        console.error("[async-watcher] failed to persist state:", err);
+    }
+}
+// ── Public API ────────────────────────────────────────────────────
+/**
+ * Register a new async agent that Claude just launched. Persists
+ * immediately so a crash right after registration still delivers
+ * the result on the next boot.
+ */
+export function registerPendingAgent(input) {
+    const now = Date.now();
+    const entry = {
+        agentId: input.agentId,
+        outputFile: input.outputFile,
+        description: input.description,
+        prompt: input.prompt,
+        chatId: input.chatId,
+        userId: input.userId,
+        startedAt: now,
+        lastCheckedAt: 0,
+        giveUpAt: input.giveUpAt ?? now + MAX_AGENT_AGE_MS,
+        toolUseId: input.toolUseId,
+    };
+    pending.set(input.agentId, entry);
+    saveToDisk();
+}
+/** Returns a snapshot of in-memory pending agents (for /subagents + diagnostics). */
+export function listPendingAgents() {
+    return [...pending.values()];
+}
+/** Start the polling loop. Idempotent. Loads any persisted state from disk. */
+export function startWatcher() {
+    if (started)
+        return;
+    started = true;
+    loadFromDisk();
+    pollTimer = setInterval(() => {
+        pollOnce().catch((err) => console.error("[async-watcher] poll cycle failed:", err));
+    }, POLL_INTERVAL_MS);
+    console.log(`⏳ Async-agent watcher started (${pending.size} pending, ${POLL_INTERVAL_MS / 1000}s interval)`);
+}
+/** Stop the polling loop. Idempotent. */
+export function stopWatcher() {
+    if (pollTimer)
+        clearInterval(pollTimer);
+    pollTimer = null;
+    started = false;
+}
+/**
+ * Run one poll cycle: check every pending agent, deliver the completed
+ * ones, drop them from the in-memory + on-disk state. Exported for
+ * tests; production uses the setInterval from startWatcher().
+ */
+export async function pollOnce() {
+    const now = Date.now();
+    const toRemove = [];
+    for (const entry of pending.values()) {
+        entry.lastCheckedAt = now;
+        // Timeout check first — if the agent is past its giveUpAt, give up
+        // regardless of whether the file shows progress.
+        if (now >= entry.giveUpAt) {
+            await deliverAsFailure(entry, "timeout", "Agent ran longer than 12h — giving up");
+            toRemove.push(entry.agentId);
+            continue;
+        }
+        const status = await parseOutputFileStatus(entry.outputFile);
+        if (status.state === "completed") {
+            await deliverAsCompleted(entry, status.output, status.tokensUsed);
+            toRemove.push(entry.agentId);
+        }
+        else if (status.state === "failed") {
+            await deliverAsFailure(entry, "error", status.error);
+            toRemove.push(entry.agentId);
+        }
+        // running / missing → keep polling next cycle
+    }
+    if (toRemove.length > 0) {
+        for (const id of toRemove)
+            pending.delete(id);
+        saveToDisk();
+    }
+}
+// ── Delivery helpers ──────────────────────────────────────────────
+async function deliverAsCompleted(entry, output, tokensUsed) {
+    const { deliverSubAgentResult } = await import("./subagent-delivery.js");
+    const info = {
+        id: entry.agentId,
+        name: entry.description,
+        status: "completed",
+        startedAt: entry.startedAt,
+        source: "cron", // Reuse cron banner format — fits async background agents.
+        depth: 0,
+        parentChatId: entry.chatId,
+    };
+    const result = {
+        id: entry.agentId,
+        name: entry.description,
+        status: "completed",
+        output,
+        tokensUsed: tokensUsed ?? { input: 0, output: 0 },
+        duration: Date.now() - entry.startedAt,
+    };
+    try {
+        await deliverSubAgentResult(info, result);
+    }
+    catch (err) {
+        console.error(`[async-watcher] delivery failed for ${entry.agentId}:`, err);
+    }
+}
+async function deliverAsFailure(entry, status, error) {
+    const { deliverSubAgentResult } = await import("./subagent-delivery.js");
+    const info = {
+        id: entry.agentId,
+        name: entry.description,
+        status,
+        startedAt: entry.startedAt,
+        source: "cron",
+        depth: 0,
+        parentChatId: entry.chatId,
+    };
+    const result = {
+        id: entry.agentId,
+        name: entry.description,
+        status,
+        output: "",
+        tokensUsed: { input: 0, output: 0 },
+        duration: Date.now() - entry.startedAt,
+        error,
+    };
+    try {
+        await deliverSubAgentResult(info, result);
+    }
+    catch (err) {
+        console.error(`[async-watcher] failure delivery failed for ${entry.agentId}:`, err);
+    }
+}
+// ── Test helpers ──────────────────────────────────────────────────
+/** Test-only: drop in-memory state. Doesn't touch disk. */
+export function __resetForTest() {
+    pending.clear();
+    if (pollTimer)
+        clearInterval(pollTimer);
+    pollTimer = null;
+    started = false;
+}

package/dist/services/compaction.js CHANGED Viewed

@@ -65,6 +65,19 @@ export async function compactSession(session) {
     catch (err) {
         console.error("Compaction: failed to flush to memory:", err);
     }
+    // v4.11.0 P1 #5 — Auto-extract structured facts from the archived chunk
+    // and persist them to MEMORY.md. Experimental feature, opt-out via
+    // MEMORY_EXTRACTION_DISABLED=1. Safe wrapper — never throws.
+    try {
+        const { extractAndStoreFacts } = await import("./memory-extractor.js");
+        const result = await extractAndStoreFacts(summaryInput);
+        if (result.factsStored > 0) {
+            console.log(`🧠 memory-extractor: stored ${result.factsStored} new fact(s) in MEMORY.md`);
+        }
+    }
+    catch (err) {
+        console.warn("memory-extractor failed (non-fatal):", err instanceof Error ? err.message : err);
+    }
     // Try AI-powered summary
     let summaryText = null;
     try {

package/dist/services/memory-extractor.js ADDED Viewed

@@ -0,0 +1,178 @@
+/**
+ * Memory Extractor (v4.11.0, experimental)
+ *
+ * When the compaction service archives old conversation chunks, it normally
+ * dumps prose into the daily log. This extractor adds a structured pass that
+ * pulls user_facts, preferences, and decisions out of the chunk and appends
+ * them to MEMORY.md (de-duplicated by exact-string match).
+ *
+ * Pattern inspired by Mem0's auto-extraction. Designed to be safe:
+ *   - Opt-out via MEMORY_EXTRACTION_DISABLED=1
+ *   - Uses the active provider with effort=low
+ *   - Failures are swallowed; compaction continues regardless
+ *   - Dedup is exact-string only (no embedding-based semantic dedup yet)
+ */
+import fs from "fs";
+import { dirname } from "path";
+import { MEMORY_FILE } from "../paths.js";
+const EMPTY_FACTS = {
+    user_facts: [],
+    preferences: [],
+    decisions: [],
+};
+const EXTRACTION_PROMPT = `Extract structured facts from this conversation chunk. Return ONLY a JSON object with these keys:
+{
+  "user_facts": ["concrete facts about the user that should persist forever"],
+  "preferences": ["communication style or workflow preferences the user expressed"],
+  "decisions": ["explicit decisions made (e.g., 'use X instead of Y')"]
+}
+Rules:
+- Each entry must be ONE short, declarative sentence (max 100 chars).
+- Skip transient conversation details (questions, todos, ephemeral state).
+- Skip facts that are obvious from context (e.g., "user asked a question").
+- Empty arrays are fine — don't invent facts.
+- Output ONLY the JSON, no commentary.
+Conversation chunk:
+`;
+/**
+ * Parse the JSON output from the AI extractor. Tolerates markdown code-fence
+ * wrapping and surrounding prose. Returns empty arrays on any parse failure.
+ */
+export function parseExtractedFacts(text) {
+    if (!text || typeof text !== "string")
+        return { ...EMPTY_FACTS };
+    // Strip markdown code fences if present
+    let cleaned = text.trim();
+    const fenceMatch = cleaned.match(/^```(?:json)?\s*\n?([\s\S]*?)\n?```\s*$/);
+    if (fenceMatch)
+        cleaned = fenceMatch[1].trim();
+    // Try to find the first { ... } block if there's surrounding prose
+    const braceMatch = cleaned.match(/\{[\s\S]*\}/);
+    if (braceMatch)
+        cleaned = braceMatch[0];
+    try {
+        const parsed = JSON.parse(cleaned);
+        return {
+            user_facts: Array.isArray(parsed.user_facts)
+                ? parsed.user_facts.filter((s) => typeof s === "string")
+                : [],
+            preferences: Array.isArray(parsed.preferences)
+                ? parsed.preferences.filter((s) => typeof s === "string")
+                : [],
+            decisions: Array.isArray(parsed.decisions)
+                ? parsed.decisions.filter((s) => typeof s === "string")
+                : [],
+        };
+    }
+    catch {
+        return { ...EMPTY_FACTS };
+    }
+}
+/**
+ * Append extracted facts to MEMORY.md under structured headers, deduplicated
+ * by exact-string match against existing content.
+ */
+export async function appendFactsToMemoryFile(facts) {
+    const total = facts.user_facts.length + facts.preferences.length + facts.decisions.length;
+    if (total === 0)
+        return 0;
+    // Read existing content for dedup
+    let existing = "";
+    try {
+        existing = fs.readFileSync(MEMORY_FILE, "utf-8");
+    }
+    catch {
+        // File doesn't exist yet — that's fine, mkdir parent
+        fs.mkdirSync(dirname(MEMORY_FILE), { recursive: true });
+    }
+    const isDuplicate = (line) => existing.includes(line);
+    const newLines = [];
+    const todayIso = new Date().toISOString().slice(0, 10);
+    const sectionHeader = `\n\n## Auto-extracted (${todayIso})\n`;
+    let stored = 0;
+    if (facts.user_facts.length > 0) {
+        const newOnes = facts.user_facts.filter(f => !isDuplicate(f));
+        if (newOnes.length > 0) {
+            newLines.push("\n### User Facts");
+            for (const f of newOnes) {
+                newLines.push(`- ${f}`);
+                stored++;
+            }
+        }
+    }
+    if (facts.preferences.length > 0) {
+        const newOnes = facts.preferences.filter(p => !isDuplicate(p));
+        if (newOnes.length > 0) {
+            newLines.push("\n### Preferences");
+            for (const p of newOnes) {
+                newLines.push(`- ${p}`);
+                stored++;
+            }
+        }
+    }
+    if (facts.decisions.length > 0) {
+        const newOnes = facts.decisions.filter(d => !isDuplicate(d));
+        if (newOnes.length > 0) {
+            newLines.push("\n### Decisions");
+            for (const d of newOnes) {
+                newLines.push(`- ${d}`);
+                stored++;
+            }
+        }
+    }
+    if (stored > 0) {
+        const block = sectionHeader + newLines.join("\n") + "\n";
+        fs.appendFileSync(MEMORY_FILE, block, "utf-8");
+    }
+    return stored;
+}
+/**
+ * Extract facts from a conversation chunk and store them in MEMORY.md.
+ * Safe wrapper — never throws, always returns an ExtractionResult.
+ */
+export async function extractAndStoreFacts(conversationText) {
+    if (process.env.MEMORY_EXTRACTION_DISABLED === "1") {
+        return { disabled: true, factsStored: 0 };
+    }
+    if (!conversationText || conversationText.trim().length < 50) {
+        return { disabled: false, factsStored: 0 };
+    }
+    let extractedText = "";
+    try {
+        // Lazy-import the registry so test environments without an engine init
+        // don't crash on module load.
+        const { getRegistry } = await import("../engine.js");
+        const registry = getRegistry();
+        const opts = {
+            prompt: EXTRACTION_PROMPT + conversationText.slice(0, 8000),
+            systemPrompt: "You are a fact extractor. Output only valid JSON, no commentary.",
+            effort: "low",
+        };
+        for await (const chunk of registry.queryWithFallback(opts)) {
+            if (chunk.type === "text" && chunk.text) {
+                extractedText = chunk.text;
+            }
+            if (chunk.type === "error") {
+                // Provider failed — silent fallback
+                return { disabled: false, factsStored: 0 };
+            }
+        }
+    }
+    catch {
+        return { disabled: false, factsStored: 0 };
+    }
+    if (!extractedText)
+        return { disabled: false, factsStored: 0 };
+    const facts = parseExtractedFacts(extractedText);
+    let stored = 0;
+    try {
+        stored = await appendFactsToMemoryFile(facts);
+    }
+    catch {
+        // appendFactsToMemoryFile failed — non-fatal
+    }
+    return { disabled: false, factsStored: stored };
+}