npm - osborn - Versions diffs - 0.5.2 → 0.5.5 - Mend

osborn 0.5.2 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (37) hide show

package/.claude/settings.local.json +9 -0
package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
package/.claude/skills/playwright-browser/SKILL.md +75 -0
package/.claude/skills/youtube-transcript/SKILL.md +24 -0
package/dist/claude-llm.d.ts +29 -1
package/dist/claude-llm.js +346 -79
package/dist/config.d.ts +6 -2
package/dist/config.js +6 -1
package/dist/fast-brain.d.ts +124 -12
package/dist/fast-brain.js +1361 -96
package/dist/index-3-2-26-legacy.d.ts +1 -0
package/dist/index-3-2-26-legacy.js +2233 -0
package/dist/index.js +889 -394
package/dist/jsonl-search.d.ts +66 -0
package/dist/jsonl-search.js +274 -0
package/dist/leagcyprompts2.d.ts +0 -0
package/dist/leagcyprompts2.js +573 -0
package/dist/pipeline-direct-llm.d.ts +77 -0
package/dist/pipeline-direct-llm.js +216 -0
package/dist/pipeline-fastbrain.d.ts +45 -0
package/dist/pipeline-fastbrain.js +367 -0
package/dist/prompts-2-25-26.d.ts +0 -0
package/dist/prompts-2-25-26.js +518 -0
package/dist/prompts-3-2-26.d.ts +78 -0
package/dist/prompts-3-2-26.js +1319 -0
package/dist/prompts.d.ts +83 -8
package/dist/prompts.js +1990 -374
package/dist/session-access.d.ts +60 -2
package/dist/session-access.js +172 -2
package/dist/summary-index.d.ts +87 -0
package/dist/summary-index.js +570 -0
package/dist/turn-detector-shim.d.ts +24 -0
package/dist/turn-detector-shim.js +83 -0
package/dist/voice-io.d.ts +9 -3
package/dist/voice-io.js +39 -20
package/package.json +18 -11

package/dist/index.js CHANGED Viewed

@@ -5,13 +5,20 @@ import { Room, RoomEvent } from '@livekit/rtc-node';
 import { AccessToken } from 'livekit-server-sdk';
 // Initialize logger before anything else
 initializeLogger({ pretty: true, level: 'info' });
+// Prevent MaxListenersExceededWarning on AbortSignal from Claude SDK query() calls
+// Each resumed query() adds listeners to the shared signal; default limit is 10
+import { setMaxListeners } from 'node:events';
+setMaxListeners(50);
 import { createServer } from 'http';
-import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts, readSessionSpec, listLibraryFiles } from './config.js';
-import { createSTT, createTTS, createVAD, createRealtimeModelFromConfig } from './voice-io.js';
+import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync } from 'node:fs';
+import { join } from 'node:path';
+import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts } from './config.js';
+import { createSTT, createTTS, createRealtimeModelFromConfig, DIRECT_MODE_STT, DIRECT_MODE_TTS } from './voice-io.js';
 import { createClaudeLLM } from './claude-llm.js';
+import { clearPipelineFastBrainSession, prewarmBM25Index } from './pipeline-fastbrain.js';
 import { createSmitheryProxy, destroySmitheryProxy, parseSmitheryUrl, isSmitheryUrl, SmitheryAuthorizationError } from './smithery-proxy.js';
-import { askHaiku, updateSpecFromJSONL } from './fast-brain.js';
-import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getResearchCompleteInjection, getResearchUpdateInjection, getNotificationInjection } from './prompts.js';
+import { askHaiku, askFastBrain, updateSpecFromJSONL, processResearchCompletion, handleResearchBatch, prepareBriefingScript, prepareRecoveryScript, writeQuestionToSpec, checkOutputAgainstQuestions, generateProactivePrompt, clearFastBrainSession } from './fast-brain.js';
+import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getScriptInjection, getProactiveInjection, getNotificationInjection } from './prompts.js';
 import { MCP_CATALOG } from './config.js';
 import { llm } from '@livekit/agents';
 import { z } from 'zod';
@@ -28,6 +35,32 @@ import { z } from 'zod';
 //   - Voice LLM with tool calling (ask_agent, respond_permission)
 //   - Routes tasks to Claude agents for execution
 // ============================================================
+// Load skills list with name + description for frontend display
+function loadSkillsList(agentDir) {
+    const skillsDir = join(agentDir, '.claude', 'skills');
+    if (!existsSync(skillsDir))
+        return [];
+    const skills = [];
+    try {
+        for (const skillName of readdirSync(skillsDir)) {
+            const skillFile = join(skillsDir, skillName, 'SKILL.md');
+            if (existsSync(skillFile)) {
+                const content = readFileSync(skillFile, 'utf-8');
+                // Extract title from first # heading, or use folder name
+                const titleMatch = content.match(/^#\s+(?:Skill:\s*)?(.+)/m);
+                const name = titleMatch ? titleMatch[1].trim() : skillName;
+                // Extract description from first paragraph after heading
+                const descMatch = content.match(/^#[^\n]+\n+([^\n#]+)/m);
+                const description = descMatch ? descMatch[1].trim() : '';
+                skills.push({ name, description });
+            }
+        }
+    }
+    catch (err) {
+        console.warn('⚠️ Failed to load skills list:', err);
+    }
+    return skills;
+}
 // Generate a short, user-friendly room code
 function generateRoomCode() {
     const chars = 'abcdefghjkmnpqrstuvwxyz23456789';
@@ -70,6 +103,22 @@ process.on('unhandledRejection', (reason) => {
         console.log('⚠️ OpenAI active response collision (will retry on next listening state)');
         return;
     }
+    // LiveKit SDK internal error after participant disconnect — safe to suppress
+    if (msg.includes("reading 'source'") || msg.includes("reading 'type'")) {
+        console.log('⚠️ Post-disconnect cleanup error (harmless)');
+        return;
+    }
+    // generateReply timeout — realtime LLM called a tool instead of speaking (toolChoice:'none' ignored)
+    // or Superseded — new generateReply cancelled a pending one
+    if (msg.includes('generateReply timed out') || msg.includes('generation_created') || msg.includes('Superseded')) {
+        console.log('⚠️ generateReply failed:', msg.substring(0, 80));
+        return;
+    }
+    // AdaptiveInterruptionDetector crash — LiveKit Cloud returns string instead of JSON.
+    // SDK handles this internally (retries → VAD fallback). Suppress residual noise.
+    if (msg.includes('interruption prediction') || msg.includes('AdaptiveInterruptionDetector')) {
+        return;
+    }
     console.error('❌ Unhandled Rejection:', msg);
 });
 process.on('uncaughtException', (error) => {
@@ -148,48 +197,6 @@ function startApiServer(workingDir, port) {
  * Gemini has smaller context limits — cap at 10 exchanges with 500 char content.
  * OpenAI handles full history (30 exchanges, 2000 char content).
  */
-function buildContextBriefing(summary, history, provider) {
-    const isGemini = provider === 'gemini';
-    // Gemini: last 10 exchanges capped at 500 chars. OpenAI: full history.
-    const maxExchanges = isGemini ? 10 : history.length;
-    const maxContentLen = isGemini ? 500 : 2000;
-    const trimmedHistory = history.slice(-maxExchanges);
-    const lines = [
-        `Session ID: ${summary.sessionId.substring(0, 8)}`,
-        `Total messages: ${summary.messageCount}`,
-        '',
-        '=== SESSION CONVERSATION HISTORY ==='
-    ];
-    for (const exchange of trimmedHistory) {
-        const content = exchange.content.length > maxContentLen
-            ? exchange.content.substring(0, maxContentLen) + '...'
-            : exchange.content;
-        lines.push(`${exchange.role === 'user' ? 'User' : 'Assistant'}: ${content}`);
-        lines.push('');
-    }
-    return lines.join('\n');
-}
-/**
- * Read spec.md and format it for the realtime voice model.
- * Truncates to avoid bloating the context window.
- * Returns null if spec doesn't exist or session ID isn't available.
- */
-function getSpecForVoiceModel(workingDir, sessionId) {
-    if (!sessionId)
-        return null;
-    const specContent = readSessionSpec(workingDir, sessionId);
-    if (!specContent)
-        return null;
-    const MAX = 3000;
-    if (specContent.length <= MAX)
-        return specContent;
-    const truncated = specContent.substring(0, MAX);
-    const lastHeading = truncated.lastIndexOf('\n## ');
-    if (lastHeading > MAX * 0.5) {
-        return truncated.substring(0, lastHeading) + '\n\n[... truncated — call read_spec for full content]';
-    }
-    return truncated + '\n\n[... truncated]';
-}
 /**
  * Load full session conversation history into the realtime model's ChatContext.
  * This gives the model persistent memory of what was discussed/researched,
@@ -251,8 +258,20 @@ async function main() {
     if (enabledMcpNames.length > 0) {
         console.log(`🔌 Enabled MCP servers: ${enabledMcpNames.join(', ')}`);
     }
-    const workingDir = config.workingDirectory || process.cwd();
-    console.log(`📂 Working directory: ${workingDir}`);
+    // Two directory concepts:
+    // 1. workingDir (cwd) — where Claude Code operates. Configurable per-session.
+    //    Priority: OSBORN_CWD env > config.workingDirectory > process.cwd()
+    // 2. sessionBaseDir — where session artifacts live (spec.md, library/).
+    //    Always the Osborn agent install directory (where this process started).
+    //    This ensures .osborn/sessions/ doesn't scatter across random directories.
+    const sessionBaseDir = process.cwd(); // Always the Osborn install dir
+    const defaultWorkingDir = process.env.OSBORN_CWD || config.workingDirectory || process.cwd();
+    let workingDir = defaultWorkingDir;
+    console.log(`📂 Working directory (cwd): ${workingDir}`);
+    console.log(`📂 Session base directory: ${sessionBaseDir}`);
+    if (process.env.OSBORN_CWD) {
+        console.log(`   (cwd from OSBORN_CWD env var)`);
+    }
     console.log(`🔬 Mode: RESEARCH`);
     // Determine voice mode
     const voiceMode = getVoiceMode(config);
@@ -305,6 +324,7 @@ async function main() {
     const room = new Room();
     room.setMaxListeners(50); // Prevent MaxListenersExceeded warnings on reconnect
     // Track state
+    let pendingSessionClose = null; // Tracks async session close for reconnect safety
     let currentSession = null;
     let currentAgent = null; // For updateChatCtx() context injection
     let currentLLM = null;
@@ -313,6 +333,9 @@ async function main() {
     let userState = 'listening'; // Track user speech state for queue safety
     let currentVoiceMode = voiceMode; // Track active voice mode for data handlers
     let currentProvider = realtimeConfig.provider; // Track active realtime provider
+    // Track the active resume session ID across scopes (ParticipantConnected + DataReceived)
+    // Updated by resume_session, session_selected, continue_session, switch_session handlers
+    let currentResumeSessionId;
     // Task deduplication guard - prevents Gemini re-execution loops
     let lastTaskRequest = '';
     let lastTaskTime = 0;
@@ -320,8 +343,78 @@ async function main() {
     let haikuInFlight = null;
     // Background research state - tracks async ask_agent execution
     let activeResearch = null;
+    // Persist last completed research context so follow-up questions can reference it
+    // (activeResearch is set to null on completion — this preserves the context)
+    let lastCompletedResearch = null;
     // No manual queuing — the Claude SDK handles sequential queries internally
     // ============================================================
+    // Interruption Tracking (Content Ledger)
+    // ============================================================
+    // When user interrupts TTS, LiveKit truncates chatCtx to what was spoken.
+    // We capture the spoken text (synchronizedTranscript) and on the next user
+    // message, read Claude's full output from JSONL + inject context so Claude
+    // knows what was heard vs lost. Claude decides: side question → answer +
+    // continue, or redirect → follow new direction.
+    // Current SpeechHandle from session.say() — only the latest one matters
+    let currentSpeechHandle = null;
+    // Last interruption context — gathered at interrupt time, consumed when user's message arrives
+    let lastInterruption = null;
+    /**
+     * Called when a SpeechHandle finishes (interrupted or not).
+     * If interrupted: gather spoken text + JSONL context. Does NOT send to Claude yet —
+     * that happens when the user's transcribed message arrives via chat().
+     */
+    async function handleSpeechDone(handle, fullText) {
+        if (!handle.interrupted) {
+            lastInterruption = null;
+            return;
+        }
+        // fullText is what was being spoken when interrupted (passed from tts_say handler).
+        // No word-level cutoff for say() — only generateReply pipeline has that — but Claude
+        // knows its own output from JSONL, so the full block is enough context.
+        console.log(`🔇 Speech interrupted. Was speaking: "${fullText.substring(0, 80)}..."`);
+        // Read last 10 assistant messages from JSONL (Claude's full untruncated output).
+        // SessionMessage.text is pre-joined from all text content blocks.
+        let recentMessages = '';
+        const sessionId = currentLLM?.sessionId;
+        if (sessionId) {
+            try {
+                const { readSessionHistory } = await import('./session-access.js');
+                const history = readSessionHistory(sessionId, workingDir, {
+                    lastN: 10,
+                    types: ['assistant'],
+                });
+                recentMessages = history
+                    .filter((m) => m.text)
+                    .map((m) => m.text)
+                    .join('\n---\n');
+            }
+            catch (err) {
+                console.warn('⚠️ Failed to read JSONL for interruption context:', err);
+            }
+        }
+        // Store — consumed when user's next message arrives via chat()
+        lastInterruption = { spokenText: fullText, recentMessages, timestamp: Date.now() };
+        console.log(`📋 Interruption context stored (text: ${fullText.length} chars, JSONL: ${recentMessages.length} chars)`);
+    }
+    /**
+     * Callback for PipelineDirectLLM — returns pending interruption context and clears it.
+     * Called in chat() when user's transcribed message arrives.
+     * PipelineDirectLLM enriches the user message with this context before sending to Claude.
+     */
+    function getAndConsumeInterruptionContext() {
+        if (!lastInterruption)
+            return null;
+        // Expire after 60s — user may have waited too long
+        if (Date.now() - lastInterruption.timestamp > 60_000) {
+            lastInterruption = null;
+            return null;
+        }
+        const ctx = { spokenText: lastInterruption.spokenText, recentMessages: lastInterruption.recentMessages };
+        lastInterruption = null;
+        return ctx;
+    }
+    // ============================================================
     // Unified Voice Injection Queue
     // ============================================================
     // ALL system injections (research updates, completions, notifications, errors)
@@ -354,43 +447,62 @@ async function main() {
             console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (user speaking)`);
             return;
         }
+        // Don't inject while fast brain tool call is in flight — the tool response will
+        // race with our generateReply, causing Gemini to drop our content and only speak
+        // the tool response. Wait for the tool call to complete first.
+        if (haikuInFlight) {
+            console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (fast brain in flight: "${haikuInFlight.question.substring(0, 40)}...")`);
+            return; // Will be retried when haikuInFlight clears (see tool execute handler)
+        }
         isProcessingQueue = true;
-        // Safety timeout: if agent_state_changed never fires (e.g. Gemini state machine hang),
-        // clear the guard after 30s so the queue isn't permanently stuck
+        // Batch ALL queued items into one generateReply call
+        const items = voiceQueue.splice(0);
+        const batchedInstruction = items.length === 1
+            ? items[0]
+            : items.join('\n\n---\n\n');
+        console.log(`📡 Voice queue: processing ${items.length} batched items (${batchedInstruction.length} chars)`);
+        // Safety timeout: if agent_state_changed never fires (edge case — e.g. Gemini
+        // WebSocket drops, or state machine hangs). 15s gives the model time to process.
         setTimeout(() => {
             if (isProcessingQueue) {
-                console.log('⚠️ Voice queue: isProcessingQueue stuck for 30s, clearing');
+                console.log('⚠️ Voice queue: safety timeout — clearing guard');
                 isProcessingQueue = false;
                 if (voiceQueue.length > 0 && agentState === 'listening') {
                     processVoiceQueue();
                 }
             }
-        }, 30000);
-        // Batch ALL queued items into one generateReply call
-        const items = voiceQueue.splice(0);
-        const batchedInstruction = items.length === 1
-            ? items[0]
-            : items.join('\n\n---\n\n');
-        console.log(`📡 Voice queue: processing ${items.length} batched items (${batchedInstruction.length} chars)`);
+        }, 15000);
         try {
             // Skip interrupt for Gemini — disrupts Gemini's state machine, causing it to
             // never transition back to 'listening' (hangs in speaking state indefinitely)
             if (currentProvider !== 'gemini') {
                 currentSession.interrupt();
             }
-            currentSession.generateReply({
-                instructions: batchedInstruction,
-                toolChoice: 'none',
-            });
+            if (currentProvider === 'gemini') {
+                // LiveKit SDK v1.0.51: generateReply({ instructions }) sends a system turn +
+                // synthetic "." user turn. After Gemini processes a tool call in this flow,
+                // autoToolReplyGeneration does NOT trigger continuation (system-only limitation).
+                // Using userInput instead makes it a "user-initiated" request where auto-continuation
+                // works. The ask_fast_brain injection bypass handles [SCRIPT]/[PROACTIVE]/[NOTIFICATION]
+                // prefixes and returns the content directly as a tool response.
+                currentSession.generateReply({
+                    userInput: batchedInstruction,
+                });
+            }
+            else {
+                // OpenAI respects toolChoice:'none' — speaks instructions directly
+                currentSession.generateReply({
+                    instructions: batchedInstruction,
+                    toolChoice: 'none',
+                });
+            }
             // Model transitions to thinking/speaking after this call.
             // When it returns to 'listening', agent_state_changed triggers processVoiceQueue() again.
             // Also inject into chatCtx as persistent context so the model remembers across turns
             injectIntoChatCtx(batchedInstruction);
         }
         catch (err) {
-            console.log('⚠️ Voice queue generateReply failed, dropping items:', err);
-            // Do NOT re-queue — re-queuing causes infinite retry cascades
-            // The frontend still has the updates via claude_output events
+            console.log('⚠️ Voice queue generateReply failed:', err);
             isProcessingQueue = false;
         }
         // isProcessingQueue is cleared when agent_state_changed fires
@@ -418,6 +530,32 @@ async function main() {
             console.log('⚠️ ChatCtx injection failed:', err);
         }
     }
+    // Extract recent voice conversation turns from the realtime LLM's in-memory ChatContext.
+    // Replaces the internal conversationHistory array in fast-brain.ts.
+    function getChatHistory(maxTurns = 20) {
+        if (!currentAgent)
+            return [];
+        try {
+            const items = currentAgent.chatCtx.items;
+            const turns = [];
+            for (const item of items) {
+                if (item.type !== 'message')
+                    continue;
+                const msg = item;
+                if (msg.role !== 'user' && msg.role !== 'assistant')
+                    continue;
+                const text = msg.textContent ?? '';
+                if (!text.trim())
+                    continue;
+                turns.push({ role: msg.role, text: text.trim() });
+            }
+            return turns.slice(-maxTurns);
+        }
+        catch (err) {
+            console.log('⚠️ getChatHistory: failed to read chatCtx:', err);
+            return [];
+        }
+    }
     // Research event batching — debounce rapid-fire tool events into a single voice queue entry
     let researchBatchTimer = null;
     function scheduleResearchBatch() {
@@ -437,10 +575,63 @@ async function main() {
                 isStreaming: true,
                 agentRole: 'research-progress',
             });
-            // Push to unified voice queue (will be spoken when model is available)
-            queueVoiceInjection(getResearchUpdateInjection(batchText));
+            // Route through fast brain — it decides whether to speak (usually silent)
+            if (activeResearch.voiceUpdateCount < 2) {
+                const voiceSid = currentLLM?.sessionId;
+                if (voiceSid) {
+                    const chatHistory = getChatHistory(10);
+                    handleResearchBatch(workingDir, voiceSid, lastTaskRequest || '', updates, activeResearch.researchLog, chatHistory, sessionBaseDir)
+                        .then(script => {
+                        if (script && activeResearch) {
+                            activeResearch.voiceUpdateCount++;
+                            queueVoiceInjection(getScriptInjection(script));
+                        }
+                    })
+                        .catch(() => { }); // Silent fail — updates are optional
+                }
+            }
         }, 8000); // 8s debounce: reduces voice queue flooding during research
     }
+    // Proactive conversational loop — keeps conversation alive during research
+    let proactiveTimer = null;
+    let proactivePromptHistory = [];
+    const PROACTIVE_INTERVAL = 15000; // 15 seconds (offset from 8s batch timer)
+    const MAX_PROACTIVE_PROMPTS = 2; // Cap per research task (reduced from 4 to minimize realtime LLM tokens)
+    function startProactiveLoop(task, sessionId) {
+        stopProactiveLoop();
+        proactivePromptHistory = [];
+        let proactiveCount = 0;
+        proactiveTimer = setInterval(async () => {
+            if (!activeResearch) {
+                stopProactiveLoop();
+                return;
+            }
+            if (proactiveCount >= MAX_PROACTIVE_PROMPTS)
+                return;
+            if (agentState !== 'listening' || userState === 'speaking')
+                return;
+            if (researchBatchTimer)
+                return; // Don't collide with batch updates
+            if (isProcessingQueue)
+                return; // Don't collide with voice queue
+            try {
+                const prompt = await generateProactivePrompt(workingDir, sessionId, task, activeResearch.researchLog, proactivePromptHistory, sessionBaseDir);
+                if (prompt && prompt !== 'NOTHING') {
+                    proactivePromptHistory.push(prompt);
+                    proactiveCount++;
+                    queueVoiceInjection(getProactiveInjection(prompt));
+                }
+            }
+            catch { } // Silent fail — proactive prompts are optional
+        }, PROACTIVE_INTERVAL);
+    }
+    function stopProactiveLoop() {
+        if (proactiveTimer) {
+            clearInterval(proactiveTimer);
+            proactiveTimer = null;
+        }
+        proactivePromptHistory = [];
+    }
     // Helper to send data to frontend (with size limit handling)
     const MAX_MESSAGE_SIZE = 60000;
     async function sendToFrontend(data) {
@@ -490,28 +681,40 @@ async function main() {
         }
     }
     // Create DIRECT session (STT + Claude Agent SDK + TTS)
-    async function createDirectSession(resumeSessionId) {
+    async function createDirectSession(resumeSessionId, llmOverride) {
         console.log('🎯 Creating direct session...');
-        const stt = createSTT({ provider: 'deepgram' });
-        const tts = createTTS({ provider: 'deepgram', voice: 'aura-asteria-en' });
-        const vad = await createVAD();
-        // Create Claude LLM wrapper in research mode
-        const directLLM = createClaudeLLM({
+        const stt = createSTT(DIRECT_MODE_STT);
+        const tts = createTTS(DIRECT_MODE_TTS);
+        // Create Claude LLM wrapper — direct mode uses speech-optimized system prompt
+        // skipTTSQueue: bypass LiveKit's BufferedTokenStream, use session.say() instead
+        // llmOverride: pipeline mode passes PipelineDirectLLM which wraps its own ClaudeLLM
+        const directLLM = llmOverride || createClaudeLLM({
             workingDirectory: workingDir,
+            sessionBaseDir,
             mcpServers,
             resumeSessionId,
+            voiceMode: 'direct',
+            skipTTSQueue: true,
         });
         currentLLM = directLLM;
         // For resumed sessions, eagerly create workspace (we know the real ID)
         if (resumeSessionId) {
-            const workspace = ensureSessionWorkspace(workingDir, resumeSessionId);
+            const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
             console.log(`📁 Session workspace (resumed): ${workspace}`);
         }
         // For new sessions, create workspace when SDK assigns real session ID
         directLLM.events.once('session_id', ({ sessionId }) => {
-            const workspace = ensureSessionWorkspace(workingDir, sessionId);
+            const workspace = ensureSessionWorkspace(sessionBaseDir, sessionId);
             console.log(`📁 Session workspace created: ${workspace}`);
+            // Pipeline mode: pre-warm BM25 index so first fast brain query is fast
+            if (currentVoiceMode === 'pipeline') {
+                prewarmBM25Index(sessionId, workingDir).catch(() => { });
+            }
         });
+        // Also pre-warm for resumed sessions (sessionId already known)
+        if (resumeSessionId && currentVoiceMode === 'pipeline') {
+            prewarmBM25Index(resumeSessionId, workingDir).catch(() => { });
+        }
         // Wire up MCP server changes to frontend
         directLLM.events.on('mcp_servers_changed', (data) => {
             console.log(`🔌 MCP servers changed: ${data.enabledKeys.join(', ') || 'none'}`);
@@ -595,6 +798,50 @@ async function main() {
                 currentSession.say?.(ttsMessage).catch(() => { });
             }
         });
+        // Wire up TTS say — bypass LiveKit's BufferedTokenStream, speak directly via session.say()
+        // Each text block from Claude gets spoken immediately as it arrives, no internal buffering
+        directLLM.events.on('tts_say', (data) => {
+            // Guard: session must be alive — TTS errors can kill the session while background query runs
+            if (!currentSession) {
+                console.warn(`⚠️ tts_say fired but currentSession is null — text dropped: "${data.text?.substring(0, 60)}"`);
+                return;
+            }
+            if (!data.text?.trim()) {
+                console.log(`🔇 tts_say fired but text is empty — skipping`);
+                return;
+            }
+            const sayId = Date.now(); // simple ID to correlate start/end logs
+            console.log(`🗣️ [${sayId}] session.say START (${data.text.length} chars): "${data.text.substring(0, 60)}..."`);
+            try {
+                const handle = currentSession.say(data.text);
+                if (handle && typeof handle.addDoneCallback === 'function') {
+                    // SpeechHandle — track it and register interruption callback
+                    currentSpeechHandle = handle;
+                    handle.addDoneCallback((sh) => {
+                        if (sh.interrupted) {
+                            console.log(`🔇 [${sayId}] session.say INTERRUPTED`);
+                            handleSpeechDone(sh, data.text);
+                        }
+                        else {
+                            console.log(`✅ [${sayId}] session.say DONE`);
+                            if (currentSpeechHandle === sh)
+                                lastInterruption = null;
+                        }
+                    });
+                    console.log(`🗣️ [${sayId}] session.say queued (SpeechHandle tracked)`);
+                }
+                else if (handle && typeof handle.then === 'function') {
+                    // Promise-based fallback (older SDK path)
+                    handle
+                        .then(() => console.log(`✅ [${sayId}] session.say DONE`))
+                        .catch((err) => console.error(`❌ [${sayId}] session.say FAILED:`, err?.message || err));
+                }
+            }
+            catch (err) {
+                // Catch synchronous "AgentSession is not running" errors
+                console.warn(`⚠️ [${sayId}] session.say threw — session likely dead: ${err?.message}`);
+            }
+        });
         // Wire up session resume failure - notify frontend when SDK creates new session instead
         directLLM.events.on('session_resume_failed', (data) => {
             console.error(`❌ Session resume failed: ${data.requestedSessionId} → ${data.actualSessionId}`);
@@ -613,17 +860,18 @@ async function main() {
             });
         });
         // Create the Agent with instructions, STT, LLM, TTS
+        // VAD (Silero ONNX) removed — caused 2-5s inference lag on CPU, making interruption detection worse
+        // Turn detection is server-side (Deepgram endpointing), interruptions handled by STT
         const agent = new voice.Agent({
             instructions: DIRECT_MODE_PROMPT,
             stt,
             llm: directLLM,
             tts,
-            vad,
-            turnDetection: 'vad',
+            turnDetection: 'stt',
         });
-        // Create the session (no longer passes STT/LLM/TTS here)
         const session = new voice.AgentSession({
-            turnDetection: 'vad',
+            turnDetection: 'stt',
+            preemptiveGeneration: false, // Only fire LLM on final committed transcript, not partial preemptives
         });
         return { session, agent };
     }
@@ -639,18 +887,19 @@ async function main() {
         // Create Claude LLM for tool execution (research tasks)
         realtimeClaudeHandler = createClaudeLLM({
             workingDirectory: workingDir,
+            sessionBaseDir,
             mcpServers,
             resumeSessionId,
         });
         currentLLM = realtimeClaudeHandler;
         // For resumed sessions, eagerly create workspace (we know the real ID)
         if (resumeSessionId) {
-            const workspace = ensureSessionWorkspace(workingDir, resumeSessionId);
+            const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
             console.log(`📁 Session workspace (resumed): ${workspace}`);
         }
         // For new sessions, create workspace when SDK assigns real session ID
         realtimeClaudeHandler.events.once('session_id', ({ sessionId }) => {
-            const workspace = ensureSessionWorkspace(workingDir, sessionId);
+            const workspace = ensureSessionWorkspace(sessionBaseDir, sessionId);
             console.log(`📁 Session workspace created: ${workspace}`);
         });
         // Wire up MCP server changes to frontend
@@ -693,8 +942,11 @@ async function main() {
             });
         });
         // Stream Claude's research text to frontend as progress updates
+        // Skips during active research to avoid duplication with per-task onText handler
         realtimeClaudeHandler.events.on('assistant_text', (data) => {
             if (data.text && data.text.trim()) {
+                if (activeResearch)
+                    return;
                 sendToFrontend({
                     type: 'claude_output',
                     text: data.text,
@@ -747,71 +999,24 @@ async function main() {
                 checkpointId: data.checkpointId,
             });
         });
-        // Extract priority content from research results — preserves URLs, code blocks, and key details
-        function extractPriorityContent(result, maxChars = 4000) {
-            if (result.length <= maxChars)
-                return result;
-            // Extract URLs (preserve for voice relay)
-            const urlRegex = /https?:\/\/[^\s\)\"\'>\]]+/g;
-            const urls = [...new Set(result.match(urlRegex) || [])];
-            // Extract code blocks (first 2, up to 400 chars each)
-            const codeBlockRegex = /```[\s\S]*?```/g;
-            const codeBlocks = [];
-            let match;
-            while ((match = codeBlockRegex.exec(result)) !== null && codeBlocks.length < 2) {
-                const block = match[0].length > 400 ? match[0].substring(0, 397) + '```' : match[0];
-                codeBlocks.push(block);
-            }
-            // Build sections
-            const sections = [];
-            // Take the first ~2500 chars of narrative (intro + main findings)
-            const narrativeEnd = Math.min(result.length, 2500);
-            const narrativeTruncated = result.substring(0, narrativeEnd);
-            const lastPeriod = narrativeTruncated.lastIndexOf('.');
-            const narrative = lastPeriod > narrativeEnd * 0.6
-                ? narrativeTruncated.substring(0, lastPeriod + 1)
-                : narrativeTruncated;
-            sections.push(narrative);
-            // Append conclusion (last ~500 chars) if result is long enough
-            if (result.length > 3000) {
-                const tail = result.substring(result.length - 500);
-                const firstPeriod = tail.indexOf('.');
-                const conclusion = firstPeriod > 0 ? tail.substring(firstPeriod + 1).trim() : tail.trim();
-                if (conclusion.length > 50) {
-                    sections.push(`\n\n[CONCLUSION]\n${conclusion}`);
-                }
-            }
-            // Append code blocks if not already in the narrative
-            if (codeBlocks.length > 0) {
-                const codeSection = codeBlocks.filter(cb => !narrative.includes(cb));
-                if (codeSection.length > 0) {
-                    sections.push(`\n\n[CODE EXAMPLES]\n${codeSection.join('\n\n')}`);
-                }
-            }
-            // Append URLs if not already in the narrative
-            const newUrls = urls.filter(u => !narrative.includes(u));
-            if (newUrls.length > 0) {
-                sections.push(`\n\n[LINKS]\n${newUrls.slice(0, 5).join('\n')}`);
-            }
-            let assembled = sections.join('');
-            // Final safety truncation if assembled exceeds maxChars
-            if (assembled.length > maxChars) {
-                const truncated = assembled.substring(0, maxChars);
-                const lp = truncated.lastIndexOf('.');
-                assembled = lp > maxChars * 0.7 ? truncated.substring(0, lp + 1) : truncated + '...';
-            }
-            return assembled;
-        }
         // Extracted research execution — called by ask_agent, SDK handles queuing internally
         function executeResearch(task) {
             sendToFrontend({ type: 'system', text: `Executing: ${task}` });
-            // Clean up previous research listeners to avoid duplicate event handlers
+            // Fire-and-forget: write user question to spec.md BEFORE agent starts
+            const questionSid = currentLLM?.sessionId || resumeSessionId;
+            if (questionSid) {
+                writeQuestionToSpec(sessionBaseDir, questionSid, task).catch(err => console.error('❌ writeQuestionToSpec failed:', err));
+            }
+            // Clean up previous research UI tracking — but let the SDK query complete in background.
+            // The SDK has an internal queue: new query() calls enqueue behind running ones.
+            // Old research results land in JSONL and fast brain can access them later.
             if (activeResearch) {
-                activeResearch.cleanup();
+                activeResearch.cleanup(); // Remove event listeners so UI tracks new task
                 if (researchBatchTimer) {
                     clearTimeout(researchBatchTimer);
                     researchBatchTimer = null;
                 }
+                // NOTE: NOT aborting — old SDK process continues writing to JSONL
             }
             // Set up research log batching — events push to queue for state-driven injection
             const researchLog = [];
@@ -861,12 +1066,30 @@ async function main() {
                 pendingUpdates.push(entry);
                 scheduleResearchBatch();
             };
+            const ANSWER_CHECK_THRESHOLD = 300; // chars — only check substantial outputs
             const onToolResult = (data) => {
                 // Only log to researchLog for the final summary — don't push to pendingUpdates
                 // This prevents redundant "Reading config.ts. Read done." voice updates
                 researchLog.push(`${data.name} completed`);
-                // Content is NOT captured here — JSONL has full untruncated tool results
-                // The fast brain reads JSONL directly on completion via updateSpecFromJSONL()
+                // Fire-and-forget: check if substantial tool results answer any spec questions
+                // Note: PostToolUse emits { name, input, response } — use data.response (not data.result)
+                const resultText = typeof data.response === 'string' ? data.response : JSON.stringify(data.response || '');
+                if (resultText.length > ANSWER_CHECK_THRESHOLD) {
+                    const sid = currentLLM?.sessionId || resumeSessionId;
+                    if (sid)
+                        checkOutputAgainstQuestions(sessionBaseDir, sid, resultText, 'tool_result').catch(() => { });
+                }
+                // When AskUserQuestion completes, the user's answer is a decision — track it in spec
+                if (data.name === 'AskUserQuestion' && data.response) {
+                    const sid = currentLLM?.sessionId || resumeSessionId;
+                    if (sid) {
+                        const questionText = JSON.stringify(data.input?.questions || data.input || {});
+                        const answerText = typeof data.response === 'string' ? data.response : JSON.stringify(data.response);
+                        const specUpdate = `User answered a clarifying question during research.\nQuestion: ${questionText}\nAnswer: ${answerText}\nRecord this as a user decision in spec.md.`;
+                        askHaiku(workingDir, sid, specUpdate, undefined, undefined, undefined, sessionBaseDir).catch(err => console.error('❌ Failed to record AskUserQuestion answer in spec:', err));
+                        console.log(`📝 AskUserQuestion answer forwarded to fast brain for spec tracking`);
+                    }
+                }
             };
             const onText = (data) => {
                 if (data.text?.trim()) {
@@ -876,31 +1099,57 @@ async function main() {
                     researchLog.push(firstSentence);
                     pendingUpdates.push(firstSentence);
                     scheduleResearchBatch();
-                    // Agent reasoning/analysis text is NOT captured here
-                    // JSONL has full untruncated assistant text — read on completion
+                    // Fire-and-forget: check if substantial agent reasoning answers any spec questions
+                    if (text.length > ANSWER_CHECK_THRESHOLD) {
+                        const sid = currentLLM?.sessionId || resumeSessionId;
+                        if (sid)
+                            checkOutputAgainstQuestions(sessionBaseDir, sid, text, 'assistant_text').catch(() => { });
+                    }
+                }
+            };
+            // Capture the SDK's requestId for this query — identifies this research task
+            // in the JSONL file for targeted retrieval by fast brain
+            let sdkRequestId = null;
+            const onQueryRequestId = (data) => {
+                if (!sdkRequestId && data.requestId) {
+                    sdkRequestId = data.requestId;
+                    console.log(`📋 [research] SDK requestId: ${sdkRequestId}`);
                 }
             };
             realtimeClaudeHandler.events.on('tool_use', onToolUse);
             realtimeClaudeHandler.events.on('tool_result', onToolResult);
             realtimeClaudeHandler.events.on('assistant_text', onText);
+            realtimeClaudeHandler.events.on('query_request_id', onQueryRequestId);
             const cleanupListeners = () => {
                 realtimeClaudeHandler?.events.off('tool_use', onToolUse);
                 realtimeClaudeHandler?.events.off('tool_result', onToolResult);
                 realtimeClaudeHandler?.events.off('assistant_text', onText);
+                realtimeClaudeHandler?.events.off('query_request_id', onQueryRequestId);
             };
+            // Create AbortController for this research task — abort on disconnect/cleanup
+            const researchAbortController = new AbortController();
             // Track active research — updates drain when model enters 'listening' state
-            activeResearch = {
+            const thisResearch = {
                 researchLog,
                 pendingUpdates,
                 cleanup: cleanupListeners,
                 voiceUpdateCount: 0,
+                abortController: researchAbortController,
             };
+            activeResearch = thisResearch;
+            // Start proactive conversational loop
+            const proactiveSid = currentLLM?.sessionId || resumeSessionId;
+            if (proactiveSid) {
+                startProactiveLoop(task, proactiveSid);
+            }
             // Run research in the background (non-blocking)
+            // Pass AbortController so research can be stopped on disconnect
             const researchPromise = (async () => {
                 const stream = realtimeClaudeHandler.chat({
                     chatCtx: {
                         items: [{ type: 'message', role: 'user', content: [task] }],
                     },
+                    abortController: researchAbortController,
                 });
                 let result = '';
                 for await (const chunk of stream) {
@@ -912,66 +1161,94 @@ async function main() {
             })();
             // Handle completion asynchronously
             researchPromise.then(async (result) => {
-                console.log(`✅ [realtime] Research complete (${result.length} chars)`);
+                // Check if aborted — empty result means clean abort, skip pipeline
+                if (researchAbortController.signal.aborted || !result.trim()) {
+                    console.log(`🛑 [realtime] Research aborted or empty: ${task.substring(0, 60)}`);
+                    cleanupListeners();
+                    if (activeResearch === thisResearch) {
+                        activeResearch = null;
+                    }
+                    return;
+                }
+                const isStillCurrent = activeResearch === thisResearch;
+                console.log(`✅ [realtime] Research complete (${result.length} chars${isStillCurrent ? '' : ', superseded by newer task'})`);
                 // Clean up
                 cleanupListeners();
-                // Send to frontend
-                await sendToFrontend({ type: 'assistant_response', text: result });
+                // Send raw result to frontend as a log entry (not assistant_response — that's reserved
+                // for the voice model's spoken response, avoiding duplication in chat)
+                await sendToFrontend({ type: 'claude_output', text: result, isStreaming: false, agentRole: 'research-result' });
                 const resultPreview = result.length > 150
                     ? result.substring(0, 150) + '...'
                     : result;
                 await sendToFrontend({ type: 'task_completed', task, resultPreview });
-                // Build enhanced return with research log
-                const logSummary = researchLog.length > 0
-                    ? `\n\n[RESEARCH LOG]\n${researchLog.slice(0, 25).join('\n')}`
-                    : '';
-                // Extract priority content — preserves URLs, code blocks, and key details (4000 char limit)
-                const resultForVoice = extractPriorityContent(result);
-                const fullResult = (resultForVoice + logSummary) || 'Research completed successfully.';
-                // Clear active research and timers before injecting final results
-                if (researchBatchTimer) {
-                    clearTimeout(researchBatchTimer);
-                    researchBatchTimer = null;
+                // Only modify global state if we're still the current research task.
+                // If a newer task replaced us, don't clobber its timers/state.
+                if (isStillCurrent) {
+                    if (researchBatchTimer) {
+                        clearTimeout(researchBatchTimer);
+                        researchBatchTimer = null;
+                    }
+                    stopProactiveLoop();
+                }
+                // Preserve research context for follow-up questions
+                lastCompletedResearch = {
+                    task,
+                    researchLog: [...researchLog],
+                    completedAt: Date.now(),
+                };
+                // Only clear activeResearch if we're still the current task
+                if (isStillCurrent) {
+                    activeResearch = null;
                 }
-                activeResearch = null;
-                // Send final results to frontend for visibility
+                // Send research_task_complete to frontend for inline chat tracking
                 await sendToFrontend({
-                    type: 'claude_output',
-                    text: `[Research Complete] Injecting findings into voice model (${fullResult.length} chars)`,
-                    isStreaming: false,
-                    agentRole: 'research-progress',
+                    type: 'research_task_complete',
+                    task,
+                    summary: result.substring(0, 500),
                 });
-                // Queue final results for voice injection — the queue handles availability gating
-                console.log(`📡 [realtime] Queuing final results (${fullResult.length} chars, agentState: ${agentState})`);
-                queueVoiceInjection(getResearchCompleteInjection(task, fullResult));
-                // Inject FULL untruncated result into ChatCtx so voice model can answer
-                // follow-up questions ("tell me more", "what were those links?") from memory
-                injectIntoChatCtx(`[FULL RESEARCH DETAILS for "${task}"]\n${result}`);
+                // Route through fast brain to generate a teleprompter script from the findings
+                // Fast brain reads full JSONL and writes a spoken monologue
+                const voiceSid = currentLLM?.sessionId || resumeSessionId;
+                const chatHistory = getChatHistory(10);
+                console.log(`📡 [realtime] Generating teleprompter script via fast brain (result: ${result.length} chars, agentState: ${agentState})`);
+                // Create sendToChat for research completion to send structured data to frontend
+                const completionSendToChat = (text) => {
+                    sendToFrontend({ type: 'assistant_response', text });
+                };
+                if (voiceSid) {
+                    processResearchCompletion(workingDir, voiceSid, task, result, chatHistory, completionSendToChat, sessionBaseDir)
+                        .then(script => {
+                        queueVoiceInjection(getScriptInjection(script));
+                    })
+                        .catch(() => {
+                        // Fallback: use truncated result directly if fast brain fails
+                        queueVoiceInjection(getScriptInjection(result.substring(0, 500)));
+                    });
+                }
+                else {
+                    queueVoiceInjection(getScriptInjection(result.substring(0, 500)));
+                }
                 // Fire-and-forget JSONL-based refinement pass via fast brain
                 // Reads FULL untruncated data from JSONL — no content buffer, no truncation
                 const postResearchSessionId = currentLLM?.sessionId || resumeSessionId;
                 if (postResearchSessionId) {
-                    updateSpecFromJSONL(workingDir, postResearchSessionId, task, researchLog)
+                    updateSpecFromJSONL(workingDir, postResearchSessionId, task, researchLog, sessionBaseDir)
                         .then(updateResult => {
                         if (!updateResult)
                             return;
                         // Notify frontend about spec.md update
                         if (updateResult.spec) {
-                            const specPath = `${workingDir}/.osborn/sessions/${postResearchSessionId}/spec.md`;
+                            const specPath = `${sessionBaseDir}/.osborn/sessions/${postResearchSessionId}/spec.md`;
                             sendToFrontend({
                                 type: 'research_artifact_updated',
                                 filePath: specPath,
                                 fileName: 'spec.md',
                             });
-                            const truncated = getSpecForVoiceModel(workingDir, postResearchSessionId);
-                            if (truncated) {
-                                injectIntoChatCtx(`[UPDATED SESSION SPEC]\n${truncated}`);
-                                console.log(`📋 Re-injected spec.md into ChatCtx after fast brain update (${truncated.length} chars)`);
-                            }
+                            // Voice model is a teleprompter — fast brain reads spec directly, no ChatCtx injection needed
                         }
                         // Notify frontend about each library file written by the fast brain
                         for (const libFile of updateResult.libraryFiles) {
-                            const libPath = `${workingDir}/.osborn/sessions/${postResearchSessionId}/library/${libFile}`;
+                            const libPath = `${sessionBaseDir}/.osborn/sessions/${postResearchSessionId}/library/${libFile}`;
                             sendToFrontend({
                                 type: 'research_artifact_updated',
                                 filePath: libPath,
@@ -981,177 +1258,148 @@ async function main() {
                     });
                 }
             }).catch(async (err) => {
-                console.error(`❌ [realtime] Research failed:`, err);
                 // Clean up
                 cleanupListeners();
-                if (researchBatchTimer) {
-                    clearTimeout(researchBatchTimer);
-                    researchBatchTimer = null;
+                const isStillCurrent = activeResearch === thisResearch;
+                if (isStillCurrent) {
+                    if (researchBatchTimer) {
+                        clearTimeout(researchBatchTimer);
+                        researchBatchTimer = null;
+                    }
+                    stopProactiveLoop();
+                    activeResearch = null;
                 }
-                activeResearch = null;
+                // If aborted (user disconnected), log quietly
+                if (researchAbortController.signal.aborted) {
+                    console.log(`🛑 [realtime] Research aborted: ${task.substring(0, 60)}`);
+                    return;
+                }
+                console.error(`❌ [realtime] Research failed:`, err);
                 // Queue error notification — will be spoken when model is available
-                queueVoiceInjection(`[NOTIFICATION] The research task encountered an error: ${err.message}. Let the user know briefly and ask if they want to try again. Do NOT call any tools.`);
+                queueVoiceInjection(getNotificationInjection(`Research encountered an error: ${err.message}. You could try asking again.`));
             });
             // Return immediately to unblock the voice model
             return 'Research started. I\'ll relay findings as they come in — you can keep talking to the user while I work.';
         }
         // Create tools for the realtime voice LLM
-        const askAgentTool = llm.tool({
-            description: `Delegate a task to your backend agent (Claude), which has full research, analysis, reasoning, and coding capabilities.
-Use for:
-- Researching topics, technologies, concepts, or ideas in depth
-- Fetching and analyzing web pages, articles, blog posts, YouTube transcripts
-- Reading and summarizing documentation, papers, or reference materials
-- Exploring and analyzing codebases, configs, architecture
-- Comparing options, tools, approaches — with tradeoffs and recommendations
-- Running bash commands, testing implementations
-- Using MCP tools (GitHub, YouTube, and other external tools)
-- Saving findings to the session library and updating the spec
-- Any question requiring research, analysis, verification, or deeper reasoning
-Reformulate the user's spoken request into a clear, specific task.
-The more context you include (topic, constraints, what they want to learn), the better the results.
-If the user wants specific details (examples, URLs, comparisons, step-by-step breakdown), mention that in your request.`,
+        // The realtime model is a thin teleprompter — only 2 tools:
+        // 1. ask_fast_brain: ALL user questions route here (the fast brain decides everything)
+        // 2. respond_permission: voice permission flow for Claude SDK blocked operations
+        const askFastBrainTool = llm.tool({
+            description: `Ask your brain. Call this for EVERY user message — greetings, questions, decisions, requests, everything. No exceptions. Returns what you should say.`,
             parameters: z.object({
-                request: z.string().describe('The task or question to delegate to the agent'),
-            }),
-            execute: async ({ request: task }) => {
-                console.log(`\n🔨 [realtime] Task: "${task}"`);
-                // Guard: if ask_haiku is currently handling a similar question, skip ask_agent
-                // This prevents the double-calling pattern where Gemini fires both in rapid succession
-                if (haikuInFlight && (Date.now() - haikuInFlight.time) < 8000) {
-                    console.log(`⏭️ Skipping ask_agent — ask_haiku is already handling: "${haikuInFlight.question.substring(0, 60)}"`);
-                    return 'The fast brain is already looking into this. Wait for its answer first.';
-                }
-                // Deduplication guard: prevent re-execution of same task within 10s
-                const now = Date.now();
-                if (task === lastTaskRequest && (now - lastTaskTime) < 10000) {
-                    console.log('⏭️ Skipping duplicate task (within 10s window)');
-                    return 'This task was just completed. The results were already relayed.';
-                }
-                lastTaskRequest = task;
-                lastTaskTime = now;
-                return executeResearch(task);
-            },
-        });
-        const respondPermissionTool = llm.tool({
-            description: `Respond to a permission request. Call after hearing user's response.`,
-            parameters: z.object({
-                response: z.enum(['allow', 'deny', 'always_allow']),
-            }),
-            execute: async ({ response }) => {
-                if (!realtimeClaudeHandler?.hasPendingPermission()) {
-                    return 'No pending permission.';
-                }
-                const pending = realtimeClaudeHandler.getPendingPermission();
-                const allow = response === 'allow' || response === 'always_allow';
-                realtimeClaudeHandler.respondToPermission(allow);
-                await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
-                return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
-            },
-        });
-        const readSpecTool = llm.tool({
-            description: `Read the session spec (spec.md) — shared state between you and your backend agent.
-Use when: checking decisions, reading open questions to ask the user, understanding architecture/context, seeing what research has been saved. Updated by your backend agent during research.`,
-            parameters: z.object({}),
-            execute: async () => {
-                const sessionId = currentLLM?.sessionId || resumeSessionId;
-                if (!sessionId)
-                    return 'No session spec yet — session is still initializing.';
-                const specContent = readSessionSpec(workingDir, sessionId);
-                if (!specContent)
-                    return 'Spec is empty — no research done yet.';
-                const libraryFiles = listLibraryFiles(workingDir, sessionId);
-                const libSection = libraryFiles.length > 0
-                    ? `\n\n[LIBRARY FILES: ${libraryFiles.join(', ')}]`
-                    : '';
-                const MAX = 4000;
-                const content = specContent.length > MAX
-                    ? specContent.substring(0, MAX) + '\n\n[... truncated]'
-                    : specContent;
-                return content + libSection;
-            },
-        });
-        const askHaikuTool = llm.tool({
-            description: `Ask your fast brain — a quick knowledge assistant with access to session files and web search (~2 seconds).
-Use for:
-- Questions answerable from the session spec or research library (much faster than ask_agent)
-- Quick web lookups for simple factual questions (definitions, current versions, basic how-to)
-- Recording user decisions: "User decided: [decision]. Update the spec."
-- Recording user preferences: "User prefers: [preference]. Update the spec."
-- Checking what research has been done on a topic
-- Reading specific library files for details
-Do NOT use for: deep research, code analysis, multi-file codebase exploration, complex investigations → use ask_agent.
-If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to look deeper, then call ask_agent with the context it provides.`,
-            parameters: z.object({
-                question: z.string().describe('The question to ask or instruction to execute'),
+                question: z.string().describe('The user\'s question or statement'),
             }),
             execute: async ({ question }) => {
-                const sessionId = currentLLM?.sessionId || resumeSessionId;
-                if (!sessionId)
-                    return 'Session not ready yet. Try ask_agent instead.';
+                // INJECTION BYPASS: When Gemini receives a system injection via generateReply(),
+                // it calls ask_fast_brain with the injection content (Gemini always calls tools).
+                // For Gemini: this is the INTENDED path — we deliberately don't set toolChoice:'none'
+                //   so the tool call goes through and we return the content as a tool response.
+                // For OpenAI: this is a fallback guard — OpenAI normally speaks instructions directly
+                //   with toolChoice:'none', but if it somehow calls the tool, we handle it here.
+                const injectionMatch = question.match(/\[(SCRIPT|PROACTIVE|NOTIFICATION)\]\s*([\s\S]*)/);
+                if (injectionMatch) {
+                    const content = injectionMatch[2].trim();
+                    console.log(`⚡ [fast brain] BYPASS: injection [${injectionMatch[1]}] → returning content directly (${content.length} chars)`);
+                    return content || question;
+                }
+                // Use pending sessionId for fresh sessions where SDK hasn't assigned one yet
+                const sessionId = currentLLM?.sessionId || currentResumeSessionId || resumeSessionId || 'pending';
                 console.log(`🧠 [fast brain] Question: "${question.substring(0, 80)}..."`);
-                // Track in-flight state to prevent ask_agent double-calling
+                // Track in-flight state
                 haikuInFlight = { question, time: Date.now() };
-                // Build live research context if the agent is actively researching
-                // This is a READ of the existing researchLog array — safe, no race conditions
+                // Build research context — from active research or last completed research
                 let researchContext;
                 if (activeResearch && activeResearch.researchLog.length > 0) {
                     const recentLog = activeResearch.researchLog.slice(-15);
                     researchContext = `Research topic: "${lastTaskRequest || 'unknown'}"\nSteps completed (${activeResearch.researchLog.length} total, showing last ${recentLog.length}):\n${recentLog.join('\n')}`;
                 }
+                else if (lastCompletedResearch && (Date.now() - lastCompletedResearch.completedAt) < 600000) {
+                    // Include context from last completed research (within 10 minutes)
+                    const recentLog = lastCompletedResearch.researchLog.slice(-15);
+                    researchContext = `[COMPLETED RESEARCH] Topic: "${lastCompletedResearch.task}"\nSteps completed (${lastCompletedResearch.researchLog.length} total, showing last ${recentLog.length}):\n${recentLog.join('\n')}\n\n(Research completed — results are in JSONL and spec.md. Answer from those, do NOT trigger new research on this topic.)`;
+                }
+                const callbacks = {
+                    triggerResearch: (task) => {
+                        // Deduplication guard
+                        const now = Date.now();
+                        if (task === lastTaskRequest && (now - lastTaskTime) < 10000) {
+                            console.log('⏭️ Skipping duplicate research task (within 10s window)');
+                            return;
+                        }
+                        lastTaskRequest = task;
+                        lastTaskTime = now;
+                        executeResearch(task);
+                    },
+                    queueVoice: (script) => {
+                        queueVoiceInjection(getScriptInjection(script));
+                    },
+                    sendToFrontend: (data) => {
+                        sendToFrontend(data);
+                    },
+                };
                 try {
-                    const answer = await askHaiku(workingDir, sessionId, question, researchContext);
-                    haikuInFlight = null; // Clear in-flight state
-                    console.log(`🧠 [fast brain] Answer (${answer.length} chars)`);
-                    // Notify frontend if the fast brain likely wrote to spec.md
-                    // (fast brain writes bypass the SDK tool system, so no tool_result event fires)
-                    if (answer.includes('Written: spec.md') || question.toLowerCase().includes('update the spec') || question.toLowerCase().includes('user decided') || question.toLowerCase().includes('user prefers')) {
-                        const specPath = `${workingDir}/.osborn/sessions/${sessionId}/spec.md`;
-                        sendToFrontend({
-                            type: 'research_artifact_updated',
-                            filePath: specPath,
-                            fileName: 'spec.md',
-                        });
+                    const chatHistory = getChatHistory(20);
+                    const result = await askFastBrain(workingDir, sessionId, question, {
+                        chatHistory,
+                        researchContext,
+                        callbacks,
+                        sessionBaseDir,
+                    });
+                    haikuInFlight = null;
+                    // Voice queue items may have been held while fast brain was in flight — retry now
+                    if (voiceQueue.length > 0) {
+                        setTimeout(() => processVoiceQueue(), 500);
                     }
-                    // If research is active and this was a user decision/direction,
-                    // also queue it for the agent SDK so it picks up the context
-                    // when its queue reaches the next query
-                    if (activeResearch && (question.toLowerCase().includes('user decided') ||
-                        question.toLowerCase().includes('user prefers') ||
-                        question.toLowerCase().includes('update the spec') ||
-                        question.toLowerCase().includes('also check') ||
+                    console.log(`🧠 [fast brain] Response type: ${result.type}, script: ${result.script.length} chars`);
+                    // If this was a user direction during active research,
+                    // pass it to the agent SDK so it picks up the context
+                    if (activeResearch && result.type === 'recorded' && (question.toLowerCase().includes('decided') ||
+                        question.toLowerCase().includes('prefers') ||
                         question.toLowerCase().includes('focus on') ||
                         question.toLowerCase().includes('redirect'))) {
-                        console.log(`📨 [fast brain] Passing user direction to agent SDK queue: "${question.substring(0, 60)}..."`);
-                        // Queue as a lightweight context update — agent reads spec.md
-                        // at the start of its next query and will see the updated direction
-                        executeResearch(`[USER DIRECTION during active research] ${question}. The user's spec.md has been updated with this. Acknowledge briefly and incorporate into your current research context.`);
+                        console.log(`📨 [fast brain] Passing user direction to agent SDK queue`);
+                        executeResearch(`[USER DIRECTION during active research] ${question}. The user's spec.md has been updated. Acknowledge briefly and incorporate.`);
                     }
-                    return answer;
+                    return result.script;
                 }
                 catch (err) {
-                    haikuInFlight = null; // Clear in-flight state on error
+                    haikuInFlight = null;
+                    // Voice queue items may have been held while fast brain was in flight — retry now
+                    if (voiceQueue.length > 0) {
+                        setTimeout(() => processVoiceQueue(), 500);
+                    }
                     console.error('❌ Fast brain failed:', err);
-                    return 'Fast brain lookup failed. Try ask_agent for a deeper search.';
+                    return 'I\'m having trouble processing that. Could you try again?';
+                }
+            },
+        });
+        const respondPermissionTool = llm.tool({
+            description: `Respond to a permission request. Call after hearing user's response.`,
+            parameters: z.object({
+                response: z.enum(['allow', 'deny', 'always_allow']),
+            }),
+            execute: async ({ response }) => {
+                if (!realtimeClaudeHandler?.hasPendingPermission()) {
+                    return 'No pending permission.';
                 }
+                const pending = realtimeClaudeHandler.getPendingPermission();
+                const allow = response === 'allow' || response === 'always_allow';
+                realtimeClaudeHandler.respondToPermission(allow);
+                await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
+                return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
             },
         });
         // Instructions for realtime voice LLM
         const realtimeInstructions = getRealtimeInstructions(workingDir);
         // Create realtime model
         const realtimeModel = createRealtimeModelFromConfig(rtConfig, realtimeInstructions);
-        // Create the Agent with realtime model and tools
+        // Create the Agent with MINIMAL tools — fast brain handles all routing
         const agent = new voice.Agent({
             instructions: realtimeInstructions,
             llm: realtimeModel,
             tools: {
-                ask_agent: askAgentTool,
-                ask_haiku: askHaikuTool,
-                read_spec: readSpecTool,
+                ask_fast_brain: askFastBrainTool,
                 respond_permission: respondPermissionTool,
             },
         });
@@ -1171,31 +1419,51 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
         // Clean up active research and voice queue
         voiceQueue.length = 0;
         isProcessingQueue = false;
+        currentSpeechHandle = null;
+        lastInterruption = null;
         if (researchBatchTimer) {
             clearTimeout(researchBatchTimer);
             researchBatchTimer = null;
         }
+        stopProactiveLoop();
         if (activeResearch) {
+            activeResearch.abortController.abort();
             activeResearch.cleanup();
             activeResearch = null;
         }
+        lastCompletedResearch = null;
         currentSession = null;
         currentAgent = null;
         currentLLM = null;
+        clearFastBrainSession();
+        clearPipelineFastBrainSession();
     });
     room.on(RoomEvent.ParticipantConnected, async (participant) => {
         console.log(`\n👤 User joined: ${participant.identity}`);
+        // Wait for previous session's byte stream handler to fully deregister.
+        // Quick reconnects (< ~6s) crash with "byte stream handler already set" without this.
+        if (pendingSessionClose) {
+            console.log('⏳ Waiting for previous session to fully close...');
+            await pendingSessionClose;
+        }
         // Clean up any existing session before creating a new one
         voiceQueue.length = 0;
         isProcessingQueue = false;
+        currentSpeechHandle = null;
+        lastInterruption = null;
         if (researchBatchTimer) {
             clearTimeout(researchBatchTimer);
             researchBatchTimer = null;
         }
+        stopProactiveLoop();
+        clearFastBrainSession();
+        clearPipelineFastBrainSession();
         if (activeResearch) {
+            activeResearch.abortController.abort();
             activeResearch.cleanup();
             activeResearch = null;
         }
+        lastCompletedResearch = null;
         if (currentSession) {
             console.log('🧹 Cleaning up previous session...');
             try {
@@ -1218,7 +1486,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
         try {
             const metadata = JSON.parse(participant.metadata || '{}');
             console.log(`📋 Participant metadata:`, metadata);
-            if (metadata.voiceArch === 'realtime' || metadata.voiceArch === 'direct') {
+            if (metadata.voiceArch === 'realtime' || metadata.voiceArch === 'direct' || metadata.voiceArch === 'pipeline') {
                 sessionVoiceMode = metadata.voiceArch;
                 console.log(`🎙️ Using voice mode from frontend: ${sessionVoiceMode}`);
             }
@@ -1235,6 +1503,15 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                 preSelectedSessionId = metadata.sessionId;
                 console.log(`📂 Pre-selected session from frontend: ${preSelectedSessionId}`);
             }
+            // Read working directory override from frontend
+            if (metadata.workingDirectory && typeof metadata.workingDirectory === 'string' && metadata.workingDirectory.length > 0) {
+                workingDir = metadata.workingDirectory;
+                console.log(`📂 Working directory from frontend: ${workingDir}`);
+            }
+            else {
+                // Reset to default for new connections (in case previous session changed it)
+                workingDir = defaultWorkingDir;
+            }
         }
         catch (err) {
             console.log('⚠️ Could not parse participant metadata, using config voiceMode:', voiceMode);
@@ -1244,6 +1521,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
         currentProvider = sessionRealtimeProvider;
         // Resume session ID — only set when resuming an existing session
         const resumeSessionId = preSelectedSessionId || undefined;
+        currentResumeSessionId = resumeSessionId;
         if (resumeSessionId) {
             console.log(`🆔 Resuming session: ${resumeSessionId}`);
         }
@@ -1261,6 +1539,46 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
             session = result.session;
             agent = result.agent;
         }
+        else if (sessionVoiceMode === 'pipeline') {
+            console.log(`🎯 PIPELINE MODE: Claude SDK + parallel Gemini fast brain observer`);
+            // Pipeline mode = direct mode underneath + parallel fast brain
+            // Fast brain runs in PipelineDirectLLM.chat() — fires Gemini alongside Claude
+            const { createPipelineDirectLLM } = await import('./pipeline-direct-llm.js');
+            const pipelineLLM = createPipelineDirectLLM({
+                workingDirectory: workingDir,
+                sessionBaseDir,
+                mcpServers,
+                resumeSessionId,
+                voiceMode: 'direct',
+                skipTTSQueue: true,
+                getChatHistory: () => getChatHistory(20).map(t => ({ role: t.role, content: t.text })),
+                getResearchContext: () => {
+                    if (activeResearch?.researchLog.length) {
+                        return `Research: "${lastTaskRequest}"\n${activeResearch.researchLog.slice(-15).join('\n')}`;
+                    }
+                    if (lastCompletedResearch && Date.now() - lastCompletedResearch.completedAt < 600000) {
+                        return `[COMPLETED] "${lastCompletedResearch.task}"\n${lastCompletedResearch.researchLog.slice(-15).join('\n')}`;
+                    }
+                },
+                getAndConsumeInterruptionContext,
+                onFastBrainResult: (result) => {
+                    console.log(`🧠⚡ [FAST_BRAIN ${result.type.toUpperCase()} +${result.elapsedMs}ms]: "${result.answer.substring(0, 60)}"`);
+                    sendToFrontend({
+                        type: 'fast_brain_response',
+                        text: result.answer,
+                        responseType: result.type,
+                        elapsedMs: result.elapsedMs,
+                        question: result.question,
+                        toolsUsed: result.toolsUsed,
+                        agentRole: 'pipeline-fast-brain',
+                    });
+                },
+            });
+            // Pass pipelineLLM to createDirectSession so it uses it instead of creating a new ClaudeLLM
+            const result = await createDirectSession(resumeSessionId, pipelineLLM);
+            session = result.session;
+            agent = result.agent;
+        }
         else {
             console.log(`🎯 DIRECT MODE: Claude Agent SDK with full coding capabilities`);
             const result = await createDirectSession(resumeSessionId);
@@ -1273,7 +1591,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
         // Session event wiring — extracted into function for auto-recovery
         // ============================================================
         let lastRecoveryTime = 0;
-        const MIN_RECOVERY_INTERVAL = 10000; // 10 seconds between recovery attempts
+        const MIN_RECOVERY_INTERVAL = 3000; // 3 seconds between recovery attempts
         function wireSessionEvents(sess, agt) {
             // Transcript dedup state (reset per wiring)
             let lastSentUserTranscript = '';
@@ -1286,6 +1604,10 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                     return;
                 if (normalized === '<noise>' || normalized.toLowerCase() === 'thank you')
                     return;
+                // Filter out voice injection content that appears as user transcript
+                // (Gemini v1.0.51: userInput in generateReply creates a user conversation item)
+                if (normalized.startsWith('[SCRIPT]') || normalized.startsWith('[PROACTIVE]') || normalized.startsWith('[NOTIFICATION]'))
+                    return;
                 console.log(`📝 User (${source}): "${transcript.substring(0, 60)}..."`);
                 sendToFrontend({ type: 'user_transcript', text: transcript });
                 lastSentUserTranscript = normalized;
@@ -1342,6 +1664,10 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
             sess.on('user_state_changed', (ev) => {
                 userState = ev.newState;
                 console.log(`👤 User state: ${ev.newState}`);
+                // When user stops speaking, retry voice queue — items may be waiting
+                if (ev.newState === 'listening' && voiceQueue.length > 0) {
+                    setTimeout(() => processVoiceQueue(), 500);
+                }
             });
             // FALLBACK: playout_completed
             sess.on('playout_completed', (ev) => {
@@ -1358,13 +1684,153 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                     console.log('⚠️ OpenAI active response collision — queue will retry on next listening state');
                     return;
                 }
+                // TTS abort from user interruption is normal — not an error
+                if (msg.includes('Request was aborted') || msg.includes('APIUserAbortError') || msg.includes('aborted')) {
+                    console.log('⚠️ LLM request aborted (user interrupted)');
+                    return;
+                }
                 console.error('❌ Session error:', ev.error);
             });
-            // Close handler with auto-recovery for Gemini 1008 crashes
+            // Capture voice mode at session creation — prevents state confusion
+            // if currentVoiceMode changes between session start and crash recovery
+            const sessionVoiceMode = currentVoiceMode;
+            // Close handler with auto-recovery for crashes (both realtime and direct modes)
             sess.on('close', async (ev) => {
                 console.log('🚪 Session closed:', ev.reason);
+                // TTS abort from user interruption — SDK already killed the session internally,
+                // so we MUST recover (can't just reset state — STT pipeline is dead).
+                // Log it distinctly so we know it's an interrupt recovery, not a real crash.
+                const errorMsg = ev.error?.message || ev.error?.error?.message || '';
+                const isTTSAbort = errorMsg.includes('aborted') || errorMsg.includes('APIUserAbortError');
+                if (isTTSAbort) {
+                    console.log('⚠️ TTS abort from user interruption — recovering session (SDK killed it internally)');
+                }
+                // Auto-recover from crashes in direct/pipeline mode (includes TTS abort)
+                if ((ev.reason === 'error' || ev.reason === 'disconnected') && (sessionVoiceMode === 'direct' || sessionVoiceMode === 'pipeline')) {
+                    const now = Date.now();
+                    if (now - lastRecoveryTime < MIN_RECOVERY_INTERVAL) {
+                        console.log(`⚠️ Recovery too frequent — scheduling retry in ${MIN_RECOVERY_INTERVAL}ms`);
+                        setTimeout(async () => {
+                            // Re-check: if session was already recovered or user left, skip
+                            if (currentSession || !room.remoteParticipants.size)
+                                return;
+                            console.log('🔄 Retrying direct mode recovery after guard interval...');
+                            // Trigger recovery by emitting a synthetic close
+                            sess.emit('close', { reason: 'error' });
+                        }, MIN_RECOVERY_INTERVAL);
+                        return;
+                    }
+                    lastRecoveryTime = now;
+                    console.log(`🔄 Auto-recovering direct mode session (reason: ${ev.reason})...`);
+                    // Clean up dead session — match realtime recovery's thoroughness
+                    try {
+                        sess.removeAllListeners();
+                    }
+                    catch { }
+                    currentSession = null;
+                    currentAgent = null;
+                    // Clear stale state from crashed session
+                    voiceQueue.length = 0;
+                    isProcessingQueue = false;
+                    haikuInFlight = null;
+                    if (researchBatchTimer) {
+                        clearTimeout(researchBatchTimer);
+                        researchBatchTimer = null;
+                    }
+                    stopProactiveLoop();
+                    if (activeResearch) {
+                        activeResearch.abortController.abort();
+                        activeResearch.cleanup();
+                        activeResearch = null;
+                    }
+                    try {
+                        // Reuse existing session ID so Claude SDK resumes where it left off
+                        const recoverySessionId = currentLLM?.sessionId || resumeSessionId;
+                        // Stop old index watcher if it exists
+                        if (currentLLM && 'stopIndexWatcher' in currentLLM) {
+                            currentLLM.stopIndexWatcher();
+                        }
+                        let result;
+                        if (sessionVoiceMode === 'pipeline') {
+                            // Pipeline mode: recreate PipelineDirectLLM wrapper with fast brain
+                            console.log('🔄 Rebuilding pipeline mode (PipelineDirectLLM + fast brain)...');
+                            const { createPipelineDirectLLM } = await import('./pipeline-direct-llm.js');
+                            const pipelineLLM = createPipelineDirectLLM({
+                                workingDirectory: workingDir,
+                                sessionBaseDir,
+                                mcpServers,
+                                resumeSessionId: recoverySessionId,
+                                voiceMode: 'direct',
+                                skipTTSQueue: true,
+                                getChatHistory: () => getChatHistory(20).map(t => ({ role: t.role, content: t.text })),
+                                getResearchContext: () => {
+                                    if (activeResearch?.researchLog.length) {
+                                        return `Research: "${lastTaskRequest}"\n${activeResearch.researchLog.slice(-15).join('\n')}`;
+                                    }
+                                    if (lastCompletedResearch && Date.now() - lastCompletedResearch.completedAt < 600000) {
+                                        return `[COMPLETED] "${lastCompletedResearch.task}"\n${lastCompletedResearch.researchLog.slice(-15).join('\n')}`;
+                                    }
+                                },
+                                getAndConsumeInterruptionContext,
+                                onFastBrainResult: (r) => {
+                                    console.log(`🧠⚡ [FAST_BRAIN ${r.type.toUpperCase()} +${r.elapsedMs}ms]: "${r.answer.substring(0, 60)}"`);
+                                    sendToFrontend({
+                                        type: 'fast_brain_response', text: r.answer, responseType: r.type,
+                                        elapsedMs: r.elapsedMs, question: r.question, toolsUsed: r.toolsUsed,
+                                        agentRole: 'pipeline-fast-brain',
+                                    });
+                                },
+                            });
+                            result = await createDirectSession(recoverySessionId, pipelineLLM);
+                        }
+                        else {
+                            result = await createDirectSession(recoverySessionId);
+                        }
+                        const newSession = result.session;
+                        const newAgent = result.agent;
+                        currentSession = newSession;
+                        currentAgent = newAgent;
+                        // Re-wire event listeners on the new session
+                        wireSessionEvents(newSession, newAgent);
+                        await newSession.start({ agent: newAgent, room });
+                        // Sync state
+                        agentState = 'listening';
+                        sendToFrontend({ type: 'agent_state', state: 'listening' });
+                        // Resume Claude session if one was active
+                        if (currentLLM?.sessionId) {
+                            currentLLM.setContinueSession(true);
+                        }
+                        console.log('✅ Direct mode auto-recovery complete');
+                        // Notify user via TTS
+                        try {
+                            const recoveredId = currentLLM?.sessionId || recoverySessionId;
+                            if (recoveredId) {
+                                const conversationHistory = await getConversationHistory(recoveredId, workingDir, 10);
+                                const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
+                                const script = await prepareRecoveryScript(historyForScript);
+                                // Direct mode: use session.say() for recovery notification
+                                newSession.say(script, { allowInterruptions: true });
+                            }
+                            else {
+                                newSession.say('Voice session was briefly interrupted but I\'m back. What were we working on?', { allowInterruptions: true });
+                            }
+                        }
+                        catch (err) {
+                            console.log('⚠️ Failed to generate recovery script:', err);
+                            try {
+                                newSession.say('I\'m back after a brief interruption. What were we working on?', { allowInterruptions: true });
+                            }
+                            catch { }
+                        }
+                    }
+                    catch (err) {
+                        console.error('❌ Direct mode auto-recovery failed:', err);
+                        sendToFrontend({ type: 'agent_state', state: 'error' });
+                    }
+                    return;
+                }
                 // Auto-recover from crashes in realtime mode
-                if (ev.reason === 'error' && currentVoiceMode === 'realtime') {
+                if (ev.reason === 'error' && sessionVoiceMode === 'realtime') {
                     const now = Date.now();
                     if (now - lastRecoveryTime < MIN_RECOVERY_INTERVAL) {
                         console.log('⚠️ Recovery too frequent — skipping to prevent loop');
@@ -1387,7 +1853,9 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                         clearTimeout(researchBatchTimer);
                         researchBatchTimer = null;
                     }
+                    stopProactiveLoop();
                     if (activeResearch) {
+                        activeResearch.abortController.abort();
                         activeResearch.cleanup();
                         activeResearch = null;
                     }
@@ -1411,29 +1879,23 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                         if (currentLLM?.sessionId) {
                             currentLLM.setContinueSession(true);
                         }
-                        // Inject conversation context into the recovered session
+                        // Generate recovery script via fast brain
                         const recoveredSessionId = currentLLM?.sessionId || recoverySessionId;
                         if (recoveredSessionId) {
                             try {
-                                const summary = await getSessionSummary(recoveredSessionId, workingDir);
-                                const conversationHistory = await getConversationHistory(recoveredSessionId, workingDir, 30);
-                                if (summary && conversationHistory.length > 0) {
-                                    const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
-                                    queueVoiceInjection(`[SESSION RECOVERED] The voice session crashed and was auto-recovered. Here's the conversation context from before the crash:\n${contextBriefing}\n\nBriefly tell the user the connection was interrupted and you still have context from the conversation. Ask if they can hear you and what they'd like to continue with. Do NOT call any tools.`);
-                                    console.log('📋 Injected conversation context into recovered session');
-                                }
-                                else {
-                                    queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
-                                }
+                                const conversationHistory = await getConversationHistory(recoveredSessionId, workingDir, 10);
+                                const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
+                                const script = await prepareRecoveryScript(historyForScript);
+                                queueVoiceInjection(getScriptInjection(script));
+                                console.log('📋 Injected recovery script into recovered session');
                             }
                             catch (err) {
-                                console.log('⚠️ Failed to load conversation context for recovery:', err);
-                                queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
+                                console.log('⚠️ Failed to generate recovery script:', err);
+                                queueVoiceInjection(getNotificationInjection('Voice session was briefly interrupted but I\'m back. What were we working on?'));
                             }
                         }
                         else {
-                            // No session ID — generic notification
-                            queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
+                            queueVoiceInjection(getNotificationInjection('Voice session was briefly interrupted but I\'m back. What were we working on?'));
                         }
                         console.log('✅ Auto-recovery complete');
                     }
@@ -1481,6 +1943,8 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                     preSelectedSessionId,
                     mcpServers: getMcpServerStatusList(config),
                     enabledMcpServers: enabledMcpNames,
+                    workingDirectory: workingDir,
+                    skills: loadSkillsList(sessionBaseDir),
                 });
             };
             const readyInterval = setInterval(sendReady, 2000);
@@ -1499,8 +1963,8 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
             // For direct mode: use say() which goes through the configured TTS
             const greetViaVoice = async (text) => {
                 if (sessionVoiceMode === 'realtime') {
-                    // Realtime models handle their own speech generation
-                    await session.generateReply({ userInput: text });
+                    // Use instructions (not userInput) to avoid system text appearing as user transcript
+                    await session.generateReply({ instructions: getScriptInjection(text) });
                 }
                 else {
                     await session.say(text);
@@ -1521,7 +1985,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                         success: true,
                     });
                     // Send existing workspace artifacts to frontend (session-scoped)
-                    const preArtifacts = listWorkspaceArtifacts(workingDir, preSelectedSessionId);
+                    const preArtifacts = listWorkspaceArtifacts(sessionBaseDir, preSelectedSessionId);
                     if (preArtifacts.length > 0) {
                         console.log(`📁 Sending ${preArtifacts.length} workspace artifacts to frontend`);
                         await sendToFrontend({
@@ -1535,18 +1999,14 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                             }))
                         });
                     }
-                    // Load full session history into realtime model's context
+                    // Generate briefing script via fast brain
                     if (summary) {
                         loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
-                        const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
-                        const specContent = getSpecForVoiceModel(workingDir, preSelectedSessionId);
-                        const specSection = specContent
-                            ? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
-                            : '';
                         try {
                             if (sessionVoiceMode === 'realtime') {
-                                const contextPrompt = `[SESSION RESUMED] The user chose to continue a previous research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
-                                await session.generateReply({ instructions: contextPrompt });
+                                const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
+                                const script = await prepareBriefingScript(sessionBaseDir, preSelectedSessionId, historyForScript);
+                                await session.generateReply({ instructions: getScriptInjection(script) });
                             }
                             else {
                                 await session.say("Welcome back! Ready to continue our previous conversation.");
@@ -1566,7 +2026,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                 // No sessions at all (or new session chosen) — greet as new user
                 try {
                     console.log('👋 Sending greeting...');
-                    await greetViaVoice("The user just connected for the first time. Briefly greet them as Osborn and ask what they're working on.");
+                    await greetViaVoice("Hey! I'm Osborn, your AI research assistant. What are you working on today?");
                     console.log('✅ Greeting sent');
                 }
                 catch (err) {
@@ -1580,11 +2040,41 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
     });
     room.on(RoomEvent.ParticipantDisconnected, (participant) => {
         console.log(`👋 User left: ${participant.identity}`);
+        // Full cleanup — stop all background work to avoid accumulating API usage
+        voiceQueue.length = 0;
+        isProcessingQueue = false;
+        currentSpeechHandle = null;
+        lastInterruption = null;
+        if (researchBatchTimer) {
+            clearTimeout(researchBatchTimer);
+            researchBatchTimer = null;
+        }
+        stopProactiveLoop();
+        if (activeResearch) {
+            activeResearch.abortController.abort();
+            activeResearch.cleanup();
+            activeResearch = null;
+        }
         if (currentSession) {
-            currentSession.removeAllListeners();
+            const sessionToClose = currentSession;
             currentSession = null;
-            currentLLM = null;
+            // Track async close so new connections can wait for byte stream handler to be released
+            pendingSessionClose = (async () => {
+                try {
+                    await sessionToClose.close();
+                }
+                catch { }
+                try {
+                    sessionToClose.removeAllListeners();
+                }
+                catch { }
+                pendingSessionClose = null;
+            })();
         }
+        currentAgent = null;
+        currentLLM = null;
+        clearFastBrainSession();
+        clearPipelineFastBrainSession();
         console.log('⏳ Waiting for new user...\n');
     });
     room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
@@ -1641,20 +2131,21 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                 }
             }
             else if (data.type === 'resume_session' && currentLLM) {
-                // Set session to resume
+                // Lightweight: set resume ID and send artifacts to frontend only
+                // Context injection (generateReply) happens in session_selected handler
+                // to avoid double generateReply calls that cause timeouts
                 const sessionId = data.sessionId;
                 if (sessionId && sessionExists(sessionId, workingDir)) {
                     currentLLM.setResumeSessionId(sessionId);
+                    currentResumeSessionId = sessionId;
                     console.log(`🔄 Will resume session: ${sessionId}`);
-                    const summary = await getSessionSummary(sessionId, workingDir);
-                    const conversationHistory = await getConversationHistory(sessionId, workingDir, 30);
                     await sendToFrontend({
                         type: 'session_resume_set',
                         sessionId,
                         success: true,
                     });
                     // Send existing session artifacts to frontend (session-scoped)
-                    const artifacts = listWorkspaceArtifacts(workingDir, sessionId);
+                    const artifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
                     if (artifacts.length > 0) {
                         console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
                         await sendToFrontend({
@@ -1668,27 +2159,6 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                             }))
                         });
                     }
-                    if (currentSession && summary) {
-                        loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
-                        const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
-                        const specContent = getSpecForVoiceModel(workingDir, sessionId);
-                        const specSection = specContent
-                            ? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
-                            : '';
-                        console.log('📋 Injecting session context into voice agent...');
-                        try {
-                            if (currentVoiceMode === 'realtime') {
-                                const contextPrompt = `[SESSION RESUMED] The user chose to continue a previous research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
-                                await currentSession.generateReply({ instructions: contextPrompt });
-                            }
-                            else {
-                                await currentSession.say("Ready to continue our previous conversation.");
-                            }
-                        }
-                        catch (err) {
-                            console.log('⚠️ Context injection failed:', err);
-                        }
-                    }
                 }
                 else {
                     console.error(`❌ Session not found: ${sessionId}`);
@@ -1704,6 +2174,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                 const recentId = await getMostRecentSessionId(workingDir);
                 if (recentId) {
                     currentLLM.setResumeSessionId(recentId);
+                    currentResumeSessionId = recentId;
                     console.log(`🔄 Continuing most recent session: ${recentId}`);
                     const summary = await getSessionSummary(recentId, workingDir);
                     const conversationHistory = await getConversationHistory(recentId, workingDir, 30);
@@ -1713,7 +2184,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                         success: true,
                     });
                     // Send existing session artifacts to frontend (session-scoped)
-                    const artifacts = listWorkspaceArtifacts(workingDir, recentId);
+                    const artifacts = listWorkspaceArtifacts(sessionBaseDir, recentId);
                     if (artifacts.length > 0) {
                         console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
                         await sendToFrontend({
@@ -1729,16 +2200,12 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                     }
                     if (currentSession && summary) {
                         loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
-                        const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
-                        const specContent = getSpecForVoiceModel(workingDir, recentId);
-                        const specSection = specContent
-                            ? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
-                            : '';
                         console.log('📋 Injecting session context into voice agent...');
                         try {
                             if (currentVoiceMode === 'realtime') {
-                                const contextPrompt = `[SESSION RESUMED] The user chose to continue their most recent research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
-                                await currentSession.generateReply({ instructions: contextPrompt });
+                                const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
+                                const script = await prepareBriefingScript(sessionBaseDir, recentId, historyForScript);
+                                await currentSession.generateReply({ instructions: getScriptInjection(script) });
                             }
                             else {
                                 await currentSession.say("Continuing where we left off.");
@@ -1769,6 +2236,9 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                     // Step 2: Reset LLM state and configure for new session
                     currentLLM.resetForSessionSwitch();
                     currentLLM.setResumeSessionId(sessionId);
+                    currentResumeSessionId = sessionId;
+                    clearFastBrainSession();
+                    clearPipelineFastBrainSession();
                     console.log(`🔄 Switched to session: ${sessionId}`);
                     // Step 3: Send full context to frontend (including conversation history)
                     await sendToFrontend({
@@ -1779,7 +2249,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                         conversationHistory,
                     });
                     // Step 3.5: Send existing session artifacts to frontend (session-scoped)
-                    const switchArtifacts = listWorkspaceArtifacts(workingDir, sessionId);
+                    const switchArtifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
                     if (switchArtifacts.length > 0) {
                         console.log(`📁 Sending ${switchArtifacts.length} session artifacts to frontend`);
                         await sendToFrontend({
@@ -1793,14 +2263,14 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                             }))
                         });
                     }
-                    // Step 4: Voice agent acknowledges context
+                    // Step 4: Voice agent acknowledges context via fast brain
                     if (currentSession && summary) {
                         loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
-                        const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
                         try {
                             if (currentVoiceMode === 'realtime') {
-                                const contextPrompt = `[SESSION SWITCHED] The user switched to a different research session. Here's the context:\n${contextBriefing}\n\nBriefly acknowledge the switch and summarize what was being worked on.`;
-                                await currentSession.generateReply({ instructions: contextPrompt });
+                                const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
+                                const briefingScript = await prepareBriefingScript(sessionBaseDir, sessionId, historyForScript, 'switch');
+                                queueVoiceInjection(getScriptInjection(briefingScript));
                             }
                             else {
                                 const acknowledgment = summary.lastMessages.length > 0
@@ -1834,7 +2304,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
             else if (data.type === 'get_session_artifacts') {
                 const sessionId = data.sessionId;
                 if (sessionId) {
-                    const artifacts = listWorkspaceArtifacts(workingDir, sessionId);
+                    const artifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
                     console.log(`📁 Sending ${artifacts.length} session artifacts for ${sessionId.substring(0, 8)}`);
                     await sendToFrontend({
                         type: 'session_artifacts',
@@ -1871,7 +2341,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                         const fs = await import('fs');
                         const fileName = filePath.split('/').pop() || '';
                         const ext = fileName.split('.').pop()?.toLowerCase() || '';
-                        const isImage = ['png', 'jpg', 'jpeg', 'svg', 'gif', 'webp'].includes(ext);
+                        const isImage = ['png', 'jpg', 'jpeg', 'gif', 'webp'].includes(ext);
                         if (isImage) {
                             const base64 = fs.readFileSync(filePath, 'base64');
                             await sendToFrontend({ type: 'research_artifact_content', filePath, content: base64, fileName, isImage: true, mimeType: `image/${ext}` });
@@ -1970,12 +2440,40 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                     enabledKeys,
                 });
             }
+            else if (data.type === 'get_skills') {
+                await sendToFrontend({
+                    type: 'skills_status',
+                    skills: loadSkillsList(sessionBaseDir),
+                });
+            }
+            else if (data.type === 'skill_add') {
+                const skillName = (data.name || '').trim().toLowerCase().replace(/[^a-z0-9-]/g, '-');
+                const skillContent = (data.content || '').trim();
+                if (!skillName || !skillContent) {
+                    await sendToFrontend({ type: 'skill_add_result', success: false, error: 'Name and content are required' });
+                }
+                else {
+                    try {
+                        const skillDir = join(sessionBaseDir, '.claude', 'skills', skillName);
+                        mkdirSync(skillDir, { recursive: true });
+                        writeFileSync(join(skillDir, 'SKILL.md'), skillContent, 'utf-8');
+                        console.log(`📚 Skill added: ${skillName}`);
+                        const skills = loadSkillsList(sessionBaseDir);
+                        await sendToFrontend({ type: 'skill_add_result', success: true, skills });
+                    }
+                    catch (err) {
+                        console.error('❌ Failed to add skill:', err);
+                        await sendToFrontend({ type: 'skill_add_result', success: false, error: String(err) });
+                    }
+                }
+            }
             else if (data.type === 'session_selected') {
                 const sessionId = data.sessionId;
                 console.log(`🚪 Session gate completed: ${sessionId ? `resume ${sessionId}` : 'fresh start'}`);
                 if (sessionId && currentLLM && sessionExists(sessionId, workingDir)) {
                     // Resume the selected session
                     currentLLM.setResumeSessionId(sessionId);
+                    currentResumeSessionId = sessionId;
                     console.log(`🔄 Resuming session: ${sessionId}`);
                     // Fetch context and greet with it
                     const summary = await getSessionSummary(sessionId, workingDir);
@@ -1986,7 +2484,7 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                         success: true,
                     });
                     // Send existing session artifacts to frontend (session-scoped)
-                    const gateArtifacts = listWorkspaceArtifacts(workingDir, sessionId);
+                    const gateArtifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
                     if (gateArtifacts.length > 0) {
                         console.log(`📁 Sending ${gateArtifacts.length} session artifacts to frontend`);
                         await sendToFrontend({
@@ -2000,18 +2498,14 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                             }))
                         });
                     }
-                    // Load full session history and greet with context
+                    // Load full session history and greet with context via fast brain
                     if (currentSession && summary) {
                         loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
-                        const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
-                        const specContent = getSpecForVoiceModel(workingDir, sessionId);
-                        const specSection = specContent
-                            ? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
-                            : '';
                         try {
                             if (currentVoiceMode === 'realtime') {
-                                const contextPrompt = `[SESSION RESUMED] The user chose to continue a previous research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
-                                await currentSession.generateReply({ instructions: contextPrompt });
+                                const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
+                                const briefingScript = await prepareBriefingScript(sessionBaseDir, sessionId, historyForScript, 'resume');
+                                queueVoiceInjection(getScriptInjection(briefingScript));
                             }
                             else {
                                 await currentSession.say("Welcome back! Ready to continue our previous conversation.");
@@ -2023,12 +2517,13 @@ If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to
                     }
                 }
                 else {
-                    // Fresh start - just greet normally
+                    // Fresh start - greet via voice queue (not userInput, which creates a user transcript)
+                    currentResumeSessionId = undefined;
                     console.log('🆕 Starting fresh session');
                     if (currentSession) {
                         try {
                             if (currentVoiceMode === 'realtime') {
-                                await currentSession.generateReply({ userInput: "The user just connected and chose to start a fresh session. Briefly greet them as Osborn and ask what they're working on." });
+                                queueVoiceInjection(getScriptInjection("Hey! I'm Osborn, your AI research assistant. What are you working on today?"));
                             }
                             else {
                                 await currentSession.say("Hey! I'm Osborn. What are you working on?");