osborn 0.5.3 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. package/.claude/settings.local.json +9 -0
  2. package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
  3. package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
  4. package/.claude/skills/playwright-browser/SKILL.md +75 -0
  5. package/.claude/skills/youtube-transcript/SKILL.md +24 -0
  6. package/dist/claude-llm.d.ts +29 -1
  7. package/dist/claude-llm.js +334 -78
  8. package/dist/config.d.ts +5 -1
  9. package/dist/config.js +4 -1
  10. package/dist/fast-brain.d.ts +70 -16
  11. package/dist/fast-brain.js +662 -99
  12. package/dist/index-3-2-26-legacy.d.ts +1 -0
  13. package/dist/index-3-2-26-legacy.js +2233 -0
  14. package/dist/index.js +752 -423
  15. package/dist/jsonl-search.d.ts +66 -0
  16. package/dist/jsonl-search.js +274 -0
  17. package/dist/leagcyprompts2.d.ts +0 -0
  18. package/dist/leagcyprompts2.js +573 -0
  19. package/dist/pipeline-direct-llm.d.ts +77 -0
  20. package/dist/pipeline-direct-llm.js +216 -0
  21. package/dist/pipeline-fastbrain.d.ts +45 -0
  22. package/dist/pipeline-fastbrain.js +367 -0
  23. package/dist/prompts-2-25-26.d.ts +0 -0
  24. package/dist/prompts-2-25-26.js +518 -0
  25. package/dist/prompts-3-2-26.d.ts +78 -0
  26. package/dist/prompts-3-2-26.js +1319 -0
  27. package/dist/prompts.d.ts +83 -12
  28. package/dist/prompts.js +1991 -588
  29. package/dist/session-access.d.ts +24 -0
  30. package/dist/session-access.js +74 -0
  31. package/dist/summary-index.d.ts +87 -0
  32. package/dist/summary-index.js +570 -0
  33. package/dist/turn-detector-shim.d.ts +24 -0
  34. package/dist/turn-detector-shim.js +83 -0
  35. package/dist/voice-io.d.ts +9 -3
  36. package/dist/voice-io.js +39 -20
  37. package/package.json +13 -10
package/dist/index.js CHANGED
@@ -5,13 +5,20 @@ import { Room, RoomEvent } from '@livekit/rtc-node';
5
5
  import { AccessToken } from 'livekit-server-sdk';
6
6
  // Initialize logger before anything else
7
7
  initializeLogger({ pretty: true, level: 'info' });
8
+ // Prevent MaxListenersExceededWarning on AbortSignal from Claude SDK query() calls
9
+ // Each resumed query() adds listeners to the shared signal; default limit is 10
10
+ import { setMaxListeners } from 'node:events';
11
+ setMaxListeners(50);
8
12
  import { createServer } from 'http';
9
- import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts, readSessionSpec, listLibraryFiles } from './config.js';
10
- import { createSTT, createTTS, createVAD, createRealtimeModelFromConfig } from './voice-io.js';
13
+ import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync } from 'node:fs';
14
+ import { join } from 'node:path';
15
+ import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts } from './config.js';
16
+ import { createSTT, createTTS, createRealtimeModelFromConfig, DIRECT_MODE_STT, DIRECT_MODE_TTS } from './voice-io.js';
11
17
  import { createClaudeLLM } from './claude-llm.js';
18
+ import { clearPipelineFastBrainSession, prewarmBM25Index } from './pipeline-fastbrain.js';
12
19
  import { createSmitheryProxy, destroySmitheryProxy, parseSmitheryUrl, isSmitheryUrl, SmitheryAuthorizationError } from './smithery-proxy.js';
13
- import { askHaiku, updateSpecFromJSONL, augmentResearchResult, writeQuestionToSpec, checkOutputAgainstQuestions, contextualizeResearchUpdate, generateProactivePrompt, generateVisualDocument, clearFastBrainHistory } from './fast-brain.js';
14
- import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getResearchCompleteInjection, getResearchUpdateInjection, getNotificationInjection } from './prompts.js';
20
+ import { askHaiku, askFastBrain, updateSpecFromJSONL, processResearchCompletion, handleResearchBatch, prepareBriefingScript, prepareRecoveryScript, writeQuestionToSpec, checkOutputAgainstQuestions, generateProactivePrompt, clearFastBrainSession } from './fast-brain.js';
21
+ import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getScriptInjection, getProactiveInjection, getNotificationInjection } from './prompts.js';
15
22
  import { MCP_CATALOG } from './config.js';
16
23
  import { llm } from '@livekit/agents';
17
24
  import { z } from 'zod';
@@ -28,6 +35,32 @@ import { z } from 'zod';
28
35
  // - Voice LLM with tool calling (ask_agent, respond_permission)
29
36
  // - Routes tasks to Claude agents for execution
30
37
  // ============================================================
38
+ // Load skills list with name + description for frontend display
39
+ function loadSkillsList(agentDir) {
40
+ const skillsDir = join(agentDir, '.claude', 'skills');
41
+ if (!existsSync(skillsDir))
42
+ return [];
43
+ const skills = [];
44
+ try {
45
+ for (const skillName of readdirSync(skillsDir)) {
46
+ const skillFile = join(skillsDir, skillName, 'SKILL.md');
47
+ if (existsSync(skillFile)) {
48
+ const content = readFileSync(skillFile, 'utf-8');
49
+ // Extract title from first # heading, or use folder name
50
+ const titleMatch = content.match(/^#\s+(?:Skill:\s*)?(.+)/m);
51
+ const name = titleMatch ? titleMatch[1].trim() : skillName;
52
+ // Extract description from first paragraph after heading
53
+ const descMatch = content.match(/^#[^\n]+\n+([^\n#]+)/m);
54
+ const description = descMatch ? descMatch[1].trim() : '';
55
+ skills.push({ name, description });
56
+ }
57
+ }
58
+ }
59
+ catch (err) {
60
+ console.warn('⚠️ Failed to load skills list:', err);
61
+ }
62
+ return skills;
63
+ }
31
64
  // Generate a short, user-friendly room code
32
65
  function generateRoomCode() {
33
66
  const chars = 'abcdefghjkmnpqrstuvwxyz23456789';
@@ -75,9 +108,15 @@ process.on('unhandledRejection', (reason) => {
75
108
  console.log('⚠️ Post-disconnect cleanup error (harmless)');
76
109
  return;
77
110
  }
78
- // generateReply timeout — usually from racing concurrent injections
79
- if (msg.includes('generateReply timed out') || msg.includes('generation_created')) {
80
- console.log('⚠️ generateReply timed out (concurrent injection race)');
111
+ // generateReply timeout — realtime LLM called a tool instead of speaking (toolChoice:'none' ignored)
112
+ // or Superseded — new generateReply cancelled a pending one
113
+ if (msg.includes('generateReply timed out') || msg.includes('generation_created') || msg.includes('Superseded')) {
114
+ console.log('⚠️ generateReply failed:', msg.substring(0, 80));
115
+ return;
116
+ }
117
+ // AdaptiveInterruptionDetector crash — LiveKit Cloud returns string instead of JSON.
118
+ // SDK handles this internally (retries → VAD fallback). Suppress residual noise.
119
+ if (msg.includes('interruption prediction') || msg.includes('AdaptiveInterruptionDetector')) {
81
120
  return;
82
121
  }
83
122
  console.error('❌ Unhandled Rejection:', msg);
@@ -158,48 +197,6 @@ function startApiServer(workingDir, port) {
158
197
  * Gemini has smaller context limits — cap at 10 exchanges with 500 char content.
159
198
  * OpenAI handles full history (30 exchanges, 2000 char content).
160
199
  */
161
- function buildContextBriefing(summary, history, provider) {
162
- const isGemini = provider === 'gemini';
163
- // Gemini: last 10 exchanges capped at 500 chars. OpenAI: full history.
164
- const maxExchanges = isGemini ? 10 : history.length;
165
- const maxContentLen = isGemini ? 500 : 2000;
166
- const trimmedHistory = history.slice(-maxExchanges);
167
- const lines = [
168
- `Session ID: ${summary.sessionId.substring(0, 8)}`,
169
- `Total messages: ${summary.messageCount}`,
170
- '',
171
- '=== SESSION CONVERSATION HISTORY ==='
172
- ];
173
- for (const exchange of trimmedHistory) {
174
- const content = exchange.content.length > maxContentLen
175
- ? exchange.content.substring(0, maxContentLen) + '...'
176
- : exchange.content;
177
- lines.push(`${exchange.role === 'user' ? 'User' : 'Assistant'}: ${content}`);
178
- lines.push('');
179
- }
180
- return lines.join('\n');
181
- }
182
- /**
183
- * Read spec.md and format it for the realtime voice model.
184
- * Truncates to avoid bloating the context window.
185
- * Returns null if spec doesn't exist or session ID isn't available.
186
- */
187
- function getSpecForVoiceModel(workingDir, sessionId) {
188
- if (!sessionId)
189
- return null;
190
- const specContent = readSessionSpec(workingDir, sessionId);
191
- if (!specContent)
192
- return null;
193
- const MAX = 3000;
194
- if (specContent.length <= MAX)
195
- return specContent;
196
- const truncated = specContent.substring(0, MAX);
197
- const lastHeading = truncated.lastIndexOf('\n## ');
198
- if (lastHeading > MAX * 0.5) {
199
- return truncated.substring(0, lastHeading) + '\n\n[... truncated — call read_spec for full content]';
200
- }
201
- return truncated + '\n\n[... truncated]';
202
- }
203
200
  /**
204
201
  * Load full session conversation history into the realtime model's ChatContext.
205
202
  * This gives the model persistent memory of what was discussed/researched,
@@ -261,8 +258,20 @@ async function main() {
261
258
  if (enabledMcpNames.length > 0) {
262
259
  console.log(`🔌 Enabled MCP servers: ${enabledMcpNames.join(', ')}`);
263
260
  }
264
- const workingDir = config.workingDirectory || process.cwd();
265
- console.log(`📂 Working directory: ${workingDir}`);
261
+ // Two directory concepts:
262
+ // 1. workingDir (cwd) where Claude Code operates. Configurable per-session.
263
+ // Priority: OSBORN_CWD env > config.workingDirectory > process.cwd()
264
+ // 2. sessionBaseDir — where session artifacts live (spec.md, library/).
265
+ // Always the Osborn agent install directory (where this process started).
266
+ // This ensures .osborn/sessions/ doesn't scatter across random directories.
267
+ const sessionBaseDir = process.cwd(); // Always the Osborn install dir
268
+ const defaultWorkingDir = process.env.OSBORN_CWD || config.workingDirectory || process.cwd();
269
+ let workingDir = defaultWorkingDir;
270
+ console.log(`📂 Working directory (cwd): ${workingDir}`);
271
+ console.log(`📂 Session base directory: ${sessionBaseDir}`);
272
+ if (process.env.OSBORN_CWD) {
273
+ console.log(` (cwd from OSBORN_CWD env var)`);
274
+ }
266
275
  console.log(`🔬 Mode: RESEARCH`);
267
276
  // Determine voice mode
268
277
  const voiceMode = getVoiceMode(config);
@@ -315,6 +324,7 @@ async function main() {
315
324
  const room = new Room();
316
325
  room.setMaxListeners(50); // Prevent MaxListenersExceeded warnings on reconnect
317
326
  // Track state
327
+ let pendingSessionClose = null; // Tracks async session close for reconnect safety
318
328
  let currentSession = null;
319
329
  let currentAgent = null; // For updateChatCtx() context injection
320
330
  let currentLLM = null;
@@ -323,6 +333,9 @@ async function main() {
323
333
  let userState = 'listening'; // Track user speech state for queue safety
324
334
  let currentVoiceMode = voiceMode; // Track active voice mode for data handlers
325
335
  let currentProvider = realtimeConfig.provider; // Track active realtime provider
336
+ // Track the active resume session ID across scopes (ParticipantConnected + DataReceived)
337
+ // Updated by resume_session, session_selected, continue_session, switch_session handlers
338
+ let currentResumeSessionId;
326
339
  // Task deduplication guard - prevents Gemini re-execution loops
327
340
  let lastTaskRequest = '';
328
341
  let lastTaskTime = 0;
@@ -330,8 +343,78 @@ async function main() {
330
343
  let haikuInFlight = null;
331
344
  // Background research state - tracks async ask_agent execution
332
345
  let activeResearch = null;
346
+ // Persist last completed research context so follow-up questions can reference it
347
+ // (activeResearch is set to null on completion — this preserves the context)
348
+ let lastCompletedResearch = null;
333
349
  // No manual queuing — the Claude SDK handles sequential queries internally
334
350
  // ============================================================
351
+ // Interruption Tracking (Content Ledger)
352
+ // ============================================================
353
+ // When user interrupts TTS, LiveKit truncates chatCtx to what was spoken.
354
+ // We capture the spoken text (synchronizedTranscript) and on the next user
355
+ // message, read Claude's full output from JSONL + inject context so Claude
356
+ // knows what was heard vs lost. Claude decides: side question → answer +
357
+ // continue, or redirect → follow new direction.
358
+ // Current SpeechHandle from session.say() — only the latest one matters
359
+ let currentSpeechHandle = null;
360
+ // Last interruption context — gathered at interrupt time, consumed when user's message arrives
361
+ let lastInterruption = null;
362
+ /**
363
+ * Called when a SpeechHandle finishes (interrupted or not).
364
+ * If interrupted: gather spoken text + JSONL context. Does NOT send to Claude yet —
365
+ * that happens when the user's transcribed message arrives via chat().
366
+ */
367
+ async function handleSpeechDone(handle, fullText) {
368
+ if (!handle.interrupted) {
369
+ lastInterruption = null;
370
+ return;
371
+ }
372
+ // fullText is what was being spoken when interrupted (passed from tts_say handler).
373
+ // No word-level cutoff for say() — only generateReply pipeline has that — but Claude
374
+ // knows its own output from JSONL, so the full block is enough context.
375
+ console.log(`🔇 Speech interrupted. Was speaking: "${fullText.substring(0, 80)}..."`);
376
+ // Read last 10 assistant messages from JSONL (Claude's full untruncated output).
377
+ // SessionMessage.text is pre-joined from all text content blocks.
378
+ let recentMessages = '';
379
+ const sessionId = currentLLM?.sessionId;
380
+ if (sessionId) {
381
+ try {
382
+ const { readSessionHistory } = await import('./session-access.js');
383
+ const history = readSessionHistory(sessionId, workingDir, {
384
+ lastN: 10,
385
+ types: ['assistant'],
386
+ });
387
+ recentMessages = history
388
+ .filter((m) => m.text)
389
+ .map((m) => m.text)
390
+ .join('\n---\n');
391
+ }
392
+ catch (err) {
393
+ console.warn('⚠️ Failed to read JSONL for interruption context:', err);
394
+ }
395
+ }
396
+ // Store — consumed when user's next message arrives via chat()
397
+ lastInterruption = { spokenText: fullText, recentMessages, timestamp: Date.now() };
398
+ console.log(`📋 Interruption context stored (text: ${fullText.length} chars, JSONL: ${recentMessages.length} chars)`);
399
+ }
400
+ /**
401
+ * Callback for PipelineDirectLLM — returns pending interruption context and clears it.
402
+ * Called in chat() when user's transcribed message arrives.
403
+ * PipelineDirectLLM enriches the user message with this context before sending to Claude.
404
+ */
405
+ function getAndConsumeInterruptionContext() {
406
+ if (!lastInterruption)
407
+ return null;
408
+ // Expire after 60s — user may have waited too long
409
+ if (Date.now() - lastInterruption.timestamp > 60_000) {
410
+ lastInterruption = null;
411
+ return null;
412
+ }
413
+ const ctx = { spokenText: lastInterruption.spokenText, recentMessages: lastInterruption.recentMessages };
414
+ lastInterruption = null;
415
+ return ctx;
416
+ }
417
+ // ============================================================
335
418
  // Unified Voice Injection Queue
336
419
  // ============================================================
337
420
  // ALL system injections (research updates, completions, notifications, errors)
@@ -364,43 +447,62 @@ async function main() {
364
447
  console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (user speaking)`);
365
448
  return;
366
449
  }
450
+ // Don't inject while fast brain tool call is in flight — the tool response will
451
+ // race with our generateReply, causing Gemini to drop our content and only speak
452
+ // the tool response. Wait for the tool call to complete first.
453
+ if (haikuInFlight) {
454
+ console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (fast brain in flight: "${haikuInFlight.question.substring(0, 40)}...")`);
455
+ return; // Will be retried when haikuInFlight clears (see tool execute handler)
456
+ }
367
457
  isProcessingQueue = true;
368
- // Safety timeout: if agent_state_changed never fires (e.g. Gemini state machine hang),
369
- // clear the guard after 30s so the queue isn't permanently stuck
458
+ // Batch ALL queued items into one generateReply call
459
+ const items = voiceQueue.splice(0);
460
+ const batchedInstruction = items.length === 1
461
+ ? items[0]
462
+ : items.join('\n\n---\n\n');
463
+ console.log(`📡 Voice queue: processing ${items.length} batched items (${batchedInstruction.length} chars)`);
464
+ // Safety timeout: if agent_state_changed never fires (edge case — e.g. Gemini
465
+ // WebSocket drops, or state machine hangs). 15s gives the model time to process.
370
466
  setTimeout(() => {
371
467
  if (isProcessingQueue) {
372
- console.log('⚠️ Voice queue: isProcessingQueue stuck for 30s, clearing');
468
+ console.log('⚠️ Voice queue: safety timeout clearing guard');
373
469
  isProcessingQueue = false;
374
470
  if (voiceQueue.length > 0 && agentState === 'listening') {
375
471
  processVoiceQueue();
376
472
  }
377
473
  }
378
- }, 30000);
379
- // Batch ALL queued items into one generateReply call
380
- const items = voiceQueue.splice(0);
381
- const batchedInstruction = items.length === 1
382
- ? items[0]
383
- : items.join('\n\n---\n\n');
384
- console.log(`📡 Voice queue: processing ${items.length} batched items (${batchedInstruction.length} chars)`);
474
+ }, 15000);
385
475
  try {
386
476
  // Skip interrupt for Gemini — disrupts Gemini's state machine, causing it to
387
477
  // never transition back to 'listening' (hangs in speaking state indefinitely)
388
478
  if (currentProvider !== 'gemini') {
389
479
  currentSession.interrupt();
390
480
  }
391
- currentSession.generateReply({
392
- instructions: batchedInstruction,
393
- toolChoice: 'none',
394
- });
481
+ if (currentProvider === 'gemini') {
482
+ // LiveKit SDK v1.0.51: generateReply({ instructions }) sends a system turn +
483
+ // synthetic "." user turn. After Gemini processes a tool call in this flow,
484
+ // autoToolReplyGeneration does NOT trigger continuation (system-only limitation).
485
+ // Using userInput instead makes it a "user-initiated" request where auto-continuation
486
+ // works. The ask_fast_brain injection bypass handles [SCRIPT]/[PROACTIVE]/[NOTIFICATION]
487
+ // prefixes and returns the content directly as a tool response.
488
+ currentSession.generateReply({
489
+ userInput: batchedInstruction,
490
+ });
491
+ }
492
+ else {
493
+ // OpenAI respects toolChoice:'none' — speaks instructions directly
494
+ currentSession.generateReply({
495
+ instructions: batchedInstruction,
496
+ toolChoice: 'none',
497
+ });
498
+ }
395
499
  // Model transitions to thinking/speaking after this call.
396
500
  // When it returns to 'listening', agent_state_changed triggers processVoiceQueue() again.
397
501
  // Also inject into chatCtx as persistent context so the model remembers across turns
398
502
  injectIntoChatCtx(batchedInstruction);
399
503
  }
400
504
  catch (err) {
401
- console.log('⚠️ Voice queue generateReply failed, dropping items:', err);
402
- // Do NOT re-queue — re-queuing causes infinite retry cascades
403
- // The frontend still has the updates via claude_output events
505
+ console.log('⚠️ Voice queue generateReply failed:', err);
404
506
  isProcessingQueue = false;
405
507
  }
406
508
  // isProcessingQueue is cleared when agent_state_changed fires
@@ -473,17 +575,16 @@ async function main() {
473
575
  isStreaming: true,
474
576
  agentRole: 'research-progress',
475
577
  });
476
- // COMMENTED OUT voice narration disabled, research progress goes to -frontend logs only
477
- // // queueVoiceInjection(getResearchUpdateInjection(batchText))
478
- // Route through fast brain for contextual voice updates (capped at 3 per task)
479
- if (activeResearch.voiceUpdateCount < 3) {
578
+ // Route through fast brain it decides whether to speak (usually silent)
579
+ if (activeResearch.voiceUpdateCount < 2) {
480
580
  const voiceSid = currentLLM?.sessionId;
481
581
  if (voiceSid) {
482
- contextualizeResearchUpdate(workingDir, voiceSid, lastTaskRequest || '', updates, activeResearch.researchLog)
483
- .then(update => {
484
- if (update && update !== 'NOTHING' && activeResearch) {
582
+ const chatHistory = getChatHistory(10);
583
+ handleResearchBatch(workingDir, voiceSid, lastTaskRequest || '', updates, activeResearch.researchLog, chatHistory, sessionBaseDir)
584
+ .then(script => {
585
+ if (script && activeResearch) {
485
586
  activeResearch.voiceUpdateCount++;
486
- queueVoiceInjection(getResearchUpdateInjection(update));
587
+ queueVoiceInjection(getScriptInjection(script));
487
588
  }
488
589
  })
489
590
  .catch(() => { }); // Silent fail — updates are optional
@@ -495,7 +596,7 @@ async function main() {
495
596
  let proactiveTimer = null;
496
597
  let proactivePromptHistory = [];
497
598
  const PROACTIVE_INTERVAL = 15000; // 15 seconds (offset from 8s batch timer)
498
- const MAX_PROACTIVE_PROMPTS = 4; // Cap per research task
599
+ const MAX_PROACTIVE_PROMPTS = 2; // Cap per research task (reduced from 4 to minimize realtime LLM tokens)
499
600
  function startProactiveLoop(task, sessionId) {
500
601
  stopProactiveLoop();
501
602
  proactivePromptHistory = [];
@@ -514,11 +615,11 @@ async function main() {
514
615
  if (isProcessingQueue)
515
616
  return; // Don't collide with voice queue
516
617
  try {
517
- const prompt = await generateProactivePrompt(workingDir, sessionId, task, activeResearch.researchLog, proactivePromptHistory);
618
+ const prompt = await generateProactivePrompt(workingDir, sessionId, task, activeResearch.researchLog, proactivePromptHistory, sessionBaseDir);
518
619
  if (prompt && prompt !== 'NOTHING') {
519
620
  proactivePromptHistory.push(prompt);
520
621
  proactiveCount++;
521
- queueVoiceInjection(`[PROACTIVE CONTEXT] ${prompt}. Say this naturally to the user. Do NOT call any tools.`);
622
+ queueVoiceInjection(getProactiveInjection(prompt));
522
623
  }
523
624
  }
524
625
  catch { } // Silent fail — proactive prompts are optional
@@ -580,28 +681,40 @@ async function main() {
580
681
  }
581
682
  }
582
683
  // Create DIRECT session (STT + Claude Agent SDK + TTS)
583
- async function createDirectSession(resumeSessionId) {
684
+ async function createDirectSession(resumeSessionId, llmOverride) {
584
685
  console.log('🎯 Creating direct session...');
585
- const stt = createSTT({ provider: 'deepgram' });
586
- const tts = createTTS({ provider: 'deepgram', voice: 'aura-asteria-en' });
587
- const vad = await createVAD();
588
- // Create Claude LLM wrapper in research mode
589
- const directLLM = createClaudeLLM({
686
+ const stt = createSTT(DIRECT_MODE_STT);
687
+ const tts = createTTS(DIRECT_MODE_TTS);
688
+ // Create Claude LLM wrapper — direct mode uses speech-optimized system prompt
689
+ // skipTTSQueue: bypass LiveKit's BufferedTokenStream, use session.say() instead
690
+ // llmOverride: pipeline mode passes PipelineDirectLLM which wraps its own ClaudeLLM
691
+ const directLLM = llmOverride || createClaudeLLM({
590
692
  workingDirectory: workingDir,
693
+ sessionBaseDir,
591
694
  mcpServers,
592
695
  resumeSessionId,
696
+ voiceMode: 'direct',
697
+ skipTTSQueue: true,
593
698
  });
594
699
  currentLLM = directLLM;
595
700
  // For resumed sessions, eagerly create workspace (we know the real ID)
596
701
  if (resumeSessionId) {
597
- const workspace = ensureSessionWorkspace(workingDir, resumeSessionId);
702
+ const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
598
703
  console.log(`📁 Session workspace (resumed): ${workspace}`);
599
704
  }
600
705
  // For new sessions, create workspace when SDK assigns real session ID
601
706
  directLLM.events.once('session_id', ({ sessionId }) => {
602
- const workspace = ensureSessionWorkspace(workingDir, sessionId);
707
+ const workspace = ensureSessionWorkspace(sessionBaseDir, sessionId);
603
708
  console.log(`📁 Session workspace created: ${workspace}`);
709
+ // Pipeline mode: pre-warm BM25 index so first fast brain query is fast
710
+ if (currentVoiceMode === 'pipeline') {
711
+ prewarmBM25Index(sessionId, workingDir).catch(() => { });
712
+ }
604
713
  });
714
+ // Also pre-warm for resumed sessions (sessionId already known)
715
+ if (resumeSessionId && currentVoiceMode === 'pipeline') {
716
+ prewarmBM25Index(resumeSessionId, workingDir).catch(() => { });
717
+ }
605
718
  // Wire up MCP server changes to frontend
606
719
  directLLM.events.on('mcp_servers_changed', (data) => {
607
720
  console.log(`🔌 MCP servers changed: ${data.enabledKeys.join(', ') || 'none'}`);
@@ -685,6 +798,50 @@ async function main() {
685
798
  currentSession.say?.(ttsMessage).catch(() => { });
686
799
  }
687
800
  });
801
+ // Wire up TTS say — bypass LiveKit's BufferedTokenStream, speak directly via session.say()
802
+ // Each text block from Claude gets spoken immediately as it arrives, no internal buffering
803
+ directLLM.events.on('tts_say', (data) => {
804
+ // Guard: session must be alive — TTS errors can kill the session while background query runs
805
+ if (!currentSession) {
806
+ console.warn(`⚠️ tts_say fired but currentSession is null — text dropped: "${data.text?.substring(0, 60)}"`);
807
+ return;
808
+ }
809
+ if (!data.text?.trim()) {
810
+ console.log(`🔇 tts_say fired but text is empty — skipping`);
811
+ return;
812
+ }
813
+ const sayId = Date.now(); // simple ID to correlate start/end logs
814
+ console.log(`🗣️ [${sayId}] session.say START (${data.text.length} chars): "${data.text.substring(0, 60)}..."`);
815
+ try {
816
+ const handle = currentSession.say(data.text);
817
+ if (handle && typeof handle.addDoneCallback === 'function') {
818
+ // SpeechHandle — track it and register interruption callback
819
+ currentSpeechHandle = handle;
820
+ handle.addDoneCallback((sh) => {
821
+ if (sh.interrupted) {
822
+ console.log(`🔇 [${sayId}] session.say INTERRUPTED`);
823
+ handleSpeechDone(sh, data.text);
824
+ }
825
+ else {
826
+ console.log(`✅ [${sayId}] session.say DONE`);
827
+ if (currentSpeechHandle === sh)
828
+ lastInterruption = null;
829
+ }
830
+ });
831
+ console.log(`🗣️ [${sayId}] session.say queued (SpeechHandle tracked)`);
832
+ }
833
+ else if (handle && typeof handle.then === 'function') {
834
+ // Promise-based fallback (older SDK path)
835
+ handle
836
+ .then(() => console.log(`✅ [${sayId}] session.say DONE`))
837
+ .catch((err) => console.error(`❌ [${sayId}] session.say FAILED:`, err?.message || err));
838
+ }
839
+ }
840
+ catch (err) {
841
+ // Catch synchronous "AgentSession is not running" errors
842
+ console.warn(`⚠️ [${sayId}] session.say threw — session likely dead: ${err?.message}`);
843
+ }
844
+ });
688
845
  // Wire up session resume failure - notify frontend when SDK creates new session instead
689
846
  directLLM.events.on('session_resume_failed', (data) => {
690
847
  console.error(`❌ Session resume failed: ${data.requestedSessionId} → ${data.actualSessionId}`);
@@ -703,17 +860,18 @@ async function main() {
703
860
  });
704
861
  });
705
862
  // Create the Agent with instructions, STT, LLM, TTS
863
+ // VAD (Silero ONNX) removed — caused 2-5s inference lag on CPU, making interruption detection worse
864
+ // Turn detection is server-side (Deepgram endpointing), interruptions handled by STT
706
865
  const agent = new voice.Agent({
707
866
  instructions: DIRECT_MODE_PROMPT,
708
867
  stt,
709
868
  llm: directLLM,
710
869
  tts,
711
- vad,
712
- turnDetection: 'vad',
870
+ turnDetection: 'stt',
713
871
  });
714
- // Create the session (no longer passes STT/LLM/TTS here)
715
872
  const session = new voice.AgentSession({
716
- turnDetection: 'vad',
873
+ turnDetection: 'stt',
874
+ preemptiveGeneration: false, // Only fire LLM on final committed transcript, not partial preemptives
717
875
  });
718
876
  return { session, agent };
719
877
  }
@@ -729,18 +887,19 @@ async function main() {
729
887
  // Create Claude LLM for tool execution (research tasks)
730
888
  realtimeClaudeHandler = createClaudeLLM({
731
889
  workingDirectory: workingDir,
890
+ sessionBaseDir,
732
891
  mcpServers,
733
892
  resumeSessionId,
734
893
  });
735
894
  currentLLM = realtimeClaudeHandler;
736
895
  // For resumed sessions, eagerly create workspace (we know the real ID)
737
896
  if (resumeSessionId) {
738
- const workspace = ensureSessionWorkspace(workingDir, resumeSessionId);
897
+ const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
739
898
  console.log(`📁 Session workspace (resumed): ${workspace}`);
740
899
  }
741
900
  // For new sessions, create workspace when SDK assigns real session ID
742
901
  realtimeClaudeHandler.events.once('session_id', ({ sessionId }) => {
743
- const workspace = ensureSessionWorkspace(workingDir, sessionId);
902
+ const workspace = ensureSessionWorkspace(sessionBaseDir, sessionId);
744
903
  console.log(`📁 Session workspace created: ${workspace}`);
745
904
  });
746
905
  // Wire up MCP server changes to frontend
@@ -840,76 +999,24 @@ async function main() {
840
999
  checkpointId: data.checkpointId,
841
1000
  });
842
1001
  });
843
- // Extract priority content from research results — preserves URLs, code blocks, and key details
844
- function extractPriorityContent(result, maxChars = 4000) {
845
- if (result.length <= maxChars)
846
- return result;
847
- // Extract URLs (preserve for voice relay)
848
- const urlRegex = /https?:\/\/[^\s\)\"\'>\]]+/g;
849
- const urls = [...new Set(result.match(urlRegex) || [])];
850
- // Extract code blocks (first 2, up to 400 chars each)
851
- const codeBlockRegex = /```[\s\S]*?```/g;
852
- const codeBlocks = [];
853
- let match;
854
- while ((match = codeBlockRegex.exec(result)) !== null && codeBlocks.length < 2) {
855
- const block = match[0].length > 400 ? match[0].substring(0, 397) + '```' : match[0];
856
- codeBlocks.push(block);
857
- }
858
- // Build sections
859
- const sections = [];
860
- // Take the first ~2500 chars of narrative (intro + main findings)
861
- const narrativeEnd = Math.min(result.length, 2500);
862
- const narrativeTruncated = result.substring(0, narrativeEnd);
863
- const lastPeriod = narrativeTruncated.lastIndexOf('.');
864
- const narrative = lastPeriod > narrativeEnd * 0.6
865
- ? narrativeTruncated.substring(0, lastPeriod + 1)
866
- : narrativeTruncated;
867
- sections.push(narrative);
868
- // Append conclusion (last ~500 chars) if result is long enough
869
- if (result.length > 3000) {
870
- const tail = result.substring(result.length - 500);
871
- const firstPeriod = tail.indexOf('.');
872
- const conclusion = firstPeriod > 0 ? tail.substring(firstPeriod + 1).trim() : tail.trim();
873
- if (conclusion.length > 50) {
874
- sections.push(`\n\n[CONCLUSION]\n${conclusion}`);
875
- }
876
- }
877
- // Append code blocks if not already in the narrative
878
- if (codeBlocks.length > 0) {
879
- const codeSection = codeBlocks.filter(cb => !narrative.includes(cb));
880
- if (codeSection.length > 0) {
881
- sections.push(`\n\n[CODE EXAMPLES]\n${codeSection.join('\n\n')}`);
882
- }
883
- }
884
- // Append URLs if not already in the narrative
885
- const newUrls = urls.filter(u => !narrative.includes(u));
886
- if (newUrls.length > 0) {
887
- sections.push(`\n\n[LINKS]\n${newUrls.slice(0, 5).join('\n')}`);
888
- }
889
- let assembled = sections.join('');
890
- // Final safety truncation if assembled exceeds maxChars
891
- if (assembled.length > maxChars) {
892
- const truncated = assembled.substring(0, maxChars);
893
- const lp = truncated.lastIndexOf('.');
894
- assembled = lp > maxChars * 0.7 ? truncated.substring(0, lp + 1) : truncated + '...';
895
- }
896
- return assembled;
897
- }
898
1002
  // Extracted research execution — called by ask_agent, SDK handles queuing internally
899
1003
  function executeResearch(task) {
900
1004
  sendToFrontend({ type: 'system', text: `Executing: ${task}` });
901
1005
  // Fire-and-forget: write user question to spec.md BEFORE agent starts
902
1006
  const questionSid = currentLLM?.sessionId || resumeSessionId;
903
1007
  if (questionSid) {
904
- writeQuestionToSpec(workingDir, questionSid, task).catch(err => console.error('❌ writeQuestionToSpec failed:', err));
1008
+ writeQuestionToSpec(sessionBaseDir, questionSid, task).catch(err => console.error('❌ writeQuestionToSpec failed:', err));
905
1009
  }
906
- // Clean up previous research listeners to avoid duplicate event handlers
1010
+ // Clean up previous research UI tracking but let the SDK query complete in background.
1011
+ // The SDK has an internal queue: new query() calls enqueue behind running ones.
1012
+ // Old research results land in JSONL and fast brain can access them later.
907
1013
  if (activeResearch) {
908
- activeResearch.cleanup();
1014
+ activeResearch.cleanup(); // Remove event listeners so UI tracks new task
909
1015
  if (researchBatchTimer) {
910
1016
  clearTimeout(researchBatchTimer);
911
1017
  researchBatchTimer = null;
912
1018
  }
1019
+ // NOTE: NOT aborting — old SDK process continues writing to JSONL
913
1020
  }
914
1021
  // Set up research log batching — events push to queue for state-driven injection
915
1022
  const researchLog = [];
@@ -970,7 +1077,7 @@ async function main() {
970
1077
  if (resultText.length > ANSWER_CHECK_THRESHOLD) {
971
1078
  const sid = currentLLM?.sessionId || resumeSessionId;
972
1079
  if (sid)
973
- checkOutputAgainstQuestions(workingDir, sid, resultText, 'tool_result').catch(() => { });
1080
+ checkOutputAgainstQuestions(sessionBaseDir, sid, resultText, 'tool_result').catch(() => { });
974
1081
  }
975
1082
  // When AskUserQuestion completes, the user's answer is a decision — track it in spec
976
1083
  if (data.name === 'AskUserQuestion' && data.response) {
@@ -979,7 +1086,7 @@ async function main() {
979
1086
  const questionText = JSON.stringify(data.input?.questions || data.input || {});
980
1087
  const answerText = typeof data.response === 'string' ? data.response : JSON.stringify(data.response);
981
1088
  const specUpdate = `User answered a clarifying question during research.\nQuestion: ${questionText}\nAnswer: ${answerText}\nRecord this as a user decision in spec.md.`;
982
- askHaiku(workingDir, sid, specUpdate).catch(err => console.error('❌ Failed to record AskUserQuestion answer in spec:', err));
1089
+ askHaiku(workingDir, sid, specUpdate, undefined, undefined, undefined, sessionBaseDir).catch(err => console.error('❌ Failed to record AskUserQuestion answer in spec:', err));
983
1090
  console.log(`📝 AskUserQuestion answer forwarded to fast brain for spec tracking`);
984
1091
  }
985
1092
  }
@@ -996,36 +1103,53 @@ async function main() {
996
1103
  if (text.length > ANSWER_CHECK_THRESHOLD) {
997
1104
  const sid = currentLLM?.sessionId || resumeSessionId;
998
1105
  if (sid)
999
- checkOutputAgainstQuestions(workingDir, sid, text, 'assistant_text').catch(() => { });
1106
+ checkOutputAgainstQuestions(sessionBaseDir, sid, text, 'assistant_text').catch(() => { });
1000
1107
  }
1001
1108
  }
1002
1109
  };
1110
+ // Capture the SDK's requestId for this query — identifies this research task
1111
+ // in the JSONL file for targeted retrieval by fast brain
1112
+ let sdkRequestId = null;
1113
+ const onQueryRequestId = (data) => {
1114
+ if (!sdkRequestId && data.requestId) {
1115
+ sdkRequestId = data.requestId;
1116
+ console.log(`📋 [research] SDK requestId: ${sdkRequestId}`);
1117
+ }
1118
+ };
1003
1119
  realtimeClaudeHandler.events.on('tool_use', onToolUse);
1004
1120
  realtimeClaudeHandler.events.on('tool_result', onToolResult);
1005
1121
  realtimeClaudeHandler.events.on('assistant_text', onText);
1122
+ realtimeClaudeHandler.events.on('query_request_id', onQueryRequestId);
1006
1123
  const cleanupListeners = () => {
1007
1124
  realtimeClaudeHandler?.events.off('tool_use', onToolUse);
1008
1125
  realtimeClaudeHandler?.events.off('tool_result', onToolResult);
1009
1126
  realtimeClaudeHandler?.events.off('assistant_text', onText);
1127
+ realtimeClaudeHandler?.events.off('query_request_id', onQueryRequestId);
1010
1128
  };
1129
+ // Create AbortController for this research task — abort on disconnect/cleanup
1130
+ const researchAbortController = new AbortController();
1011
1131
  // Track active research — updates drain when model enters 'listening' state
1012
- activeResearch = {
1132
+ const thisResearch = {
1013
1133
  researchLog,
1014
1134
  pendingUpdates,
1015
1135
  cleanup: cleanupListeners,
1016
1136
  voiceUpdateCount: 0,
1137
+ abortController: researchAbortController,
1017
1138
  };
1139
+ activeResearch = thisResearch;
1018
1140
  // Start proactive conversational loop
1019
1141
  const proactiveSid = currentLLM?.sessionId || resumeSessionId;
1020
1142
  if (proactiveSid) {
1021
1143
  startProactiveLoop(task, proactiveSid);
1022
1144
  }
1023
1145
  // Run research in the background (non-blocking)
1146
+ // Pass AbortController so research can be stopped on disconnect
1024
1147
  const researchPromise = (async () => {
1025
1148
  const stream = realtimeClaudeHandler.chat({
1026
1149
  chatCtx: {
1027
1150
  items: [{ type: 'message', role: 'user', content: [task] }],
1028
1151
  },
1152
+ abortController: researchAbortController,
1029
1153
  });
1030
1154
  let result = '';
1031
1155
  for await (const chunk of stream) {
@@ -1037,7 +1161,17 @@ async function main() {
1037
1161
  })();
1038
1162
  // Handle completion asynchronously
1039
1163
  researchPromise.then(async (result) => {
1040
- console.log(`✅ [realtime] Research complete (${result.length} chars)`);
1164
+ // Check if aborted — empty result means clean abort, skip pipeline
1165
+ if (researchAbortController.signal.aborted || !result.trim()) {
1166
+ console.log(`🛑 [realtime] Research aborted or empty: ${task.substring(0, 60)}`);
1167
+ cleanupListeners();
1168
+ if (activeResearch === thisResearch) {
1169
+ activeResearch = null;
1170
+ }
1171
+ return;
1172
+ }
1173
+ const isStillCurrent = activeResearch === thisResearch;
1174
+ console.log(`✅ [realtime] Research complete (${result.length} chars${isStillCurrent ? '' : ', superseded by newer task'})`);
1041
1175
  // Clean up
1042
1176
  cleanupListeners();
1043
1177
  // Send raw result to frontend as a log entry (not assistant_response — that's reserved
@@ -1047,72 +1181,74 @@ async function main() {
1047
1181
  ? result.substring(0, 150) + '...'
1048
1182
  : result;
1049
1183
  await sendToFrontend({ type: 'task_completed', task, resultPreview });
1050
- // Build enhanced return with research log
1051
- const logSummary = researchLog.length > 0
1052
- ? `\n\n[RESEARCH LOG]\n${researchLog.slice(0, 25).join('\n')}`
1053
- : '';
1054
- // Extract priority content — preserves URLs, code blocks, and key details (4000 char limit)
1055
- const resultForVoice = extractPriorityContent(result);
1056
- const fullResult = (resultForVoice + logSummary) || 'Research completed successfully.';
1057
- // Clear active research and timers before injecting final results
1058
- if (researchBatchTimer) {
1059
- clearTimeout(researchBatchTimer);
1060
- researchBatchTimer = null;
1184
+ // Only modify global state if we're still the current research task.
1185
+ // If a newer task replaced us, don't clobber its timers/state.
1186
+ if (isStillCurrent) {
1187
+ if (researchBatchTimer) {
1188
+ clearTimeout(researchBatchTimer);
1189
+ researchBatchTimer = null;
1190
+ }
1191
+ stopProactiveLoop();
1061
1192
  }
1062
- stopProactiveLoop();
1063
- activeResearch = null;
1064
- // Send final results to frontend for visibility
1193
+ // Preserve research context for follow-up questions
1194
+ lastCompletedResearch = {
1195
+ task,
1196
+ researchLog: [...researchLog],
1197
+ completedAt: Date.now(),
1198
+ };
1199
+ // Only clear activeResearch if we're still the current task
1200
+ if (isStillCurrent) {
1201
+ activeResearch = null;
1202
+ }
1203
+ // Send research_task_complete to frontend for inline chat tracking
1065
1204
  await sendToFrontend({
1066
- type: 'claude_output',
1067
- text: `[Research Complete] Injecting findings into voice model (${fullResult.length} chars)`,
1068
- isStreaming: false,
1069
- agentRole: 'research-progress',
1205
+ type: 'research_task_complete',
1206
+ task,
1207
+ summary: result.substring(0, 500),
1070
1208
  });
1071
- // Route through fast brain for context augmentation before voice injection
1072
- // Fast brain adds spec context but does NOT summarize — passes details through verbatim
1209
+ // Route through fast brain to generate a teleprompter script from the findings
1210
+ // Fast brain reads full JSONL and writes a spoken monologue
1073
1211
  const voiceSid = currentLLM?.sessionId || resumeSessionId;
1074
- console.log(`📡 [realtime] Augmenting results via fast brain (${fullResult.length} chars, agentState: ${agentState})`);
1212
+ const chatHistory = getChatHistory(10);
1213
+ console.log(`📡 [realtime] Generating teleprompter script via fast brain (result: ${result.length} chars, agentState: ${agentState})`);
1214
+ // Create sendToChat for research completion to send structured data to frontend
1215
+ const completionSendToChat = (text) => {
1216
+ sendToFrontend({ type: 'assistant_response', text });
1217
+ };
1075
1218
  if (voiceSid) {
1076
- augmentResearchResult(workingDir, voiceSid, task, fullResult)
1077
- .then(augmented => {
1078
- queueVoiceInjection(getResearchCompleteInjection(task, augmented));
1219
+ processResearchCompletion(workingDir, voiceSid, task, result, chatHistory, completionSendToChat, sessionBaseDir)
1220
+ .then(script => {
1221
+ queueVoiceInjection(getScriptInjection(script));
1079
1222
  })
1080
1223
  .catch(() => {
1081
- // Fallback: use result directly if fast brain fails
1082
- queueVoiceInjection(getResearchCompleteInjection(task, fullResult));
1224
+ // Fallback: use truncated result directly if fast brain fails
1225
+ queueVoiceInjection(getScriptInjection(result.substring(0, 500)));
1083
1226
  });
1084
1227
  }
1085
1228
  else {
1086
- queueVoiceInjection(getResearchCompleteInjection(task, fullResult));
1229
+ queueVoiceInjection(getScriptInjection(result.substring(0, 500)));
1087
1230
  }
1088
- // Inject FULL untruncated result into ChatCtx so voice model can answer
1089
- // follow-up questions ("tell me more", "what were those links?") from memory
1090
- injectIntoChatCtx(`[FULL RESEARCH DETAILS for "${task}"]\n${result}`);
1091
1231
  // Fire-and-forget JSONL-based refinement pass via fast brain
1092
1232
  // Reads FULL untruncated data from JSONL — no content buffer, no truncation
1093
1233
  const postResearchSessionId = currentLLM?.sessionId || resumeSessionId;
1094
1234
  if (postResearchSessionId) {
1095
- updateSpecFromJSONL(workingDir, postResearchSessionId, task, researchLog)
1235
+ updateSpecFromJSONL(workingDir, postResearchSessionId, task, researchLog, sessionBaseDir)
1096
1236
  .then(updateResult => {
1097
1237
  if (!updateResult)
1098
1238
  return;
1099
1239
  // Notify frontend about spec.md update
1100
1240
  if (updateResult.spec) {
1101
- const specPath = `${workingDir}/.osborn/sessions/${postResearchSessionId}/spec.md`;
1241
+ const specPath = `${sessionBaseDir}/.osborn/sessions/${postResearchSessionId}/spec.md`;
1102
1242
  sendToFrontend({
1103
1243
  type: 'research_artifact_updated',
1104
1244
  filePath: specPath,
1105
1245
  fileName: 'spec.md',
1106
1246
  });
1107
- const truncated = getSpecForVoiceModel(workingDir, postResearchSessionId);
1108
- if (truncated) {
1109
- injectIntoChatCtx(`[UPDATED SESSION SPEC]\n${truncated}`);
1110
- console.log(`📋 Re-injected spec.md into ChatCtx after fast brain update (${truncated.length} chars)`);
1111
- }
1247
+ // Voice model is a teleprompter — fast brain reads spec directly, no ChatCtx injection needed
1112
1248
  }
1113
1249
  // Notify frontend about each library file written by the fast brain
1114
1250
  for (const libFile of updateResult.libraryFiles) {
1115
- const libPath = `${workingDir}/.osborn/sessions/${postResearchSessionId}/library/${libFile}`;
1251
+ const libPath = `${sessionBaseDir}/.osborn/sessions/${postResearchSessionId}/library/${libFile}`;
1116
1252
  sendToFrontend({
1117
1253
  type: 'research_artifact_updated',
1118
1254
  filePath: libPath,
@@ -1122,217 +1258,148 @@ async function main() {
1122
1258
  });
1123
1259
  }
1124
1260
  }).catch(async (err) => {
1125
- console.error(`❌ [realtime] Research failed:`, err);
1126
1261
  // Clean up
1127
1262
  cleanupListeners();
1128
- if (researchBatchTimer) {
1129
- clearTimeout(researchBatchTimer);
1130
- researchBatchTimer = null;
1263
+ const isStillCurrent = activeResearch === thisResearch;
1264
+ if (isStillCurrent) {
1265
+ if (researchBatchTimer) {
1266
+ clearTimeout(researchBatchTimer);
1267
+ researchBatchTimer = null;
1268
+ }
1269
+ stopProactiveLoop();
1270
+ activeResearch = null;
1131
1271
  }
1132
- stopProactiveLoop();
1133
- activeResearch = null;
1272
+ // If aborted (user disconnected), log quietly
1273
+ if (researchAbortController.signal.aborted) {
1274
+ console.log(`🛑 [realtime] Research aborted: ${task.substring(0, 60)}`);
1275
+ return;
1276
+ }
1277
+ console.error(`❌ [realtime] Research failed:`, err);
1134
1278
  // Queue error notification — will be spoken when model is available
1135
- queueVoiceInjection(`[NOTIFICATION] The research task encountered an error: ${err.message}. Let the user know briefly and ask if they want to try again. Do NOT call any tools.`);
1279
+ queueVoiceInjection(getNotificationInjection(`Research encountered an error: ${err.message}. You could try asking again.`));
1136
1280
  });
1137
1281
  // Return immediately to unblock the voice model
1138
1282
  return 'Research started. I\'ll relay findings as they come in — you can keep talking to the user while I work.';
1139
1283
  }
1140
1284
  // Create tools for the realtime voice LLM
1141
- const askAgentTool = llm.tool({
1142
- description: `Delegate a task to your backend agent (Claude), which has full research, analysis, reasoning, and coding capabilities.
1143
-
1144
- Use for:
1145
- - Researching topics, technologies, concepts, or ideas in depth
1146
- - Fetching and analyzing web pages, articles, blog posts, YouTube transcripts
1147
- - Reading and summarizing documentation, papers, or reference materials
1148
- - Exploring and analyzing codebases, configs, architecture
1149
- - Comparing options, tools, approaches — with tradeoffs and recommendations
1150
- - Running bash commands, testing implementations
1151
- - Using MCP tools (GitHub, YouTube, and other external tools)
1152
- - Saving findings to the session library and updating the spec
1153
- - Any question requiring research, analysis, verification, or deeper reasoning
1154
-
1155
- Reformulate the user's spoken request into a clear, specific task.
1156
- The more context you include (topic, constraints, what they want to learn), the better the results.
1157
- If the user wants specific details (examples, URLs, comparisons, step-by-step breakdown), mention that in your request.`,
1158
- parameters: z.object({
1159
- request: z.string().describe('The task or question to delegate to the agent'),
1160
- }),
1161
- execute: async ({ request: task }) => {
1162
- console.log(`\n🔨 [realtime] Task: "${task}"`);
1163
- // Guard: if ask_haiku is currently handling a similar question, skip ask_agent
1164
- // This prevents the double-calling pattern where Gemini fires both in rapid succession
1165
- if (haikuInFlight && (Date.now() - haikuInFlight.time) < 8000) {
1166
- console.log(`⏭️ Skipping ask_agent — ask_haiku is already handling: "${haikuInFlight.question.substring(0, 60)}"`);
1167
- return 'The fast brain is already looking into this. Wait for its answer first.';
1168
- }
1169
- // Deduplication guard: prevent re-execution of same task within 10s
1170
- const now = Date.now();
1171
- if (task === lastTaskRequest && (now - lastTaskTime) < 10000) {
1172
- console.log('⏭️ Skipping duplicate task (within 10s window)');
1173
- return 'This task was just completed. The results were already relayed.';
1174
- }
1175
- lastTaskRequest = task;
1176
- lastTaskTime = now;
1177
- return executeResearch(task);
1178
- },
1179
- });
1180
- const respondPermissionTool = llm.tool({
1181
- description: `Respond to a permission request. Call after hearing user's response.`,
1285
+ // The realtime model is a thin teleprompter — only 2 tools:
1286
+ // 1. ask_fast_brain: ALL user questions route here (the fast brain decides everything)
1287
+ // 2. respond_permission: voice permission flow for Claude SDK blocked operations
1288
+ const askFastBrainTool = llm.tool({
1289
+ description: `Ask your brain. Call this for EVERY user message — greetings, questions, decisions, requests, everything. No exceptions. Returns what you should say.`,
1182
1290
  parameters: z.object({
1183
- response: z.enum(['allow', 'deny', 'always_allow']),
1184
- }),
1185
- execute: async ({ response }) => {
1186
- if (!realtimeClaudeHandler?.hasPendingPermission()) {
1187
- return 'No pending permission.';
1188
- }
1189
- const pending = realtimeClaudeHandler.getPendingPermission();
1190
- const allow = response === 'allow' || response === 'always_allow';
1191
- realtimeClaudeHandler.respondToPermission(allow);
1192
- await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
1193
- return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
1194
- },
1195
- });
1196
- const readSpecTool = llm.tool({
1197
- description: `Read the session spec (spec.md) — shared state between you and your backend agent.
1198
- Use when: checking decisions, reading open questions to ask the user, understanding architecture/context, seeing what research has been saved. Updated by your backend agent during research.`,
1199
- parameters: z.object({}),
1200
- execute: async () => {
1201
- const sessionId = currentLLM?.sessionId || resumeSessionId;
1202
- if (!sessionId)
1203
- return 'No session spec yet — session is still initializing.';
1204
- const specContent = readSessionSpec(workingDir, sessionId);
1205
- if (!specContent)
1206
- return 'Spec is empty — no research done yet.';
1207
- const libraryFiles = listLibraryFiles(workingDir, sessionId);
1208
- const libSection = libraryFiles.length > 0
1209
- ? `\n\n[LIBRARY FILES: ${libraryFiles.join(', ')}]`
1210
- : '';
1211
- const MAX = 4000;
1212
- const content = specContent.length > MAX
1213
- ? specContent.substring(0, MAX) + '\n\n[... truncated]'
1214
- : specContent;
1215
- return content + libSection;
1216
- },
1217
- });
1218
- const askHaikuTool = llm.tool({
1219
- description: `Ask your fast brain — a quick knowledge assistant with access to session files and web search (~2 seconds).
1220
-
1221
- Use for:
1222
- - Questions answerable from the session spec or research library (much faster than ask_agent)
1223
- - Quick web lookups for simple factual questions (definitions, current versions, basic how-to)
1224
- - Recording user decisions: "User decided: [decision]. Update the spec."
1225
- - Recording user preferences: "User prefers: [preference]. Update the spec."
1226
- - Checking what research has been done on a topic
1227
- - Reading specific library files for details
1228
-
1229
- Do NOT use for: deep research, code analysis, multi-file codebase exploration, complex investigations → use ask_agent.
1230
- If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to look deeper, then call ask_agent with the context it provides.`,
1231
- parameters: z.object({
1232
- question: z.string().describe('The question to ask or instruction to execute'),
1291
+ question: z.string().describe('The user\'s question or statement'),
1233
1292
  }),
1234
1293
  execute: async ({ question }) => {
1235
- const sessionId = currentLLM?.sessionId || resumeSessionId;
1236
- if (!sessionId)
1237
- return 'Session not ready yet. Try ask_agent instead.';
1294
+ // INJECTION BYPASS: When Gemini receives a system injection via generateReply(),
1295
+ // it calls ask_fast_brain with the injection content (Gemini always calls tools).
1296
+ // For Gemini: this is the INTENDED path — we deliberately don't set toolChoice:'none'
1297
+ // so the tool call goes through and we return the content as a tool response.
1298
+ // For OpenAI: this is a fallback guard — OpenAI normally speaks instructions directly
1299
+ // with toolChoice:'none', but if it somehow calls the tool, we handle it here.
1300
+ const injectionMatch = question.match(/\[(SCRIPT|PROACTIVE|NOTIFICATION)\]\s*([\s\S]*)/);
1301
+ if (injectionMatch) {
1302
+ const content = injectionMatch[2].trim();
1303
+ console.log(`⚡ [fast brain] BYPASS: injection [${injectionMatch[1]}] → returning content directly (${content.length} chars)`);
1304
+ return content || question;
1305
+ }
1306
+ // Use pending sessionId for fresh sessions where SDK hasn't assigned one yet
1307
+ const sessionId = currentLLM?.sessionId || currentResumeSessionId || resumeSessionId || 'pending';
1238
1308
  console.log(`🧠 [fast brain] Question: "${question.substring(0, 80)}..."`);
1239
- // Track in-flight state to prevent ask_agent double-calling
1309
+ // Track in-flight state
1240
1310
  haikuInFlight = { question, time: Date.now() };
1241
- // Build live research context if the agent is actively researching
1242
- // This is a READ of the existing researchLog array — safe, no race conditions
1311
+ // Build research context from active research or last completed research
1243
1312
  let researchContext;
1244
1313
  if (activeResearch && activeResearch.researchLog.length > 0) {
1245
1314
  const recentLog = activeResearch.researchLog.slice(-15);
1246
1315
  researchContext = `Research topic: "${lastTaskRequest || 'unknown'}"\nSteps completed (${activeResearch.researchLog.length} total, showing last ${recentLog.length}):\n${recentLog.join('\n')}`;
1247
1316
  }
1317
+ else if (lastCompletedResearch && (Date.now() - lastCompletedResearch.completedAt) < 600000) {
1318
+ // Include context from last completed research (within 10 minutes)
1319
+ const recentLog = lastCompletedResearch.researchLog.slice(-15);
1320
+ researchContext = `[COMPLETED RESEARCH] Topic: "${lastCompletedResearch.task}"\nSteps completed (${lastCompletedResearch.researchLog.length} total, showing last ${recentLog.length}):\n${recentLog.join('\n')}\n\n(Research completed — results are in JSONL and spec.md. Answer from those, do NOT trigger new research on this topic.)`;
1321
+ }
1322
+ const callbacks = {
1323
+ triggerResearch: (task) => {
1324
+ // Deduplication guard
1325
+ const now = Date.now();
1326
+ if (task === lastTaskRequest && (now - lastTaskTime) < 10000) {
1327
+ console.log('⏭️ Skipping duplicate research task (within 10s window)');
1328
+ return;
1329
+ }
1330
+ lastTaskRequest = task;
1331
+ lastTaskTime = now;
1332
+ executeResearch(task);
1333
+ },
1334
+ queueVoice: (script) => {
1335
+ queueVoiceInjection(getScriptInjection(script));
1336
+ },
1337
+ sendToFrontend: (data) => {
1338
+ sendToFrontend(data);
1339
+ },
1340
+ };
1248
1341
  try {
1249
1342
  const chatHistory = getChatHistory(20);
1250
- const answer = await askHaiku(workingDir, sessionId, question, researchContext, chatHistory);
1251
- haikuInFlight = null; // Clear in-flight state
1252
- console.log(`🧠 [fast brain] Answer (${answer.length} chars)`);
1253
- // Notify frontend if the fast brain likely wrote to spec.md
1254
- // (fast brain writes bypass the SDK tool system, so no tool_result event fires)
1255
- if (answer.includes('Written: spec.md') || question.toLowerCase().includes('update the spec') || question.toLowerCase().includes('user decided') || question.toLowerCase().includes('user prefers')) {
1256
- const specPath = `${workingDir}/.osborn/sessions/${sessionId}/spec.md`;
1257
- sendToFrontend({
1258
- type: 'research_artifact_updated',
1259
- filePath: specPath,
1260
- fileName: 'spec.md',
1261
- });
1343
+ const result = await askFastBrain(workingDir, sessionId, question, {
1344
+ chatHistory,
1345
+ researchContext,
1346
+ callbacks,
1347
+ sessionBaseDir,
1348
+ });
1349
+ haikuInFlight = null;
1350
+ // Voice queue items may have been held while fast brain was in flight — retry now
1351
+ if (voiceQueue.length > 0) {
1352
+ setTimeout(() => processVoiceQueue(), 500);
1262
1353
  }
1263
- // If research is active and this was a user decision/direction,
1264
- // also queue it for the agent SDK so it picks up the context
1265
- // when its queue reaches the next query
1266
- if (activeResearch && (question.toLowerCase().includes('user decided') ||
1267
- question.toLowerCase().includes('user prefers') ||
1268
- question.toLowerCase().includes('update the spec') ||
1269
- question.toLowerCase().includes('also check') ||
1354
+ console.log(`🧠 [fast brain] Response type: ${result.type}, script: ${result.script.length} chars`);
1355
+ // If this was a user direction during active research,
1356
+ // pass it to the agent SDK so it picks up the context
1357
+ if (activeResearch && result.type === 'recorded' && (question.toLowerCase().includes('decided') ||
1358
+ question.toLowerCase().includes('prefers') ||
1270
1359
  question.toLowerCase().includes('focus on') ||
1271
1360
  question.toLowerCase().includes('redirect'))) {
1272
- console.log(`📨 [fast brain] Passing user direction to agent SDK queue: "${question.substring(0, 60)}..."`);
1273
- // Queue as a lightweight context update agent reads spec.md
1274
- // at the start of its next query and will see the updated direction
1275
- executeResearch(`[USER DIRECTION during active research] ${question}. The user's spec.md has been updated with this. Acknowledge briefly and incorporate into your current research context.`);
1361
+ console.log(`📨 [fast brain] Passing user direction to agent SDK queue`);
1362
+ executeResearch(`[USER DIRECTION during active research] ${question}. The user's spec.md has been updated. Acknowledge briefly and incorporate.`);
1276
1363
  }
1277
- return answer;
1364
+ return result.script;
1278
1365
  }
1279
1366
  catch (err) {
1280
- haikuInFlight = null; // Clear in-flight state on error
1367
+ haikuInFlight = null;
1368
+ // Voice queue items may have been held while fast brain was in flight — retry now
1369
+ if (voiceQueue.length > 0) {
1370
+ setTimeout(() => processVoiceQueue(), 500);
1371
+ }
1281
1372
  console.error('❌ Fast brain failed:', err);
1282
- return 'Fast brain lookup failed. Try ask_agent for a deeper search.';
1373
+ return 'I\'m having trouble processing that. Could you try again?';
1283
1374
  }
1284
1375
  },
1285
1376
  });
1286
- const generateDocumentTool = llm.tool({
1287
- description: `Generate a visual document (comparison table, Mermaid diagram, structured analysis, summary) from research findings. Saved to the session library as a markdown file.
1288
-
1289
- Use when the user asks for:
1290
- - "Compare X and Y" → type: 'comparison' (markdown table with features, pros, cons)
1291
- - "Draw a diagram" / "Show the architecture" / "Map out the flow" → type: 'diagram' (Mermaid flowchart/sequence/architecture)
1292
- - "Analyze the tradeoffs" / "Break down the options" → type: 'analysis' (structured pros/cons, decision matrix)
1293
- - "Summarize what we found" / "Give me an overview document" → type: 'summary' (organized findings with key takeaways)
1294
-
1295
- For actual images (photos, illustrations, screenshots), use ask_agent instead — this tool generates text-based visual documents only.`,
1377
+ const respondPermissionTool = llm.tool({
1378
+ description: `Respond to a permission request. Call after hearing user's response.`,
1296
1379
  parameters: z.object({
1297
- request: z.string().describe('What to generate — be specific about the topic and what aspects to cover'),
1298
- type: z.enum(['comparison', 'diagram', 'analysis', 'summary']).describe('Document type'),
1380
+ response: z.enum(['allow', 'deny', 'always_allow']),
1299
1381
  }),
1300
- execute: async ({ request, type }) => {
1301
- const sid = currentLLM?.sessionId || resumeSessionId;
1302
- if (!sid)
1303
- return 'Session not ready yet.';
1304
- console.log(`📊 [generate_document] Type: ${type}, Request: "${request.substring(0, 60)}..."`);
1305
- try {
1306
- const result = await generateVisualDocument(workingDir, sid, request, type);
1307
- if (!result)
1308
- return 'Could not generate document — not enough research context available.';
1309
- const fullPath = `${workingDir}/.osborn/sessions/${sid}/library/${result.fileName}`;
1310
- sendToFrontend({
1311
- type: 'research_artifact_updated',
1312
- filePath: fullPath,
1313
- fileName: result.fileName,
1314
- });
1315
- return `Generated: ${result.fileName} (${result.content.length} chars) — saved to session library. The document contains a ${type} with the requested information.`;
1316
- }
1317
- catch (err) {
1318
- console.error('❌ Document generation failed:', err);
1319
- return 'Document generation failed. Try asking the research agent for a more detailed analysis.';
1382
+ execute: async ({ response }) => {
1383
+ if (!realtimeClaudeHandler?.hasPendingPermission()) {
1384
+ return 'No pending permission.';
1320
1385
  }
1386
+ const pending = realtimeClaudeHandler.getPendingPermission();
1387
+ const allow = response === 'allow' || response === 'always_allow';
1388
+ realtimeClaudeHandler.respondToPermission(allow);
1389
+ await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
1390
+ return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
1321
1391
  },
1322
1392
  });
1323
1393
  // Instructions for realtime voice LLM
1324
1394
  const realtimeInstructions = getRealtimeInstructions(workingDir);
1325
1395
  // Create realtime model
1326
1396
  const realtimeModel = createRealtimeModelFromConfig(rtConfig, realtimeInstructions);
1327
- // Create the Agent with realtime model and tools
1397
+ // Create the Agent with MINIMAL tools fast brain handles all routing
1328
1398
  const agent = new voice.Agent({
1329
1399
  instructions: realtimeInstructions,
1330
1400
  llm: realtimeModel,
1331
1401
  tools: {
1332
- ask_agent: askAgentTool,
1333
- ask_haiku: askHaikuTool,
1334
- read_spec: readSpecTool,
1335
- generate_document: generateDocumentTool,
1402
+ ask_fast_brain: askFastBrainTool,
1336
1403
  respond_permission: respondPermissionTool,
1337
1404
  },
1338
1405
  });
@@ -1352,35 +1419,51 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1352
1419
  // Clean up active research and voice queue
1353
1420
  voiceQueue.length = 0;
1354
1421
  isProcessingQueue = false;
1422
+ currentSpeechHandle = null;
1423
+ lastInterruption = null;
1355
1424
  if (researchBatchTimer) {
1356
1425
  clearTimeout(researchBatchTimer);
1357
1426
  researchBatchTimer = null;
1358
1427
  }
1359
1428
  stopProactiveLoop();
1360
1429
  if (activeResearch) {
1430
+ activeResearch.abortController.abort();
1361
1431
  activeResearch.cleanup();
1362
1432
  activeResearch = null;
1363
1433
  }
1434
+ lastCompletedResearch = null;
1364
1435
  currentSession = null;
1365
1436
  currentAgent = null;
1366
1437
  currentLLM = null;
1367
- clearFastBrainHistory();
1438
+ clearFastBrainSession();
1439
+ clearPipelineFastBrainSession();
1368
1440
  });
1369
1441
  room.on(RoomEvent.ParticipantConnected, async (participant) => {
1370
1442
  console.log(`\n👤 User joined: ${participant.identity}`);
1443
+ // Wait for previous session's byte stream handler to fully deregister.
1444
+ // Quick reconnects (< ~6s) crash with "byte stream handler already set" without this.
1445
+ if (pendingSessionClose) {
1446
+ console.log('⏳ Waiting for previous session to fully close...');
1447
+ await pendingSessionClose;
1448
+ }
1371
1449
  // Clean up any existing session before creating a new one
1372
1450
  voiceQueue.length = 0;
1373
1451
  isProcessingQueue = false;
1452
+ currentSpeechHandle = null;
1453
+ lastInterruption = null;
1374
1454
  if (researchBatchTimer) {
1375
1455
  clearTimeout(researchBatchTimer);
1376
1456
  researchBatchTimer = null;
1377
1457
  }
1378
1458
  stopProactiveLoop();
1379
- clearFastBrainHistory();
1459
+ clearFastBrainSession();
1460
+ clearPipelineFastBrainSession();
1380
1461
  if (activeResearch) {
1462
+ activeResearch.abortController.abort();
1381
1463
  activeResearch.cleanup();
1382
1464
  activeResearch = null;
1383
1465
  }
1466
+ lastCompletedResearch = null;
1384
1467
  if (currentSession) {
1385
1468
  console.log('🧹 Cleaning up previous session...');
1386
1469
  try {
@@ -1403,7 +1486,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1403
1486
  try {
1404
1487
  const metadata = JSON.parse(participant.metadata || '{}');
1405
1488
  console.log(`📋 Participant metadata:`, metadata);
1406
- if (metadata.voiceArch === 'realtime' || metadata.voiceArch === 'direct') {
1489
+ if (metadata.voiceArch === 'realtime' || metadata.voiceArch === 'direct' || metadata.voiceArch === 'pipeline') {
1407
1490
  sessionVoiceMode = metadata.voiceArch;
1408
1491
  console.log(`🎙️ Using voice mode from frontend: ${sessionVoiceMode}`);
1409
1492
  }
@@ -1420,6 +1503,15 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1420
1503
  preSelectedSessionId = metadata.sessionId;
1421
1504
  console.log(`📂 Pre-selected session from frontend: ${preSelectedSessionId}`);
1422
1505
  }
1506
+ // Read working directory override from frontend
1507
+ if (metadata.workingDirectory && typeof metadata.workingDirectory === 'string' && metadata.workingDirectory.length > 0) {
1508
+ workingDir = metadata.workingDirectory;
1509
+ console.log(`📂 Working directory from frontend: ${workingDir}`);
1510
+ }
1511
+ else {
1512
+ // Reset to default for new connections (in case previous session changed it)
1513
+ workingDir = defaultWorkingDir;
1514
+ }
1423
1515
  }
1424
1516
  catch (err) {
1425
1517
  console.log('⚠️ Could not parse participant metadata, using config voiceMode:', voiceMode);
@@ -1429,6 +1521,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1429
1521
  currentProvider = sessionRealtimeProvider;
1430
1522
  // Resume session ID — only set when resuming an existing session
1431
1523
  const resumeSessionId = preSelectedSessionId || undefined;
1524
+ currentResumeSessionId = resumeSessionId;
1432
1525
  if (resumeSessionId) {
1433
1526
  console.log(`🆔 Resuming session: ${resumeSessionId}`);
1434
1527
  }
@@ -1446,6 +1539,46 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1446
1539
  session = result.session;
1447
1540
  agent = result.agent;
1448
1541
  }
1542
+ else if (sessionVoiceMode === 'pipeline') {
1543
+ console.log(`🎯 PIPELINE MODE: Claude SDK + parallel Gemini fast brain observer`);
1544
+ // Pipeline mode = direct mode underneath + parallel fast brain
1545
+ // Fast brain runs in PipelineDirectLLM.chat() — fires Gemini alongside Claude
1546
+ const { createPipelineDirectLLM } = await import('./pipeline-direct-llm.js');
1547
+ const pipelineLLM = createPipelineDirectLLM({
1548
+ workingDirectory: workingDir,
1549
+ sessionBaseDir,
1550
+ mcpServers,
1551
+ resumeSessionId,
1552
+ voiceMode: 'direct',
1553
+ skipTTSQueue: true,
1554
+ getChatHistory: () => getChatHistory(20).map(t => ({ role: t.role, content: t.text })),
1555
+ getResearchContext: () => {
1556
+ if (activeResearch?.researchLog.length) {
1557
+ return `Research: "${lastTaskRequest}"\n${activeResearch.researchLog.slice(-15).join('\n')}`;
1558
+ }
1559
+ if (lastCompletedResearch && Date.now() - lastCompletedResearch.completedAt < 600000) {
1560
+ return `[COMPLETED] "${lastCompletedResearch.task}"\n${lastCompletedResearch.researchLog.slice(-15).join('\n')}`;
1561
+ }
1562
+ },
1563
+ getAndConsumeInterruptionContext,
1564
+ onFastBrainResult: (result) => {
1565
+ console.log(`🧠⚡ [FAST_BRAIN ${result.type.toUpperCase()} +${result.elapsedMs}ms]: "${result.answer.substring(0, 60)}"`);
1566
+ sendToFrontend({
1567
+ type: 'fast_brain_response',
1568
+ text: result.answer,
1569
+ responseType: result.type,
1570
+ elapsedMs: result.elapsedMs,
1571
+ question: result.question,
1572
+ toolsUsed: result.toolsUsed,
1573
+ agentRole: 'pipeline-fast-brain',
1574
+ });
1575
+ },
1576
+ });
1577
+ // Pass pipelineLLM to createDirectSession so it uses it instead of creating a new ClaudeLLM
1578
+ const result = await createDirectSession(resumeSessionId, pipelineLLM);
1579
+ session = result.session;
1580
+ agent = result.agent;
1581
+ }
1449
1582
  else {
1450
1583
  console.log(`🎯 DIRECT MODE: Claude Agent SDK with full coding capabilities`);
1451
1584
  const result = await createDirectSession(resumeSessionId);
@@ -1458,7 +1591,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1458
1591
  // Session event wiring — extracted into function for auto-recovery
1459
1592
  // ============================================================
1460
1593
  let lastRecoveryTime = 0;
1461
- const MIN_RECOVERY_INTERVAL = 10000; // 10 seconds between recovery attempts
1594
+ const MIN_RECOVERY_INTERVAL = 3000; // 3 seconds between recovery attempts
1462
1595
  function wireSessionEvents(sess, agt) {
1463
1596
  // Transcript dedup state (reset per wiring)
1464
1597
  let lastSentUserTranscript = '';
@@ -1471,6 +1604,10 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1471
1604
  return;
1472
1605
  if (normalized === '<noise>' || normalized.toLowerCase() === 'thank you')
1473
1606
  return;
1607
+ // Filter out voice injection content that appears as user transcript
1608
+ // (Gemini v1.0.51: userInput in generateReply creates a user conversation item)
1609
+ if (normalized.startsWith('[SCRIPT]') || normalized.startsWith('[PROACTIVE]') || normalized.startsWith('[NOTIFICATION]'))
1610
+ return;
1474
1611
  console.log(`📝 User (${source}): "${transcript.substring(0, 60)}..."`);
1475
1612
  sendToFrontend({ type: 'user_transcript', text: transcript });
1476
1613
  lastSentUserTranscript = normalized;
@@ -1527,6 +1664,10 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1527
1664
  sess.on('user_state_changed', (ev) => {
1528
1665
  userState = ev.newState;
1529
1666
  console.log(`👤 User state: ${ev.newState}`);
1667
+ // When user stops speaking, retry voice queue — items may be waiting
1668
+ if (ev.newState === 'listening' && voiceQueue.length > 0) {
1669
+ setTimeout(() => processVoiceQueue(), 500);
1670
+ }
1530
1671
  });
1531
1672
  // FALLBACK: playout_completed
1532
1673
  sess.on('playout_completed', (ev) => {
@@ -1543,13 +1684,153 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1543
1684
  console.log('⚠️ OpenAI active response collision — queue will retry on next listening state');
1544
1685
  return;
1545
1686
  }
1687
+ // TTS abort from user interruption is normal — not an error
1688
+ if (msg.includes('Request was aborted') || msg.includes('APIUserAbortError') || msg.includes('aborted')) {
1689
+ console.log('⚠️ LLM request aborted (user interrupted)');
1690
+ return;
1691
+ }
1546
1692
  console.error('❌ Session error:', ev.error);
1547
1693
  });
1548
- // Close handler with auto-recovery for Gemini 1008 crashes
1694
+ // Capture voice mode at session creation prevents state confusion
1695
+ // if currentVoiceMode changes between session start and crash recovery
1696
+ const sessionVoiceMode = currentVoiceMode;
1697
+ // Close handler with auto-recovery for crashes (both realtime and direct modes)
1549
1698
  sess.on('close', async (ev) => {
1550
1699
  console.log('🚪 Session closed:', ev.reason);
1700
+ // TTS abort from user interruption — SDK already killed the session internally,
1701
+ // so we MUST recover (can't just reset state — STT pipeline is dead).
1702
+ // Log it distinctly so we know it's an interrupt recovery, not a real crash.
1703
+ const errorMsg = ev.error?.message || ev.error?.error?.message || '';
1704
+ const isTTSAbort = errorMsg.includes('aborted') || errorMsg.includes('APIUserAbortError');
1705
+ if (isTTSAbort) {
1706
+ console.log('⚠️ TTS abort from user interruption — recovering session (SDK killed it internally)');
1707
+ }
1708
+ // Auto-recover from crashes in direct/pipeline mode (includes TTS abort)
1709
+ if ((ev.reason === 'error' || ev.reason === 'disconnected') && (sessionVoiceMode === 'direct' || sessionVoiceMode === 'pipeline')) {
1710
+ const now = Date.now();
1711
+ if (now - lastRecoveryTime < MIN_RECOVERY_INTERVAL) {
1712
+ console.log(`⚠️ Recovery too frequent — scheduling retry in ${MIN_RECOVERY_INTERVAL}ms`);
1713
+ setTimeout(async () => {
1714
+ // Re-check: if session was already recovered or user left, skip
1715
+ if (currentSession || !room.remoteParticipants.size)
1716
+ return;
1717
+ console.log('🔄 Retrying direct mode recovery after guard interval...');
1718
+ // Trigger recovery by emitting a synthetic close
1719
+ sess.emit('close', { reason: 'error' });
1720
+ }, MIN_RECOVERY_INTERVAL);
1721
+ return;
1722
+ }
1723
+ lastRecoveryTime = now;
1724
+ console.log(`🔄 Auto-recovering direct mode session (reason: ${ev.reason})...`);
1725
+ // Clean up dead session — match realtime recovery's thoroughness
1726
+ try {
1727
+ sess.removeAllListeners();
1728
+ }
1729
+ catch { }
1730
+ currentSession = null;
1731
+ currentAgent = null;
1732
+ // Clear stale state from crashed session
1733
+ voiceQueue.length = 0;
1734
+ isProcessingQueue = false;
1735
+ haikuInFlight = null;
1736
+ if (researchBatchTimer) {
1737
+ clearTimeout(researchBatchTimer);
1738
+ researchBatchTimer = null;
1739
+ }
1740
+ stopProactiveLoop();
1741
+ if (activeResearch) {
1742
+ activeResearch.abortController.abort();
1743
+ activeResearch.cleanup();
1744
+ activeResearch = null;
1745
+ }
1746
+ try {
1747
+ // Reuse existing session ID so Claude SDK resumes where it left off
1748
+ const recoverySessionId = currentLLM?.sessionId || resumeSessionId;
1749
+ // Stop old index watcher if it exists
1750
+ if (currentLLM && 'stopIndexWatcher' in currentLLM) {
1751
+ currentLLM.stopIndexWatcher();
1752
+ }
1753
+ let result;
1754
+ if (sessionVoiceMode === 'pipeline') {
1755
+ // Pipeline mode: recreate PipelineDirectLLM wrapper with fast brain
1756
+ console.log('🔄 Rebuilding pipeline mode (PipelineDirectLLM + fast brain)...');
1757
+ const { createPipelineDirectLLM } = await import('./pipeline-direct-llm.js');
1758
+ const pipelineLLM = createPipelineDirectLLM({
1759
+ workingDirectory: workingDir,
1760
+ sessionBaseDir,
1761
+ mcpServers,
1762
+ resumeSessionId: recoverySessionId,
1763
+ voiceMode: 'direct',
1764
+ skipTTSQueue: true,
1765
+ getChatHistory: () => getChatHistory(20).map(t => ({ role: t.role, content: t.text })),
1766
+ getResearchContext: () => {
1767
+ if (activeResearch?.researchLog.length) {
1768
+ return `Research: "${lastTaskRequest}"\n${activeResearch.researchLog.slice(-15).join('\n')}`;
1769
+ }
1770
+ if (lastCompletedResearch && Date.now() - lastCompletedResearch.completedAt < 600000) {
1771
+ return `[COMPLETED] "${lastCompletedResearch.task}"\n${lastCompletedResearch.researchLog.slice(-15).join('\n')}`;
1772
+ }
1773
+ },
1774
+ getAndConsumeInterruptionContext,
1775
+ onFastBrainResult: (r) => {
1776
+ console.log(`🧠⚡ [FAST_BRAIN ${r.type.toUpperCase()} +${r.elapsedMs}ms]: "${r.answer.substring(0, 60)}"`);
1777
+ sendToFrontend({
1778
+ type: 'fast_brain_response', text: r.answer, responseType: r.type,
1779
+ elapsedMs: r.elapsedMs, question: r.question, toolsUsed: r.toolsUsed,
1780
+ agentRole: 'pipeline-fast-brain',
1781
+ });
1782
+ },
1783
+ });
1784
+ result = await createDirectSession(recoverySessionId, pipelineLLM);
1785
+ }
1786
+ else {
1787
+ result = await createDirectSession(recoverySessionId);
1788
+ }
1789
+ const newSession = result.session;
1790
+ const newAgent = result.agent;
1791
+ currentSession = newSession;
1792
+ currentAgent = newAgent;
1793
+ // Re-wire event listeners on the new session
1794
+ wireSessionEvents(newSession, newAgent);
1795
+ await newSession.start({ agent: newAgent, room });
1796
+ // Sync state
1797
+ agentState = 'listening';
1798
+ sendToFrontend({ type: 'agent_state', state: 'listening' });
1799
+ // Resume Claude session if one was active
1800
+ if (currentLLM?.sessionId) {
1801
+ currentLLM.setContinueSession(true);
1802
+ }
1803
+ console.log('✅ Direct mode auto-recovery complete');
1804
+ // Notify user via TTS
1805
+ try {
1806
+ const recoveredId = currentLLM?.sessionId || recoverySessionId;
1807
+ if (recoveredId) {
1808
+ const conversationHistory = await getConversationHistory(recoveredId, workingDir, 10);
1809
+ const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
1810
+ const script = await prepareRecoveryScript(historyForScript);
1811
+ // Direct mode: use session.say() for recovery notification
1812
+ newSession.say(script, { allowInterruptions: true });
1813
+ }
1814
+ else {
1815
+ newSession.say('Voice session was briefly interrupted but I\'m back. What were we working on?', { allowInterruptions: true });
1816
+ }
1817
+ }
1818
+ catch (err) {
1819
+ console.log('⚠️ Failed to generate recovery script:', err);
1820
+ try {
1821
+ newSession.say('I\'m back after a brief interruption. What were we working on?', { allowInterruptions: true });
1822
+ }
1823
+ catch { }
1824
+ }
1825
+ }
1826
+ catch (err) {
1827
+ console.error('❌ Direct mode auto-recovery failed:', err);
1828
+ sendToFrontend({ type: 'agent_state', state: 'error' });
1829
+ }
1830
+ return;
1831
+ }
1551
1832
  // Auto-recover from crashes in realtime mode
1552
- if (ev.reason === 'error' && currentVoiceMode === 'realtime') {
1833
+ if (ev.reason === 'error' && sessionVoiceMode === 'realtime') {
1553
1834
  const now = Date.now();
1554
1835
  if (now - lastRecoveryTime < MIN_RECOVERY_INTERVAL) {
1555
1836
  console.log('⚠️ Recovery too frequent — skipping to prevent loop');
@@ -1574,6 +1855,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1574
1855
  }
1575
1856
  stopProactiveLoop();
1576
1857
  if (activeResearch) {
1858
+ activeResearch.abortController.abort();
1577
1859
  activeResearch.cleanup();
1578
1860
  activeResearch = null;
1579
1861
  }
@@ -1597,29 +1879,23 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1597
1879
  if (currentLLM?.sessionId) {
1598
1880
  currentLLM.setContinueSession(true);
1599
1881
  }
1600
- // Inject conversation context into the recovered session
1882
+ // Generate recovery script via fast brain
1601
1883
  const recoveredSessionId = currentLLM?.sessionId || recoverySessionId;
1602
1884
  if (recoveredSessionId) {
1603
1885
  try {
1604
- const summary = await getSessionSummary(recoveredSessionId, workingDir);
1605
- const conversationHistory = await getConversationHistory(recoveredSessionId, workingDir, 30);
1606
- if (summary && conversationHistory.length > 0) {
1607
- const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
1608
- queueVoiceInjection(`[SESSION RECOVERED] The voice session crashed and was auto-recovered. Here's the conversation context from before the crash:\n${contextBriefing}\n\nBriefly tell the user the connection was interrupted and you still have context from the conversation. Ask if they can hear you and what they'd like to continue with. Do NOT call any tools.`);
1609
- console.log('📋 Injected conversation context into recovered session');
1610
- }
1611
- else {
1612
- queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
1613
- }
1886
+ const conversationHistory = await getConversationHistory(recoveredSessionId, workingDir, 10);
1887
+ const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
1888
+ const script = await prepareRecoveryScript(historyForScript);
1889
+ queueVoiceInjection(getScriptInjection(script));
1890
+ console.log('📋 Injected recovery script into recovered session');
1614
1891
  }
1615
1892
  catch (err) {
1616
- console.log('⚠️ Failed to load conversation context for recovery:', err);
1617
- queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
1893
+ console.log('⚠️ Failed to generate recovery script:', err);
1894
+ queueVoiceInjection(getNotificationInjection('Voice session was briefly interrupted but I\'m back. What were we working on?'));
1618
1895
  }
1619
1896
  }
1620
1897
  else {
1621
- // No session ID generic notification
1622
- queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
1898
+ queueVoiceInjection(getNotificationInjection('Voice session was briefly interrupted but I\'m back. What were we working on?'));
1623
1899
  }
1624
1900
  console.log('✅ Auto-recovery complete');
1625
1901
  }
@@ -1667,6 +1943,8 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1667
1943
  preSelectedSessionId,
1668
1944
  mcpServers: getMcpServerStatusList(config),
1669
1945
  enabledMcpServers: enabledMcpNames,
1946
+ workingDirectory: workingDir,
1947
+ skills: loadSkillsList(sessionBaseDir),
1670
1948
  });
1671
1949
  };
1672
1950
  const readyInterval = setInterval(sendReady, 2000);
@@ -1685,8 +1963,8 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1685
1963
  // For direct mode: use say() which goes through the configured TTS
1686
1964
  const greetViaVoice = async (text) => {
1687
1965
  if (sessionVoiceMode === 'realtime') {
1688
- // Realtime models handle their own speech generation
1689
- await session.generateReply({ userInput: text });
1966
+ // Use instructions (not userInput) to avoid system text appearing as user transcript
1967
+ await session.generateReply({ instructions: getScriptInjection(text) });
1690
1968
  }
1691
1969
  else {
1692
1970
  await session.say(text);
@@ -1707,7 +1985,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1707
1985
  success: true,
1708
1986
  });
1709
1987
  // Send existing workspace artifacts to frontend (session-scoped)
1710
- const preArtifacts = listWorkspaceArtifacts(workingDir, preSelectedSessionId);
1988
+ const preArtifacts = listWorkspaceArtifacts(sessionBaseDir, preSelectedSessionId);
1711
1989
  if (preArtifacts.length > 0) {
1712
1990
  console.log(`📁 Sending ${preArtifacts.length} workspace artifacts to frontend`);
1713
1991
  await sendToFrontend({
@@ -1721,18 +1999,14 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1721
1999
  }))
1722
2000
  });
1723
2001
  }
1724
- // Load full session history into realtime model's context
2002
+ // Generate briefing script via fast brain
1725
2003
  if (summary) {
1726
2004
  loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
1727
- const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
1728
- const specContent = getSpecForVoiceModel(workingDir, preSelectedSessionId);
1729
- const specSection = specContent
1730
- ? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
1731
- : '';
1732
2005
  try {
1733
2006
  if (sessionVoiceMode === 'realtime') {
1734
- const contextPrompt = `[SESSION RESUMED] The user chose to continue a previous research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
1735
- await session.generateReply({ instructions: contextPrompt });
2007
+ const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
2008
+ const script = await prepareBriefingScript(sessionBaseDir, preSelectedSessionId, historyForScript);
2009
+ await session.generateReply({ instructions: getScriptInjection(script) });
1736
2010
  }
1737
2011
  else {
1738
2012
  await session.say("Welcome back! Ready to continue our previous conversation.");
@@ -1752,7 +2026,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1752
2026
  // No sessions at all (or new session chosen) — greet as new user
1753
2027
  try {
1754
2028
  console.log('👋 Sending greeting...');
1755
- await greetViaVoice("The user just connected for the first time. Briefly greet them as Osborn and ask what they're working on.");
2029
+ await greetViaVoice("Hey! I'm Osborn, your AI research assistant. What are you working on today?");
1756
2030
  console.log('✅ Greeting sent');
1757
2031
  }
1758
2032
  catch (err) {
@@ -1766,11 +2040,41 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1766
2040
  });
1767
2041
  room.on(RoomEvent.ParticipantDisconnected, (participant) => {
1768
2042
  console.log(`👋 User left: ${participant.identity}`);
2043
+ // Full cleanup — stop all background work to avoid accumulating API usage
2044
+ voiceQueue.length = 0;
2045
+ isProcessingQueue = false;
2046
+ currentSpeechHandle = null;
2047
+ lastInterruption = null;
2048
+ if (researchBatchTimer) {
2049
+ clearTimeout(researchBatchTimer);
2050
+ researchBatchTimer = null;
2051
+ }
2052
+ stopProactiveLoop();
2053
+ if (activeResearch) {
2054
+ activeResearch.abortController.abort();
2055
+ activeResearch.cleanup();
2056
+ activeResearch = null;
2057
+ }
1769
2058
  if (currentSession) {
1770
- currentSession.removeAllListeners();
2059
+ const sessionToClose = currentSession;
1771
2060
  currentSession = null;
1772
- currentLLM = null;
2061
+ // Track async close so new connections can wait for byte stream handler to be released
2062
+ pendingSessionClose = (async () => {
2063
+ try {
2064
+ await sessionToClose.close();
2065
+ }
2066
+ catch { }
2067
+ try {
2068
+ sessionToClose.removeAllListeners();
2069
+ }
2070
+ catch { }
2071
+ pendingSessionClose = null;
2072
+ })();
1773
2073
  }
2074
+ currentAgent = null;
2075
+ currentLLM = null;
2076
+ clearFastBrainSession();
2077
+ clearPipelineFastBrainSession();
1774
2078
  console.log('⏳ Waiting for new user...\n');
1775
2079
  });
1776
2080
  room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
@@ -1833,6 +2137,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1833
2137
  const sessionId = data.sessionId;
1834
2138
  if (sessionId && sessionExists(sessionId, workingDir)) {
1835
2139
  currentLLM.setResumeSessionId(sessionId);
2140
+ currentResumeSessionId = sessionId;
1836
2141
  console.log(`🔄 Will resume session: ${sessionId}`);
1837
2142
  await sendToFrontend({
1838
2143
  type: 'session_resume_set',
@@ -1840,7 +2145,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1840
2145
  success: true,
1841
2146
  });
1842
2147
  // Send existing session artifacts to frontend (session-scoped)
1843
- const artifacts = listWorkspaceArtifacts(workingDir, sessionId);
2148
+ const artifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
1844
2149
  if (artifacts.length > 0) {
1845
2150
  console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
1846
2151
  await sendToFrontend({
@@ -1869,6 +2174,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1869
2174
  const recentId = await getMostRecentSessionId(workingDir);
1870
2175
  if (recentId) {
1871
2176
  currentLLM.setResumeSessionId(recentId);
2177
+ currentResumeSessionId = recentId;
1872
2178
  console.log(`🔄 Continuing most recent session: ${recentId}`);
1873
2179
  const summary = await getSessionSummary(recentId, workingDir);
1874
2180
  const conversationHistory = await getConversationHistory(recentId, workingDir, 30);
@@ -1878,7 +2184,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1878
2184
  success: true,
1879
2185
  });
1880
2186
  // Send existing session artifacts to frontend (session-scoped)
1881
- const artifacts = listWorkspaceArtifacts(workingDir, recentId);
2187
+ const artifacts = listWorkspaceArtifacts(sessionBaseDir, recentId);
1882
2188
  if (artifacts.length > 0) {
1883
2189
  console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
1884
2190
  await sendToFrontend({
@@ -1894,16 +2200,12 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1894
2200
  }
1895
2201
  if (currentSession && summary) {
1896
2202
  loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
1897
- const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
1898
- const specContent = getSpecForVoiceModel(workingDir, recentId);
1899
- const specSection = specContent
1900
- ? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
1901
- : '';
1902
2203
  console.log('📋 Injecting session context into voice agent...');
1903
2204
  try {
1904
2205
  if (currentVoiceMode === 'realtime') {
1905
- const contextPrompt = `[SESSION RESUMED] The user chose to continue their most recent research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
1906
- await currentSession.generateReply({ instructions: contextPrompt });
2206
+ const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
2207
+ const script = await prepareBriefingScript(sessionBaseDir, recentId, historyForScript);
2208
+ await currentSession.generateReply({ instructions: getScriptInjection(script) });
1907
2209
  }
1908
2210
  else {
1909
2211
  await currentSession.say("Continuing where we left off.");
@@ -1934,7 +2236,9 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1934
2236
  // Step 2: Reset LLM state and configure for new session
1935
2237
  currentLLM.resetForSessionSwitch();
1936
2238
  currentLLM.setResumeSessionId(sessionId);
1937
- clearFastBrainHistory();
2239
+ currentResumeSessionId = sessionId;
2240
+ clearFastBrainSession();
2241
+ clearPipelineFastBrainSession();
1938
2242
  console.log(`🔄 Switched to session: ${sessionId}`);
1939
2243
  // Step 3: Send full context to frontend (including conversation history)
1940
2244
  await sendToFrontend({
@@ -1945,7 +2249,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1945
2249
  conversationHistory,
1946
2250
  });
1947
2251
  // Step 3.5: Send existing session artifacts to frontend (session-scoped)
1948
- const switchArtifacts = listWorkspaceArtifacts(workingDir, sessionId);
2252
+ const switchArtifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
1949
2253
  if (switchArtifacts.length > 0) {
1950
2254
  console.log(`📁 Sending ${switchArtifacts.length} session artifacts to frontend`);
1951
2255
  await sendToFrontend({
@@ -1959,14 +2263,14 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
1959
2263
  }))
1960
2264
  });
1961
2265
  }
1962
- // Step 4: Voice agent acknowledges context
2266
+ // Step 4: Voice agent acknowledges context via fast brain
1963
2267
  if (currentSession && summary) {
1964
2268
  loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
1965
- const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
1966
2269
  try {
1967
2270
  if (currentVoiceMode === 'realtime') {
1968
- const contextPrompt = `[SESSION SWITCHED] The user switched to a different research session. Here's the context:\n${contextBriefing}\n\nBriefly acknowledge the switch and summarize what was being worked on.`;
1969
- await currentSession.generateReply({ instructions: contextPrompt });
2271
+ const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
2272
+ const briefingScript = await prepareBriefingScript(sessionBaseDir, sessionId, historyForScript, 'switch');
2273
+ queueVoiceInjection(getScriptInjection(briefingScript));
1970
2274
  }
1971
2275
  else {
1972
2276
  const acknowledgment = summary.lastMessages.length > 0
@@ -2000,7 +2304,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
2000
2304
  else if (data.type === 'get_session_artifacts') {
2001
2305
  const sessionId = data.sessionId;
2002
2306
  if (sessionId) {
2003
- const artifacts = listWorkspaceArtifacts(workingDir, sessionId);
2307
+ const artifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
2004
2308
  console.log(`📁 Sending ${artifacts.length} session artifacts for ${sessionId.substring(0, 8)}`);
2005
2309
  await sendToFrontend({
2006
2310
  type: 'session_artifacts',
@@ -2136,12 +2440,40 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
2136
2440
  enabledKeys,
2137
2441
  });
2138
2442
  }
2443
+ else if (data.type === 'get_skills') {
2444
+ await sendToFrontend({
2445
+ type: 'skills_status',
2446
+ skills: loadSkillsList(sessionBaseDir),
2447
+ });
2448
+ }
2449
+ else if (data.type === 'skill_add') {
2450
+ const skillName = (data.name || '').trim().toLowerCase().replace(/[^a-z0-9-]/g, '-');
2451
+ const skillContent = (data.content || '').trim();
2452
+ if (!skillName || !skillContent) {
2453
+ await sendToFrontend({ type: 'skill_add_result', success: false, error: 'Name and content are required' });
2454
+ }
2455
+ else {
2456
+ try {
2457
+ const skillDir = join(sessionBaseDir, '.claude', 'skills', skillName);
2458
+ mkdirSync(skillDir, { recursive: true });
2459
+ writeFileSync(join(skillDir, 'SKILL.md'), skillContent, 'utf-8');
2460
+ console.log(`📚 Skill added: ${skillName}`);
2461
+ const skills = loadSkillsList(sessionBaseDir);
2462
+ await sendToFrontend({ type: 'skill_add_result', success: true, skills });
2463
+ }
2464
+ catch (err) {
2465
+ console.error('❌ Failed to add skill:', err);
2466
+ await sendToFrontend({ type: 'skill_add_result', success: false, error: String(err) });
2467
+ }
2468
+ }
2469
+ }
2139
2470
  else if (data.type === 'session_selected') {
2140
2471
  const sessionId = data.sessionId;
2141
2472
  console.log(`🚪 Session gate completed: ${sessionId ? `resume ${sessionId}` : 'fresh start'}`);
2142
2473
  if (sessionId && currentLLM && sessionExists(sessionId, workingDir)) {
2143
2474
  // Resume the selected session
2144
2475
  currentLLM.setResumeSessionId(sessionId);
2476
+ currentResumeSessionId = sessionId;
2145
2477
  console.log(`🔄 Resuming session: ${sessionId}`);
2146
2478
  // Fetch context and greet with it
2147
2479
  const summary = await getSessionSummary(sessionId, workingDir);
@@ -2152,7 +2484,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
2152
2484
  success: true,
2153
2485
  });
2154
2486
  // Send existing session artifacts to frontend (session-scoped)
2155
- const gateArtifacts = listWorkspaceArtifacts(workingDir, sessionId);
2487
+ const gateArtifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
2156
2488
  if (gateArtifacts.length > 0) {
2157
2489
  console.log(`📁 Sending ${gateArtifacts.length} session artifacts to frontend`);
2158
2490
  await sendToFrontend({
@@ -2166,18 +2498,14 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
2166
2498
  }))
2167
2499
  });
2168
2500
  }
2169
- // Load full session history and greet with context
2501
+ // Load full session history and greet with context via fast brain
2170
2502
  if (currentSession && summary) {
2171
2503
  loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
2172
- const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
2173
- const specContent = getSpecForVoiceModel(workingDir, sessionId);
2174
- const specSection = specContent
2175
- ? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
2176
- : '';
2177
2504
  try {
2178
2505
  if (currentVoiceMode === 'realtime') {
2179
- const contextPrompt = `[SESSION RESUMED] The user chose to continue a previous research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
2180
- await currentSession.generateReply({ instructions: contextPrompt });
2506
+ const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
2507
+ const briefingScript = await prepareBriefingScript(sessionBaseDir, sessionId, historyForScript, 'resume');
2508
+ queueVoiceInjection(getScriptInjection(briefingScript));
2181
2509
  }
2182
2510
  else {
2183
2511
  await currentSession.say("Welcome back! Ready to continue our previous conversation.");
@@ -2189,12 +2517,13 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
2189
2517
  }
2190
2518
  }
2191
2519
  else {
2192
- // Fresh start - just greet normally
2520
+ // Fresh start - greet via voice queue (not userInput, which creates a user transcript)
2521
+ currentResumeSessionId = undefined;
2193
2522
  console.log('🆕 Starting fresh session');
2194
2523
  if (currentSession) {
2195
2524
  try {
2196
2525
  if (currentVoiceMode === 'realtime') {
2197
- await currentSession.generateReply({ userInput: "The user just connected and chose to start a fresh session. Briefly greet them as Osborn and ask what they're working on." });
2526
+ queueVoiceInjection(getScriptInjection("Hey! I'm Osborn, your AI research assistant. What are you working on today?"));
2198
2527
  }
2199
2528
  else {
2200
2529
  await currentSession.say("Hey! I'm Osborn. What are you working on?");