osborn 0.5.3 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
- package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
- package/.claude/skills/playwright-browser/SKILL.md +90 -0
- package/.claude/skills/shadcn/SKILL.md +232 -0
- package/.claude/skills/shadcn/image.png +0 -0
- package/.claude/skills/youtube-transcript/SKILL.md +24 -0
- package/.dockerignore +13 -0
- package/Dockerfile +103 -0
- package/deploy.sh +70 -0
- package/dist/claude-auth.d.ts +60 -0
- package/dist/claude-auth.js +334 -0
- package/dist/claude-llm.d.ts +51 -2
- package/dist/claude-llm.js +619 -86
- package/dist/config.d.ts +5 -1
- package/dist/config.js +4 -1
- package/dist/fast-brain.d.ts +70 -16
- package/dist/fast-brain.js +662 -99
- package/dist/index-3-2-26-legacy.d.ts +1 -0
- package/dist/index-3-2-26-legacy.js +2233 -0
- package/dist/index.js +979 -429
- package/dist/jsonl-search.d.ts +66 -0
- package/dist/jsonl-search.js +274 -0
- package/dist/leagcyprompts2.d.ts +0 -0
- package/dist/leagcyprompts2.js +573 -0
- package/dist/pipeline-direct-llm.d.ts +77 -0
- package/dist/pipeline-direct-llm.js +221 -0
- package/dist/pipeline-fastbrain.d.ts +45 -0
- package/dist/pipeline-fastbrain.js +373 -0
- package/dist/prompts-2-25-26.d.ts +0 -0
- package/dist/prompts-2-25-26.js +518 -0
- package/dist/prompts-3-2-26.d.ts +78 -0
- package/dist/prompts-3-2-26.js +1319 -0
- package/dist/prompts.d.ts +83 -12
- package/dist/prompts.js +2064 -587
- package/dist/recall-client.d.ts +33 -0
- package/dist/recall-client.js +101 -0
- package/dist/session-access.d.ts +24 -0
- package/dist/session-access.js +74 -0
- package/dist/summary-index.d.ts +87 -0
- package/dist/summary-index.js +570 -0
- package/dist/turn-detector-shim.d.ts +24 -0
- package/dist/turn-detector-shim.js +83 -0
- package/dist/voice-io.d.ts +15 -5
- package/dist/voice-io.js +52 -20
- package/fly.toml +30 -0
- package/package.json +18 -13
package/dist/index.js
CHANGED
|
@@ -5,14 +5,24 @@ import { Room, RoomEvent } from '@livekit/rtc-node';
|
|
|
5
5
|
import { AccessToken } from 'livekit-server-sdk';
|
|
6
6
|
// Initialize logger before anything else
|
|
7
7
|
initializeLogger({ pretty: true, level: 'info' });
|
|
8
|
+
// Prevent MaxListenersExceededWarning on AbortSignal from Claude SDK query() calls
|
|
9
|
+
// Each resumed query() adds listeners to the shared signal; default limit is 10
|
|
10
|
+
import { setMaxListeners } from 'node:events';
|
|
11
|
+
setMaxListeners(50);
|
|
8
12
|
import { createServer } from 'http';
|
|
9
|
-
import {
|
|
10
|
-
import {
|
|
13
|
+
import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync } from 'node:fs';
|
|
14
|
+
import { join } from 'node:path';
|
|
15
|
+
import { createPatch } from 'diff';
|
|
16
|
+
import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts } from './config.js';
|
|
17
|
+
import { createSTT, createTTS, createRealtimeModelFromConfig, DIRECT_MODE_STT, DIRECT_MODE_TTS } from './voice-io.js';
|
|
11
18
|
import { createClaudeLLM } from './claude-llm.js';
|
|
19
|
+
import { clearPipelineFastBrainSession, prewarmBM25Index } from './pipeline-fastbrain.js';
|
|
20
|
+
import { ensureClaudeAuth } from './claude-auth.js';
|
|
12
21
|
import { createSmitheryProxy, destroySmitheryProxy, parseSmitheryUrl, isSmitheryUrl, SmitheryAuthorizationError } from './smithery-proxy.js';
|
|
13
|
-
import { askHaiku, updateSpecFromJSONL,
|
|
14
|
-
import { DIRECT_MODE_PROMPT, getRealtimeInstructions,
|
|
22
|
+
import { askHaiku, askFastBrain, updateSpecFromJSONL, processResearchCompletion, handleResearchBatch, prepareBriefingScript, prepareRecoveryScript, writeQuestionToSpec, checkOutputAgainstQuestions, generateProactivePrompt, clearFastBrainSession } from './fast-brain.js';
|
|
23
|
+
import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getScriptInjection, getProactiveInjection, getNotificationInjection } from './prompts.js';
|
|
15
24
|
import { MCP_CATALOG } from './config.js';
|
|
25
|
+
import { getRecallClient } from './recall-client.js';
|
|
16
26
|
import { llm } from '@livekit/agents';
|
|
17
27
|
import { z } from 'zod';
|
|
18
28
|
// ============================================================
|
|
@@ -28,6 +38,32 @@ import { z } from 'zod';
|
|
|
28
38
|
// - Voice LLM with tool calling (ask_agent, respond_permission)
|
|
29
39
|
// - Routes tasks to Claude agents for execution
|
|
30
40
|
// ============================================================
|
|
41
|
+
// Load skills list with name + description for frontend display
|
|
42
|
+
function loadSkillsList(agentDir) {
|
|
43
|
+
const skillsDir = join(agentDir, '.claude', 'skills');
|
|
44
|
+
if (!existsSync(skillsDir))
|
|
45
|
+
return [];
|
|
46
|
+
const skills = [];
|
|
47
|
+
try {
|
|
48
|
+
for (const skillName of readdirSync(skillsDir)) {
|
|
49
|
+
const skillFile = join(skillsDir, skillName, 'SKILL.md');
|
|
50
|
+
if (existsSync(skillFile)) {
|
|
51
|
+
const content = readFileSync(skillFile, 'utf-8');
|
|
52
|
+
// Extract title from first # heading, or use folder name
|
|
53
|
+
const titleMatch = content.match(/^#\s+(?:Skill:\s*)?(.+)/m);
|
|
54
|
+
const name = titleMatch ? titleMatch[1].trim() : skillName;
|
|
55
|
+
// Extract description from first paragraph after heading
|
|
56
|
+
const descMatch = content.match(/^#[^\n]+\n+([^\n#]+)/m);
|
|
57
|
+
const description = descMatch ? descMatch[1].trim() : '';
|
|
58
|
+
skills.push({ name, description });
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
}
|
|
62
|
+
catch (err) {
|
|
63
|
+
console.warn('⚠️ Failed to load skills list:', err);
|
|
64
|
+
}
|
|
65
|
+
return skills;
|
|
66
|
+
}
|
|
31
67
|
// Generate a short, user-friendly room code
|
|
32
68
|
function generateRoomCode() {
|
|
33
69
|
const chars = 'abcdefghjkmnpqrstuvwxyz23456789';
|
|
@@ -75,9 +111,15 @@ process.on('unhandledRejection', (reason) => {
|
|
|
75
111
|
console.log('⚠️ Post-disconnect cleanup error (harmless)');
|
|
76
112
|
return;
|
|
77
113
|
}
|
|
78
|
-
// generateReply timeout —
|
|
79
|
-
|
|
80
|
-
|
|
114
|
+
// generateReply timeout — realtime LLM called a tool instead of speaking (toolChoice:'none' ignored)
|
|
115
|
+
// or Superseded — new generateReply cancelled a pending one
|
|
116
|
+
if (msg.includes('generateReply timed out') || msg.includes('generation_created') || msg.includes('Superseded')) {
|
|
117
|
+
console.log('⚠️ generateReply failed:', msg.substring(0, 80));
|
|
118
|
+
return;
|
|
119
|
+
}
|
|
120
|
+
// AdaptiveInterruptionDetector crash — LiveKit Cloud returns string instead of JSON.
|
|
121
|
+
// SDK handles this internally (retries → VAD fallback). Suppress residual noise.
|
|
122
|
+
if (msg.includes('interruption prediction') || msg.includes('AdaptiveInterruptionDetector')) {
|
|
81
123
|
return;
|
|
82
124
|
}
|
|
83
125
|
console.error('❌ Unhandled Rejection:', msg);
|
|
@@ -92,11 +134,13 @@ process.on('uncaughtException', (error) => {
|
|
|
92
134
|
// ============================================================
|
|
93
135
|
// HTTP API SERVER - Exposes session data to cloud-deployed frontend
|
|
94
136
|
// ============================================================
|
|
137
|
+
// Module-level room code so the HTTP server can expose it via GET /room-code
|
|
138
|
+
let currentRoomCode = null;
|
|
95
139
|
function startApiServer(workingDir, port) {
|
|
96
140
|
const server = createServer(async (req, res) => {
|
|
97
141
|
// CORS headers for cloud frontend
|
|
98
142
|
res.setHeader('Access-Control-Allow-Origin', '*');
|
|
99
|
-
res.setHeader('Access-Control-Allow-Methods', 'GET, OPTIONS');
|
|
143
|
+
res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
|
|
100
144
|
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
|
|
101
145
|
if (req.method === 'OPTIONS') {
|
|
102
146
|
res.writeHead(204);
|
|
@@ -132,12 +176,52 @@ function startApiServer(workingDir, port) {
|
|
|
132
176
|
res.end(JSON.stringify({ status: 'ok', workingDir }));
|
|
133
177
|
return;
|
|
134
178
|
}
|
|
179
|
+
// POST /webhook/recall — Recall.ai real-time transcript webhooks
|
|
180
|
+
if (req.method === 'POST' && url.pathname === '/webhook/recall') {
|
|
181
|
+
// Respond 200 immediately — never block or Node delays next webhooks
|
|
182
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
183
|
+
res.end('{"ok":true}');
|
|
184
|
+
let body = '';
|
|
185
|
+
req.on('data', (chunk) => { body += chunk.toString(); });
|
|
186
|
+
req.on('end', () => {
|
|
187
|
+
try {
|
|
188
|
+
const payload = JSON.parse(body);
|
|
189
|
+
const recall = getRecallClient();
|
|
190
|
+
if (recall)
|
|
191
|
+
recall.handleWebhook(payload);
|
|
192
|
+
}
|
|
193
|
+
catch (e) {
|
|
194
|
+
console.error('Recall webhook parse error:', e);
|
|
195
|
+
}
|
|
196
|
+
});
|
|
197
|
+
return;
|
|
198
|
+
}
|
|
199
|
+
// GET /meeting-output — Output Media webpage for Recall.ai bot audio
|
|
200
|
+
if (req.method === 'GET' && url.pathname === '/meeting-output') {
|
|
201
|
+
const htmlPath = join(process.cwd(), 'src', 'meeting-output.html');
|
|
202
|
+
try {
|
|
203
|
+
const html = readFileSync(htmlPath, 'utf-8');
|
|
204
|
+
res.writeHead(200, { 'Content-Type': 'text/html' });
|
|
205
|
+
res.end(html);
|
|
206
|
+
}
|
|
207
|
+
catch {
|
|
208
|
+
res.writeHead(404, { 'Content-Type': 'text/plain' });
|
|
209
|
+
res.end('meeting-output.html not found');
|
|
210
|
+
}
|
|
211
|
+
return;
|
|
212
|
+
}
|
|
213
|
+
if (req.method === 'GET' && url.pathname === '/room-code') {
|
|
214
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
215
|
+
res.end(JSON.stringify({ roomCode: currentRoomCode }));
|
|
216
|
+
return;
|
|
217
|
+
}
|
|
135
218
|
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
136
219
|
res.end(JSON.stringify({ error: 'Not found' }));
|
|
137
220
|
});
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
console.log(
|
|
221
|
+
const host = process.env.HOST || '0.0.0.0';
|
|
222
|
+
server.listen(port, host, () => {
|
|
223
|
+
console.log(`🌐 API server listening on http://${host}:${port}`);
|
|
224
|
+
console.log(` Sessions: http://${host}:${port}/sessions`);
|
|
141
225
|
});
|
|
142
226
|
server.on('error', (err) => {
|
|
143
227
|
if (err.code === 'EADDRINUSE') {
|
|
@@ -158,48 +242,6 @@ function startApiServer(workingDir, port) {
|
|
|
158
242
|
* Gemini has smaller context limits — cap at 10 exchanges with 500 char content.
|
|
159
243
|
* OpenAI handles full history (30 exchanges, 2000 char content).
|
|
160
244
|
*/
|
|
161
|
-
function buildContextBriefing(summary, history, provider) {
|
|
162
|
-
const isGemini = provider === 'gemini';
|
|
163
|
-
// Gemini: last 10 exchanges capped at 500 chars. OpenAI: full history.
|
|
164
|
-
const maxExchanges = isGemini ? 10 : history.length;
|
|
165
|
-
const maxContentLen = isGemini ? 500 : 2000;
|
|
166
|
-
const trimmedHistory = history.slice(-maxExchanges);
|
|
167
|
-
const lines = [
|
|
168
|
-
`Session ID: ${summary.sessionId.substring(0, 8)}`,
|
|
169
|
-
`Total messages: ${summary.messageCount}`,
|
|
170
|
-
'',
|
|
171
|
-
'=== SESSION CONVERSATION HISTORY ==='
|
|
172
|
-
];
|
|
173
|
-
for (const exchange of trimmedHistory) {
|
|
174
|
-
const content = exchange.content.length > maxContentLen
|
|
175
|
-
? exchange.content.substring(0, maxContentLen) + '...'
|
|
176
|
-
: exchange.content;
|
|
177
|
-
lines.push(`${exchange.role === 'user' ? 'User' : 'Assistant'}: ${content}`);
|
|
178
|
-
lines.push('');
|
|
179
|
-
}
|
|
180
|
-
return lines.join('\n');
|
|
181
|
-
}
|
|
182
|
-
/**
|
|
183
|
-
* Read spec.md and format it for the realtime voice model.
|
|
184
|
-
* Truncates to avoid bloating the context window.
|
|
185
|
-
* Returns null if spec doesn't exist or session ID isn't available.
|
|
186
|
-
*/
|
|
187
|
-
function getSpecForVoiceModel(workingDir, sessionId) {
|
|
188
|
-
if (!sessionId)
|
|
189
|
-
return null;
|
|
190
|
-
const specContent = readSessionSpec(workingDir, sessionId);
|
|
191
|
-
if (!specContent)
|
|
192
|
-
return null;
|
|
193
|
-
const MAX = 3000;
|
|
194
|
-
if (specContent.length <= MAX)
|
|
195
|
-
return specContent;
|
|
196
|
-
const truncated = specContent.substring(0, MAX);
|
|
197
|
-
const lastHeading = truncated.lastIndexOf('\n## ');
|
|
198
|
-
if (lastHeading > MAX * 0.5) {
|
|
199
|
-
return truncated.substring(0, lastHeading) + '\n\n[... truncated — call read_spec for full content]';
|
|
200
|
-
}
|
|
201
|
-
return truncated + '\n\n[... truncated]';
|
|
202
|
-
}
|
|
203
245
|
/**
|
|
204
246
|
* Load full session conversation history into the realtime model's ChatContext.
|
|
205
247
|
* This gives the model persistent memory of what was discussed/researched,
|
|
@@ -261,8 +303,20 @@ async function main() {
|
|
|
261
303
|
if (enabledMcpNames.length > 0) {
|
|
262
304
|
console.log(`🔌 Enabled MCP servers: ${enabledMcpNames.join(', ')}`);
|
|
263
305
|
}
|
|
264
|
-
|
|
265
|
-
|
|
306
|
+
// Two directory concepts:
|
|
307
|
+
// 1. workingDir (cwd) — where Claude Code operates. Configurable per-session.
|
|
308
|
+
// Priority: OSBORN_CWD env > config.workingDirectory > process.cwd()
|
|
309
|
+
// 2. sessionBaseDir — where session artifacts live (spec.md, library/).
|
|
310
|
+
// Always the Osborn agent install directory (where this process started).
|
|
311
|
+
// This ensures .osborn/sessions/ doesn't scatter across random directories.
|
|
312
|
+
const sessionBaseDir = process.cwd(); // Always the Osborn install dir
|
|
313
|
+
const defaultWorkingDir = process.env.OSBORN_CWD || config.workingDirectory || process.cwd();
|
|
314
|
+
let workingDir = defaultWorkingDir;
|
|
315
|
+
console.log(`📂 Working directory (cwd): ${workingDir}`);
|
|
316
|
+
console.log(`📂 Session base directory: ${sessionBaseDir}`);
|
|
317
|
+
if (process.env.OSBORN_CWD) {
|
|
318
|
+
console.log(` (cwd from OSBORN_CWD env var)`);
|
|
319
|
+
}
|
|
266
320
|
console.log(`🔬 Mode: RESEARCH`);
|
|
267
321
|
// Determine voice mode
|
|
268
322
|
const voiceMode = getVoiceMode(config);
|
|
@@ -278,6 +332,7 @@ async function main() {
|
|
|
278
332
|
}
|
|
279
333
|
// Determine room code
|
|
280
334
|
const roomCode = cliArgs.roomCode || generateRoomCode();
|
|
335
|
+
currentRoomCode = roomCode;
|
|
281
336
|
const roomName = `osborn-${roomCode}`;
|
|
282
337
|
if (cliArgs.roomCode) {
|
|
283
338
|
console.log(`🔗 Joining room: ${roomCode}`);
|
|
@@ -315,14 +370,22 @@ async function main() {
|
|
|
315
370
|
const room = new Room();
|
|
316
371
|
room.setMaxListeners(50); // Prevent MaxListenersExceeded warnings on reconnect
|
|
317
372
|
// Track state
|
|
373
|
+
let pendingSessionClose = null; // Tracks async session close for reconnect safety
|
|
318
374
|
let currentSession = null;
|
|
319
375
|
let currentAgent = null; // For updateChatCtx() context injection
|
|
320
376
|
let currentLLM = null;
|
|
321
377
|
let localParticipant = null;
|
|
322
378
|
let agentState = 'initializing';
|
|
379
|
+
// Session-level always-allow list: paths the user has approved for this session without prompting
|
|
380
|
+
let sessionAlwaysAllowPaths = new Set();
|
|
323
381
|
let userState = 'listening'; // Track user speech state for queue safety
|
|
324
382
|
let currentVoiceMode = voiceMode; // Track active voice mode for data handlers
|
|
325
383
|
let currentProvider = realtimeConfig.provider; // Track active realtime provider
|
|
384
|
+
// Track the active resume session ID across scopes (ParticipantConnected + DataReceived)
|
|
385
|
+
// Updated by resume_session, session_selected, continue_session, switch_session handlers
|
|
386
|
+
let currentResumeSessionId;
|
|
387
|
+
// Claude auth code submission handler (set during OAuth flow, cleared after)
|
|
388
|
+
let pendingAuthSubmitCode = null;
|
|
326
389
|
// Task deduplication guard - prevents Gemini re-execution loops
|
|
327
390
|
let lastTaskRequest = '';
|
|
328
391
|
let lastTaskTime = 0;
|
|
@@ -330,8 +393,103 @@ async function main() {
|
|
|
330
393
|
let haikuInFlight = null;
|
|
331
394
|
// Background research state - tracks async ask_agent execution
|
|
332
395
|
let activeResearch = null;
|
|
396
|
+
// Persist last completed research context so follow-up questions can reference it
|
|
397
|
+
// (activeResearch is set to null on completion — this preserves the context)
|
|
398
|
+
let lastCompletedResearch = null;
|
|
333
399
|
// No manual queuing — the Claude SDK handles sequential queries internally
|
|
334
400
|
// ============================================================
|
|
401
|
+
// Recall.ai — Meeting Transcript Routing
|
|
402
|
+
// ============================================================
|
|
403
|
+
const recall = getRecallClient();
|
|
404
|
+
if (recall) {
|
|
405
|
+
console.log('🎥 Recall.ai client initialized (RECALL_API_KEY present)');
|
|
406
|
+
recall.on('transcript', ({ botId, speaker, text }) => {
|
|
407
|
+
console.log(`📝 Meeting transcript [${speaker}]: ${text}`);
|
|
408
|
+
// Route meeting transcripts to Claude as user text with speaker attribution
|
|
409
|
+
if (currentLLM && currentSession) {
|
|
410
|
+
const meetingText = `[Meeting — ${speaker}]: ${text}`;
|
|
411
|
+
// Use the same pipeline as user_text data channel messages
|
|
412
|
+
try {
|
|
413
|
+
if (currentVoiceMode === 'pipeline' || currentVoiceMode === 'direct') {
|
|
414
|
+
const chatCtx = new llm.ChatContext();
|
|
415
|
+
chatCtx.addMessage({ role: 'user', content: meetingText });
|
|
416
|
+
currentLLM.chat({ chatCtx });
|
|
417
|
+
}
|
|
418
|
+
}
|
|
419
|
+
catch (err) {
|
|
420
|
+
console.error('❌ Failed to route meeting transcript:', err);
|
|
421
|
+
}
|
|
422
|
+
}
|
|
423
|
+
});
|
|
424
|
+
}
|
|
425
|
+
// ============================================================
|
|
426
|
+
// Interruption Tracking (Content Ledger)
|
|
427
|
+
// ============================================================
|
|
428
|
+
// When user interrupts TTS, LiveKit truncates chatCtx to what was spoken.
|
|
429
|
+
// We capture the spoken text (synchronizedTranscript) and on the next user
|
|
430
|
+
// message, read Claude's full output from JSONL + inject context so Claude
|
|
431
|
+
// knows what was heard vs lost. Claude decides: side question → answer +
|
|
432
|
+
// continue, or redirect → follow new direction.
|
|
433
|
+
// Current SpeechHandle from session.say() — only the latest one matters
|
|
434
|
+
let currentSpeechHandle = null;
|
|
435
|
+
// Last interruption context — gathered at interrupt time, consumed when user's message arrives
|
|
436
|
+
let lastInterruption = null;
|
|
437
|
+
/**
|
|
438
|
+
* Called when a SpeechHandle finishes (interrupted or not).
|
|
439
|
+
* If interrupted: gather spoken text + JSONL context. Does NOT send to Claude yet —
|
|
440
|
+
* that happens when the user's transcribed message arrives via chat().
|
|
441
|
+
*/
|
|
442
|
+
async function handleSpeechDone(handle, fullText) {
|
|
443
|
+
if (!handle.interrupted) {
|
|
444
|
+
lastInterruption = null;
|
|
445
|
+
return;
|
|
446
|
+
}
|
|
447
|
+
// fullText is what was being spoken when interrupted (passed from tts_say handler).
|
|
448
|
+
// No word-level cutoff for say() — only generateReply pipeline has that — but Claude
|
|
449
|
+
// knows its own output from JSONL, so the full block is enough context.
|
|
450
|
+
console.log(`🔇 Speech interrupted. Was speaking: "${fullText.substring(0, 80)}..."`);
|
|
451
|
+
// Read last 10 assistant messages from JSONL (Claude's full untruncated output).
|
|
452
|
+
// SessionMessage.text is pre-joined from all text content blocks.
|
|
453
|
+
let recentMessages = '';
|
|
454
|
+
const sessionId = currentLLM?.sessionId;
|
|
455
|
+
if (sessionId) {
|
|
456
|
+
try {
|
|
457
|
+
const { readSessionHistory } = await import('./session-access.js');
|
|
458
|
+
const history = readSessionHistory(sessionId, workingDir, {
|
|
459
|
+
lastN: 10,
|
|
460
|
+
types: ['assistant'],
|
|
461
|
+
});
|
|
462
|
+
recentMessages = history
|
|
463
|
+
.filter((m) => m.text)
|
|
464
|
+
.map((m) => m.text)
|
|
465
|
+
.join('\n---\n');
|
|
466
|
+
}
|
|
467
|
+
catch (err) {
|
|
468
|
+
console.warn('⚠️ Failed to read JSONL for interruption context:', err);
|
|
469
|
+
}
|
|
470
|
+
}
|
|
471
|
+
// Store — consumed when user's next message arrives via chat()
|
|
472
|
+
lastInterruption = { spokenText: fullText, recentMessages, timestamp: Date.now() };
|
|
473
|
+
console.log(`📋 Interruption context stored (text: ${fullText.length} chars, JSONL: ${recentMessages.length} chars)`);
|
|
474
|
+
}
|
|
475
|
+
/**
|
|
476
|
+
* Callback for PipelineDirectLLM — returns pending interruption context and clears it.
|
|
477
|
+
* Called in chat() when user's transcribed message arrives.
|
|
478
|
+
* PipelineDirectLLM enriches the user message with this context before sending to Claude.
|
|
479
|
+
*/
|
|
480
|
+
function getAndConsumeInterruptionContext() {
|
|
481
|
+
if (!lastInterruption)
|
|
482
|
+
return null;
|
|
483
|
+
// Expire after 60s — user may have waited too long
|
|
484
|
+
if (Date.now() - lastInterruption.timestamp > 60_000) {
|
|
485
|
+
lastInterruption = null;
|
|
486
|
+
return null;
|
|
487
|
+
}
|
|
488
|
+
const ctx = { spokenText: lastInterruption.spokenText, recentMessages: lastInterruption.recentMessages };
|
|
489
|
+
lastInterruption = null;
|
|
490
|
+
return ctx;
|
|
491
|
+
}
|
|
492
|
+
// ============================================================
|
|
335
493
|
// Unified Voice Injection Queue
|
|
336
494
|
// ============================================================
|
|
337
495
|
// ALL system injections (research updates, completions, notifications, errors)
|
|
@@ -364,43 +522,62 @@ async function main() {
|
|
|
364
522
|
console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (user speaking)`);
|
|
365
523
|
return;
|
|
366
524
|
}
|
|
525
|
+
// Don't inject while fast brain tool call is in flight — the tool response will
|
|
526
|
+
// race with our generateReply, causing Gemini to drop our content and only speak
|
|
527
|
+
// the tool response. Wait for the tool call to complete first.
|
|
528
|
+
if (haikuInFlight) {
|
|
529
|
+
console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (fast brain in flight: "${haikuInFlight.question.substring(0, 40)}...")`);
|
|
530
|
+
return; // Will be retried when haikuInFlight clears (see tool execute handler)
|
|
531
|
+
}
|
|
367
532
|
isProcessingQueue = true;
|
|
368
|
-
//
|
|
369
|
-
|
|
533
|
+
// Batch ALL queued items into one generateReply call
|
|
534
|
+
const items = voiceQueue.splice(0);
|
|
535
|
+
const batchedInstruction = items.length === 1
|
|
536
|
+
? items[0]
|
|
537
|
+
: items.join('\n\n---\n\n');
|
|
538
|
+
console.log(`📡 Voice queue: processing ${items.length} batched items (${batchedInstruction.length} chars)`);
|
|
539
|
+
// Safety timeout: if agent_state_changed never fires (edge case — e.g. Gemini
|
|
540
|
+
// WebSocket drops, or state machine hangs). 15s gives the model time to process.
|
|
370
541
|
setTimeout(() => {
|
|
371
542
|
if (isProcessingQueue) {
|
|
372
|
-
console.log('⚠️ Voice queue:
|
|
543
|
+
console.log('⚠️ Voice queue: safety timeout — clearing guard');
|
|
373
544
|
isProcessingQueue = false;
|
|
374
545
|
if (voiceQueue.length > 0 && agentState === 'listening') {
|
|
375
546
|
processVoiceQueue();
|
|
376
547
|
}
|
|
377
548
|
}
|
|
378
|
-
},
|
|
379
|
-
// Batch ALL queued items into one generateReply call
|
|
380
|
-
const items = voiceQueue.splice(0);
|
|
381
|
-
const batchedInstruction = items.length === 1
|
|
382
|
-
? items[0]
|
|
383
|
-
: items.join('\n\n---\n\n');
|
|
384
|
-
console.log(`📡 Voice queue: processing ${items.length} batched items (${batchedInstruction.length} chars)`);
|
|
549
|
+
}, 15000);
|
|
385
550
|
try {
|
|
386
551
|
// Skip interrupt for Gemini — disrupts Gemini's state machine, causing it to
|
|
387
552
|
// never transition back to 'listening' (hangs in speaking state indefinitely)
|
|
388
553
|
if (currentProvider !== 'gemini') {
|
|
389
554
|
currentSession.interrupt();
|
|
390
555
|
}
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
556
|
+
if (currentProvider === 'gemini') {
|
|
557
|
+
// LiveKit SDK v1.0.51: generateReply({ instructions }) sends a system turn +
|
|
558
|
+
// synthetic "." user turn. After Gemini processes a tool call in this flow,
|
|
559
|
+
// autoToolReplyGeneration does NOT trigger continuation (system-only limitation).
|
|
560
|
+
// Using userInput instead makes it a "user-initiated" request where auto-continuation
|
|
561
|
+
// works. The ask_fast_brain injection bypass handles [SCRIPT]/[PROACTIVE]/[NOTIFICATION]
|
|
562
|
+
// prefixes and returns the content directly as a tool response.
|
|
563
|
+
currentSession.generateReply({
|
|
564
|
+
userInput: batchedInstruction,
|
|
565
|
+
});
|
|
566
|
+
}
|
|
567
|
+
else {
|
|
568
|
+
// OpenAI respects toolChoice:'none' — speaks instructions directly
|
|
569
|
+
currentSession.generateReply({
|
|
570
|
+
instructions: batchedInstruction,
|
|
571
|
+
toolChoice: 'none',
|
|
572
|
+
});
|
|
573
|
+
}
|
|
395
574
|
// Model transitions to thinking/speaking after this call.
|
|
396
575
|
// When it returns to 'listening', agent_state_changed triggers processVoiceQueue() again.
|
|
397
576
|
// Also inject into chatCtx as persistent context so the model remembers across turns
|
|
398
577
|
injectIntoChatCtx(batchedInstruction);
|
|
399
578
|
}
|
|
400
579
|
catch (err) {
|
|
401
|
-
console.log('⚠️ Voice queue generateReply failed
|
|
402
|
-
// Do NOT re-queue — re-queuing causes infinite retry cascades
|
|
403
|
-
// The frontend still has the updates via claude_output events
|
|
580
|
+
console.log('⚠️ Voice queue generateReply failed:', err);
|
|
404
581
|
isProcessingQueue = false;
|
|
405
582
|
}
|
|
406
583
|
// isProcessingQueue is cleared when agent_state_changed fires
|
|
@@ -473,17 +650,16 @@ async function main() {
|
|
|
473
650
|
isStreaming: true,
|
|
474
651
|
agentRole: 'research-progress',
|
|
475
652
|
});
|
|
476
|
-
//
|
|
477
|
-
|
|
478
|
-
// Route through fast brain for contextual voice updates (capped at 3 per task)
|
|
479
|
-
if (activeResearch.voiceUpdateCount < 3) {
|
|
653
|
+
// Route through fast brain — it decides whether to speak (usually silent)
|
|
654
|
+
if (activeResearch.voiceUpdateCount < 2) {
|
|
480
655
|
const voiceSid = currentLLM?.sessionId;
|
|
481
656
|
if (voiceSid) {
|
|
482
|
-
|
|
483
|
-
|
|
484
|
-
|
|
657
|
+
const chatHistory = getChatHistory(10);
|
|
658
|
+
handleResearchBatch(workingDir, voiceSid, lastTaskRequest || '', updates, activeResearch.researchLog, chatHistory, sessionBaseDir)
|
|
659
|
+
.then(script => {
|
|
660
|
+
if (script && activeResearch) {
|
|
485
661
|
activeResearch.voiceUpdateCount++;
|
|
486
|
-
queueVoiceInjection(
|
|
662
|
+
queueVoiceInjection(getScriptInjection(script));
|
|
487
663
|
}
|
|
488
664
|
})
|
|
489
665
|
.catch(() => { }); // Silent fail — updates are optional
|
|
@@ -495,7 +671,7 @@ async function main() {
|
|
|
495
671
|
let proactiveTimer = null;
|
|
496
672
|
let proactivePromptHistory = [];
|
|
497
673
|
const PROACTIVE_INTERVAL = 15000; // 15 seconds (offset from 8s batch timer)
|
|
498
|
-
const MAX_PROACTIVE_PROMPTS =
|
|
674
|
+
const MAX_PROACTIVE_PROMPTS = 2; // Cap per research task (reduced from 4 to minimize realtime LLM tokens)
|
|
499
675
|
function startProactiveLoop(task, sessionId) {
|
|
500
676
|
stopProactiveLoop();
|
|
501
677
|
proactivePromptHistory = [];
|
|
@@ -514,11 +690,11 @@ async function main() {
|
|
|
514
690
|
if (isProcessingQueue)
|
|
515
691
|
return; // Don't collide with voice queue
|
|
516
692
|
try {
|
|
517
|
-
const prompt = await generateProactivePrompt(workingDir, sessionId, task, activeResearch.researchLog, proactivePromptHistory);
|
|
693
|
+
const prompt = await generateProactivePrompt(workingDir, sessionId, task, activeResearch.researchLog, proactivePromptHistory, sessionBaseDir);
|
|
518
694
|
if (prompt && prompt !== 'NOTHING') {
|
|
519
695
|
proactivePromptHistory.push(prompt);
|
|
520
696
|
proactiveCount++;
|
|
521
|
-
queueVoiceInjection(
|
|
697
|
+
queueVoiceInjection(getProactiveInjection(prompt));
|
|
522
698
|
}
|
|
523
699
|
}
|
|
524
700
|
catch { } // Silent fail — proactive prompts are optional
|
|
@@ -580,28 +756,42 @@ async function main() {
|
|
|
580
756
|
}
|
|
581
757
|
}
|
|
582
758
|
// Create DIRECT session (STT + Claude Agent SDK + TTS)
|
|
583
|
-
async function createDirectSession(resumeSessionId) {
|
|
759
|
+
async function createDirectSession(resumeSessionId, llmOverride) {
|
|
584
760
|
console.log('🎯 Creating direct session...');
|
|
585
|
-
const stt = createSTT(
|
|
586
|
-
const tts = createTTS(
|
|
587
|
-
|
|
588
|
-
//
|
|
589
|
-
|
|
761
|
+
const stt = createSTT(DIRECT_MODE_STT);
|
|
762
|
+
const tts = createTTS(DIRECT_MODE_TTS);
|
|
763
|
+
// Create Claude LLM wrapper — direct mode uses speech-optimized system prompt
|
|
764
|
+
// skipTTSQueue: bypass LiveKit's BufferedTokenStream, use session.say() instead
|
|
765
|
+
// llmOverride: pipeline mode passes PipelineDirectLLM which wraps its own ClaudeLLM
|
|
766
|
+
const directLLM = llmOverride || createClaudeLLM({
|
|
590
767
|
workingDirectory: workingDir,
|
|
768
|
+
sessionBaseDir,
|
|
591
769
|
mcpServers,
|
|
592
770
|
resumeSessionId,
|
|
771
|
+
voiceMode: 'direct',
|
|
772
|
+
skipTTSQueue: true,
|
|
593
773
|
});
|
|
594
774
|
currentLLM = directLLM;
|
|
775
|
+
// Reset the session always-allow list for each new direct session
|
|
776
|
+
sessionAlwaysAllowPaths = new Set();
|
|
595
777
|
// For resumed sessions, eagerly create workspace (we know the real ID)
|
|
596
778
|
if (resumeSessionId) {
|
|
597
|
-
const workspace = ensureSessionWorkspace(
|
|
779
|
+
const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
|
|
598
780
|
console.log(`📁 Session workspace (resumed): ${workspace}`);
|
|
599
781
|
}
|
|
600
782
|
// For new sessions, create workspace when SDK assigns real session ID
|
|
601
783
|
directLLM.events.once('session_id', ({ sessionId }) => {
|
|
602
|
-
const workspace = ensureSessionWorkspace(
|
|
784
|
+
const workspace = ensureSessionWorkspace(sessionBaseDir, sessionId);
|
|
603
785
|
console.log(`📁 Session workspace created: ${workspace}`);
|
|
786
|
+
// Pipeline mode: pre-warm BM25 index so first fast brain query is fast
|
|
787
|
+
if (currentVoiceMode === 'pipeline') {
|
|
788
|
+
prewarmBM25Index(sessionId, workingDir).catch(() => { });
|
|
789
|
+
}
|
|
604
790
|
});
|
|
791
|
+
// Also pre-warm for resumed sessions (sessionId already known)
|
|
792
|
+
if (resumeSessionId && currentVoiceMode === 'pipeline') {
|
|
793
|
+
prewarmBM25Index(resumeSessionId, workingDir).catch(() => { });
|
|
794
|
+
}
|
|
605
795
|
// Wire up MCP server changes to frontend
|
|
606
796
|
directLLM.events.on('mcp_servers_changed', (data) => {
|
|
607
797
|
console.log(`🔌 MCP servers changed: ${data.enabledKeys.join(', ') || 'none'}`);
|
|
@@ -657,6 +847,15 @@ async function main() {
|
|
|
657
847
|
console.log(`⚠️ Permission needed: ${data.toolName}`);
|
|
658
848
|
const toolName = data.toolName;
|
|
659
849
|
const input = data.input || {};
|
|
850
|
+
// Check session always-allow list before showing dialog
|
|
851
|
+
if (toolName === 'Write' || toolName === 'Edit' || toolName === 'MultiEdit') {
|
|
852
|
+
const filePath = String(input?.file_path || '');
|
|
853
|
+
if (filePath && sessionAlwaysAllowPaths.has(filePath)) {
|
|
854
|
+
console.log(`✅ Session always-allow: ${filePath}`);
|
|
855
|
+
directLLM.respondToPermission(true);
|
|
856
|
+
return;
|
|
857
|
+
}
|
|
858
|
+
}
|
|
660
859
|
// Build descriptive message based on tool type
|
|
661
860
|
let description = `I need permission to use ${toolName}.`;
|
|
662
861
|
if (toolName === 'Bash' && input.command) {
|
|
@@ -672,17 +871,120 @@ async function main() {
|
|
|
672
871
|
else if (toolName === 'WebFetch' && input.url) {
|
|
673
872
|
description = `I want to fetch content from: ${input.url}`;
|
|
674
873
|
}
|
|
874
|
+
// Generate diff for Write/Edit/MultiEdit tools
|
|
875
|
+
let diffString;
|
|
876
|
+
if (toolName === 'Write' || toolName === 'Edit' || toolName === 'MultiEdit') {
|
|
877
|
+
const diffStart = performance.now();
|
|
878
|
+
try {
|
|
879
|
+
const filePath = String(input?.file_path || '');
|
|
880
|
+
let beforeContent = '';
|
|
881
|
+
const readStart = performance.now();
|
|
882
|
+
try {
|
|
883
|
+
beforeContent = readFileSync(filePath, 'utf-8');
|
|
884
|
+
}
|
|
885
|
+
catch {
|
|
886
|
+
beforeContent = ''; // new file
|
|
887
|
+
}
|
|
888
|
+
const readMs = (performance.now() - readStart).toFixed(2);
|
|
889
|
+
console.log(`⏱️ diff read: ${readMs}ms (${beforeContent.length} chars, ${filePath.split('/').pop()})`);
|
|
890
|
+
let afterContent = beforeContent;
|
|
891
|
+
if (toolName === 'Write') {
|
|
892
|
+
afterContent = String(input?.content || '');
|
|
893
|
+
}
|
|
894
|
+
else if (toolName === 'Edit') {
|
|
895
|
+
const oldStr = String(input?.old_string || '');
|
|
896
|
+
const newStr = String(input?.new_string || '');
|
|
897
|
+
const replaceAll = Boolean(input?.replace_all);
|
|
898
|
+
if (replaceAll) {
|
|
899
|
+
afterContent = beforeContent.split(oldStr).join(newStr);
|
|
900
|
+
}
|
|
901
|
+
else {
|
|
902
|
+
afterContent = beforeContent.replace(oldStr, newStr);
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
else if (toolName === 'MultiEdit') {
|
|
906
|
+
afterContent = beforeContent;
|
|
907
|
+
const edits = Array.isArray(input?.edits) ? input.edits : [];
|
|
908
|
+
for (const edit of edits) {
|
|
909
|
+
if (edit.replace_all) {
|
|
910
|
+
afterContent = afterContent.split(edit.old_string).join(edit.new_string);
|
|
911
|
+
}
|
|
912
|
+
else {
|
|
913
|
+
afterContent = afterContent.replace(edit.old_string, edit.new_string);
|
|
914
|
+
}
|
|
915
|
+
}
|
|
916
|
+
}
|
|
917
|
+
const patchStart = performance.now();
|
|
918
|
+
const fileName = filePath.split('/').pop() || filePath;
|
|
919
|
+
diffString = createPatch(fileName, beforeContent, afterContent, '', '', { context: 4 });
|
|
920
|
+
const patchMs = (performance.now() - patchStart).toFixed(2);
|
|
921
|
+
const totalMs = (performance.now() - diffStart).toFixed(2);
|
|
922
|
+
console.log(`⏱️ diff patch: ${patchMs}ms | total: ${totalMs}ms (before: ${beforeContent.length} chars, after: ${afterContent.length} chars, diff: ${diffString.length} chars)`);
|
|
923
|
+
}
|
|
924
|
+
catch (e) {
|
|
925
|
+
const totalMs = (performance.now() - diffStart).toFixed(2);
|
|
926
|
+
console.log(`⏱️ diff failed after ${totalMs}ms:`, e);
|
|
927
|
+
// diff generation failed — proceed without diff
|
|
928
|
+
diffString = undefined;
|
|
929
|
+
}
|
|
930
|
+
}
|
|
931
|
+
console.log(`🔍 perm payload: diff=${diffString ? `✅ ${diffString.length} chars` : '❌ NONE'} toolName=${toolName}`);
|
|
675
932
|
sendToFrontend({
|
|
676
933
|
type: 'permission_request',
|
|
677
934
|
toolName: data.toolName,
|
|
678
935
|
input: data.input,
|
|
679
936
|
description,
|
|
680
937
|
agentRole: 'direct',
|
|
938
|
+
diff: diffString,
|
|
681
939
|
});
|
|
682
940
|
// Speak the descriptive request so user knows to respond
|
|
683
941
|
if (currentSession) {
|
|
684
942
|
const ttsMessage = `${description} Say yes, no, or always.`;
|
|
685
|
-
currentSession.say?.(ttsMessage)
|
|
943
|
+
currentSession.say?.(ttsMessage);
|
|
944
|
+
}
|
|
945
|
+
});
|
|
946
|
+
// Wire up TTS say — bypass LiveKit's BufferedTokenStream, speak directly via session.say()
|
|
947
|
+
// Each text block from Claude gets spoken immediately as it arrives, no internal buffering
|
|
948
|
+
directLLM.events.on('tts_say', (data) => {
|
|
949
|
+
// Guard: session must be alive — TTS errors can kill the session while background query runs
|
|
950
|
+
if (!currentSession) {
|
|
951
|
+
console.warn(`⚠️ tts_say fired but currentSession is null — text dropped: "${data.text?.substring(0, 60)}"`);
|
|
952
|
+
return;
|
|
953
|
+
}
|
|
954
|
+
if (!data.text?.trim()) {
|
|
955
|
+
console.log(`🔇 tts_say fired but text is empty — skipping`);
|
|
956
|
+
return;
|
|
957
|
+
}
|
|
958
|
+
const sayId = Date.now(); // simple ID to correlate start/end logs
|
|
959
|
+
console.log(`🗣️ [${sayId}] session.say START (${data.text.length} chars): "${data.text.substring(0, 60)}..."`);
|
|
960
|
+
try {
|
|
961
|
+
const handle = currentSession.say(data.text);
|
|
962
|
+
if (handle && typeof handle.addDoneCallback === 'function') {
|
|
963
|
+
// SpeechHandle — track it and register interruption callback
|
|
964
|
+
currentSpeechHandle = handle;
|
|
965
|
+
handle.addDoneCallback((sh) => {
|
|
966
|
+
if (sh.interrupted) {
|
|
967
|
+
console.log(`🔇 [${sayId}] session.say INTERRUPTED`);
|
|
968
|
+
handleSpeechDone(sh, data.text);
|
|
969
|
+
}
|
|
970
|
+
else {
|
|
971
|
+
console.log(`✅ [${sayId}] session.say DONE`);
|
|
972
|
+
if (currentSpeechHandle === sh)
|
|
973
|
+
lastInterruption = null;
|
|
974
|
+
}
|
|
975
|
+
});
|
|
976
|
+
console.log(`🗣️ [${sayId}] session.say queued (SpeechHandle tracked)`);
|
|
977
|
+
}
|
|
978
|
+
else if (handle && typeof handle.then === 'function') {
|
|
979
|
+
// Promise-based fallback (older SDK path)
|
|
980
|
+
handle
|
|
981
|
+
.then(() => console.log(`✅ [${sayId}] session.say DONE`))
|
|
982
|
+
.catch((err) => console.error(`❌ [${sayId}] session.say FAILED:`, err?.message || err));
|
|
983
|
+
}
|
|
984
|
+
}
|
|
985
|
+
catch (err) {
|
|
986
|
+
// Catch synchronous "AgentSession is not running" errors
|
|
987
|
+
console.warn(`⚠️ [${sayId}] session.say threw — session likely dead: ${err?.message}`);
|
|
686
988
|
}
|
|
687
989
|
});
|
|
688
990
|
// Wire up session resume failure - notify frontend when SDK creates new session instead
|
|
@@ -703,17 +1005,25 @@ async function main() {
|
|
|
703
1005
|
});
|
|
704
1006
|
});
|
|
705
1007
|
// Create the Agent with instructions, STT, LLM, TTS
|
|
1008
|
+
// VAD (Silero ONNX) removed — caused 2-5s inference lag on CPU, making interruption detection worse
|
|
1009
|
+
// Turn detection is server-side (Deepgram endpointing), interruptions handled by STT
|
|
706
1010
|
const agent = new voice.Agent({
|
|
707
1011
|
instructions: DIRECT_MODE_PROMPT,
|
|
708
1012
|
stt,
|
|
709
1013
|
llm: directLLM,
|
|
710
1014
|
tts,
|
|
711
|
-
|
|
712
|
-
turnDetection: 'vad',
|
|
1015
|
+
turnDetection: 'stt',
|
|
713
1016
|
});
|
|
714
|
-
// Create the session (no longer passes STT/LLM/TTS here)
|
|
715
1017
|
const session = new voice.AgentSession({
|
|
716
|
-
turnDetection: '
|
|
1018
|
+
turnDetection: 'stt',
|
|
1019
|
+
preemptiveGeneration: false, // Only fire LLM on final committed transcript, not partial preemptives
|
|
1020
|
+
turnHandling: {
|
|
1021
|
+
endpointing: {
|
|
1022
|
+
mode: 'fixed',
|
|
1023
|
+
minDelay: 500, // Wait 500ms after STT commits before generating reply
|
|
1024
|
+
maxDelay: 2000, // Force end-of-turn after 2s to prevent hangs
|
|
1025
|
+
},
|
|
1026
|
+
},
|
|
717
1027
|
});
|
|
718
1028
|
return { session, agent };
|
|
719
1029
|
}
|
|
@@ -729,18 +1039,19 @@ async function main() {
|
|
|
729
1039
|
// Create Claude LLM for tool execution (research tasks)
|
|
730
1040
|
realtimeClaudeHandler = createClaudeLLM({
|
|
731
1041
|
workingDirectory: workingDir,
|
|
1042
|
+
sessionBaseDir,
|
|
732
1043
|
mcpServers,
|
|
733
1044
|
resumeSessionId,
|
|
734
1045
|
});
|
|
735
1046
|
currentLLM = realtimeClaudeHandler;
|
|
736
1047
|
// For resumed sessions, eagerly create workspace (we know the real ID)
|
|
737
1048
|
if (resumeSessionId) {
|
|
738
|
-
const workspace = ensureSessionWorkspace(
|
|
1049
|
+
const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
|
|
739
1050
|
console.log(`📁 Session workspace (resumed): ${workspace}`);
|
|
740
1051
|
}
|
|
741
1052
|
// For new sessions, create workspace when SDK assigns real session ID
|
|
742
1053
|
realtimeClaudeHandler.events.once('session_id', ({ sessionId }) => {
|
|
743
|
-
const workspace = ensureSessionWorkspace(
|
|
1054
|
+
const workspace = ensureSessionWorkspace(sessionBaseDir, sessionId);
|
|
744
1055
|
console.log(`📁 Session workspace created: ${workspace}`);
|
|
745
1056
|
});
|
|
746
1057
|
// Wire up MCP server changes to frontend
|
|
@@ -840,76 +1151,24 @@ async function main() {
|
|
|
840
1151
|
checkpointId: data.checkpointId,
|
|
841
1152
|
});
|
|
842
1153
|
});
|
|
843
|
-
// Extract priority content from research results — preserves URLs, code blocks, and key details
|
|
844
|
-
function extractPriorityContent(result, maxChars = 4000) {
|
|
845
|
-
if (result.length <= maxChars)
|
|
846
|
-
return result;
|
|
847
|
-
// Extract URLs (preserve for voice relay)
|
|
848
|
-
const urlRegex = /https?:\/\/[^\s\)\"\'>\]]+/g;
|
|
849
|
-
const urls = [...new Set(result.match(urlRegex) || [])];
|
|
850
|
-
// Extract code blocks (first 2, up to 400 chars each)
|
|
851
|
-
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
852
|
-
const codeBlocks = [];
|
|
853
|
-
let match;
|
|
854
|
-
while ((match = codeBlockRegex.exec(result)) !== null && codeBlocks.length < 2) {
|
|
855
|
-
const block = match[0].length > 400 ? match[0].substring(0, 397) + '```' : match[0];
|
|
856
|
-
codeBlocks.push(block);
|
|
857
|
-
}
|
|
858
|
-
// Build sections
|
|
859
|
-
const sections = [];
|
|
860
|
-
// Take the first ~2500 chars of narrative (intro + main findings)
|
|
861
|
-
const narrativeEnd = Math.min(result.length, 2500);
|
|
862
|
-
const narrativeTruncated = result.substring(0, narrativeEnd);
|
|
863
|
-
const lastPeriod = narrativeTruncated.lastIndexOf('.');
|
|
864
|
-
const narrative = lastPeriod > narrativeEnd * 0.6
|
|
865
|
-
? narrativeTruncated.substring(0, lastPeriod + 1)
|
|
866
|
-
: narrativeTruncated;
|
|
867
|
-
sections.push(narrative);
|
|
868
|
-
// Append conclusion (last ~500 chars) if result is long enough
|
|
869
|
-
if (result.length > 3000) {
|
|
870
|
-
const tail = result.substring(result.length - 500);
|
|
871
|
-
const firstPeriod = tail.indexOf('.');
|
|
872
|
-
const conclusion = firstPeriod > 0 ? tail.substring(firstPeriod + 1).trim() : tail.trim();
|
|
873
|
-
if (conclusion.length > 50) {
|
|
874
|
-
sections.push(`\n\n[CONCLUSION]\n${conclusion}`);
|
|
875
|
-
}
|
|
876
|
-
}
|
|
877
|
-
// Append code blocks if not already in the narrative
|
|
878
|
-
if (codeBlocks.length > 0) {
|
|
879
|
-
const codeSection = codeBlocks.filter(cb => !narrative.includes(cb));
|
|
880
|
-
if (codeSection.length > 0) {
|
|
881
|
-
sections.push(`\n\n[CODE EXAMPLES]\n${codeSection.join('\n\n')}`);
|
|
882
|
-
}
|
|
883
|
-
}
|
|
884
|
-
// Append URLs if not already in the narrative
|
|
885
|
-
const newUrls = urls.filter(u => !narrative.includes(u));
|
|
886
|
-
if (newUrls.length > 0) {
|
|
887
|
-
sections.push(`\n\n[LINKS]\n${newUrls.slice(0, 5).join('\n')}`);
|
|
888
|
-
}
|
|
889
|
-
let assembled = sections.join('');
|
|
890
|
-
// Final safety truncation if assembled exceeds maxChars
|
|
891
|
-
if (assembled.length > maxChars) {
|
|
892
|
-
const truncated = assembled.substring(0, maxChars);
|
|
893
|
-
const lp = truncated.lastIndexOf('.');
|
|
894
|
-
assembled = lp > maxChars * 0.7 ? truncated.substring(0, lp + 1) : truncated + '...';
|
|
895
|
-
}
|
|
896
|
-
return assembled;
|
|
897
|
-
}
|
|
898
1154
|
// Extracted research execution — called by ask_agent, SDK handles queuing internally
|
|
899
1155
|
function executeResearch(task) {
|
|
900
1156
|
sendToFrontend({ type: 'system', text: `Executing: ${task}` });
|
|
901
1157
|
// Fire-and-forget: write user question to spec.md BEFORE agent starts
|
|
902
1158
|
const questionSid = currentLLM?.sessionId || resumeSessionId;
|
|
903
1159
|
if (questionSid) {
|
|
904
|
-
writeQuestionToSpec(
|
|
1160
|
+
writeQuestionToSpec(sessionBaseDir, questionSid, task).catch(err => console.error('❌ writeQuestionToSpec failed:', err));
|
|
905
1161
|
}
|
|
906
|
-
// Clean up previous research
|
|
1162
|
+
// Clean up previous research UI tracking — but let the SDK query complete in background.
|
|
1163
|
+
// The SDK has an internal queue: new query() calls enqueue behind running ones.
|
|
1164
|
+
// Old research results land in JSONL and fast brain can access them later.
|
|
907
1165
|
if (activeResearch) {
|
|
908
|
-
activeResearch.cleanup();
|
|
1166
|
+
activeResearch.cleanup(); // Remove event listeners so UI tracks new task
|
|
909
1167
|
if (researchBatchTimer) {
|
|
910
1168
|
clearTimeout(researchBatchTimer);
|
|
911
1169
|
researchBatchTimer = null;
|
|
912
1170
|
}
|
|
1171
|
+
// NOTE: NOT aborting — old SDK process continues writing to JSONL
|
|
913
1172
|
}
|
|
914
1173
|
// Set up research log batching — events push to queue for state-driven injection
|
|
915
1174
|
const researchLog = [];
|
|
@@ -970,7 +1229,7 @@ async function main() {
|
|
|
970
1229
|
if (resultText.length > ANSWER_CHECK_THRESHOLD) {
|
|
971
1230
|
const sid = currentLLM?.sessionId || resumeSessionId;
|
|
972
1231
|
if (sid)
|
|
973
|
-
checkOutputAgainstQuestions(
|
|
1232
|
+
checkOutputAgainstQuestions(sessionBaseDir, sid, resultText, 'tool_result').catch(() => { });
|
|
974
1233
|
}
|
|
975
1234
|
// When AskUserQuestion completes, the user's answer is a decision — track it in spec
|
|
976
1235
|
if (data.name === 'AskUserQuestion' && data.response) {
|
|
@@ -979,7 +1238,7 @@ async function main() {
|
|
|
979
1238
|
const questionText = JSON.stringify(data.input?.questions || data.input || {});
|
|
980
1239
|
const answerText = typeof data.response === 'string' ? data.response : JSON.stringify(data.response);
|
|
981
1240
|
const specUpdate = `User answered a clarifying question during research.\nQuestion: ${questionText}\nAnswer: ${answerText}\nRecord this as a user decision in spec.md.`;
|
|
982
|
-
askHaiku(workingDir, sid, specUpdate).catch(err => console.error('❌ Failed to record AskUserQuestion answer in spec:', err));
|
|
1241
|
+
askHaiku(workingDir, sid, specUpdate, undefined, undefined, undefined, sessionBaseDir).catch(err => console.error('❌ Failed to record AskUserQuestion answer in spec:', err));
|
|
983
1242
|
console.log(`📝 AskUserQuestion answer forwarded to fast brain for spec tracking`);
|
|
984
1243
|
}
|
|
985
1244
|
}
|
|
@@ -996,36 +1255,53 @@ async function main() {
|
|
|
996
1255
|
if (text.length > ANSWER_CHECK_THRESHOLD) {
|
|
997
1256
|
const sid = currentLLM?.sessionId || resumeSessionId;
|
|
998
1257
|
if (sid)
|
|
999
|
-
checkOutputAgainstQuestions(
|
|
1258
|
+
checkOutputAgainstQuestions(sessionBaseDir, sid, text, 'assistant_text').catch(() => { });
|
|
1000
1259
|
}
|
|
1001
1260
|
}
|
|
1002
1261
|
};
|
|
1262
|
+
// Capture the SDK's requestId for this query — identifies this research task
|
|
1263
|
+
// in the JSONL file for targeted retrieval by fast brain
|
|
1264
|
+
let sdkRequestId = null;
|
|
1265
|
+
const onQueryRequestId = (data) => {
|
|
1266
|
+
if (!sdkRequestId && data.requestId) {
|
|
1267
|
+
sdkRequestId = data.requestId;
|
|
1268
|
+
console.log(`📋 [research] SDK requestId: ${sdkRequestId}`);
|
|
1269
|
+
}
|
|
1270
|
+
};
|
|
1003
1271
|
realtimeClaudeHandler.events.on('tool_use', onToolUse);
|
|
1004
1272
|
realtimeClaudeHandler.events.on('tool_result', onToolResult);
|
|
1005
1273
|
realtimeClaudeHandler.events.on('assistant_text', onText);
|
|
1274
|
+
realtimeClaudeHandler.events.on('query_request_id', onQueryRequestId);
|
|
1006
1275
|
const cleanupListeners = () => {
|
|
1007
1276
|
realtimeClaudeHandler?.events.off('tool_use', onToolUse);
|
|
1008
1277
|
realtimeClaudeHandler?.events.off('tool_result', onToolResult);
|
|
1009
1278
|
realtimeClaudeHandler?.events.off('assistant_text', onText);
|
|
1279
|
+
realtimeClaudeHandler?.events.off('query_request_id', onQueryRequestId);
|
|
1010
1280
|
};
|
|
1281
|
+
// Create AbortController for this research task — abort on disconnect/cleanup
|
|
1282
|
+
const researchAbortController = new AbortController();
|
|
1011
1283
|
// Track active research — updates drain when model enters 'listening' state
|
|
1012
|
-
|
|
1284
|
+
const thisResearch = {
|
|
1013
1285
|
researchLog,
|
|
1014
1286
|
pendingUpdates,
|
|
1015
1287
|
cleanup: cleanupListeners,
|
|
1016
1288
|
voiceUpdateCount: 0,
|
|
1289
|
+
abortController: researchAbortController,
|
|
1017
1290
|
};
|
|
1291
|
+
activeResearch = thisResearch;
|
|
1018
1292
|
// Start proactive conversational loop
|
|
1019
1293
|
const proactiveSid = currentLLM?.sessionId || resumeSessionId;
|
|
1020
1294
|
if (proactiveSid) {
|
|
1021
1295
|
startProactiveLoop(task, proactiveSid);
|
|
1022
1296
|
}
|
|
1023
1297
|
// Run research in the background (non-blocking)
|
|
1298
|
+
// Pass AbortController so research can be stopped on disconnect
|
|
1024
1299
|
const researchPromise = (async () => {
|
|
1025
1300
|
const stream = realtimeClaudeHandler.chat({
|
|
1026
1301
|
chatCtx: {
|
|
1027
1302
|
items: [{ type: 'message', role: 'user', content: [task] }],
|
|
1028
1303
|
},
|
|
1304
|
+
abortController: researchAbortController,
|
|
1029
1305
|
});
|
|
1030
1306
|
let result = '';
|
|
1031
1307
|
for await (const chunk of stream) {
|
|
@@ -1037,7 +1313,17 @@ async function main() {
|
|
|
1037
1313
|
})();
|
|
1038
1314
|
// Handle completion asynchronously
|
|
1039
1315
|
researchPromise.then(async (result) => {
|
|
1040
|
-
|
|
1316
|
+
// Check if aborted — empty result means clean abort, skip pipeline
|
|
1317
|
+
if (researchAbortController.signal.aborted || !result.trim()) {
|
|
1318
|
+
console.log(`🛑 [realtime] Research aborted or empty: ${task.substring(0, 60)}`);
|
|
1319
|
+
cleanupListeners();
|
|
1320
|
+
if (activeResearch === thisResearch) {
|
|
1321
|
+
activeResearch = null;
|
|
1322
|
+
}
|
|
1323
|
+
return;
|
|
1324
|
+
}
|
|
1325
|
+
const isStillCurrent = activeResearch === thisResearch;
|
|
1326
|
+
console.log(`✅ [realtime] Research complete (${result.length} chars${isStillCurrent ? '' : ', superseded by newer task'})`);
|
|
1041
1327
|
// Clean up
|
|
1042
1328
|
cleanupListeners();
|
|
1043
1329
|
// Send raw result to frontend as a log entry (not assistant_response — that's reserved
|
|
@@ -1047,72 +1333,74 @@ async function main() {
|
|
|
1047
1333
|
? result.substring(0, 150) + '...'
|
|
1048
1334
|
: result;
|
|
1049
1335
|
await sendToFrontend({ type: 'task_completed', task, resultPreview });
|
|
1050
|
-
//
|
|
1051
|
-
|
|
1052
|
-
|
|
1053
|
-
|
|
1054
|
-
|
|
1055
|
-
|
|
1056
|
-
|
|
1057
|
-
|
|
1058
|
-
if (researchBatchTimer) {
|
|
1059
|
-
clearTimeout(researchBatchTimer);
|
|
1060
|
-
researchBatchTimer = null;
|
|
1336
|
+
// Only modify global state if we're still the current research task.
|
|
1337
|
+
// If a newer task replaced us, don't clobber its timers/state.
|
|
1338
|
+
if (isStillCurrent) {
|
|
1339
|
+
if (researchBatchTimer) {
|
|
1340
|
+
clearTimeout(researchBatchTimer);
|
|
1341
|
+
researchBatchTimer = null;
|
|
1342
|
+
}
|
|
1343
|
+
stopProactiveLoop();
|
|
1061
1344
|
}
|
|
1062
|
-
|
|
1063
|
-
|
|
1064
|
-
|
|
1345
|
+
// Preserve research context for follow-up questions
|
|
1346
|
+
lastCompletedResearch = {
|
|
1347
|
+
task,
|
|
1348
|
+
researchLog: [...researchLog],
|
|
1349
|
+
completedAt: Date.now(),
|
|
1350
|
+
};
|
|
1351
|
+
// Only clear activeResearch if we're still the current task
|
|
1352
|
+
if (isStillCurrent) {
|
|
1353
|
+
activeResearch = null;
|
|
1354
|
+
}
|
|
1355
|
+
// Send research_task_complete to frontend for inline chat tracking
|
|
1065
1356
|
await sendToFrontend({
|
|
1066
|
-
type: '
|
|
1067
|
-
|
|
1068
|
-
|
|
1069
|
-
agentRole: 'research-progress',
|
|
1357
|
+
type: 'research_task_complete',
|
|
1358
|
+
task,
|
|
1359
|
+
summary: result.substring(0, 500),
|
|
1070
1360
|
});
|
|
1071
|
-
// Route through fast brain
|
|
1072
|
-
// Fast brain
|
|
1361
|
+
// Route through fast brain to generate a teleprompter script from the findings
|
|
1362
|
+
// Fast brain reads full JSONL and writes a spoken monologue
|
|
1073
1363
|
const voiceSid = currentLLM?.sessionId || resumeSessionId;
|
|
1074
|
-
|
|
1364
|
+
const chatHistory = getChatHistory(10);
|
|
1365
|
+
console.log(`📡 [realtime] Generating teleprompter script via fast brain (result: ${result.length} chars, agentState: ${agentState})`);
|
|
1366
|
+
// Create sendToChat for research completion to send structured data to frontend
|
|
1367
|
+
const completionSendToChat = (text) => {
|
|
1368
|
+
sendToFrontend({ type: 'assistant_response', text });
|
|
1369
|
+
};
|
|
1075
1370
|
if (voiceSid) {
|
|
1076
|
-
|
|
1077
|
-
.then(
|
|
1078
|
-
queueVoiceInjection(
|
|
1371
|
+
processResearchCompletion(workingDir, voiceSid, task, result, chatHistory, completionSendToChat, sessionBaseDir)
|
|
1372
|
+
.then(script => {
|
|
1373
|
+
queueVoiceInjection(getScriptInjection(script));
|
|
1079
1374
|
})
|
|
1080
1375
|
.catch(() => {
|
|
1081
|
-
// Fallback: use result directly if fast brain fails
|
|
1082
|
-
queueVoiceInjection(
|
|
1376
|
+
// Fallback: use truncated result directly if fast brain fails
|
|
1377
|
+
queueVoiceInjection(getScriptInjection(result.substring(0, 500)));
|
|
1083
1378
|
});
|
|
1084
1379
|
}
|
|
1085
1380
|
else {
|
|
1086
|
-
queueVoiceInjection(
|
|
1381
|
+
queueVoiceInjection(getScriptInjection(result.substring(0, 500)));
|
|
1087
1382
|
}
|
|
1088
|
-
// Inject FULL untruncated result into ChatCtx so voice model can answer
|
|
1089
|
-
// follow-up questions ("tell me more", "what were those links?") from memory
|
|
1090
|
-
injectIntoChatCtx(`[FULL RESEARCH DETAILS for "${task}"]\n${result}`);
|
|
1091
1383
|
// Fire-and-forget JSONL-based refinement pass via fast brain
|
|
1092
1384
|
// Reads FULL untruncated data from JSONL — no content buffer, no truncation
|
|
1093
1385
|
const postResearchSessionId = currentLLM?.sessionId || resumeSessionId;
|
|
1094
1386
|
if (postResearchSessionId) {
|
|
1095
|
-
updateSpecFromJSONL(workingDir, postResearchSessionId, task, researchLog)
|
|
1387
|
+
updateSpecFromJSONL(workingDir, postResearchSessionId, task, researchLog, sessionBaseDir)
|
|
1096
1388
|
.then(updateResult => {
|
|
1097
1389
|
if (!updateResult)
|
|
1098
1390
|
return;
|
|
1099
1391
|
// Notify frontend about spec.md update
|
|
1100
1392
|
if (updateResult.spec) {
|
|
1101
|
-
const specPath = `${
|
|
1393
|
+
const specPath = `${sessionBaseDir}/.osborn/sessions/${postResearchSessionId}/spec.md`;
|
|
1102
1394
|
sendToFrontend({
|
|
1103
1395
|
type: 'research_artifact_updated',
|
|
1104
1396
|
filePath: specPath,
|
|
1105
1397
|
fileName: 'spec.md',
|
|
1106
1398
|
});
|
|
1107
|
-
|
|
1108
|
-
if (truncated) {
|
|
1109
|
-
injectIntoChatCtx(`[UPDATED SESSION SPEC]\n${truncated}`);
|
|
1110
|
-
console.log(`📋 Re-injected spec.md into ChatCtx after fast brain update (${truncated.length} chars)`);
|
|
1111
|
-
}
|
|
1399
|
+
// Voice model is a teleprompter — fast brain reads spec directly, no ChatCtx injection needed
|
|
1112
1400
|
}
|
|
1113
1401
|
// Notify frontend about each library file written by the fast brain
|
|
1114
1402
|
for (const libFile of updateResult.libraryFiles) {
|
|
1115
|
-
const libPath = `${
|
|
1403
|
+
const libPath = `${sessionBaseDir}/.osborn/sessions/${postResearchSessionId}/library/${libFile}`;
|
|
1116
1404
|
sendToFrontend({
|
|
1117
1405
|
type: 'research_artifact_updated',
|
|
1118
1406
|
filePath: libPath,
|
|
@@ -1122,217 +1410,148 @@ async function main() {
|
|
|
1122
1410
|
});
|
|
1123
1411
|
}
|
|
1124
1412
|
}).catch(async (err) => {
|
|
1125
|
-
console.error(`❌ [realtime] Research failed:`, err);
|
|
1126
1413
|
// Clean up
|
|
1127
1414
|
cleanupListeners();
|
|
1128
|
-
|
|
1129
|
-
|
|
1130
|
-
researchBatchTimer
|
|
1415
|
+
const isStillCurrent = activeResearch === thisResearch;
|
|
1416
|
+
if (isStillCurrent) {
|
|
1417
|
+
if (researchBatchTimer) {
|
|
1418
|
+
clearTimeout(researchBatchTimer);
|
|
1419
|
+
researchBatchTimer = null;
|
|
1420
|
+
}
|
|
1421
|
+
stopProactiveLoop();
|
|
1422
|
+
activeResearch = null;
|
|
1131
1423
|
}
|
|
1132
|
-
|
|
1133
|
-
|
|
1424
|
+
// If aborted (user disconnected), log quietly
|
|
1425
|
+
if (researchAbortController.signal.aborted) {
|
|
1426
|
+
console.log(`🛑 [realtime] Research aborted: ${task.substring(0, 60)}`);
|
|
1427
|
+
return;
|
|
1428
|
+
}
|
|
1429
|
+
console.error(`❌ [realtime] Research failed:`, err);
|
|
1134
1430
|
// Queue error notification — will be spoken when model is available
|
|
1135
|
-
queueVoiceInjection(`
|
|
1431
|
+
queueVoiceInjection(getNotificationInjection(`Research encountered an error: ${err.message}. You could try asking again.`));
|
|
1136
1432
|
});
|
|
1137
1433
|
// Return immediately to unblock the voice model
|
|
1138
1434
|
return 'Research started. I\'ll relay findings as they come in — you can keep talking to the user while I work.';
|
|
1139
1435
|
}
|
|
1140
1436
|
// Create tools for the realtime voice LLM
|
|
1141
|
-
|
|
1142
|
-
|
|
1143
|
-
|
|
1144
|
-
|
|
1145
|
-
|
|
1146
|
-
- Fetching and analyzing web pages, articles, blog posts, YouTube transcripts
|
|
1147
|
-
- Reading and summarizing documentation, papers, or reference materials
|
|
1148
|
-
- Exploring and analyzing codebases, configs, architecture
|
|
1149
|
-
- Comparing options, tools, approaches — with tradeoffs and recommendations
|
|
1150
|
-
- Running bash commands, testing implementations
|
|
1151
|
-
- Using MCP tools (GitHub, YouTube, and other external tools)
|
|
1152
|
-
- Saving findings to the session library and updating the spec
|
|
1153
|
-
- Any question requiring research, analysis, verification, or deeper reasoning
|
|
1154
|
-
|
|
1155
|
-
Reformulate the user's spoken request into a clear, specific task.
|
|
1156
|
-
The more context you include (topic, constraints, what they want to learn), the better the results.
|
|
1157
|
-
If the user wants specific details (examples, URLs, comparisons, step-by-step breakdown), mention that in your request.`,
|
|
1158
|
-
parameters: z.object({
|
|
1159
|
-
request: z.string().describe('The task or question to delegate to the agent'),
|
|
1160
|
-
}),
|
|
1161
|
-
execute: async ({ request: task }) => {
|
|
1162
|
-
console.log(`\n🔨 [realtime] Task: "${task}"`);
|
|
1163
|
-
// Guard: if ask_haiku is currently handling a similar question, skip ask_agent
|
|
1164
|
-
// This prevents the double-calling pattern where Gemini fires both in rapid succession
|
|
1165
|
-
if (haikuInFlight && (Date.now() - haikuInFlight.time) < 8000) {
|
|
1166
|
-
console.log(`⏭️ Skipping ask_agent — ask_haiku is already handling: "${haikuInFlight.question.substring(0, 60)}"`);
|
|
1167
|
-
return 'The fast brain is already looking into this. Wait for its answer first.';
|
|
1168
|
-
}
|
|
1169
|
-
// Deduplication guard: prevent re-execution of same task within 10s
|
|
1170
|
-
const now = Date.now();
|
|
1171
|
-
if (task === lastTaskRequest && (now - lastTaskTime) < 10000) {
|
|
1172
|
-
console.log('⏭️ Skipping duplicate task (within 10s window)');
|
|
1173
|
-
return 'This task was just completed. The results were already relayed.';
|
|
1174
|
-
}
|
|
1175
|
-
lastTaskRequest = task;
|
|
1176
|
-
lastTaskTime = now;
|
|
1177
|
-
return executeResearch(task);
|
|
1178
|
-
},
|
|
1179
|
-
});
|
|
1180
|
-
const respondPermissionTool = llm.tool({
|
|
1181
|
-
description: `Respond to a permission request. Call after hearing user's response.`,
|
|
1182
|
-
parameters: z.object({
|
|
1183
|
-
response: z.enum(['allow', 'deny', 'always_allow']),
|
|
1184
|
-
}),
|
|
1185
|
-
execute: async ({ response }) => {
|
|
1186
|
-
if (!realtimeClaudeHandler?.hasPendingPermission()) {
|
|
1187
|
-
return 'No pending permission.';
|
|
1188
|
-
}
|
|
1189
|
-
const pending = realtimeClaudeHandler.getPendingPermission();
|
|
1190
|
-
const allow = response === 'allow' || response === 'always_allow';
|
|
1191
|
-
realtimeClaudeHandler.respondToPermission(allow);
|
|
1192
|
-
await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
|
|
1193
|
-
return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
|
|
1194
|
-
},
|
|
1195
|
-
});
|
|
1196
|
-
const readSpecTool = llm.tool({
|
|
1197
|
-
description: `Read the session spec (spec.md) — shared state between you and your backend agent.
|
|
1198
|
-
Use when: checking decisions, reading open questions to ask the user, understanding architecture/context, seeing what research has been saved. Updated by your backend agent during research.`,
|
|
1199
|
-
parameters: z.object({}),
|
|
1200
|
-
execute: async () => {
|
|
1201
|
-
const sessionId = currentLLM?.sessionId || resumeSessionId;
|
|
1202
|
-
if (!sessionId)
|
|
1203
|
-
return 'No session spec yet — session is still initializing.';
|
|
1204
|
-
const specContent = readSessionSpec(workingDir, sessionId);
|
|
1205
|
-
if (!specContent)
|
|
1206
|
-
return 'Spec is empty — no research done yet.';
|
|
1207
|
-
const libraryFiles = listLibraryFiles(workingDir, sessionId);
|
|
1208
|
-
const libSection = libraryFiles.length > 0
|
|
1209
|
-
? `\n\n[LIBRARY FILES: ${libraryFiles.join(', ')}]`
|
|
1210
|
-
: '';
|
|
1211
|
-
const MAX = 4000;
|
|
1212
|
-
const content = specContent.length > MAX
|
|
1213
|
-
? specContent.substring(0, MAX) + '\n\n[... truncated]'
|
|
1214
|
-
: specContent;
|
|
1215
|
-
return content + libSection;
|
|
1216
|
-
},
|
|
1217
|
-
});
|
|
1218
|
-
const askHaikuTool = llm.tool({
|
|
1219
|
-
description: `Ask your fast brain — a quick knowledge assistant with access to session files and web search (~2 seconds).
|
|
1220
|
-
|
|
1221
|
-
Use for:
|
|
1222
|
-
- Questions answerable from the session spec or research library (much faster than ask_agent)
|
|
1223
|
-
- Quick web lookups for simple factual questions (definitions, current versions, basic how-to)
|
|
1224
|
-
- Recording user decisions: "User decided: [decision]. Update the spec."
|
|
1225
|
-
- Recording user preferences: "User prefers: [preference]. Update the spec."
|
|
1226
|
-
- Checking what research has been done on a topic
|
|
1227
|
-
- Reading specific library files for details
|
|
1228
|
-
|
|
1229
|
-
Do NOT use for: deep research, code analysis, multi-file codebase exploration, complex investigations → use ask_agent.
|
|
1230
|
-
If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to look deeper, then call ask_agent with the context it provides.`,
|
|
1437
|
+
// The realtime model is a thin teleprompter — only 2 tools:
|
|
1438
|
+
// 1. ask_fast_brain: ALL user questions route here (the fast brain decides everything)
|
|
1439
|
+
// 2. respond_permission: voice permission flow for Claude SDK blocked operations
|
|
1440
|
+
const askFastBrainTool = llm.tool({
|
|
1441
|
+
description: `Ask your brain. Call this for EVERY user message — greetings, questions, decisions, requests, everything. No exceptions. Returns what you should say.`,
|
|
1231
1442
|
parameters: z.object({
|
|
1232
|
-
question: z.string().describe('The question
|
|
1443
|
+
question: z.string().describe('The user\'s question or statement'),
|
|
1233
1444
|
}),
|
|
1234
1445
|
execute: async ({ question }) => {
|
|
1235
|
-
|
|
1236
|
-
|
|
1237
|
-
|
|
1446
|
+
// INJECTION BYPASS: When Gemini receives a system injection via generateReply(),
|
|
1447
|
+
// it calls ask_fast_brain with the injection content (Gemini always calls tools).
|
|
1448
|
+
// For Gemini: this is the INTENDED path — we deliberately don't set toolChoice:'none'
|
|
1449
|
+
// so the tool call goes through and we return the content as a tool response.
|
|
1450
|
+
// For OpenAI: this is a fallback guard — OpenAI normally speaks instructions directly
|
|
1451
|
+
// with toolChoice:'none', but if it somehow calls the tool, we handle it here.
|
|
1452
|
+
const injectionMatch = question.match(/\[(SCRIPT|PROACTIVE|NOTIFICATION)\]\s*([\s\S]*)/);
|
|
1453
|
+
if (injectionMatch) {
|
|
1454
|
+
const content = injectionMatch[2].trim();
|
|
1455
|
+
console.log(`⚡ [fast brain] BYPASS: injection [${injectionMatch[1]}] → returning content directly (${content.length} chars)`);
|
|
1456
|
+
return content || question;
|
|
1457
|
+
}
|
|
1458
|
+
// Use pending sessionId for fresh sessions where SDK hasn't assigned one yet
|
|
1459
|
+
const sessionId = currentLLM?.sessionId || currentResumeSessionId || resumeSessionId || 'pending';
|
|
1238
1460
|
console.log(`🧠 [fast brain] Question: "${question.substring(0, 80)}..."`);
|
|
1239
|
-
// Track in-flight state
|
|
1461
|
+
// Track in-flight state
|
|
1240
1462
|
haikuInFlight = { question, time: Date.now() };
|
|
1241
|
-
// Build
|
|
1242
|
-
// This is a READ of the existing researchLog array — safe, no race conditions
|
|
1463
|
+
// Build research context — from active research or last completed research
|
|
1243
1464
|
let researchContext;
|
|
1244
1465
|
if (activeResearch && activeResearch.researchLog.length > 0) {
|
|
1245
1466
|
const recentLog = activeResearch.researchLog.slice(-15);
|
|
1246
1467
|
researchContext = `Research topic: "${lastTaskRequest || 'unknown'}"\nSteps completed (${activeResearch.researchLog.length} total, showing last ${recentLog.length}):\n${recentLog.join('\n')}`;
|
|
1247
1468
|
}
|
|
1469
|
+
else if (lastCompletedResearch && (Date.now() - lastCompletedResearch.completedAt) < 600000) {
|
|
1470
|
+
// Include context from last completed research (within 10 minutes)
|
|
1471
|
+
const recentLog = lastCompletedResearch.researchLog.slice(-15);
|
|
1472
|
+
researchContext = `[COMPLETED RESEARCH] Topic: "${lastCompletedResearch.task}"\nSteps completed (${lastCompletedResearch.researchLog.length} total, showing last ${recentLog.length}):\n${recentLog.join('\n')}\n\n(Research completed — results are in JSONL and spec.md. Answer from those, do NOT trigger new research on this topic.)`;
|
|
1473
|
+
}
|
|
1474
|
+
const callbacks = {
|
|
1475
|
+
triggerResearch: (task) => {
|
|
1476
|
+
// Deduplication guard
|
|
1477
|
+
const now = Date.now();
|
|
1478
|
+
if (task === lastTaskRequest && (now - lastTaskTime) < 10000) {
|
|
1479
|
+
console.log('⏭️ Skipping duplicate research task (within 10s window)');
|
|
1480
|
+
return;
|
|
1481
|
+
}
|
|
1482
|
+
lastTaskRequest = task;
|
|
1483
|
+
lastTaskTime = now;
|
|
1484
|
+
executeResearch(task);
|
|
1485
|
+
},
|
|
1486
|
+
queueVoice: (script) => {
|
|
1487
|
+
queueVoiceInjection(getScriptInjection(script));
|
|
1488
|
+
},
|
|
1489
|
+
sendToFrontend: (data) => {
|
|
1490
|
+
sendToFrontend(data);
|
|
1491
|
+
},
|
|
1492
|
+
};
|
|
1248
1493
|
try {
|
|
1249
1494
|
const chatHistory = getChatHistory(20);
|
|
1250
|
-
const
|
|
1251
|
-
|
|
1252
|
-
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1256
|
-
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1260
|
-
fileName: 'spec.md',
|
|
1261
|
-
});
|
|
1495
|
+
const result = await askFastBrain(workingDir, sessionId, question, {
|
|
1496
|
+
chatHistory,
|
|
1497
|
+
researchContext,
|
|
1498
|
+
callbacks,
|
|
1499
|
+
sessionBaseDir,
|
|
1500
|
+
});
|
|
1501
|
+
haikuInFlight = null;
|
|
1502
|
+
// Voice queue items may have been held while fast brain was in flight — retry now
|
|
1503
|
+
if (voiceQueue.length > 0) {
|
|
1504
|
+
setTimeout(() => processVoiceQueue(), 500);
|
|
1262
1505
|
}
|
|
1263
|
-
|
|
1264
|
-
//
|
|
1265
|
-
//
|
|
1266
|
-
if (activeResearch && (question.toLowerCase().includes('
|
|
1267
|
-
question.toLowerCase().includes('
|
|
1268
|
-
question.toLowerCase().includes('update the spec') ||
|
|
1269
|
-
question.toLowerCase().includes('also check') ||
|
|
1506
|
+
console.log(`🧠 [fast brain] Response type: ${result.type}, script: ${result.script.length} chars`);
|
|
1507
|
+
// If this was a user direction during active research,
|
|
1508
|
+
// pass it to the agent SDK so it picks up the context
|
|
1509
|
+
if (activeResearch && result.type === 'recorded' && (question.toLowerCase().includes('decided') ||
|
|
1510
|
+
question.toLowerCase().includes('prefers') ||
|
|
1270
1511
|
question.toLowerCase().includes('focus on') ||
|
|
1271
1512
|
question.toLowerCase().includes('redirect'))) {
|
|
1272
|
-
console.log(`📨 [fast brain] Passing user direction to agent SDK queue
|
|
1273
|
-
|
|
1274
|
-
// at the start of its next query and will see the updated direction
|
|
1275
|
-
executeResearch(`[USER DIRECTION during active research] ${question}. The user's spec.md has been updated with this. Acknowledge briefly and incorporate into your current research context.`);
|
|
1513
|
+
console.log(`📨 [fast brain] Passing user direction to agent SDK queue`);
|
|
1514
|
+
executeResearch(`[USER DIRECTION during active research] ${question}. The user's spec.md has been updated. Acknowledge briefly and incorporate.`);
|
|
1276
1515
|
}
|
|
1277
|
-
return
|
|
1516
|
+
return result.script;
|
|
1278
1517
|
}
|
|
1279
1518
|
catch (err) {
|
|
1280
|
-
haikuInFlight = null;
|
|
1519
|
+
haikuInFlight = null;
|
|
1520
|
+
// Voice queue items may have been held while fast brain was in flight — retry now
|
|
1521
|
+
if (voiceQueue.length > 0) {
|
|
1522
|
+
setTimeout(() => processVoiceQueue(), 500);
|
|
1523
|
+
}
|
|
1281
1524
|
console.error('❌ Fast brain failed:', err);
|
|
1282
|
-
return '
|
|
1525
|
+
return 'I\'m having trouble processing that. Could you try again?';
|
|
1283
1526
|
}
|
|
1284
1527
|
},
|
|
1285
1528
|
});
|
|
1286
|
-
const
|
|
1287
|
-
description: `
|
|
1288
|
-
|
|
1289
|
-
Use when the user asks for:
|
|
1290
|
-
- "Compare X and Y" → type: 'comparison' (markdown table with features, pros, cons)
|
|
1291
|
-
- "Draw a diagram" / "Show the architecture" / "Map out the flow" → type: 'diagram' (Mermaid flowchart/sequence/architecture)
|
|
1292
|
-
- "Analyze the tradeoffs" / "Break down the options" → type: 'analysis' (structured pros/cons, decision matrix)
|
|
1293
|
-
- "Summarize what we found" / "Give me an overview document" → type: 'summary' (organized findings with key takeaways)
|
|
1294
|
-
|
|
1295
|
-
For actual images (photos, illustrations, screenshots), use ask_agent instead — this tool generates text-based visual documents only.`,
|
|
1529
|
+
const respondPermissionTool = llm.tool({
|
|
1530
|
+
description: `Respond to a permission request. Call after hearing user's response.`,
|
|
1296
1531
|
parameters: z.object({
|
|
1297
|
-
|
|
1298
|
-
type: z.enum(['comparison', 'diagram', 'analysis', 'summary']).describe('Document type'),
|
|
1532
|
+
response: z.enum(['allow', 'deny', 'always_allow']),
|
|
1299
1533
|
}),
|
|
1300
|
-
execute: async ({
|
|
1301
|
-
|
|
1302
|
-
|
|
1303
|
-
return 'Session not ready yet.';
|
|
1304
|
-
console.log(`📊 [generate_document] Type: ${type}, Request: "${request.substring(0, 60)}..."`);
|
|
1305
|
-
try {
|
|
1306
|
-
const result = await generateVisualDocument(workingDir, sid, request, type);
|
|
1307
|
-
if (!result)
|
|
1308
|
-
return 'Could not generate document — not enough research context available.';
|
|
1309
|
-
const fullPath = `${workingDir}/.osborn/sessions/${sid}/library/${result.fileName}`;
|
|
1310
|
-
sendToFrontend({
|
|
1311
|
-
type: 'research_artifact_updated',
|
|
1312
|
-
filePath: fullPath,
|
|
1313
|
-
fileName: result.fileName,
|
|
1314
|
-
});
|
|
1315
|
-
return `Generated: ${result.fileName} (${result.content.length} chars) — saved to session library. The document contains a ${type} with the requested information.`;
|
|
1316
|
-
}
|
|
1317
|
-
catch (err) {
|
|
1318
|
-
console.error('❌ Document generation failed:', err);
|
|
1319
|
-
return 'Document generation failed. Try asking the research agent for a more detailed analysis.';
|
|
1534
|
+
execute: async ({ response }) => {
|
|
1535
|
+
if (!realtimeClaudeHandler?.hasPendingPermission()) {
|
|
1536
|
+
return 'No pending permission.';
|
|
1320
1537
|
}
|
|
1538
|
+
const pending = realtimeClaudeHandler.getPendingPermission();
|
|
1539
|
+
const allow = response === 'allow' || response === 'always_allow';
|
|
1540
|
+
realtimeClaudeHandler.respondToPermission(allow);
|
|
1541
|
+
await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
|
|
1542
|
+
return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
|
|
1321
1543
|
},
|
|
1322
1544
|
});
|
|
1323
1545
|
// Instructions for realtime voice LLM
|
|
1324
1546
|
const realtimeInstructions = getRealtimeInstructions(workingDir);
|
|
1325
1547
|
// Create realtime model
|
|
1326
1548
|
const realtimeModel = createRealtimeModelFromConfig(rtConfig, realtimeInstructions);
|
|
1327
|
-
// Create the Agent with
|
|
1549
|
+
// Create the Agent with MINIMAL tools — fast brain handles all routing
|
|
1328
1550
|
const agent = new voice.Agent({
|
|
1329
1551
|
instructions: realtimeInstructions,
|
|
1330
1552
|
llm: realtimeModel,
|
|
1331
1553
|
tools: {
|
|
1332
|
-
|
|
1333
|
-
ask_haiku: askHaikuTool,
|
|
1334
|
-
read_spec: readSpecTool,
|
|
1335
|
-
generate_document: generateDocumentTool,
|
|
1554
|
+
ask_fast_brain: askFastBrainTool,
|
|
1336
1555
|
respond_permission: respondPermissionTool,
|
|
1337
1556
|
},
|
|
1338
1557
|
});
|
|
@@ -1352,35 +1571,51 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1352
1571
|
// Clean up active research and voice queue
|
|
1353
1572
|
voiceQueue.length = 0;
|
|
1354
1573
|
isProcessingQueue = false;
|
|
1574
|
+
currentSpeechHandle = null;
|
|
1575
|
+
lastInterruption = null;
|
|
1355
1576
|
if (researchBatchTimer) {
|
|
1356
1577
|
clearTimeout(researchBatchTimer);
|
|
1357
1578
|
researchBatchTimer = null;
|
|
1358
1579
|
}
|
|
1359
1580
|
stopProactiveLoop();
|
|
1360
1581
|
if (activeResearch) {
|
|
1582
|
+
activeResearch.abortController.abort();
|
|
1361
1583
|
activeResearch.cleanup();
|
|
1362
1584
|
activeResearch = null;
|
|
1363
1585
|
}
|
|
1586
|
+
lastCompletedResearch = null;
|
|
1364
1587
|
currentSession = null;
|
|
1365
1588
|
currentAgent = null;
|
|
1366
1589
|
currentLLM = null;
|
|
1367
|
-
|
|
1590
|
+
clearFastBrainSession();
|
|
1591
|
+
clearPipelineFastBrainSession();
|
|
1368
1592
|
});
|
|
1369
1593
|
room.on(RoomEvent.ParticipantConnected, async (participant) => {
|
|
1370
1594
|
console.log(`\n👤 User joined: ${participant.identity}`);
|
|
1595
|
+
// Wait for previous session's byte stream handler to fully deregister.
|
|
1596
|
+
// Quick reconnects (< ~6s) crash with "byte stream handler already set" without this.
|
|
1597
|
+
if (pendingSessionClose) {
|
|
1598
|
+
console.log('⏳ Waiting for previous session to fully close...');
|
|
1599
|
+
await pendingSessionClose;
|
|
1600
|
+
}
|
|
1371
1601
|
// Clean up any existing session before creating a new one
|
|
1372
1602
|
voiceQueue.length = 0;
|
|
1373
1603
|
isProcessingQueue = false;
|
|
1604
|
+
currentSpeechHandle = null;
|
|
1605
|
+
lastInterruption = null;
|
|
1374
1606
|
if (researchBatchTimer) {
|
|
1375
1607
|
clearTimeout(researchBatchTimer);
|
|
1376
1608
|
researchBatchTimer = null;
|
|
1377
1609
|
}
|
|
1378
1610
|
stopProactiveLoop();
|
|
1379
|
-
|
|
1611
|
+
clearFastBrainSession();
|
|
1612
|
+
clearPipelineFastBrainSession();
|
|
1380
1613
|
if (activeResearch) {
|
|
1614
|
+
activeResearch.abortController.abort();
|
|
1381
1615
|
activeResearch.cleanup();
|
|
1382
1616
|
activeResearch = null;
|
|
1383
1617
|
}
|
|
1618
|
+
lastCompletedResearch = null;
|
|
1384
1619
|
if (currentSession) {
|
|
1385
1620
|
console.log('🧹 Cleaning up previous session...');
|
|
1386
1621
|
try {
|
|
@@ -1403,7 +1638,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1403
1638
|
try {
|
|
1404
1639
|
const metadata = JSON.parse(participant.metadata || '{}');
|
|
1405
1640
|
console.log(`📋 Participant metadata:`, metadata);
|
|
1406
|
-
if (metadata.voiceArch === 'realtime' || metadata.voiceArch === 'direct') {
|
|
1641
|
+
if (metadata.voiceArch === 'realtime' || metadata.voiceArch === 'direct' || metadata.voiceArch === 'pipeline') {
|
|
1407
1642
|
sessionVoiceMode = metadata.voiceArch;
|
|
1408
1643
|
console.log(`🎙️ Using voice mode from frontend: ${sessionVoiceMode}`);
|
|
1409
1644
|
}
|
|
@@ -1420,6 +1655,15 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1420
1655
|
preSelectedSessionId = metadata.sessionId;
|
|
1421
1656
|
console.log(`📂 Pre-selected session from frontend: ${preSelectedSessionId}`);
|
|
1422
1657
|
}
|
|
1658
|
+
// Read working directory override from frontend
|
|
1659
|
+
if (metadata.workingDirectory && typeof metadata.workingDirectory === 'string' && metadata.workingDirectory.length > 0) {
|
|
1660
|
+
workingDir = metadata.workingDirectory;
|
|
1661
|
+
console.log(`📂 Working directory from frontend: ${workingDir}`);
|
|
1662
|
+
}
|
|
1663
|
+
else {
|
|
1664
|
+
// Reset to default for new connections (in case previous session changed it)
|
|
1665
|
+
workingDir = defaultWorkingDir;
|
|
1666
|
+
}
|
|
1423
1667
|
}
|
|
1424
1668
|
catch (err) {
|
|
1425
1669
|
console.log('⚠️ Could not parse participant metadata, using config voiceMode:', voiceMode);
|
|
@@ -1429,12 +1673,33 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1429
1673
|
currentProvider = sessionRealtimeProvider;
|
|
1430
1674
|
// Resume session ID — only set when resuming an existing session
|
|
1431
1675
|
const resumeSessionId = preSelectedSessionId || undefined;
|
|
1676
|
+
currentResumeSessionId = resumeSessionId;
|
|
1432
1677
|
if (resumeSessionId) {
|
|
1433
1678
|
console.log(`🆔 Resuming session: ${resumeSessionId}`);
|
|
1434
1679
|
}
|
|
1435
1680
|
else {
|
|
1436
1681
|
console.log(`🆔 New session (ID assigned by SDK)`);
|
|
1437
1682
|
}
|
|
1683
|
+
// Ensure Claude is authenticated before creating voice session
|
|
1684
|
+
// In cloud deployments (Fly.io), this triggers OAuth flow on first boot:
|
|
1685
|
+
// captures login URL → sends to frontend → user clicks → gets code → pastes in frontend → auth completes
|
|
1686
|
+
try {
|
|
1687
|
+
const authResult = await ensureClaudeAuth((type, payload) => {
|
|
1688
|
+
sendToFrontend({ type, ...payload });
|
|
1689
|
+
});
|
|
1690
|
+
// If auth flow is running, store the submitCode handler for the DataReceived handler
|
|
1691
|
+
if (authResult.submitCode && authResult.done) {
|
|
1692
|
+
pendingAuthSubmitCode = authResult.submitCode;
|
|
1693
|
+
await authResult.done;
|
|
1694
|
+
pendingAuthSubmitCode = null;
|
|
1695
|
+
}
|
|
1696
|
+
}
|
|
1697
|
+
catch (err) {
|
|
1698
|
+
console.error('❌ Claude authentication failed:', err?.message);
|
|
1699
|
+
sendToFrontend({ type: 'claude_auth_error', message: err?.message || 'Authentication failed' });
|
|
1700
|
+
pendingAuthSubmitCode = null;
|
|
1701
|
+
// Continue anyway — the agent SDK will use ANTHROPIC_API_KEY if available
|
|
1702
|
+
}
|
|
1438
1703
|
// Create session based on voice mode (from frontend or config)
|
|
1439
1704
|
let session;
|
|
1440
1705
|
let agent;
|
|
@@ -1446,6 +1711,46 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1446
1711
|
session = result.session;
|
|
1447
1712
|
agent = result.agent;
|
|
1448
1713
|
}
|
|
1714
|
+
else if (sessionVoiceMode === 'pipeline') {
|
|
1715
|
+
console.log(`🎯 PIPELINE MODE: Claude SDK + parallel Gemini fast brain observer`);
|
|
1716
|
+
// Pipeline mode = direct mode underneath + parallel fast brain
|
|
1717
|
+
// Fast brain runs in PipelineDirectLLM.chat() — fires Gemini alongside Claude
|
|
1718
|
+
const { createPipelineDirectLLM } = await import('./pipeline-direct-llm.js');
|
|
1719
|
+
const pipelineLLM = createPipelineDirectLLM({
|
|
1720
|
+
workingDirectory: workingDir,
|
|
1721
|
+
sessionBaseDir,
|
|
1722
|
+
mcpServers,
|
|
1723
|
+
resumeSessionId,
|
|
1724
|
+
voiceMode: 'direct',
|
|
1725
|
+
skipTTSQueue: true,
|
|
1726
|
+
getChatHistory: () => getChatHistory(20).map(t => ({ role: t.role, content: t.text })),
|
|
1727
|
+
getResearchContext: () => {
|
|
1728
|
+
if (activeResearch?.researchLog.length) {
|
|
1729
|
+
return `Research: "${lastTaskRequest}"\n${activeResearch.researchLog.slice(-15).join('\n')}`;
|
|
1730
|
+
}
|
|
1731
|
+
if (lastCompletedResearch && Date.now() - lastCompletedResearch.completedAt < 600000) {
|
|
1732
|
+
return `[COMPLETED] "${lastCompletedResearch.task}"\n${lastCompletedResearch.researchLog.slice(-15).join('\n')}`;
|
|
1733
|
+
}
|
|
1734
|
+
},
|
|
1735
|
+
getAndConsumeInterruptionContext,
|
|
1736
|
+
onFastBrainResult: (result) => {
|
|
1737
|
+
console.log(`🧠⚡ [FAST_BRAIN ${result.type.toUpperCase()} +${result.elapsedMs}ms]: "${result.answer.substring(0, 60)}"`);
|
|
1738
|
+
sendToFrontend({
|
|
1739
|
+
type: 'fast_brain_response',
|
|
1740
|
+
text: result.answer,
|
|
1741
|
+
responseType: result.type,
|
|
1742
|
+
elapsedMs: result.elapsedMs,
|
|
1743
|
+
question: result.question,
|
|
1744
|
+
toolsUsed: result.toolsUsed,
|
|
1745
|
+
agentRole: 'pipeline-fast-brain',
|
|
1746
|
+
});
|
|
1747
|
+
},
|
|
1748
|
+
});
|
|
1749
|
+
// Pass pipelineLLM to createDirectSession so it uses it instead of creating a new ClaudeLLM
|
|
1750
|
+
const result = await createDirectSession(resumeSessionId, pipelineLLM);
|
|
1751
|
+
session = result.session;
|
|
1752
|
+
agent = result.agent;
|
|
1753
|
+
}
|
|
1449
1754
|
else {
|
|
1450
1755
|
console.log(`🎯 DIRECT MODE: Claude Agent SDK with full coding capabilities`);
|
|
1451
1756
|
const result = await createDirectSession(resumeSessionId);
|
|
@@ -1458,7 +1763,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1458
1763
|
// Session event wiring — extracted into function for auto-recovery
|
|
1459
1764
|
// ============================================================
|
|
1460
1765
|
let lastRecoveryTime = 0;
|
|
1461
|
-
const MIN_RECOVERY_INTERVAL =
|
|
1766
|
+
const MIN_RECOVERY_INTERVAL = 3000; // 3 seconds between recovery attempts
|
|
1462
1767
|
function wireSessionEvents(sess, agt) {
|
|
1463
1768
|
// Transcript dedup state (reset per wiring)
|
|
1464
1769
|
let lastSentUserTranscript = '';
|
|
@@ -1471,6 +1776,10 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1471
1776
|
return;
|
|
1472
1777
|
if (normalized === '<noise>' || normalized.toLowerCase() === 'thank you')
|
|
1473
1778
|
return;
|
|
1779
|
+
// Filter out voice injection content that appears as user transcript
|
|
1780
|
+
// (Gemini v1.0.51: userInput in generateReply creates a user conversation item)
|
|
1781
|
+
if (normalized.startsWith('[SCRIPT]') || normalized.startsWith('[PROACTIVE]') || normalized.startsWith('[NOTIFICATION]'))
|
|
1782
|
+
return;
|
|
1474
1783
|
console.log(`📝 User (${source}): "${transcript.substring(0, 60)}..."`);
|
|
1475
1784
|
sendToFrontend({ type: 'user_transcript', text: transcript });
|
|
1476
1785
|
lastSentUserTranscript = normalized;
|
|
@@ -1527,6 +1836,10 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1527
1836
|
sess.on('user_state_changed', (ev) => {
|
|
1528
1837
|
userState = ev.newState;
|
|
1529
1838
|
console.log(`👤 User state: ${ev.newState}`);
|
|
1839
|
+
// When user stops speaking, retry voice queue — items may be waiting
|
|
1840
|
+
if (ev.newState === 'listening' && voiceQueue.length > 0) {
|
|
1841
|
+
setTimeout(() => processVoiceQueue(), 500);
|
|
1842
|
+
}
|
|
1530
1843
|
});
|
|
1531
1844
|
// FALLBACK: playout_completed
|
|
1532
1845
|
sess.on('playout_completed', (ev) => {
|
|
@@ -1543,13 +1856,153 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1543
1856
|
console.log('⚠️ OpenAI active response collision — queue will retry on next listening state');
|
|
1544
1857
|
return;
|
|
1545
1858
|
}
|
|
1859
|
+
// TTS abort from user interruption is normal — not an error
|
|
1860
|
+
if (msg.includes('Request was aborted') || msg.includes('APIUserAbortError') || msg.includes('aborted')) {
|
|
1861
|
+
console.log('⚠️ LLM request aborted (user interrupted)');
|
|
1862
|
+
return;
|
|
1863
|
+
}
|
|
1546
1864
|
console.error('❌ Session error:', ev.error);
|
|
1547
1865
|
});
|
|
1548
|
-
//
|
|
1866
|
+
// Capture voice mode at session creation — prevents state confusion
|
|
1867
|
+
// if currentVoiceMode changes between session start and crash recovery
|
|
1868
|
+
const sessionVoiceMode = currentVoiceMode;
|
|
1869
|
+
// Close handler with auto-recovery for crashes (both realtime and direct modes)
|
|
1549
1870
|
sess.on('close', async (ev) => {
|
|
1550
1871
|
console.log('🚪 Session closed:', ev.reason);
|
|
1872
|
+
// TTS abort from user interruption — SDK already killed the session internally,
|
|
1873
|
+
// so we MUST recover (can't just reset state — STT pipeline is dead).
|
|
1874
|
+
// Log it distinctly so we know it's an interrupt recovery, not a real crash.
|
|
1875
|
+
const errorMsg = ev.error?.message || ev.error?.error?.message || '';
|
|
1876
|
+
const isTTSAbort = errorMsg.includes('aborted') || errorMsg.includes('APIUserAbortError');
|
|
1877
|
+
if (isTTSAbort) {
|
|
1878
|
+
console.log('⚠️ TTS abort from user interruption — recovering session (SDK killed it internally)');
|
|
1879
|
+
}
|
|
1880
|
+
// Auto-recover from crashes in direct/pipeline mode (includes TTS abort)
|
|
1881
|
+
if ((ev.reason === 'error' || ev.reason === 'disconnected') && (sessionVoiceMode === 'direct' || sessionVoiceMode === 'pipeline')) {
|
|
1882
|
+
const now = Date.now();
|
|
1883
|
+
if (now - lastRecoveryTime < MIN_RECOVERY_INTERVAL) {
|
|
1884
|
+
console.log(`⚠️ Recovery too frequent — scheduling retry in ${MIN_RECOVERY_INTERVAL}ms`);
|
|
1885
|
+
setTimeout(async () => {
|
|
1886
|
+
// Re-check: if session was already recovered or user left, skip
|
|
1887
|
+
if (currentSession || !room.remoteParticipants.size)
|
|
1888
|
+
return;
|
|
1889
|
+
console.log('🔄 Retrying direct mode recovery after guard interval...');
|
|
1890
|
+
// Trigger recovery by emitting a synthetic close
|
|
1891
|
+
sess.emit('close', { reason: 'error' });
|
|
1892
|
+
}, MIN_RECOVERY_INTERVAL);
|
|
1893
|
+
return;
|
|
1894
|
+
}
|
|
1895
|
+
lastRecoveryTime = now;
|
|
1896
|
+
console.log(`🔄 Auto-recovering direct mode session (reason: ${ev.reason})...`);
|
|
1897
|
+
// Clean up dead session — match realtime recovery's thoroughness
|
|
1898
|
+
try {
|
|
1899
|
+
sess.removeAllListeners();
|
|
1900
|
+
}
|
|
1901
|
+
catch { }
|
|
1902
|
+
currentSession = null;
|
|
1903
|
+
currentAgent = null;
|
|
1904
|
+
// Clear stale state from crashed session
|
|
1905
|
+
voiceQueue.length = 0;
|
|
1906
|
+
isProcessingQueue = false;
|
|
1907
|
+
haikuInFlight = null;
|
|
1908
|
+
if (researchBatchTimer) {
|
|
1909
|
+
clearTimeout(researchBatchTimer);
|
|
1910
|
+
researchBatchTimer = null;
|
|
1911
|
+
}
|
|
1912
|
+
stopProactiveLoop();
|
|
1913
|
+
if (activeResearch) {
|
|
1914
|
+
activeResearch.abortController.abort();
|
|
1915
|
+
activeResearch.cleanup();
|
|
1916
|
+
activeResearch = null;
|
|
1917
|
+
}
|
|
1918
|
+
try {
|
|
1919
|
+
// Reuse existing session ID so Claude SDK resumes where it left off
|
|
1920
|
+
const recoverySessionId = currentLLM?.sessionId || resumeSessionId;
|
|
1921
|
+
// Stop old index watcher if it exists
|
|
1922
|
+
if (currentLLM && 'stopIndexWatcher' in currentLLM) {
|
|
1923
|
+
currentLLM.stopIndexWatcher();
|
|
1924
|
+
}
|
|
1925
|
+
let result;
|
|
1926
|
+
if (sessionVoiceMode === 'pipeline') {
|
|
1927
|
+
// Pipeline mode: recreate PipelineDirectLLM wrapper with fast brain
|
|
1928
|
+
console.log('🔄 Rebuilding pipeline mode (PipelineDirectLLM + fast brain)...');
|
|
1929
|
+
const { createPipelineDirectLLM } = await import('./pipeline-direct-llm.js');
|
|
1930
|
+
const pipelineLLM = createPipelineDirectLLM({
|
|
1931
|
+
workingDirectory: workingDir,
|
|
1932
|
+
sessionBaseDir,
|
|
1933
|
+
mcpServers,
|
|
1934
|
+
resumeSessionId: recoverySessionId,
|
|
1935
|
+
voiceMode: 'direct',
|
|
1936
|
+
skipTTSQueue: true,
|
|
1937
|
+
getChatHistory: () => getChatHistory(20).map(t => ({ role: t.role, content: t.text })),
|
|
1938
|
+
getResearchContext: () => {
|
|
1939
|
+
if (activeResearch?.researchLog.length) {
|
|
1940
|
+
return `Research: "${lastTaskRequest}"\n${activeResearch.researchLog.slice(-15).join('\n')}`;
|
|
1941
|
+
}
|
|
1942
|
+
if (lastCompletedResearch && Date.now() - lastCompletedResearch.completedAt < 600000) {
|
|
1943
|
+
return `[COMPLETED] "${lastCompletedResearch.task}"\n${lastCompletedResearch.researchLog.slice(-15).join('\n')}`;
|
|
1944
|
+
}
|
|
1945
|
+
},
|
|
1946
|
+
getAndConsumeInterruptionContext,
|
|
1947
|
+
onFastBrainResult: (r) => {
|
|
1948
|
+
console.log(`🧠⚡ [FAST_BRAIN ${r.type.toUpperCase()} +${r.elapsedMs}ms]: "${r.answer.substring(0, 60)}"`);
|
|
1949
|
+
sendToFrontend({
|
|
1950
|
+
type: 'fast_brain_response', text: r.answer, responseType: r.type,
|
|
1951
|
+
elapsedMs: r.elapsedMs, question: r.question, toolsUsed: r.toolsUsed,
|
|
1952
|
+
agentRole: 'pipeline-fast-brain',
|
|
1953
|
+
});
|
|
1954
|
+
},
|
|
1955
|
+
});
|
|
1956
|
+
result = await createDirectSession(recoverySessionId, pipelineLLM);
|
|
1957
|
+
}
|
|
1958
|
+
else {
|
|
1959
|
+
result = await createDirectSession(recoverySessionId);
|
|
1960
|
+
}
|
|
1961
|
+
const newSession = result.session;
|
|
1962
|
+
const newAgent = result.agent;
|
|
1963
|
+
currentSession = newSession;
|
|
1964
|
+
currentAgent = newAgent;
|
|
1965
|
+
// Re-wire event listeners on the new session
|
|
1966
|
+
wireSessionEvents(newSession, newAgent);
|
|
1967
|
+
await newSession.start({ agent: newAgent, room });
|
|
1968
|
+
// Sync state
|
|
1969
|
+
agentState = 'listening';
|
|
1970
|
+
sendToFrontend({ type: 'agent_state', state: 'listening' });
|
|
1971
|
+
// Resume Claude session if one was active
|
|
1972
|
+
if (currentLLM?.sessionId) {
|
|
1973
|
+
currentLLM.setContinueSession(true);
|
|
1974
|
+
}
|
|
1975
|
+
console.log('✅ Direct mode auto-recovery complete');
|
|
1976
|
+
// Notify user via TTS
|
|
1977
|
+
try {
|
|
1978
|
+
const recoveredId = currentLLM?.sessionId || recoverySessionId;
|
|
1979
|
+
if (recoveredId) {
|
|
1980
|
+
const conversationHistory = await getConversationHistory(recoveredId, workingDir, 10);
|
|
1981
|
+
const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
|
|
1982
|
+
const script = await prepareRecoveryScript(historyForScript);
|
|
1983
|
+
// Direct mode: use session.say() for recovery notification
|
|
1984
|
+
newSession.say(script, { allowInterruptions: true });
|
|
1985
|
+
}
|
|
1986
|
+
else {
|
|
1987
|
+
newSession.say('Voice session was briefly interrupted but I\'m back. What were we working on?', { allowInterruptions: true });
|
|
1988
|
+
}
|
|
1989
|
+
}
|
|
1990
|
+
catch (err) {
|
|
1991
|
+
console.log('⚠️ Failed to generate recovery script:', err);
|
|
1992
|
+
try {
|
|
1993
|
+
newSession.say('I\'m back after a brief interruption. What were we working on?', { allowInterruptions: true });
|
|
1994
|
+
}
|
|
1995
|
+
catch { }
|
|
1996
|
+
}
|
|
1997
|
+
}
|
|
1998
|
+
catch (err) {
|
|
1999
|
+
console.error('❌ Direct mode auto-recovery failed:', err);
|
|
2000
|
+
sendToFrontend({ type: 'agent_state', state: 'error' });
|
|
2001
|
+
}
|
|
2002
|
+
return;
|
|
2003
|
+
}
|
|
1551
2004
|
// Auto-recover from crashes in realtime mode
|
|
1552
|
-
if (ev.reason === 'error' &&
|
|
2005
|
+
if (ev.reason === 'error' && sessionVoiceMode === 'realtime') {
|
|
1553
2006
|
const now = Date.now();
|
|
1554
2007
|
if (now - lastRecoveryTime < MIN_RECOVERY_INTERVAL) {
|
|
1555
2008
|
console.log('⚠️ Recovery too frequent — skipping to prevent loop');
|
|
@@ -1574,6 +2027,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1574
2027
|
}
|
|
1575
2028
|
stopProactiveLoop();
|
|
1576
2029
|
if (activeResearch) {
|
|
2030
|
+
activeResearch.abortController.abort();
|
|
1577
2031
|
activeResearch.cleanup();
|
|
1578
2032
|
activeResearch = null;
|
|
1579
2033
|
}
|
|
@@ -1597,29 +2051,23 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1597
2051
|
if (currentLLM?.sessionId) {
|
|
1598
2052
|
currentLLM.setContinueSession(true);
|
|
1599
2053
|
}
|
|
1600
|
-
//
|
|
2054
|
+
// Generate recovery script via fast brain
|
|
1601
2055
|
const recoveredSessionId = currentLLM?.sessionId || recoverySessionId;
|
|
1602
2056
|
if (recoveredSessionId) {
|
|
1603
2057
|
try {
|
|
1604
|
-
const
|
|
1605
|
-
const
|
|
1606
|
-
|
|
1607
|
-
|
|
1608
|
-
|
|
1609
|
-
console.log('📋 Injected conversation context into recovered session');
|
|
1610
|
-
}
|
|
1611
|
-
else {
|
|
1612
|
-
queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
|
|
1613
|
-
}
|
|
2058
|
+
const conversationHistory = await getConversationHistory(recoveredSessionId, workingDir, 10);
|
|
2059
|
+
const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
|
|
2060
|
+
const script = await prepareRecoveryScript(historyForScript);
|
|
2061
|
+
queueVoiceInjection(getScriptInjection(script));
|
|
2062
|
+
console.log('📋 Injected recovery script into recovered session');
|
|
1614
2063
|
}
|
|
1615
2064
|
catch (err) {
|
|
1616
|
-
console.log('⚠️ Failed to
|
|
1617
|
-
queueVoiceInjection('
|
|
2065
|
+
console.log('⚠️ Failed to generate recovery script:', err);
|
|
2066
|
+
queueVoiceInjection(getNotificationInjection('Voice session was briefly interrupted but I\'m back. What were we working on?'));
|
|
1618
2067
|
}
|
|
1619
2068
|
}
|
|
1620
2069
|
else {
|
|
1621
|
-
|
|
1622
|
-
queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
|
|
2070
|
+
queueVoiceInjection(getNotificationInjection('Voice session was briefly interrupted but I\'m back. What were we working on?'));
|
|
1623
2071
|
}
|
|
1624
2072
|
console.log('✅ Auto-recovery complete');
|
|
1625
2073
|
}
|
|
@@ -1667,6 +2115,8 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1667
2115
|
preSelectedSessionId,
|
|
1668
2116
|
mcpServers: getMcpServerStatusList(config),
|
|
1669
2117
|
enabledMcpServers: enabledMcpNames,
|
|
2118
|
+
workingDirectory: workingDir,
|
|
2119
|
+
skills: loadSkillsList(sessionBaseDir),
|
|
1670
2120
|
});
|
|
1671
2121
|
};
|
|
1672
2122
|
const readyInterval = setInterval(sendReady, 2000);
|
|
@@ -1685,8 +2135,8 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1685
2135
|
// For direct mode: use say() which goes through the configured TTS
|
|
1686
2136
|
const greetViaVoice = async (text) => {
|
|
1687
2137
|
if (sessionVoiceMode === 'realtime') {
|
|
1688
|
-
//
|
|
1689
|
-
await session.generateReply({
|
|
2138
|
+
// Use instructions (not userInput) to avoid system text appearing as user transcript
|
|
2139
|
+
await session.generateReply({ instructions: getScriptInjection(text) });
|
|
1690
2140
|
}
|
|
1691
2141
|
else {
|
|
1692
2142
|
await session.say(text);
|
|
@@ -1707,7 +2157,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1707
2157
|
success: true,
|
|
1708
2158
|
});
|
|
1709
2159
|
// Send existing workspace artifacts to frontend (session-scoped)
|
|
1710
|
-
const preArtifacts = listWorkspaceArtifacts(
|
|
2160
|
+
const preArtifacts = listWorkspaceArtifacts(sessionBaseDir, preSelectedSessionId);
|
|
1711
2161
|
if (preArtifacts.length > 0) {
|
|
1712
2162
|
console.log(`📁 Sending ${preArtifacts.length} workspace artifacts to frontend`);
|
|
1713
2163
|
await sendToFrontend({
|
|
@@ -1721,18 +2171,14 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1721
2171
|
}))
|
|
1722
2172
|
});
|
|
1723
2173
|
}
|
|
1724
|
-
//
|
|
2174
|
+
// Generate briefing script via fast brain
|
|
1725
2175
|
if (summary) {
|
|
1726
2176
|
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
1727
|
-
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
1728
|
-
const specContent = getSpecForVoiceModel(workingDir, preSelectedSessionId);
|
|
1729
|
-
const specSection = specContent
|
|
1730
|
-
? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
|
|
1731
|
-
: '';
|
|
1732
2177
|
try {
|
|
1733
2178
|
if (sessionVoiceMode === 'realtime') {
|
|
1734
|
-
const
|
|
1735
|
-
await
|
|
2179
|
+
const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
|
|
2180
|
+
const script = await prepareBriefingScript(sessionBaseDir, preSelectedSessionId, historyForScript);
|
|
2181
|
+
await session.generateReply({ instructions: getScriptInjection(script) });
|
|
1736
2182
|
}
|
|
1737
2183
|
else {
|
|
1738
2184
|
await session.say("Welcome back! Ready to continue our previous conversation.");
|
|
@@ -1752,7 +2198,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1752
2198
|
// No sessions at all (or new session chosen) — greet as new user
|
|
1753
2199
|
try {
|
|
1754
2200
|
console.log('👋 Sending greeting...');
|
|
1755
|
-
await greetViaVoice("
|
|
2201
|
+
await greetViaVoice("Hey! I'm Osborn, your AI research assistant. What are you working on today?");
|
|
1756
2202
|
console.log('✅ Greeting sent');
|
|
1757
2203
|
}
|
|
1758
2204
|
catch (err) {
|
|
@@ -1766,11 +2212,41 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1766
2212
|
});
|
|
1767
2213
|
room.on(RoomEvent.ParticipantDisconnected, (participant) => {
|
|
1768
2214
|
console.log(`👋 User left: ${participant.identity}`);
|
|
2215
|
+
// Full cleanup — stop all background work to avoid accumulating API usage
|
|
2216
|
+
voiceQueue.length = 0;
|
|
2217
|
+
isProcessingQueue = false;
|
|
2218
|
+
currentSpeechHandle = null;
|
|
2219
|
+
lastInterruption = null;
|
|
2220
|
+
if (researchBatchTimer) {
|
|
2221
|
+
clearTimeout(researchBatchTimer);
|
|
2222
|
+
researchBatchTimer = null;
|
|
2223
|
+
}
|
|
2224
|
+
stopProactiveLoop();
|
|
2225
|
+
if (activeResearch) {
|
|
2226
|
+
activeResearch.abortController.abort();
|
|
2227
|
+
activeResearch.cleanup();
|
|
2228
|
+
activeResearch = null;
|
|
2229
|
+
}
|
|
1769
2230
|
if (currentSession) {
|
|
1770
|
-
currentSession
|
|
2231
|
+
const sessionToClose = currentSession;
|
|
1771
2232
|
currentSession = null;
|
|
1772
|
-
|
|
2233
|
+
// Track async close so new connections can wait for byte stream handler to be released
|
|
2234
|
+
pendingSessionClose = (async () => {
|
|
2235
|
+
try {
|
|
2236
|
+
await sessionToClose.close();
|
|
2237
|
+
}
|
|
2238
|
+
catch { }
|
|
2239
|
+
try {
|
|
2240
|
+
sessionToClose.removeAllListeners();
|
|
2241
|
+
}
|
|
2242
|
+
catch { }
|
|
2243
|
+
pendingSessionClose = null;
|
|
2244
|
+
})();
|
|
1773
2245
|
}
|
|
2246
|
+
currentAgent = null;
|
|
2247
|
+
currentLLM = null;
|
|
2248
|
+
clearFastBrainSession();
|
|
2249
|
+
clearPipelineFastBrainSession();
|
|
1774
2250
|
console.log('⏳ Waiting for new user...\n');
|
|
1775
2251
|
});
|
|
1776
2252
|
room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
|
|
@@ -1779,10 +2255,20 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1779
2255
|
try {
|
|
1780
2256
|
const data = JSON.parse(new TextDecoder().decode(payload));
|
|
1781
2257
|
console.log('📨 Data:', data.type);
|
|
1782
|
-
if (data.type === '
|
|
2258
|
+
if (data.type === 'claude_auth_code' && pendingAuthSubmitCode) {
|
|
2259
|
+
console.log('🔑 Received auth code from frontend');
|
|
2260
|
+
sendToFrontend({ type: 'claude_auth_submitting', message: 'Submitting code to Claude CLI...' });
|
|
2261
|
+
pendingAuthSubmitCode(data.code);
|
|
2262
|
+
}
|
|
2263
|
+
else if (data.type === 'permission_response') {
|
|
1783
2264
|
// Handle permission response for direct mode
|
|
1784
2265
|
if (currentLLM && currentLLM.hasPendingPermission?.()) {
|
|
1785
2266
|
const allow = data.response === 'allow' || data.response === 'always_allow';
|
|
2267
|
+
// Track always_allow paths for this session so future requests auto-approve
|
|
2268
|
+
if (data.response === 'always_allow' && data.filePath) {
|
|
2269
|
+
sessionAlwaysAllowPaths.add(String(data.filePath));
|
|
2270
|
+
console.log(`🔒 Always-allow added for session: ${data.filePath}`);
|
|
2271
|
+
}
|
|
1786
2272
|
currentLLM.respondToPermission(allow);
|
|
1787
2273
|
console.log(`✅ Permission: ${data.response}`);
|
|
1788
2274
|
}
|
|
@@ -1833,6 +2319,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1833
2319
|
const sessionId = data.sessionId;
|
|
1834
2320
|
if (sessionId && sessionExists(sessionId, workingDir)) {
|
|
1835
2321
|
currentLLM.setResumeSessionId(sessionId);
|
|
2322
|
+
currentResumeSessionId = sessionId;
|
|
1836
2323
|
console.log(`🔄 Will resume session: ${sessionId}`);
|
|
1837
2324
|
await sendToFrontend({
|
|
1838
2325
|
type: 'session_resume_set',
|
|
@@ -1840,7 +2327,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1840
2327
|
success: true,
|
|
1841
2328
|
});
|
|
1842
2329
|
// Send existing session artifacts to frontend (session-scoped)
|
|
1843
|
-
const artifacts = listWorkspaceArtifacts(
|
|
2330
|
+
const artifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
|
|
1844
2331
|
if (artifacts.length > 0) {
|
|
1845
2332
|
console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
|
|
1846
2333
|
await sendToFrontend({
|
|
@@ -1869,6 +2356,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1869
2356
|
const recentId = await getMostRecentSessionId(workingDir);
|
|
1870
2357
|
if (recentId) {
|
|
1871
2358
|
currentLLM.setResumeSessionId(recentId);
|
|
2359
|
+
currentResumeSessionId = recentId;
|
|
1872
2360
|
console.log(`🔄 Continuing most recent session: ${recentId}`);
|
|
1873
2361
|
const summary = await getSessionSummary(recentId, workingDir);
|
|
1874
2362
|
const conversationHistory = await getConversationHistory(recentId, workingDir, 30);
|
|
@@ -1878,7 +2366,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1878
2366
|
success: true,
|
|
1879
2367
|
});
|
|
1880
2368
|
// Send existing session artifacts to frontend (session-scoped)
|
|
1881
|
-
const artifacts = listWorkspaceArtifacts(
|
|
2369
|
+
const artifacts = listWorkspaceArtifacts(sessionBaseDir, recentId);
|
|
1882
2370
|
if (artifacts.length > 0) {
|
|
1883
2371
|
console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
|
|
1884
2372
|
await sendToFrontend({
|
|
@@ -1894,16 +2382,12 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1894
2382
|
}
|
|
1895
2383
|
if (currentSession && summary) {
|
|
1896
2384
|
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
1897
|
-
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
1898
|
-
const specContent = getSpecForVoiceModel(workingDir, recentId);
|
|
1899
|
-
const specSection = specContent
|
|
1900
|
-
? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
|
|
1901
|
-
: '';
|
|
1902
2385
|
console.log('📋 Injecting session context into voice agent...');
|
|
1903
2386
|
try {
|
|
1904
2387
|
if (currentVoiceMode === 'realtime') {
|
|
1905
|
-
const
|
|
1906
|
-
await
|
|
2388
|
+
const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
|
|
2389
|
+
const script = await prepareBriefingScript(sessionBaseDir, recentId, historyForScript);
|
|
2390
|
+
await currentSession.generateReply({ instructions: getScriptInjection(script) });
|
|
1907
2391
|
}
|
|
1908
2392
|
else {
|
|
1909
2393
|
await currentSession.say("Continuing where we left off.");
|
|
@@ -1934,7 +2418,9 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1934
2418
|
// Step 2: Reset LLM state and configure for new session
|
|
1935
2419
|
currentLLM.resetForSessionSwitch();
|
|
1936
2420
|
currentLLM.setResumeSessionId(sessionId);
|
|
1937
|
-
|
|
2421
|
+
currentResumeSessionId = sessionId;
|
|
2422
|
+
clearFastBrainSession();
|
|
2423
|
+
clearPipelineFastBrainSession();
|
|
1938
2424
|
console.log(`🔄 Switched to session: ${sessionId}`);
|
|
1939
2425
|
// Step 3: Send full context to frontend (including conversation history)
|
|
1940
2426
|
await sendToFrontend({
|
|
@@ -1945,7 +2431,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1945
2431
|
conversationHistory,
|
|
1946
2432
|
});
|
|
1947
2433
|
// Step 3.5: Send existing session artifacts to frontend (session-scoped)
|
|
1948
|
-
const switchArtifacts = listWorkspaceArtifacts(
|
|
2434
|
+
const switchArtifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
|
|
1949
2435
|
if (switchArtifacts.length > 0) {
|
|
1950
2436
|
console.log(`📁 Sending ${switchArtifacts.length} session artifacts to frontend`);
|
|
1951
2437
|
await sendToFrontend({
|
|
@@ -1959,14 +2445,14 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
1959
2445
|
}))
|
|
1960
2446
|
});
|
|
1961
2447
|
}
|
|
1962
|
-
// Step 4: Voice agent acknowledges context
|
|
2448
|
+
// Step 4: Voice agent acknowledges context via fast brain
|
|
1963
2449
|
if (currentSession && summary) {
|
|
1964
2450
|
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
1965
|
-
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
1966
2451
|
try {
|
|
1967
2452
|
if (currentVoiceMode === 'realtime') {
|
|
1968
|
-
const
|
|
1969
|
-
await
|
|
2453
|
+
const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
|
|
2454
|
+
const briefingScript = await prepareBriefingScript(sessionBaseDir, sessionId, historyForScript, 'switch');
|
|
2455
|
+
queueVoiceInjection(getScriptInjection(briefingScript));
|
|
1970
2456
|
}
|
|
1971
2457
|
else {
|
|
1972
2458
|
const acknowledgment = summary.lastMessages.length > 0
|
|
@@ -2000,7 +2486,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
2000
2486
|
else if (data.type === 'get_session_artifacts') {
|
|
2001
2487
|
const sessionId = data.sessionId;
|
|
2002
2488
|
if (sessionId) {
|
|
2003
|
-
const artifacts = listWorkspaceArtifacts(
|
|
2489
|
+
const artifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
|
|
2004
2490
|
console.log(`📁 Sending ${artifacts.length} session artifacts for ${sessionId.substring(0, 8)}`);
|
|
2005
2491
|
await sendToFrontend({
|
|
2006
2492
|
type: 'session_artifacts',
|
|
@@ -2136,12 +2622,79 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
2136
2622
|
enabledKeys,
|
|
2137
2623
|
});
|
|
2138
2624
|
}
|
|
2625
|
+
else if (data.type === 'get_skills') {
|
|
2626
|
+
await sendToFrontend({
|
|
2627
|
+
type: 'skills_status',
|
|
2628
|
+
skills: loadSkillsList(sessionBaseDir),
|
|
2629
|
+
});
|
|
2630
|
+
}
|
|
2631
|
+
else if (data.type === 'skill_add') {
|
|
2632
|
+
const skillName = (data.name || '').trim().toLowerCase().replace(/[^a-z0-9-]/g, '-');
|
|
2633
|
+
const skillContent = (data.content || '').trim();
|
|
2634
|
+
if (!skillName || !skillContent) {
|
|
2635
|
+
await sendToFrontend({ type: 'skill_add_result', success: false, error: 'Name and content are required' });
|
|
2636
|
+
}
|
|
2637
|
+
else {
|
|
2638
|
+
try {
|
|
2639
|
+
const skillDir = join(sessionBaseDir, '.claude', 'skills', skillName);
|
|
2640
|
+
mkdirSync(skillDir, { recursive: true });
|
|
2641
|
+
writeFileSync(join(skillDir, 'SKILL.md'), skillContent, 'utf-8');
|
|
2642
|
+
console.log(`📚 Skill added: ${skillName}`);
|
|
2643
|
+
const skills = loadSkillsList(sessionBaseDir);
|
|
2644
|
+
await sendToFrontend({ type: 'skill_add_result', success: true, skills });
|
|
2645
|
+
}
|
|
2646
|
+
catch (err) {
|
|
2647
|
+
console.error('❌ Failed to add skill:', err);
|
|
2648
|
+
await sendToFrontend({ type: 'skill_add_result', success: false, error: String(err) });
|
|
2649
|
+
}
|
|
2650
|
+
}
|
|
2651
|
+
}
|
|
2652
|
+
else if (data.type === 'join_meeting') {
|
|
2653
|
+
const meetingUrl = data.url;
|
|
2654
|
+
if (meetingUrl) {
|
|
2655
|
+
const recallJoin = getRecallClient();
|
|
2656
|
+
if (!recallJoin) {
|
|
2657
|
+
await sendToFrontend({ type: 'meeting_error', message: 'Recall.ai not configured — set RECALL_API_KEY in .env' });
|
|
2658
|
+
}
|
|
2659
|
+
else {
|
|
2660
|
+
try {
|
|
2661
|
+
const webhookBase = process.env.FLY_APP_NAME
|
|
2662
|
+
? `https://${process.env.FLY_APP_NAME}.fly.dev`
|
|
2663
|
+
: `http://localhost:${apiPort}`;
|
|
2664
|
+
await sendToFrontend({ type: 'meeting_joining', message: 'Osborn is joining your meeting...' });
|
|
2665
|
+
const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase);
|
|
2666
|
+
const sessionId = currentLLM?.sessionId || currentResumeSessionId || 'default';
|
|
2667
|
+
recallJoin.registerBot(botId, sessionId);
|
|
2668
|
+
await sendToFrontend({ type: 'meeting_joined', botId, message: 'Osborn has joined the meeting' });
|
|
2669
|
+
}
|
|
2670
|
+
catch (err) {
|
|
2671
|
+
console.error('❌ Recall.ai join error:', err);
|
|
2672
|
+
await sendToFrontend({ type: 'meeting_error', message: err.message });
|
|
2673
|
+
}
|
|
2674
|
+
}
|
|
2675
|
+
}
|
|
2676
|
+
}
|
|
2677
|
+
else if (data.type === 'leave_meeting') {
|
|
2678
|
+
const botId = data.botId;
|
|
2679
|
+
const recallLeave = getRecallClient();
|
|
2680
|
+
if (recallLeave && botId) {
|
|
2681
|
+
try {
|
|
2682
|
+
await recallLeave.leaveMeeting(botId);
|
|
2683
|
+
await sendToFrontend({ type: 'meeting_left', botId });
|
|
2684
|
+
}
|
|
2685
|
+
catch (err) {
|
|
2686
|
+
console.error('❌ Recall.ai leave error:', err);
|
|
2687
|
+
await sendToFrontend({ type: 'meeting_error', message: err.message });
|
|
2688
|
+
}
|
|
2689
|
+
}
|
|
2690
|
+
}
|
|
2139
2691
|
else if (data.type === 'session_selected') {
|
|
2140
2692
|
const sessionId = data.sessionId;
|
|
2141
2693
|
console.log(`🚪 Session gate completed: ${sessionId ? `resume ${sessionId}` : 'fresh start'}`);
|
|
2142
2694
|
if (sessionId && currentLLM && sessionExists(sessionId, workingDir)) {
|
|
2143
2695
|
// Resume the selected session
|
|
2144
2696
|
currentLLM.setResumeSessionId(sessionId);
|
|
2697
|
+
currentResumeSessionId = sessionId;
|
|
2145
2698
|
console.log(`🔄 Resuming session: ${sessionId}`);
|
|
2146
2699
|
// Fetch context and greet with it
|
|
2147
2700
|
const summary = await getSessionSummary(sessionId, workingDir);
|
|
@@ -2152,7 +2705,7 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
2152
2705
|
success: true,
|
|
2153
2706
|
});
|
|
2154
2707
|
// Send existing session artifacts to frontend (session-scoped)
|
|
2155
|
-
const gateArtifacts = listWorkspaceArtifacts(
|
|
2708
|
+
const gateArtifacts = listWorkspaceArtifacts(sessionBaseDir, sessionId);
|
|
2156
2709
|
if (gateArtifacts.length > 0) {
|
|
2157
2710
|
console.log(`📁 Sending ${gateArtifacts.length} session artifacts to frontend`);
|
|
2158
2711
|
await sendToFrontend({
|
|
@@ -2166,18 +2719,14 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
2166
2719
|
}))
|
|
2167
2720
|
});
|
|
2168
2721
|
}
|
|
2169
|
-
// Load full session history and greet with context
|
|
2722
|
+
// Load full session history and greet with context via fast brain
|
|
2170
2723
|
if (currentSession && summary) {
|
|
2171
2724
|
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
2172
|
-
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
2173
|
-
const specContent = getSpecForVoiceModel(workingDir, sessionId);
|
|
2174
|
-
const specSection = specContent
|
|
2175
|
-
? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
|
|
2176
|
-
: '';
|
|
2177
2725
|
try {
|
|
2178
2726
|
if (currentVoiceMode === 'realtime') {
|
|
2179
|
-
const
|
|
2180
|
-
await
|
|
2727
|
+
const historyForScript = conversationHistory.map(e => ({ role: e.role, text: e.content }));
|
|
2728
|
+
const briefingScript = await prepareBriefingScript(sessionBaseDir, sessionId, historyForScript, 'resume');
|
|
2729
|
+
queueVoiceInjection(getScriptInjection(briefingScript));
|
|
2181
2730
|
}
|
|
2182
2731
|
else {
|
|
2183
2732
|
await currentSession.say("Welcome back! Ready to continue our previous conversation.");
|
|
@@ -2189,12 +2738,13 @@ For actual images (photos, illustrations, screenshots), use ask_agent instead
|
|
|
2189
2738
|
}
|
|
2190
2739
|
}
|
|
2191
2740
|
else {
|
|
2192
|
-
// Fresh start -
|
|
2741
|
+
// Fresh start - greet via voice queue (not userInput, which creates a user transcript)
|
|
2742
|
+
currentResumeSessionId = undefined;
|
|
2193
2743
|
console.log('🆕 Starting fresh session');
|
|
2194
2744
|
if (currentSession) {
|
|
2195
2745
|
try {
|
|
2196
2746
|
if (currentVoiceMode === 'realtime') {
|
|
2197
|
-
|
|
2747
|
+
queueVoiceInjection(getScriptInjection("Hey! I'm Osborn, your AI research assistant. What are you working on today?"));
|
|
2198
2748
|
}
|
|
2199
2749
|
else {
|
|
2200
2750
|
await currentSession.say("Hey! I'm Osborn. What are you working on?");
|