osborn 0.5.3 → 0.5.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.claude/settings.local.json +9 -0
- package/.claude/skills/markdown-to-pdf/SKILL.md +29 -0
- package/.claude/skills/pdf-to-markdown/SKILL.md +28 -0
- package/.claude/skills/playwright-browser/SKILL.md +75 -0
- package/.claude/skills/youtube-transcript/SKILL.md +24 -0
- package/dist/claude-llm.d.ts +29 -1
- package/dist/claude-llm.js +334 -78
- package/dist/config.d.ts +5 -1
- package/dist/config.js +4 -1
- package/dist/fast-brain.d.ts +70 -16
- package/dist/fast-brain.js +662 -99
- package/dist/index-3-2-26-legacy.d.ts +1 -0
- package/dist/index-3-2-26-legacy.js +2233 -0
- package/dist/index.js +752 -423
- package/dist/jsonl-search.d.ts +66 -0
- package/dist/jsonl-search.js +274 -0
- package/dist/leagcyprompts2.d.ts +0 -0
- package/dist/leagcyprompts2.js +573 -0
- package/dist/pipeline-direct-llm.d.ts +77 -0
- package/dist/pipeline-direct-llm.js +216 -0
- package/dist/pipeline-fastbrain.d.ts +45 -0
- package/dist/pipeline-fastbrain.js +367 -0
- package/dist/prompts-2-25-26.d.ts +0 -0
- package/dist/prompts-2-25-26.js +518 -0
- package/dist/prompts-3-2-26.d.ts +78 -0
- package/dist/prompts-3-2-26.js +1319 -0
- package/dist/prompts.d.ts +83 -12
- package/dist/prompts.js +1991 -588
- package/dist/session-access.d.ts +24 -0
- package/dist/session-access.js +74 -0
- package/dist/summary-index.d.ts +87 -0
- package/dist/summary-index.js +570 -0
- package/dist/turn-detector-shim.d.ts +24 -0
- package/dist/turn-detector-shim.js +83 -0
- package/dist/voice-io.d.ts +9 -3
- package/dist/voice-io.js +39 -20
- package/package.json +13 -10
|
@@ -0,0 +1,2233 @@
|
|
|
1
|
+
// Load environment variables FIRST before any other imports
|
|
2
|
+
import 'dotenv/config';
|
|
3
|
+
import { voice, initializeLogger } from '@livekit/agents';
|
|
4
|
+
import { Room, RoomEvent } from '@livekit/rtc-node';
|
|
5
|
+
import { AccessToken } from 'livekit-server-sdk';
|
|
6
|
+
// Initialize logger before anything else
|
|
7
|
+
initializeLogger({ pretty: true, level: 'info' });
|
|
8
|
+
import { createServer } from 'http';
|
|
9
|
+
import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts, readSessionSpec, listLibraryFiles } from './config.js';
|
|
10
|
+
import { createSTT, createTTS, createVAD, createRealtimeModelFromConfig } from './voice-io.js';
|
|
11
|
+
import { createClaudeLLM } from './claude-llm.js';
|
|
12
|
+
import { createSmitheryProxy, destroySmitheryProxy, parseSmitheryUrl, isSmitheryUrl, SmitheryAuthorizationError } from './smithery-proxy.js';
|
|
13
|
+
import { askHaiku, updateSpecFromJSONL, augmentResearchResult, writeQuestionToSpec, checkOutputAgainstQuestions, contextualizeResearchUpdate, generateProactivePrompt, generateVisualDocument, clearFastBrainHistory } from './fast-brain.js';
|
|
14
|
+
import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getResearchCompleteInjection, getResearchUpdateInjection, getNotificationInjection } from './prompts.js';
|
|
15
|
+
import { MCP_CATALOG } from './config.js';
|
|
16
|
+
import { llm } from '@livekit/agents';
|
|
17
|
+
import { z } from 'zod';
|
|
18
|
+
// ============================================================
|
|
19
|
+
// DUAL MODE VOICE ARCHITECTURE
|
|
20
|
+
// ============================================================
|
|
21
|
+
// DIRECT MODE (default): STT → Claude Agent SDK → TTS
|
|
22
|
+
// - Full coding capabilities via Claude Agent SDK
|
|
23
|
+
// - Permission system flows to frontend
|
|
24
|
+
// - Best for actual coding tasks
|
|
25
|
+
//
|
|
26
|
+
// REALTIME MODE: OpenAI/Gemini native speech-to-speech
|
|
27
|
+
// - Faster response, lower latency
|
|
28
|
+
// - Voice LLM with tool calling (ask_agent, respond_permission)
|
|
29
|
+
// - Routes tasks to Claude agents for execution
|
|
30
|
+
// ============================================================
|
|
31
|
+
// Generate a short, user-friendly room code
|
|
32
|
+
function generateRoomCode() {
|
|
33
|
+
const chars = 'abcdefghjkmnpqrstuvwxyz23456789';
|
|
34
|
+
let code = '';
|
|
35
|
+
for (let i = 0; i < 6; i++) {
|
|
36
|
+
code += chars[Math.floor(Math.random() * chars.length)];
|
|
37
|
+
}
|
|
38
|
+
return code;
|
|
39
|
+
}
|
|
40
|
+
// Parse CLI arguments
|
|
41
|
+
function parseArgs() {
|
|
42
|
+
const args = process.argv.slice(2);
|
|
43
|
+
let roomCode;
|
|
44
|
+
for (let i = 0; i < args.length; i++) {
|
|
45
|
+
if (args[i] === '--room' && args[i + 1]) {
|
|
46
|
+
roomCode = args[i + 1];
|
|
47
|
+
}
|
|
48
|
+
// Short code detection (e.g., `npm run dev abc123`)
|
|
49
|
+
if (!args[i].startsWith('-') && args[i].length >= 4 && args[i].length <= 10 &&
|
|
50
|
+
!['dev', 'start'].includes(args[i])) {
|
|
51
|
+
roomCode = args[i];
|
|
52
|
+
}
|
|
53
|
+
}
|
|
54
|
+
return { roomCode };
|
|
55
|
+
}
|
|
56
|
+
// Global error handlers
|
|
57
|
+
process.on('unhandledRejection', (reason) => {
|
|
58
|
+
const msg = reason?.message || String(reason);
|
|
59
|
+
if (msg.includes('aborted') || msg.includes('AbortError')) {
|
|
60
|
+
console.log('⚠️ LLM request aborted (user interrupted)');
|
|
61
|
+
return;
|
|
62
|
+
}
|
|
63
|
+
// Gemini plugin intentionally supersedes generate_reply calls — safe to suppress
|
|
64
|
+
if (msg.includes('Superseded')) {
|
|
65
|
+
console.log('⚠️ generateReply superseded (expected during concurrent injections)');
|
|
66
|
+
return;
|
|
67
|
+
}
|
|
68
|
+
// OpenAI race: voice queue fired while server-side VAD already created a response
|
|
69
|
+
if (msg.includes('conversation_already_has_active_response') || msg.includes('active_response')) {
|
|
70
|
+
console.log('⚠️ OpenAI active response collision (will retry on next listening state)');
|
|
71
|
+
return;
|
|
72
|
+
}
|
|
73
|
+
// LiveKit SDK internal error after participant disconnect — safe to suppress
|
|
74
|
+
if (msg.includes("reading 'source'") || msg.includes("reading 'type'")) {
|
|
75
|
+
console.log('⚠️ Post-disconnect cleanup error (harmless)');
|
|
76
|
+
return;
|
|
77
|
+
}
|
|
78
|
+
// generateReply timeout — usually from racing concurrent injections
|
|
79
|
+
if (msg.includes('generateReply timed out') || msg.includes('generation_created')) {
|
|
80
|
+
console.log('⚠️ generateReply timed out (concurrent injection race)');
|
|
81
|
+
return;
|
|
82
|
+
}
|
|
83
|
+
console.error('❌ Unhandled Rejection:', msg);
|
|
84
|
+
});
|
|
85
|
+
process.on('uncaughtException', (error) => {
|
|
86
|
+
if (error.message?.includes('aborted') || error.message?.includes('AbortError')) {
|
|
87
|
+
console.log('⚠️ Operation aborted');
|
|
88
|
+
return;
|
|
89
|
+
}
|
|
90
|
+
console.error('❌ Uncaught Exception:', error);
|
|
91
|
+
});
|
|
92
|
+
// ============================================================
|
|
93
|
+
// HTTP API SERVER - Exposes session data to cloud-deployed frontend
|
|
94
|
+
// ============================================================
|
|
95
|
+
function startApiServer(workingDir, port) {
|
|
96
|
+
const server = createServer(async (req, res) => {
|
|
97
|
+
// CORS headers for cloud frontend
|
|
98
|
+
res.setHeader('Access-Control-Allow-Origin', '*');
|
|
99
|
+
res.setHeader('Access-Control-Allow-Methods', 'GET, OPTIONS');
|
|
100
|
+
res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
|
|
101
|
+
if (req.method === 'OPTIONS') {
|
|
102
|
+
res.writeHead(204);
|
|
103
|
+
res.end();
|
|
104
|
+
return;
|
|
105
|
+
}
|
|
106
|
+
const url = new URL(req.url || '/', `http://localhost:${port}`);
|
|
107
|
+
if (req.method === 'GET' && url.pathname === '/sessions') {
|
|
108
|
+
try {
|
|
109
|
+
await cleanupOrphanedMetadata(workingDir);
|
|
110
|
+
const sessions = await listSessions(workingDir);
|
|
111
|
+
const payload = {
|
|
112
|
+
sessions: sessions.map(s => ({
|
|
113
|
+
sessionId: s.sessionId,
|
|
114
|
+
timestamp: s.timestamp.toISOString(),
|
|
115
|
+
lastMessage: s.lastMessage,
|
|
116
|
+
messageCount: s.messageCount,
|
|
117
|
+
})),
|
|
118
|
+
total: sessions.length,
|
|
119
|
+
};
|
|
120
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
121
|
+
res.end(JSON.stringify(payload));
|
|
122
|
+
}
|
|
123
|
+
catch (err) {
|
|
124
|
+
console.error('API /sessions error:', err);
|
|
125
|
+
res.writeHead(500, { 'Content-Type': 'application/json' });
|
|
126
|
+
res.end(JSON.stringify({ sessions: [], total: 0, error: 'Failed to list sessions' }));
|
|
127
|
+
}
|
|
128
|
+
return;
|
|
129
|
+
}
|
|
130
|
+
if (req.method === 'GET' && url.pathname === '/health') {
|
|
131
|
+
res.writeHead(200, { 'Content-Type': 'application/json' });
|
|
132
|
+
res.end(JSON.stringify({ status: 'ok', workingDir }));
|
|
133
|
+
return;
|
|
134
|
+
}
|
|
135
|
+
res.writeHead(404, { 'Content-Type': 'application/json' });
|
|
136
|
+
res.end(JSON.stringify({ error: 'Not found' }));
|
|
137
|
+
});
|
|
138
|
+
server.listen(port, () => {
|
|
139
|
+
console.log(`🌐 API server listening on http://localhost:${port}`);
|
|
140
|
+
console.log(` Sessions: http://localhost:${port}/sessions`);
|
|
141
|
+
});
|
|
142
|
+
server.on('error', (err) => {
|
|
143
|
+
if (err.code === 'EADDRINUSE') {
|
|
144
|
+
console.warn(`⚠️ API port ${port} in use, trying ${port + 1}...`);
|
|
145
|
+
startApiServer(workingDir, port + 1);
|
|
146
|
+
}
|
|
147
|
+
else {
|
|
148
|
+
console.error('❌ API server error:', err);
|
|
149
|
+
}
|
|
150
|
+
});
|
|
151
|
+
}
|
|
152
|
+
// ============================================================
|
|
153
|
+
// SESSION CONTEXT HELPERS
|
|
154
|
+
// ============================================================
|
|
155
|
+
/**
|
|
156
|
+
* Build a context briefing string for the realtime agent
|
|
157
|
+
* Loads session conversation history so the model has deep context.
|
|
158
|
+
* Gemini has smaller context limits — cap at 10 exchanges with 500 char content.
|
|
159
|
+
* OpenAI handles full history (30 exchanges, 2000 char content).
|
|
160
|
+
*/
|
|
161
|
+
function buildContextBriefing(summary, history, provider) {
|
|
162
|
+
const isGemini = provider === 'gemini';
|
|
163
|
+
// Gemini: last 10 exchanges capped at 500 chars. OpenAI: full history.
|
|
164
|
+
const maxExchanges = isGemini ? 10 : history.length;
|
|
165
|
+
const maxContentLen = isGemini ? 500 : 2000;
|
|
166
|
+
const trimmedHistory = history.slice(-maxExchanges);
|
|
167
|
+
const lines = [
|
|
168
|
+
`Session ID: ${summary.sessionId.substring(0, 8)}`,
|
|
169
|
+
`Total messages: ${summary.messageCount}`,
|
|
170
|
+
'',
|
|
171
|
+
'=== SESSION CONVERSATION HISTORY ==='
|
|
172
|
+
];
|
|
173
|
+
for (const exchange of trimmedHistory) {
|
|
174
|
+
const content = exchange.content.length > maxContentLen
|
|
175
|
+
? exchange.content.substring(0, maxContentLen) + '...'
|
|
176
|
+
: exchange.content;
|
|
177
|
+
lines.push(`${exchange.role === 'user' ? 'User' : 'Assistant'}: ${content}`);
|
|
178
|
+
lines.push('');
|
|
179
|
+
}
|
|
180
|
+
return lines.join('\n');
|
|
181
|
+
}
|
|
182
|
+
/**
|
|
183
|
+
* Read spec.md and format it for the realtime voice model.
|
|
184
|
+
* Truncates to avoid bloating the context window.
|
|
185
|
+
* Returns null if spec doesn't exist or session ID isn't available.
|
|
186
|
+
*/
|
|
187
|
+
function getSpecForVoiceModel(workingDir, sessionId) {
|
|
188
|
+
if (!sessionId)
|
|
189
|
+
return null;
|
|
190
|
+
const specContent = readSessionSpec(workingDir, sessionId);
|
|
191
|
+
if (!specContent)
|
|
192
|
+
return null;
|
|
193
|
+
const MAX = 3000;
|
|
194
|
+
if (specContent.length <= MAX)
|
|
195
|
+
return specContent;
|
|
196
|
+
const truncated = specContent.substring(0, MAX);
|
|
197
|
+
const lastHeading = truncated.lastIndexOf('\n## ');
|
|
198
|
+
if (lastHeading > MAX * 0.5) {
|
|
199
|
+
return truncated.substring(0, lastHeading) + '\n\n[... truncated — call read_spec for full content]';
|
|
200
|
+
}
|
|
201
|
+
return truncated + '\n\n[... truncated]';
|
|
202
|
+
}
|
|
203
|
+
/**
|
|
204
|
+
* Load full session conversation history into the realtime model's ChatContext.
|
|
205
|
+
* This gives the model persistent memory of what was discussed/researched,
|
|
206
|
+
* enabling deeper follow-up conversations without re-delegating to ask_agent.
|
|
207
|
+
*
|
|
208
|
+
* NOTE: Gemini's Live API doesn't support updateChatCtx (crashes with code 1008).
|
|
209
|
+
* For Gemini, the session resume context is already injected via generateReply({ userInput })
|
|
210
|
+
* which becomes part of the conversation history as model turns.
|
|
211
|
+
*/
|
|
212
|
+
function loadSessionHistoryIntoChatCtx(agent, history, provider) {
|
|
213
|
+
if (!agent || history.length === 0)
|
|
214
|
+
return;
|
|
215
|
+
// Skip for Gemini — updateChatCtx triggers unsupported operations on Gemini Live API
|
|
216
|
+
if (provider === 'gemini') {
|
|
217
|
+
console.log(`🧠 Skipping ChatCtx load for Gemini (${history.length} exchanges) — context injected via generateReply`);
|
|
218
|
+
return;
|
|
219
|
+
}
|
|
220
|
+
try {
|
|
221
|
+
const chatCtx = agent.chatCtx.copy();
|
|
222
|
+
// Inject each conversation exchange as a proper chat message
|
|
223
|
+
for (const exchange of history) {
|
|
224
|
+
chatCtx.addMessage({
|
|
225
|
+
role: exchange.role === 'user' ? 'user' : 'assistant',
|
|
226
|
+
content: exchange.content,
|
|
227
|
+
});
|
|
228
|
+
}
|
|
229
|
+
agent.updateChatCtx(chatCtx);
|
|
230
|
+
console.log(`🧠 Loaded ${history.length} conversation exchanges into ChatCtx (${history.reduce((sum, e) => sum + e.content.length, 0)} chars)`);
|
|
231
|
+
}
|
|
232
|
+
catch (err) {
|
|
233
|
+
console.log('⚠️ Failed to load session history into ChatCtx:', err);
|
|
234
|
+
}
|
|
235
|
+
}
|
|
236
|
+
// Main function
|
|
237
|
+
async function main() {
|
|
238
|
+
console.log('\n🤖 Osborn Voice AI Coding Assistant\n');
|
|
239
|
+
// Validate environment
|
|
240
|
+
const livekitUrl = process.env.LIVEKIT_URL;
|
|
241
|
+
const apiKey = process.env.LIVEKIT_API_KEY;
|
|
242
|
+
const apiSecret = process.env.LIVEKIT_API_SECRET;
|
|
243
|
+
if (!livekitUrl || !apiKey || !apiSecret) {
|
|
244
|
+
console.error('❌ Missing required environment variables:');
|
|
245
|
+
if (!livekitUrl)
|
|
246
|
+
console.error(' - LIVEKIT_URL');
|
|
247
|
+
if (!apiKey)
|
|
248
|
+
console.error(' - LIVEKIT_API_KEY');
|
|
249
|
+
if (!apiSecret)
|
|
250
|
+
console.error(' - LIVEKIT_API_SECRET');
|
|
251
|
+
console.error('\nSet these in your .env file or environment.');
|
|
252
|
+
process.exit(1);
|
|
253
|
+
}
|
|
254
|
+
// Parse CLI args
|
|
255
|
+
const cliArgs = parseArgs();
|
|
256
|
+
// Load configuration
|
|
257
|
+
console.log('📁 Loading configuration...');
|
|
258
|
+
const config = loadConfig();
|
|
259
|
+
const mcpServers = getMcpServers(config);
|
|
260
|
+
const enabledMcpNames = getEnabledMcpServerNames(config);
|
|
261
|
+
if (enabledMcpNames.length > 0) {
|
|
262
|
+
console.log(`🔌 Enabled MCP servers: ${enabledMcpNames.join(', ')}`);
|
|
263
|
+
}
|
|
264
|
+
const workingDir = config.workingDirectory || process.cwd();
|
|
265
|
+
console.log(`📂 Working directory: ${workingDir}`);
|
|
266
|
+
console.log(`🔬 Mode: RESEARCH`);
|
|
267
|
+
// Determine voice mode
|
|
268
|
+
const voiceMode = getVoiceMode(config);
|
|
269
|
+
const realtimeConfig = getRealtimeConfig(config);
|
|
270
|
+
const directConfig = getDirectConfig(config);
|
|
271
|
+
if (voiceMode === 'realtime') {
|
|
272
|
+
console.log(`🎙️ REALTIME MODE: ${realtimeConfig.provider} native speech-to-speech`);
|
|
273
|
+
console.log(` Voice: ${realtimeConfig.provider === 'openai' ? realtimeConfig.openaiVoice : realtimeConfig.geminiVoice}`);
|
|
274
|
+
}
|
|
275
|
+
else {
|
|
276
|
+
console.log(`🎯 DIRECT MODE: ${directConfig.stt.provider} STT → Claude Agent SDK → ${directConfig.tts.provider} TTS`);
|
|
277
|
+
console.log(' 🔥 Full coding capabilities!');
|
|
278
|
+
}
|
|
279
|
+
// Determine room code
|
|
280
|
+
const roomCode = cliArgs.roomCode || generateRoomCode();
|
|
281
|
+
const roomName = `osborn-${roomCode}`;
|
|
282
|
+
if (cliArgs.roomCode) {
|
|
283
|
+
console.log(`🔗 Joining room: ${roomCode}`);
|
|
284
|
+
}
|
|
285
|
+
else {
|
|
286
|
+
console.log(`\n✨ Created new room: ${roomCode}`);
|
|
287
|
+
console.log(`\n📋 Share this with the frontend or run:`);
|
|
288
|
+
console.log(` Open: https://osborn.app?room=${roomCode}`);
|
|
289
|
+
console.log(` Or enter code "${roomCode}" in the frontend\n`);
|
|
290
|
+
}
|
|
291
|
+
// Start HTTP API server for frontend session browsing
|
|
292
|
+
const apiPort = parseInt(process.env.OSBORN_API_PORT || '8741', 10);
|
|
293
|
+
startApiServer(workingDir, apiPort);
|
|
294
|
+
// ============================================================
|
|
295
|
+
// Create Access Token for Agent
|
|
296
|
+
// ============================================================
|
|
297
|
+
console.log('🔑 Creating access token...');
|
|
298
|
+
const token = new AccessToken(apiKey, apiSecret, {
|
|
299
|
+
identity: 'osborn-agent',
|
|
300
|
+
name: 'Osborn AI',
|
|
301
|
+
metadata: JSON.stringify({ type: 'agent', version: '0.3.0' }),
|
|
302
|
+
});
|
|
303
|
+
token.addGrant({
|
|
304
|
+
roomJoin: true,
|
|
305
|
+
room: roomName,
|
|
306
|
+
canPublish: true,
|
|
307
|
+
canSubscribe: true,
|
|
308
|
+
canPublishData: true,
|
|
309
|
+
});
|
|
310
|
+
const jwt = await token.toJwt();
|
|
311
|
+
// ============================================================
|
|
312
|
+
// Connect to Room
|
|
313
|
+
// ============================================================
|
|
314
|
+
console.log('📡 Connecting to LiveKit...');
|
|
315
|
+
const room = new Room();
|
|
316
|
+
room.setMaxListeners(50); // Prevent MaxListenersExceeded warnings on reconnect
|
|
317
|
+
// Track state
|
|
318
|
+
let currentSession = null;
|
|
319
|
+
let currentAgent = null; // For updateChatCtx() context injection
|
|
320
|
+
let currentLLM = null;
|
|
321
|
+
let localParticipant = null;
|
|
322
|
+
let agentState = 'initializing';
|
|
323
|
+
let userState = 'listening'; // Track user speech state for queue safety
|
|
324
|
+
let currentVoiceMode = voiceMode; // Track active voice mode for data handlers
|
|
325
|
+
let currentProvider = realtimeConfig.provider; // Track active realtime provider
|
|
326
|
+
// Task deduplication guard - prevents Gemini re-execution loops
|
|
327
|
+
let lastTaskRequest = '';
|
|
328
|
+
let lastTaskTime = 0;
|
|
329
|
+
// Fast brain (ask_haiku) in-flight tracking — prevents ask_agent double-calling
|
|
330
|
+
let haikuInFlight = null;
|
|
331
|
+
// Background research state - tracks async ask_agent execution
|
|
332
|
+
let activeResearch = null;
|
|
333
|
+
// No manual queuing — the Claude SDK handles sequential queries internally
|
|
334
|
+
// ============================================================
|
|
335
|
+
// Unified Voice Injection Queue
|
|
336
|
+
// ============================================================
|
|
337
|
+
// ALL system injections (research updates, completions, notifications, errors)
|
|
338
|
+
// go through this queue. Never call generateReply directly for injections.
|
|
339
|
+
// The queue only drains when the voice model is confirmed 'listening'.
|
|
340
|
+
// After draining, the model transitions to thinking/speaking, and the queue
|
|
341
|
+
// naturally pauses until the next 'listening' state.
|
|
342
|
+
const voiceQueue = [];
|
|
343
|
+
let isProcessingQueue = false;
|
|
344
|
+
function queueVoiceInjection(instructions) {
|
|
345
|
+
voiceQueue.push(instructions);
|
|
346
|
+
console.log(`📥 Voice queue: +1 (total: ${voiceQueue.length}): ${instructions.substring(0, 80)}...`);
|
|
347
|
+
processVoiceQueue();
|
|
348
|
+
}
|
|
349
|
+
function processVoiceQueue() {
|
|
350
|
+
if (voiceQueue.length === 0)
|
|
351
|
+
return;
|
|
352
|
+
if (!currentSession)
|
|
353
|
+
return;
|
|
354
|
+
if (isProcessingQueue) {
|
|
355
|
+
console.log(`⏸️ Voice queue: already processing, ${voiceQueue.length} items waiting`);
|
|
356
|
+
return;
|
|
357
|
+
}
|
|
358
|
+
if (agentState !== 'listening') {
|
|
359
|
+
console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (model: ${agentState})`);
|
|
360
|
+
return; // Will be called again when agent_state_changed → 'listening'
|
|
361
|
+
}
|
|
362
|
+
// Don't inject while user is speaking — server-side VAD will auto-create a response
|
|
363
|
+
if (userState === 'speaking') {
|
|
364
|
+
console.log(`⏸️ Voice queue: ${voiceQueue.length} items waiting (user speaking)`);
|
|
365
|
+
return;
|
|
366
|
+
}
|
|
367
|
+
isProcessingQueue = true;
|
|
368
|
+
// Safety timeout: if agent_state_changed never fires (e.g. Gemini state machine hang),
|
|
369
|
+
// clear the guard after 30s so the queue isn't permanently stuck
|
|
370
|
+
setTimeout(() => {
|
|
371
|
+
if (isProcessingQueue) {
|
|
372
|
+
console.log('⚠️ Voice queue: isProcessingQueue stuck for 30s, clearing');
|
|
373
|
+
isProcessingQueue = false;
|
|
374
|
+
if (voiceQueue.length > 0 && agentState === 'listening') {
|
|
375
|
+
processVoiceQueue();
|
|
376
|
+
}
|
|
377
|
+
}
|
|
378
|
+
}, 30000);
|
|
379
|
+
// Batch ALL queued items into one generateReply call
|
|
380
|
+
const items = voiceQueue.splice(0);
|
|
381
|
+
const batchedInstruction = items.length === 1
|
|
382
|
+
? items[0]
|
|
383
|
+
: items.join('\n\n---\n\n');
|
|
384
|
+
console.log(`📡 Voice queue: processing ${items.length} batched items (${batchedInstruction.length} chars)`);
|
|
385
|
+
try {
|
|
386
|
+
// Skip interrupt for Gemini — disrupts Gemini's state machine, causing it to
|
|
387
|
+
// never transition back to 'listening' (hangs in speaking state indefinitely)
|
|
388
|
+
if (currentProvider !== 'gemini') {
|
|
389
|
+
currentSession.interrupt();
|
|
390
|
+
}
|
|
391
|
+
currentSession.generateReply({
|
|
392
|
+
instructions: batchedInstruction,
|
|
393
|
+
toolChoice: 'none',
|
|
394
|
+
});
|
|
395
|
+
// Model transitions to thinking/speaking after this call.
|
|
396
|
+
// When it returns to 'listening', agent_state_changed triggers processVoiceQueue() again.
|
|
397
|
+
// Also inject into chatCtx as persistent context so the model remembers across turns
|
|
398
|
+
injectIntoChatCtx(batchedInstruction);
|
|
399
|
+
}
|
|
400
|
+
catch (err) {
|
|
401
|
+
console.log('⚠️ Voice queue generateReply failed, dropping items:', err);
|
|
402
|
+
// Do NOT re-queue — re-queuing causes infinite retry cascades
|
|
403
|
+
// The frontend still has the updates via claude_output events
|
|
404
|
+
isProcessingQueue = false;
|
|
405
|
+
}
|
|
406
|
+
// isProcessingQueue is cleared when agent_state_changed fires
|
|
407
|
+
}
|
|
408
|
+
// Inject content into the agent's ChatContext as persistent memory
|
|
409
|
+
// This ensures the realtime model can reference prior research in follow-up questions
|
|
410
|
+
// NOTE: Gemini doesn't support updateChatCtx (crashes with "Operation not implemented" code 1008).
|
|
411
|
+
// For Gemini, generateReply({ instructions }) already injects as model turns, so context persists naturally.
|
|
412
|
+
function injectIntoChatCtx(content) {
|
|
413
|
+
if (!currentAgent)
|
|
414
|
+
return;
|
|
415
|
+
// Skip for Gemini — updateChatCtx triggers unsupported operations on Gemini Live API
|
|
416
|
+
if (currentVoiceMode === 'realtime' && currentProvider === 'gemini')
|
|
417
|
+
return;
|
|
418
|
+
try {
|
|
419
|
+
const chatCtx = currentAgent.chatCtx.copy();
|
|
420
|
+
chatCtx.addMessage({
|
|
421
|
+
role: 'assistant',
|
|
422
|
+
content: content,
|
|
423
|
+
});
|
|
424
|
+
currentAgent.updateChatCtx(chatCtx);
|
|
425
|
+
console.log(`🧠 ChatCtx updated (+${content.length} chars persistent context)`);
|
|
426
|
+
}
|
|
427
|
+
catch (err) {
|
|
428
|
+
console.log('⚠️ ChatCtx injection failed:', err);
|
|
429
|
+
}
|
|
430
|
+
}
|
|
431
|
+
// Extract recent voice conversation turns from the realtime LLM's in-memory ChatContext.
|
|
432
|
+
// Replaces the internal conversationHistory array in fast-brain.ts.
|
|
433
|
+
function getChatHistory(maxTurns = 20) {
|
|
434
|
+
if (!currentAgent)
|
|
435
|
+
return [];
|
|
436
|
+
try {
|
|
437
|
+
const items = currentAgent.chatCtx.items;
|
|
438
|
+
const turns = [];
|
|
439
|
+
for (const item of items) {
|
|
440
|
+
if (item.type !== 'message')
|
|
441
|
+
continue;
|
|
442
|
+
const msg = item;
|
|
443
|
+
if (msg.role !== 'user' && msg.role !== 'assistant')
|
|
444
|
+
continue;
|
|
445
|
+
const text = msg.textContent ?? '';
|
|
446
|
+
if (!text.trim())
|
|
447
|
+
continue;
|
|
448
|
+
turns.push({ role: msg.role, text: text.trim() });
|
|
449
|
+
}
|
|
450
|
+
return turns.slice(-maxTurns);
|
|
451
|
+
}
|
|
452
|
+
catch (err) {
|
|
453
|
+
console.log('⚠️ getChatHistory: failed to read chatCtx:', err);
|
|
454
|
+
return [];
|
|
455
|
+
}
|
|
456
|
+
}
|
|
457
|
+
// Research event batching — debounce rapid-fire tool events into a single voice queue entry
|
|
458
|
+
let researchBatchTimer = null;
|
|
459
|
+
function scheduleResearchBatch() {
|
|
460
|
+
if (researchBatchTimer)
|
|
461
|
+
return; // Already scheduled
|
|
462
|
+
researchBatchTimer = setTimeout(() => {
|
|
463
|
+
researchBatchTimer = null;
|
|
464
|
+
if (!activeResearch || activeResearch.pendingUpdates.length === 0)
|
|
465
|
+
return;
|
|
466
|
+
const updates = activeResearch.pendingUpdates.splice(0);
|
|
467
|
+
const batchText = updates.slice(-10).join('. ');
|
|
468
|
+
console.log(`📡 [research] Batching ${updates.length} events: ${batchText.substring(0, 80)}...`);
|
|
469
|
+
// Send to frontend for visibility
|
|
470
|
+
sendToFrontend({
|
|
471
|
+
type: 'claude_output',
|
|
472
|
+
text: `[Research Progress] ${batchText}`,
|
|
473
|
+
isStreaming: true,
|
|
474
|
+
agentRole: 'research-progress',
|
|
475
|
+
});
|
|
476
|
+
// COMMENTED OUT — voice narration disabled, research progress goes to -frontend logs only
|
|
477
|
+
// // queueVoiceInjection(getResearchUpdateInjection(batchText))
|
|
478
|
+
// Route through fast brain for contextual voice updates (capped at 3 per task)
|
|
479
|
+
if (activeResearch.voiceUpdateCount < 3) {
|
|
480
|
+
const voiceSid = currentLLM?.sessionId;
|
|
481
|
+
if (voiceSid) {
|
|
482
|
+
contextualizeResearchUpdate(workingDir, voiceSid, lastTaskRequest || '', updates, activeResearch.researchLog)
|
|
483
|
+
.then(update => {
|
|
484
|
+
if (update && update !== 'NOTHING' && activeResearch) {
|
|
485
|
+
activeResearch.voiceUpdateCount++;
|
|
486
|
+
queueVoiceInjection(getResearchUpdateInjection(update));
|
|
487
|
+
}
|
|
488
|
+
})
|
|
489
|
+
.catch(() => { }); // Silent fail — updates are optional
|
|
490
|
+
}
|
|
491
|
+
}
|
|
492
|
+
}, 8000); // 8s debounce: reduces voice queue flooding during research
|
|
493
|
+
}
|
|
494
|
+
// Proactive conversational loop — keeps conversation alive during research
|
|
495
|
+
let proactiveTimer = null;
|
|
496
|
+
let proactivePromptHistory = [];
|
|
497
|
+
const PROACTIVE_INTERVAL = 15000; // 15 seconds (offset from 8s batch timer)
|
|
498
|
+
const MAX_PROACTIVE_PROMPTS = 4; // Cap per research task
|
|
499
|
+
function startProactiveLoop(task, sessionId) {
|
|
500
|
+
stopProactiveLoop();
|
|
501
|
+
proactivePromptHistory = [];
|
|
502
|
+
let proactiveCount = 0;
|
|
503
|
+
proactiveTimer = setInterval(async () => {
|
|
504
|
+
if (!activeResearch) {
|
|
505
|
+
stopProactiveLoop();
|
|
506
|
+
return;
|
|
507
|
+
}
|
|
508
|
+
if (proactiveCount >= MAX_PROACTIVE_PROMPTS)
|
|
509
|
+
return;
|
|
510
|
+
if (agentState !== 'listening' || userState === 'speaking')
|
|
511
|
+
return;
|
|
512
|
+
if (researchBatchTimer)
|
|
513
|
+
return; // Don't collide with batch updates
|
|
514
|
+
if (isProcessingQueue)
|
|
515
|
+
return; // Don't collide with voice queue
|
|
516
|
+
try {
|
|
517
|
+
const prompt = await generateProactivePrompt(workingDir, sessionId, task, activeResearch.researchLog, proactivePromptHistory);
|
|
518
|
+
if (prompt && prompt !== 'NOTHING') {
|
|
519
|
+
proactivePromptHistory.push(prompt);
|
|
520
|
+
proactiveCount++;
|
|
521
|
+
queueVoiceInjection(`[PROACTIVE CONTEXT] ${prompt}. Say this naturally to the user. Do NOT call any tools.`);
|
|
522
|
+
}
|
|
523
|
+
}
|
|
524
|
+
catch { } // Silent fail — proactive prompts are optional
|
|
525
|
+
}, PROACTIVE_INTERVAL);
|
|
526
|
+
}
|
|
527
|
+
function stopProactiveLoop() {
|
|
528
|
+
if (proactiveTimer) {
|
|
529
|
+
clearInterval(proactiveTimer);
|
|
530
|
+
proactiveTimer = null;
|
|
531
|
+
}
|
|
532
|
+
proactivePromptHistory = [];
|
|
533
|
+
}
|
|
534
|
+
// Helper to send data to frontend (with size limit handling)
|
|
535
|
+
const MAX_MESSAGE_SIZE = 60000;
|
|
536
|
+
async function sendToFrontend(data) {
|
|
537
|
+
if (!localParticipant) {
|
|
538
|
+
console.log('⚠️ sendToFrontend: no localParticipant!');
|
|
539
|
+
return;
|
|
540
|
+
}
|
|
541
|
+
try {
|
|
542
|
+
const encoder = new TextEncoder();
|
|
543
|
+
let jsonData = JSON.stringify(data);
|
|
544
|
+
// If message is too large, truncate the text content
|
|
545
|
+
if (jsonData.length > MAX_MESSAGE_SIZE) {
|
|
546
|
+
const truncatedData = { ...data };
|
|
547
|
+
if (truncatedData.text && typeof truncatedData.text === 'string') {
|
|
548
|
+
const overhead = JSON.stringify({ ...truncatedData, text: '' }).length;
|
|
549
|
+
const maxTextLength = MAX_MESSAGE_SIZE - overhead - 100;
|
|
550
|
+
truncatedData.text = truncatedData.text.substring(0, maxTextLength) + '\n\n[Message truncated due to size limit]';
|
|
551
|
+
jsonData = JSON.stringify(truncatedData);
|
|
552
|
+
console.log(`⚠️ Message truncated from ${data.text?.length} to ${truncatedData.text.length} chars`);
|
|
553
|
+
}
|
|
554
|
+
}
|
|
555
|
+
const payload = encoder.encode(jsonData);
|
|
556
|
+
await localParticipant.publishData(payload, {
|
|
557
|
+
reliable: true,
|
|
558
|
+
topic: 'osborn-updates',
|
|
559
|
+
});
|
|
560
|
+
console.log(`📤 Sent to frontend: ${data.type} (${payload.length} bytes)`);
|
|
561
|
+
}
|
|
562
|
+
catch (err) {
|
|
563
|
+
console.error('❌ sendToFrontend error:', err);
|
|
564
|
+
}
|
|
565
|
+
}
|
|
566
|
+
// Helper: announce via voice - uses voice queue for realtime, say() for direct
|
|
567
|
+
async function announceViaVoice(text) {
|
|
568
|
+
if (!currentSession)
|
|
569
|
+
return;
|
|
570
|
+
if (currentVoiceMode === 'realtime') {
|
|
571
|
+
queueVoiceInjection(getNotificationInjection(text));
|
|
572
|
+
}
|
|
573
|
+
else {
|
|
574
|
+
try {
|
|
575
|
+
await currentSession.say(text);
|
|
576
|
+
}
|
|
577
|
+
catch (err) {
|
|
578
|
+
console.log('⚠️ Voice announcement failed:', err);
|
|
579
|
+
}
|
|
580
|
+
}
|
|
581
|
+
}
|
|
582
|
+
// Create DIRECT session (STT + Claude Agent SDK + TTS)
|
|
583
|
+
async function createDirectSession(resumeSessionId) {
|
|
584
|
+
console.log('🎯 Creating direct session...');
|
|
585
|
+
const stt = createSTT({ provider: 'deepgram' });
|
|
586
|
+
const tts = createTTS({ provider: 'deepgram', voice: 'aura-asteria-en' });
|
|
587
|
+
const vad = await createVAD();
|
|
588
|
+
// Create Claude LLM wrapper in research mode
|
|
589
|
+
const directLLM = createClaudeLLM({
|
|
590
|
+
workingDirectory: workingDir,
|
|
591
|
+
mcpServers,
|
|
592
|
+
resumeSessionId,
|
|
593
|
+
});
|
|
594
|
+
currentLLM = directLLM;
|
|
595
|
+
// For resumed sessions, eagerly create workspace (we know the real ID)
|
|
596
|
+
if (resumeSessionId) {
|
|
597
|
+
const workspace = ensureSessionWorkspace(workingDir, resumeSessionId);
|
|
598
|
+
console.log(`📁 Session workspace (resumed): ${workspace}`);
|
|
599
|
+
}
|
|
600
|
+
// For new sessions, create workspace when SDK assigns real session ID
|
|
601
|
+
directLLM.events.once('session_id', ({ sessionId }) => {
|
|
602
|
+
const workspace = ensureSessionWorkspace(workingDir, sessionId);
|
|
603
|
+
console.log(`📁 Session workspace created: ${workspace}`);
|
|
604
|
+
});
|
|
605
|
+
// Wire up MCP server changes to frontend
|
|
606
|
+
directLLM.events.on('mcp_servers_changed', (data) => {
|
|
607
|
+
console.log(`🔌 MCP servers changed: ${data.enabledKeys.join(', ') || 'none'}`);
|
|
608
|
+
sendToFrontend({
|
|
609
|
+
type: 'mcp_servers_changed',
|
|
610
|
+
enabledKeys: data.enabledKeys,
|
|
611
|
+
mcpServers: getMcpServerStatusList(config),
|
|
612
|
+
});
|
|
613
|
+
});
|
|
614
|
+
// Wire up events from the Claude SDK wrapper to frontend
|
|
615
|
+
directLLM.events.on('tool_use', (data) => {
|
|
616
|
+
console.log(`🔧 Claude: ${data.name}`);
|
|
617
|
+
sendToFrontend({ type: 'tool_use', tool: data.name, agentRole: 'direct' });
|
|
618
|
+
});
|
|
619
|
+
directLLM.events.on('tool_result', (data) => {
|
|
620
|
+
console.log(`✅ Done: ${data.name}`);
|
|
621
|
+
sendToFrontend({ type: 'tool_use', tool: data.name, status: 'completed', agentRole: 'direct' });
|
|
622
|
+
// Detect research artifact writes (session workspace or legacy research dir)
|
|
623
|
+
if ((data.name === 'Write' || data.name === 'Edit') && data.input?.file_path) {
|
|
624
|
+
const fp = data.input.file_path;
|
|
625
|
+
if (fp.includes('.osborn/sessions/') || fp.includes('.osborn/research/')) {
|
|
626
|
+
sendToFrontend({
|
|
627
|
+
type: 'research_artifact_updated',
|
|
628
|
+
filePath: fp,
|
|
629
|
+
fileName: fp.split('/').pop(),
|
|
630
|
+
});
|
|
631
|
+
}
|
|
632
|
+
}
|
|
633
|
+
});
|
|
634
|
+
// Wire up Claude text output - RAW text goes to frontend for chat bubbles
|
|
635
|
+
directLLM.events.on('assistant_text', (data) => {
|
|
636
|
+
console.log(`💬 Claude text: ${data.text?.substring(0, 60)}...`);
|
|
637
|
+
sendToFrontend({
|
|
638
|
+
type: 'claude_output',
|
|
639
|
+
text: data.text,
|
|
640
|
+
isStreaming: true,
|
|
641
|
+
agentRole: 'direct',
|
|
642
|
+
});
|
|
643
|
+
});
|
|
644
|
+
// Wire up Claude final result - RAW result goes to frontend
|
|
645
|
+
directLLM.events.on('assistant_result', (data) => {
|
|
646
|
+
console.log(`📋 Claude result: ${data.text?.substring(0, 60)}...`);
|
|
647
|
+
sendToFrontend({
|
|
648
|
+
type: 'claude_output',
|
|
649
|
+
text: data.text,
|
|
650
|
+
isStreaming: false,
|
|
651
|
+
isFinal: true,
|
|
652
|
+
agentRole: 'direct',
|
|
653
|
+
});
|
|
654
|
+
});
|
|
655
|
+
// Wire up permission requests - sends to frontend for user approval
|
|
656
|
+
directLLM.events.on('permission_request', (data) => {
|
|
657
|
+
console.log(`⚠️ Permission needed: ${data.toolName}`);
|
|
658
|
+
const toolName = data.toolName;
|
|
659
|
+
const input = data.input || {};
|
|
660
|
+
// Build descriptive message based on tool type
|
|
661
|
+
let description = `I need permission to use ${toolName}.`;
|
|
662
|
+
if (toolName === 'Bash' && input.command) {
|
|
663
|
+
const cmd = String(input.command).substring(0, 60);
|
|
664
|
+
description = `I want to run the command: ${cmd}${String(input.command).length > 60 ? '...' : ''}`;
|
|
665
|
+
}
|
|
666
|
+
else if (toolName === 'Write' && input.file_path) {
|
|
667
|
+
description = `I want to create or overwrite the file: ${input.file_path}`;
|
|
668
|
+
}
|
|
669
|
+
else if (toolName === 'Edit' && input.file_path) {
|
|
670
|
+
description = `I want to edit the file: ${input.file_path}`;
|
|
671
|
+
}
|
|
672
|
+
else if (toolName === 'WebFetch' && input.url) {
|
|
673
|
+
description = `I want to fetch content from: ${input.url}`;
|
|
674
|
+
}
|
|
675
|
+
sendToFrontend({
|
|
676
|
+
type: 'permission_request',
|
|
677
|
+
toolName: data.toolName,
|
|
678
|
+
input: data.input,
|
|
679
|
+
description,
|
|
680
|
+
agentRole: 'direct',
|
|
681
|
+
});
|
|
682
|
+
// Speak the descriptive request so user knows to respond
|
|
683
|
+
if (currentSession) {
|
|
684
|
+
const ttsMessage = `${description} Say yes, no, or always.`;
|
|
685
|
+
currentSession.say?.(ttsMessage).catch(() => { });
|
|
686
|
+
}
|
|
687
|
+
});
|
|
688
|
+
// Wire up session resume failure - notify frontend when SDK creates new session instead
|
|
689
|
+
directLLM.events.on('session_resume_failed', (data) => {
|
|
690
|
+
console.error(`❌ Session resume failed: ${data.requestedSessionId} → ${data.actualSessionId}`);
|
|
691
|
+
sendToFrontend({
|
|
692
|
+
type: 'session_resume_failed',
|
|
693
|
+
requestedSessionId: data.requestedSessionId,
|
|
694
|
+
actualSessionId: data.actualSessionId,
|
|
695
|
+
});
|
|
696
|
+
});
|
|
697
|
+
// Wire up file checkpoint capture - track restore points for file rewind
|
|
698
|
+
directLLM.events.on('checkpoint_captured', (data) => {
|
|
699
|
+
console.log(`📍 Checkpoint: ${data.checkpointId.substring(0, 8)}...`);
|
|
700
|
+
sendToFrontend({
|
|
701
|
+
type: 'checkpoint_captured',
|
|
702
|
+
checkpointId: data.checkpointId,
|
|
703
|
+
});
|
|
704
|
+
});
|
|
705
|
+
// Create the Agent with instructions, STT, LLM, TTS
|
|
706
|
+
const agent = new voice.Agent({
|
|
707
|
+
instructions: DIRECT_MODE_PROMPT,
|
|
708
|
+
stt,
|
|
709
|
+
llm: directLLM,
|
|
710
|
+
tts,
|
|
711
|
+
vad,
|
|
712
|
+
turnDetection: 'vad',
|
|
713
|
+
});
|
|
714
|
+
// Create the session (no longer passes STT/LLM/TTS here)
|
|
715
|
+
const session = new voice.AgentSession({
|
|
716
|
+
turnDetection: 'vad',
|
|
717
|
+
});
|
|
718
|
+
return { session, agent };
|
|
719
|
+
}
|
|
720
|
+
// ============================================================
|
|
721
|
+
// REALTIME MODE - OpenAI/Gemini native speech-to-speech
|
|
722
|
+
// ============================================================
|
|
723
|
+
// Claude handler for realtime mode tool execution
|
|
724
|
+
let realtimeClaudeHandler = null;
|
|
725
|
+
// Create REALTIME session (OpenAI/Gemini native speech-to-speech)
|
|
726
|
+
async function createRealtimeSession(sessionRealtimeConfig, resumeSessionId) {
|
|
727
|
+
const rtConfig = sessionRealtimeConfig || realtimeConfig;
|
|
728
|
+
console.log(`🎯 Creating realtime session (${rtConfig.provider})...`);
|
|
729
|
+
// Create Claude LLM for tool execution (research tasks)
|
|
730
|
+
realtimeClaudeHandler = createClaudeLLM({
|
|
731
|
+
workingDirectory: workingDir,
|
|
732
|
+
mcpServers,
|
|
733
|
+
resumeSessionId,
|
|
734
|
+
});
|
|
735
|
+
currentLLM = realtimeClaudeHandler;
|
|
736
|
+
// For resumed sessions, eagerly create workspace (we know the real ID)
|
|
737
|
+
if (resumeSessionId) {
|
|
738
|
+
const workspace = ensureSessionWorkspace(workingDir, resumeSessionId);
|
|
739
|
+
console.log(`📁 Session workspace (resumed): ${workspace}`);
|
|
740
|
+
}
|
|
741
|
+
// For new sessions, create workspace when SDK assigns real session ID
|
|
742
|
+
realtimeClaudeHandler.events.once('session_id', ({ sessionId }) => {
|
|
743
|
+
const workspace = ensureSessionWorkspace(workingDir, sessionId);
|
|
744
|
+
console.log(`📁 Session workspace created: ${workspace}`);
|
|
745
|
+
});
|
|
746
|
+
// Wire up MCP server changes to frontend
|
|
747
|
+
realtimeClaudeHandler.events.on('mcp_servers_changed', (data) => {
|
|
748
|
+
console.log(`🔌 MCP servers changed: ${data.enabledKeys.join(', ') || 'none'}`);
|
|
749
|
+
sendToFrontend({
|
|
750
|
+
type: 'mcp_servers_changed',
|
|
751
|
+
enabledKeys: data.enabledKeys,
|
|
752
|
+
mcpServers: getMcpServerStatusList(config),
|
|
753
|
+
});
|
|
754
|
+
});
|
|
755
|
+
// Wire up Claude events to frontend
|
|
756
|
+
realtimeClaudeHandler.events.on('tool_use', (data) => {
|
|
757
|
+
console.log(`🔧 Claude: ${data.name}`);
|
|
758
|
+
sendToFrontend({ type: 'tool_use', tool: data.name, agentRole: 'realtime' });
|
|
759
|
+
});
|
|
760
|
+
realtimeClaudeHandler.events.on('tool_result', (data) => {
|
|
761
|
+
console.log(`✅ Done: ${data.name}`);
|
|
762
|
+
sendToFrontend({ type: 'tool_use', tool: data.name, status: 'completed', agentRole: 'realtime' });
|
|
763
|
+
// Detect research artifact writes (session workspace or legacy research dir)
|
|
764
|
+
if ((data.name === 'Write' || data.name === 'Edit') && data.input?.file_path) {
|
|
765
|
+
const fp = data.input.file_path;
|
|
766
|
+
if (fp.includes('.osborn/sessions/') || fp.includes('.osborn/research/')) {
|
|
767
|
+
sendToFrontend({
|
|
768
|
+
type: 'research_artifact_updated',
|
|
769
|
+
filePath: fp,
|
|
770
|
+
fileName: fp.split('/').pop(),
|
|
771
|
+
});
|
|
772
|
+
}
|
|
773
|
+
}
|
|
774
|
+
});
|
|
775
|
+
realtimeClaudeHandler.events.on('assistant_result', (data) => {
|
|
776
|
+
console.log(`📋 Claude result: ${data.text?.substring(0, 60)}...`);
|
|
777
|
+
sendToFrontend({
|
|
778
|
+
type: 'claude_output',
|
|
779
|
+
text: data.text,
|
|
780
|
+
isStreaming: false,
|
|
781
|
+
isFinal: true,
|
|
782
|
+
agentRole: 'realtime',
|
|
783
|
+
});
|
|
784
|
+
});
|
|
785
|
+
// Stream Claude's research text to frontend as progress updates
|
|
786
|
+
// Skips during active research to avoid duplication with per-task onText handler
|
|
787
|
+
realtimeClaudeHandler.events.on('assistant_text', (data) => {
|
|
788
|
+
if (data.text && data.text.trim()) {
|
|
789
|
+
if (activeResearch)
|
|
790
|
+
return;
|
|
791
|
+
sendToFrontend({
|
|
792
|
+
type: 'claude_output',
|
|
793
|
+
text: data.text,
|
|
794
|
+
isStreaming: true,
|
|
795
|
+
agentRole: 'realtime-agent',
|
|
796
|
+
});
|
|
797
|
+
}
|
|
798
|
+
});
|
|
799
|
+
realtimeClaudeHandler.events.on('permission_request', (data) => {
|
|
800
|
+
console.log(`⚠️ Permission needed: ${data.toolName}`);
|
|
801
|
+
const toolName = data.toolName;
|
|
802
|
+
const input = data.input || {};
|
|
803
|
+
// Build descriptive message based on tool type
|
|
804
|
+
let description = `I need permission to use ${toolName}.`;
|
|
805
|
+
if (toolName === 'Bash' && input.command) {
|
|
806
|
+
const cmd = String(input.command).substring(0, 60);
|
|
807
|
+
description = `I want to run the command: ${cmd}${String(input.command).length > 60 ? '...' : ''}`;
|
|
808
|
+
}
|
|
809
|
+
else if (toolName === 'Write' && input.file_path) {
|
|
810
|
+
description = `I want to create or overwrite the file: ${input.file_path}`;
|
|
811
|
+
}
|
|
812
|
+
else if (toolName === 'Edit' && input.file_path) {
|
|
813
|
+
description = `I want to edit the file: ${input.file_path}`;
|
|
814
|
+
}
|
|
815
|
+
else if (toolName === 'WebFetch' && input.url) {
|
|
816
|
+
description = `I want to fetch content from: ${input.url}`;
|
|
817
|
+
}
|
|
818
|
+
sendToFrontend({
|
|
819
|
+
type: 'permission_request',
|
|
820
|
+
toolName: data.toolName,
|
|
821
|
+
input: data.input,
|
|
822
|
+
description,
|
|
823
|
+
agentRole: 'realtime',
|
|
824
|
+
});
|
|
825
|
+
});
|
|
826
|
+
// Wire up session resume failure for realtime mode
|
|
827
|
+
realtimeClaudeHandler.events.on('session_resume_failed', (data) => {
|
|
828
|
+
console.error(`❌ Session resume failed: ${data.requestedSessionId} → ${data.actualSessionId}`);
|
|
829
|
+
sendToFrontend({
|
|
830
|
+
type: 'session_resume_failed',
|
|
831
|
+
requestedSessionId: data.requestedSessionId,
|
|
832
|
+
actualSessionId: data.actualSessionId,
|
|
833
|
+
});
|
|
834
|
+
});
|
|
835
|
+
// Wire up file checkpoint capture for realtime mode
|
|
836
|
+
realtimeClaudeHandler.events.on('checkpoint_captured', (data) => {
|
|
837
|
+
console.log(`📍 Checkpoint: ${data.checkpointId.substring(0, 8)}...`);
|
|
838
|
+
sendToFrontend({
|
|
839
|
+
type: 'checkpoint_captured',
|
|
840
|
+
checkpointId: data.checkpointId,
|
|
841
|
+
});
|
|
842
|
+
});
|
|
843
|
+
// Extract priority content from research results — preserves URLs, code blocks, and key details
|
|
844
|
+
function extractPriorityContent(result, maxChars = 4000) {
|
|
845
|
+
if (result.length <= maxChars)
|
|
846
|
+
return result;
|
|
847
|
+
// Extract URLs (preserve for voice relay)
|
|
848
|
+
const urlRegex = /https?:\/\/[^\s\)\"\'>\]]+/g;
|
|
849
|
+
const urls = [...new Set(result.match(urlRegex) || [])];
|
|
850
|
+
// Extract code blocks (first 2, up to 400 chars each)
|
|
851
|
+
const codeBlockRegex = /```[\s\S]*?```/g;
|
|
852
|
+
const codeBlocks = [];
|
|
853
|
+
let match;
|
|
854
|
+
while ((match = codeBlockRegex.exec(result)) !== null && codeBlocks.length < 2) {
|
|
855
|
+
const block = match[0].length > 400 ? match[0].substring(0, 397) + '```' : match[0];
|
|
856
|
+
codeBlocks.push(block);
|
|
857
|
+
}
|
|
858
|
+
// Build sections
|
|
859
|
+
const sections = [];
|
|
860
|
+
// Take the first ~2500 chars of narrative (intro + main findings)
|
|
861
|
+
const narrativeEnd = Math.min(result.length, 2500);
|
|
862
|
+
const narrativeTruncated = result.substring(0, narrativeEnd);
|
|
863
|
+
const lastPeriod = narrativeTruncated.lastIndexOf('.');
|
|
864
|
+
const narrative = lastPeriod > narrativeEnd * 0.6
|
|
865
|
+
? narrativeTruncated.substring(0, lastPeriod + 1)
|
|
866
|
+
: narrativeTruncated;
|
|
867
|
+
sections.push(narrative);
|
|
868
|
+
// Append conclusion (last ~500 chars) if result is long enough
|
|
869
|
+
if (result.length > 3000) {
|
|
870
|
+
const tail = result.substring(result.length - 500);
|
|
871
|
+
const firstPeriod = tail.indexOf('.');
|
|
872
|
+
const conclusion = firstPeriod > 0 ? tail.substring(firstPeriod + 1).trim() : tail.trim();
|
|
873
|
+
if (conclusion.length > 50) {
|
|
874
|
+
sections.push(`\n\n[CONCLUSION]\n${conclusion}`);
|
|
875
|
+
}
|
|
876
|
+
}
|
|
877
|
+
// Append code blocks if not already in the narrative
|
|
878
|
+
if (codeBlocks.length > 0) {
|
|
879
|
+
const codeSection = codeBlocks.filter(cb => !narrative.includes(cb));
|
|
880
|
+
if (codeSection.length > 0) {
|
|
881
|
+
sections.push(`\n\n[CODE EXAMPLES]\n${codeSection.join('\n\n')}`);
|
|
882
|
+
}
|
|
883
|
+
}
|
|
884
|
+
// Append URLs if not already in the narrative
|
|
885
|
+
const newUrls = urls.filter(u => !narrative.includes(u));
|
|
886
|
+
if (newUrls.length > 0) {
|
|
887
|
+
sections.push(`\n\n[LINKS]\n${newUrls.slice(0, 5).join('\n')}`);
|
|
888
|
+
}
|
|
889
|
+
let assembled = sections.join('');
|
|
890
|
+
// Final safety truncation if assembled exceeds maxChars
|
|
891
|
+
if (assembled.length > maxChars) {
|
|
892
|
+
const truncated = assembled.substring(0, maxChars);
|
|
893
|
+
const lp = truncated.lastIndexOf('.');
|
|
894
|
+
assembled = lp > maxChars * 0.7 ? truncated.substring(0, lp + 1) : truncated + '...';
|
|
895
|
+
}
|
|
896
|
+
return assembled;
|
|
897
|
+
}
|
|
898
|
+
// Extracted research execution — called by ask_agent, SDK handles queuing internally
|
|
899
|
+
function executeResearch(task) {
|
|
900
|
+
sendToFrontend({ type: 'system', text: `Executing: ${task}` });
|
|
901
|
+
// Fire-and-forget: write user question to spec.md BEFORE agent starts
|
|
902
|
+
const questionSid = currentLLM?.sessionId || resumeSessionId;
|
|
903
|
+
if (questionSid) {
|
|
904
|
+
writeQuestionToSpec(workingDir, questionSid, task).catch(err => console.error('❌ writeQuestionToSpec failed:', err));
|
|
905
|
+
}
|
|
906
|
+
// Clean up previous research listeners to avoid duplicate event handlers
|
|
907
|
+
if (activeResearch) {
|
|
908
|
+
activeResearch.cleanup();
|
|
909
|
+
if (researchBatchTimer) {
|
|
910
|
+
clearTimeout(researchBatchTimer);
|
|
911
|
+
researchBatchTimer = null;
|
|
912
|
+
}
|
|
913
|
+
}
|
|
914
|
+
// Set up research log batching — events push to queue for state-driven injection
|
|
915
|
+
const researchLog = [];
|
|
916
|
+
const pendingUpdates = [];
|
|
917
|
+
const onToolUse = (data) => {
|
|
918
|
+
const input = data.input || {};
|
|
919
|
+
let entry;
|
|
920
|
+
if (data.name === 'Read' && input.file_path) {
|
|
921
|
+
const fileName = input.file_path.split('/').pop() || input.file_path;
|
|
922
|
+
entry = `Reading ${fileName}`;
|
|
923
|
+
}
|
|
924
|
+
else if (data.name === 'Bash' && input.command) {
|
|
925
|
+
const cmd = input.command.substring(0, 80);
|
|
926
|
+
entry = `Running: ${cmd}`;
|
|
927
|
+
}
|
|
928
|
+
else if (data.name === 'Glob' && input.pattern) {
|
|
929
|
+
entry = `Searching for files matching ${input.pattern}`;
|
|
930
|
+
}
|
|
931
|
+
else if (data.name === 'Grep' && input.pattern) {
|
|
932
|
+
entry = `Searching for "${input.pattern}" in files`;
|
|
933
|
+
}
|
|
934
|
+
else if (data.name === 'WebSearch' && input.query) {
|
|
935
|
+
entry = `Searching the web for "${input.query}"`;
|
|
936
|
+
}
|
|
937
|
+
else if (data.name === 'WebFetch' && input.url) {
|
|
938
|
+
const hostname = input.url.replace(/https?:\/\//, '').split('/')[0];
|
|
939
|
+
entry = `Fetching content from ${hostname}`;
|
|
940
|
+
}
|
|
941
|
+
else if (data.name === 'Write' && input.file_path) {
|
|
942
|
+
const fileName = input.file_path.split('/').pop() || input.file_path;
|
|
943
|
+
entry = `Writing ${fileName}`;
|
|
944
|
+
}
|
|
945
|
+
else if (data.name === 'Edit' && input.file_path) {
|
|
946
|
+
const fileName = input.file_path.split('/').pop() || input.file_path;
|
|
947
|
+
entry = `Editing ${fileName}`;
|
|
948
|
+
}
|
|
949
|
+
else if (data.name.startsWith('mcp__')) {
|
|
950
|
+
const parts = data.name.split('__');
|
|
951
|
+
const serverName = parts[1] || 'external';
|
|
952
|
+
const toolAction = parts.slice(2).join(' ') || 'tool';
|
|
953
|
+
entry = `Using ${serverName}: ${toolAction}`;
|
|
954
|
+
}
|
|
955
|
+
else {
|
|
956
|
+
entry = `Using ${data.name}`;
|
|
957
|
+
}
|
|
958
|
+
researchLog.push(entry);
|
|
959
|
+
pendingUpdates.push(entry);
|
|
960
|
+
scheduleResearchBatch();
|
|
961
|
+
};
|
|
962
|
+
const ANSWER_CHECK_THRESHOLD = 300; // chars — only check substantial outputs
|
|
963
|
+
const onToolResult = (data) => {
|
|
964
|
+
// Only log to researchLog for the final summary — don't push to pendingUpdates
|
|
965
|
+
// This prevents redundant "Reading config.ts. Read done." voice updates
|
|
966
|
+
researchLog.push(`${data.name} completed`);
|
|
967
|
+
// Fire-and-forget: check if substantial tool results answer any spec questions
|
|
968
|
+
// Note: PostToolUse emits { name, input, response } — use data.response (not data.result)
|
|
969
|
+
const resultText = typeof data.response === 'string' ? data.response : JSON.stringify(data.response || '');
|
|
970
|
+
if (resultText.length > ANSWER_CHECK_THRESHOLD) {
|
|
971
|
+
const sid = currentLLM?.sessionId || resumeSessionId;
|
|
972
|
+
if (sid)
|
|
973
|
+
checkOutputAgainstQuestions(workingDir, sid, resultText, 'tool_result').catch(() => { });
|
|
974
|
+
}
|
|
975
|
+
// When AskUserQuestion completes, the user's answer is a decision — track it in spec
|
|
976
|
+
if (data.name === 'AskUserQuestion' && data.response) {
|
|
977
|
+
const sid = currentLLM?.sessionId || resumeSessionId;
|
|
978
|
+
if (sid) {
|
|
979
|
+
const questionText = JSON.stringify(data.input?.questions || data.input || {});
|
|
980
|
+
const answerText = typeof data.response === 'string' ? data.response : JSON.stringify(data.response);
|
|
981
|
+
const specUpdate = `User answered a clarifying question during research.\nQuestion: ${questionText}\nAnswer: ${answerText}\nRecord this as a user decision in spec.md.`;
|
|
982
|
+
askHaiku(workingDir, sid, specUpdate).catch(err => console.error('❌ Failed to record AskUserQuestion answer in spec:', err));
|
|
983
|
+
console.log(`📝 AskUserQuestion answer forwarded to fast brain for spec tracking`);
|
|
984
|
+
}
|
|
985
|
+
}
|
|
986
|
+
};
|
|
987
|
+
const onText = (data) => {
|
|
988
|
+
if (data.text?.trim()) {
|
|
989
|
+
const text = data.text.trim();
|
|
990
|
+
const preview = text.substring(0, 150);
|
|
991
|
+
const firstSentence = preview.match(/^[^.!?\n]+[.!?]/)?.[0] || preview;
|
|
992
|
+
researchLog.push(firstSentence);
|
|
993
|
+
pendingUpdates.push(firstSentence);
|
|
994
|
+
scheduleResearchBatch();
|
|
995
|
+
// Fire-and-forget: check if substantial agent reasoning answers any spec questions
|
|
996
|
+
if (text.length > ANSWER_CHECK_THRESHOLD) {
|
|
997
|
+
const sid = currentLLM?.sessionId || resumeSessionId;
|
|
998
|
+
if (sid)
|
|
999
|
+
checkOutputAgainstQuestions(workingDir, sid, text, 'assistant_text').catch(() => { });
|
|
1000
|
+
}
|
|
1001
|
+
}
|
|
1002
|
+
};
|
|
1003
|
+
realtimeClaudeHandler.events.on('tool_use', onToolUse);
|
|
1004
|
+
realtimeClaudeHandler.events.on('tool_result', onToolResult);
|
|
1005
|
+
realtimeClaudeHandler.events.on('assistant_text', onText);
|
|
1006
|
+
const cleanupListeners = () => {
|
|
1007
|
+
realtimeClaudeHandler?.events.off('tool_use', onToolUse);
|
|
1008
|
+
realtimeClaudeHandler?.events.off('tool_result', onToolResult);
|
|
1009
|
+
realtimeClaudeHandler?.events.off('assistant_text', onText);
|
|
1010
|
+
};
|
|
1011
|
+
// Track active research — updates drain when model enters 'listening' state
|
|
1012
|
+
activeResearch = {
|
|
1013
|
+
researchLog,
|
|
1014
|
+
pendingUpdates,
|
|
1015
|
+
cleanup: cleanupListeners,
|
|
1016
|
+
voiceUpdateCount: 0,
|
|
1017
|
+
};
|
|
1018
|
+
// Start proactive conversational loop
|
|
1019
|
+
const proactiveSid = currentLLM?.sessionId || resumeSessionId;
|
|
1020
|
+
if (proactiveSid) {
|
|
1021
|
+
startProactiveLoop(task, proactiveSid);
|
|
1022
|
+
}
|
|
1023
|
+
// Run research in the background (non-blocking)
|
|
1024
|
+
const researchPromise = (async () => {
|
|
1025
|
+
const stream = realtimeClaudeHandler.chat({
|
|
1026
|
+
chatCtx: {
|
|
1027
|
+
items: [{ type: 'message', role: 'user', content: [task] }],
|
|
1028
|
+
},
|
|
1029
|
+
});
|
|
1030
|
+
let result = '';
|
|
1031
|
+
for await (const chunk of stream) {
|
|
1032
|
+
if (chunk.delta?.content) {
|
|
1033
|
+
result += chunk.delta.content;
|
|
1034
|
+
}
|
|
1035
|
+
}
|
|
1036
|
+
return result;
|
|
1037
|
+
})();
|
|
1038
|
+
// Handle completion asynchronously
|
|
1039
|
+
researchPromise.then(async (result) => {
|
|
1040
|
+
console.log(`✅ [realtime] Research complete (${result.length} chars)`);
|
|
1041
|
+
// Clean up
|
|
1042
|
+
cleanupListeners();
|
|
1043
|
+
// Send raw result to frontend as a log entry (not assistant_response — that's reserved
|
|
1044
|
+
// for the voice model's spoken response, avoiding duplication in chat)
|
|
1045
|
+
await sendToFrontend({ type: 'claude_output', text: result, isStreaming: false, agentRole: 'research-result' });
|
|
1046
|
+
const resultPreview = result.length > 150
|
|
1047
|
+
? result.substring(0, 150) + '...'
|
|
1048
|
+
: result;
|
|
1049
|
+
await sendToFrontend({ type: 'task_completed', task, resultPreview });
|
|
1050
|
+
// Build enhanced return with research log
|
|
1051
|
+
const logSummary = researchLog.length > 0
|
|
1052
|
+
? `\n\n[RESEARCH LOG]\n${researchLog.slice(0, 25).join('\n')}`
|
|
1053
|
+
: '';
|
|
1054
|
+
// Extract priority content — preserves URLs, code blocks, and key details (4000 char limit)
|
|
1055
|
+
const resultForVoice = extractPriorityContent(result);
|
|
1056
|
+
const fullResult = (resultForVoice + logSummary) || 'Research completed successfully.';
|
|
1057
|
+
// Clear active research and timers before injecting final results
|
|
1058
|
+
if (researchBatchTimer) {
|
|
1059
|
+
clearTimeout(researchBatchTimer);
|
|
1060
|
+
researchBatchTimer = null;
|
|
1061
|
+
}
|
|
1062
|
+
stopProactiveLoop();
|
|
1063
|
+
activeResearch = null;
|
|
1064
|
+
// Send final results to frontend for visibility
|
|
1065
|
+
await sendToFrontend({
|
|
1066
|
+
type: 'claude_output',
|
|
1067
|
+
text: `[Research Complete] Injecting findings into voice model (${fullResult.length} chars)`,
|
|
1068
|
+
isStreaming: false,
|
|
1069
|
+
agentRole: 'research-progress',
|
|
1070
|
+
});
|
|
1071
|
+
// Route through fast brain for context augmentation before voice injection
|
|
1072
|
+
// Fast brain adds spec context but does NOT summarize — passes details through verbatim
|
|
1073
|
+
const voiceSid = currentLLM?.sessionId || resumeSessionId;
|
|
1074
|
+
console.log(`📡 [realtime] Augmenting results via fast brain (${fullResult.length} chars, agentState: ${agentState})`);
|
|
1075
|
+
if (voiceSid) {
|
|
1076
|
+
augmentResearchResult(workingDir, voiceSid, task, fullResult)
|
|
1077
|
+
.then(augmented => {
|
|
1078
|
+
queueVoiceInjection(getResearchCompleteInjection(task, augmented));
|
|
1079
|
+
})
|
|
1080
|
+
.catch(() => {
|
|
1081
|
+
// Fallback: use result directly if fast brain fails
|
|
1082
|
+
queueVoiceInjection(getResearchCompleteInjection(task, fullResult));
|
|
1083
|
+
});
|
|
1084
|
+
}
|
|
1085
|
+
else {
|
|
1086
|
+
queueVoiceInjection(getResearchCompleteInjection(task, fullResult));
|
|
1087
|
+
}
|
|
1088
|
+
// Inject FULL untruncated result into ChatCtx so voice model can answer
|
|
1089
|
+
// follow-up questions ("tell me more", "what were those links?") from memory
|
|
1090
|
+
injectIntoChatCtx(`[FULL RESEARCH DETAILS for "${task}"]\n${result}`);
|
|
1091
|
+
// Fire-and-forget JSONL-based refinement pass via fast brain
|
|
1092
|
+
// Reads FULL untruncated data from JSONL — no content buffer, no truncation
|
|
1093
|
+
const postResearchSessionId = currentLLM?.sessionId || resumeSessionId;
|
|
1094
|
+
if (postResearchSessionId) {
|
|
1095
|
+
updateSpecFromJSONL(workingDir, postResearchSessionId, task, researchLog)
|
|
1096
|
+
.then(updateResult => {
|
|
1097
|
+
if (!updateResult)
|
|
1098
|
+
return;
|
|
1099
|
+
// Notify frontend about spec.md update
|
|
1100
|
+
if (updateResult.spec) {
|
|
1101
|
+
const specPath = `${workingDir}/.osborn/sessions/${postResearchSessionId}/spec.md`;
|
|
1102
|
+
sendToFrontend({
|
|
1103
|
+
type: 'research_artifact_updated',
|
|
1104
|
+
filePath: specPath,
|
|
1105
|
+
fileName: 'spec.md',
|
|
1106
|
+
});
|
|
1107
|
+
const truncated = getSpecForVoiceModel(workingDir, postResearchSessionId);
|
|
1108
|
+
if (truncated) {
|
|
1109
|
+
injectIntoChatCtx(`[UPDATED SESSION SPEC]\n${truncated}`);
|
|
1110
|
+
console.log(`📋 Re-injected spec.md into ChatCtx after fast brain update (${truncated.length} chars)`);
|
|
1111
|
+
}
|
|
1112
|
+
}
|
|
1113
|
+
// Notify frontend about each library file written by the fast brain
|
|
1114
|
+
for (const libFile of updateResult.libraryFiles) {
|
|
1115
|
+
const libPath = `${workingDir}/.osborn/sessions/${postResearchSessionId}/library/${libFile}`;
|
|
1116
|
+
sendToFrontend({
|
|
1117
|
+
type: 'research_artifact_updated',
|
|
1118
|
+
filePath: libPath,
|
|
1119
|
+
fileName: libFile,
|
|
1120
|
+
});
|
|
1121
|
+
}
|
|
1122
|
+
});
|
|
1123
|
+
}
|
|
1124
|
+
}).catch(async (err) => {
|
|
1125
|
+
console.error(`❌ [realtime] Research failed:`, err);
|
|
1126
|
+
// Clean up
|
|
1127
|
+
cleanupListeners();
|
|
1128
|
+
if (researchBatchTimer) {
|
|
1129
|
+
clearTimeout(researchBatchTimer);
|
|
1130
|
+
researchBatchTimer = null;
|
|
1131
|
+
}
|
|
1132
|
+
stopProactiveLoop();
|
|
1133
|
+
activeResearch = null;
|
|
1134
|
+
// Queue error notification — will be spoken when model is available
|
|
1135
|
+
queueVoiceInjection(`[NOTIFICATION] The research task encountered an error: ${err.message}. Let the user know briefly and ask if they want to try again. Do NOT call any tools.`);
|
|
1136
|
+
});
|
|
1137
|
+
// Return immediately to unblock the voice model
|
|
1138
|
+
return 'Research started. I\'ll relay findings as they come in — you can keep talking to the user while I work.';
|
|
1139
|
+
}
|
|
1140
|
+
// Create tools for the realtime voice LLM
|
|
1141
|
+
const askAgentTool = llm.tool({
|
|
1142
|
+
description: `Delegate a task to your backend agent (Claude), which has full research, analysis, reasoning, and coding capabilities.
|
|
1143
|
+
|
|
1144
|
+
Use for:
|
|
1145
|
+
- Researching topics, technologies, concepts, or ideas in depth
|
|
1146
|
+
- Fetching and analyzing web pages, articles, blog posts, YouTube transcripts
|
|
1147
|
+
- Reading and summarizing documentation, papers, or reference materials
|
|
1148
|
+
- Exploring and analyzing codebases, configs, architecture
|
|
1149
|
+
- Comparing options, tools, approaches — with tradeoffs and recommendations
|
|
1150
|
+
- Running bash commands, testing implementations
|
|
1151
|
+
- Using MCP tools (GitHub, YouTube, and other external tools)
|
|
1152
|
+
- Saving findings to the session library and updating the spec
|
|
1153
|
+
- Any question requiring research, analysis, verification, or deeper reasoning
|
|
1154
|
+
|
|
1155
|
+
Reformulate the user's spoken request into a clear, specific task.
|
|
1156
|
+
The more context you include (topic, constraints, what they want to learn), the better the results.
|
|
1157
|
+
If the user wants specific details (examples, URLs, comparisons, step-by-step breakdown), mention that in your request.`,
|
|
1158
|
+
parameters: z.object({
|
|
1159
|
+
request: z.string().describe('The task or question to delegate to the agent'),
|
|
1160
|
+
}),
|
|
1161
|
+
execute: async ({ request: task }) => {
|
|
1162
|
+
console.log(`\n🔨 [realtime] Task: "${task}"`);
|
|
1163
|
+
// Guard: if ask_haiku is currently handling a similar question, skip ask_agent
|
|
1164
|
+
// This prevents the double-calling pattern where Gemini fires both in rapid succession
|
|
1165
|
+
if (haikuInFlight && (Date.now() - haikuInFlight.time) < 8000) {
|
|
1166
|
+
console.log(`⏭️ Skipping ask_agent — ask_haiku is already handling: "${haikuInFlight.question.substring(0, 60)}"`);
|
|
1167
|
+
return 'The fast brain is already looking into this. Wait for its answer first.';
|
|
1168
|
+
}
|
|
1169
|
+
// Deduplication guard: prevent re-execution of same task within 10s
|
|
1170
|
+
const now = Date.now();
|
|
1171
|
+
if (task === lastTaskRequest && (now - lastTaskTime) < 10000) {
|
|
1172
|
+
console.log('⏭️ Skipping duplicate task (within 10s window)');
|
|
1173
|
+
return 'This task was just completed. The results were already relayed.';
|
|
1174
|
+
}
|
|
1175
|
+
lastTaskRequest = task;
|
|
1176
|
+
lastTaskTime = now;
|
|
1177
|
+
return executeResearch(task);
|
|
1178
|
+
},
|
|
1179
|
+
});
|
|
1180
|
+
const respondPermissionTool = llm.tool({
|
|
1181
|
+
description: `Respond to a permission request. Call after hearing user's response.`,
|
|
1182
|
+
parameters: z.object({
|
|
1183
|
+
response: z.enum(['allow', 'deny', 'always_allow']),
|
|
1184
|
+
}),
|
|
1185
|
+
execute: async ({ response }) => {
|
|
1186
|
+
if (!realtimeClaudeHandler?.hasPendingPermission()) {
|
|
1187
|
+
return 'No pending permission.';
|
|
1188
|
+
}
|
|
1189
|
+
const pending = realtimeClaudeHandler.getPendingPermission();
|
|
1190
|
+
const allow = response === 'allow' || response === 'always_allow';
|
|
1191
|
+
realtimeClaudeHandler.respondToPermission(allow);
|
|
1192
|
+
await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
|
|
1193
|
+
return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
|
|
1194
|
+
},
|
|
1195
|
+
});
|
|
1196
|
+
const readSpecTool = llm.tool({
|
|
1197
|
+
description: `Read the session spec (spec.md) — shared state between you and your backend agent.
|
|
1198
|
+
Use when: checking decisions, reading open questions to ask the user, understanding architecture/context, seeing what research has been saved. Updated by your backend agent during research.`,
|
|
1199
|
+
parameters: z.object({}),
|
|
1200
|
+
execute: async () => {
|
|
1201
|
+
const sessionId = currentLLM?.sessionId || resumeSessionId;
|
|
1202
|
+
if (!sessionId)
|
|
1203
|
+
return 'No session spec yet — session is still initializing.';
|
|
1204
|
+
const specContent = readSessionSpec(workingDir, sessionId);
|
|
1205
|
+
if (!specContent)
|
|
1206
|
+
return 'Spec is empty — no research done yet.';
|
|
1207
|
+
const libraryFiles = listLibraryFiles(workingDir, sessionId);
|
|
1208
|
+
const libSection = libraryFiles.length > 0
|
|
1209
|
+
? `\n\n[LIBRARY FILES: ${libraryFiles.join(', ')}]`
|
|
1210
|
+
: '';
|
|
1211
|
+
const MAX = 4000;
|
|
1212
|
+
const content = specContent.length > MAX
|
|
1213
|
+
? specContent.substring(0, MAX) + '\n\n[... truncated]'
|
|
1214
|
+
: specContent;
|
|
1215
|
+
return content + libSection;
|
|
1216
|
+
},
|
|
1217
|
+
});
|
|
1218
|
+
const askHaikuTool = llm.tool({
|
|
1219
|
+
description: `Ask your fast brain — a quick knowledge assistant with access to session files and web search (~2 seconds).
|
|
1220
|
+
|
|
1221
|
+
Use for:
|
|
1222
|
+
- Questions answerable from the session spec or research library (much faster than ask_agent)
|
|
1223
|
+
- Quick web lookups for simple factual questions (definitions, current versions, basic how-to)
|
|
1224
|
+
- Recording user decisions: "User decided: [decision]. Update the spec."
|
|
1225
|
+
- Recording user preferences: "User prefers: [preference]. Update the spec."
|
|
1226
|
+
- Checking what research has been done on a topic
|
|
1227
|
+
- Reading specific library files for details
|
|
1228
|
+
|
|
1229
|
+
Do NOT use for: deep research, code analysis, multi-file codebase exploration, complex investigations → use ask_agent.
|
|
1230
|
+
If the fast brain responds with NEEDS_DEEPER_RESEARCH, tell the user you need to look deeper, then call ask_agent with the context it provides.`,
|
|
1231
|
+
parameters: z.object({
|
|
1232
|
+
question: z.string().describe('The question to ask or instruction to execute'),
|
|
1233
|
+
}),
|
|
1234
|
+
execute: async ({ question }) => {
|
|
1235
|
+
const sessionId = currentLLM?.sessionId || resumeSessionId;
|
|
1236
|
+
if (!sessionId)
|
|
1237
|
+
return 'Session not ready yet. Try ask_agent instead.';
|
|
1238
|
+
console.log(`🧠 [fast brain] Question: "${question.substring(0, 80)}..."`);
|
|
1239
|
+
// Track in-flight state to prevent ask_agent double-calling
|
|
1240
|
+
haikuInFlight = { question, time: Date.now() };
|
|
1241
|
+
// Build live research context if the agent is actively researching
|
|
1242
|
+
// This is a READ of the existing researchLog array — safe, no race conditions
|
|
1243
|
+
let researchContext;
|
|
1244
|
+
if (activeResearch && activeResearch.researchLog.length > 0) {
|
|
1245
|
+
const recentLog = activeResearch.researchLog.slice(-15);
|
|
1246
|
+
researchContext = `Research topic: "${lastTaskRequest || 'unknown'}"\nSteps completed (${activeResearch.researchLog.length} total, showing last ${recentLog.length}):\n${recentLog.join('\n')}`;
|
|
1247
|
+
}
|
|
1248
|
+
try {
|
|
1249
|
+
const chatHistory = getChatHistory(20);
|
|
1250
|
+
const answer = await askHaiku(workingDir, sessionId, question, researchContext, chatHistory);
|
|
1251
|
+
haikuInFlight = null; // Clear in-flight state
|
|
1252
|
+
console.log(`🧠 [fast brain] Answer (${answer.length} chars)`);
|
|
1253
|
+
// Notify frontend if the fast brain likely wrote to spec.md
|
|
1254
|
+
// (fast brain writes bypass the SDK tool system, so no tool_result event fires)
|
|
1255
|
+
if (answer.includes('Written: spec.md') || question.toLowerCase().includes('update the spec') || question.toLowerCase().includes('user decided') || question.toLowerCase().includes('user prefers')) {
|
|
1256
|
+
const specPath = `${workingDir}/.osborn/sessions/${sessionId}/spec.md`;
|
|
1257
|
+
sendToFrontend({
|
|
1258
|
+
type: 'research_artifact_updated',
|
|
1259
|
+
filePath: specPath,
|
|
1260
|
+
fileName: 'spec.md',
|
|
1261
|
+
});
|
|
1262
|
+
}
|
|
1263
|
+
// If research is active and this was a user decision/direction,
|
|
1264
|
+
// also queue it for the agent SDK so it picks up the context
|
|
1265
|
+
// when its queue reaches the next query
|
|
1266
|
+
if (activeResearch && (question.toLowerCase().includes('user decided') ||
|
|
1267
|
+
question.toLowerCase().includes('user prefers') ||
|
|
1268
|
+
question.toLowerCase().includes('update the spec') ||
|
|
1269
|
+
question.toLowerCase().includes('also check') ||
|
|
1270
|
+
question.toLowerCase().includes('focus on') ||
|
|
1271
|
+
question.toLowerCase().includes('redirect'))) {
|
|
1272
|
+
console.log(`📨 [fast brain] Passing user direction to agent SDK queue: "${question.substring(0, 60)}..."`);
|
|
1273
|
+
// Queue as a lightweight context update — agent reads spec.md
|
|
1274
|
+
// at the start of its next query and will see the updated direction
|
|
1275
|
+
executeResearch(`[USER DIRECTION during active research] ${question}. The user's spec.md has been updated with this. Acknowledge briefly and incorporate into your current research context.`);
|
|
1276
|
+
}
|
|
1277
|
+
return answer;
|
|
1278
|
+
}
|
|
1279
|
+
catch (err) {
|
|
1280
|
+
haikuInFlight = null; // Clear in-flight state on error
|
|
1281
|
+
console.error('❌ Fast brain failed:', err);
|
|
1282
|
+
return 'Fast brain lookup failed. Try ask_agent for a deeper search.';
|
|
1283
|
+
}
|
|
1284
|
+
},
|
|
1285
|
+
});
|
|
1286
|
+
const generateDocumentTool = llm.tool({
|
|
1287
|
+
description: `Generate a visual document (comparison table, Mermaid diagram, structured analysis, summary) from research findings. Saved to the session library as a markdown file.
|
|
1288
|
+
|
|
1289
|
+
Use when the user asks for:
|
|
1290
|
+
- "Compare X and Y" → type: 'comparison' (markdown table with features, pros, cons)
|
|
1291
|
+
- "Draw a diagram" / "Show the architecture" / "Map out the flow" → type: 'diagram' (Mermaid flowchart/sequence/architecture)
|
|
1292
|
+
- "Analyze the tradeoffs" / "Break down the options" → type: 'analysis' (structured pros/cons, decision matrix)
|
|
1293
|
+
- "Summarize what we found" / "Give me an overview document" → type: 'summary' (organized findings with key takeaways)
|
|
1294
|
+
|
|
1295
|
+
For actual images (photos, illustrations, screenshots), use ask_agent instead — this tool generates text-based visual documents only.`,
|
|
1296
|
+
parameters: z.object({
|
|
1297
|
+
request: z.string().describe('What to generate — be specific about the topic and what aspects to cover'),
|
|
1298
|
+
type: z.enum(['comparison', 'diagram', 'analysis', 'summary']).describe('Document type'),
|
|
1299
|
+
}),
|
|
1300
|
+
execute: async ({ request, type }) => {
|
|
1301
|
+
const sid = currentLLM?.sessionId || resumeSessionId;
|
|
1302
|
+
if (!sid)
|
|
1303
|
+
return 'Session not ready yet.';
|
|
1304
|
+
console.log(`📊 [generate_document] Type: ${type}, Request: "${request.substring(0, 60)}..."`);
|
|
1305
|
+
try {
|
|
1306
|
+
const result = await generateVisualDocument(workingDir, sid, request, type);
|
|
1307
|
+
if (!result)
|
|
1308
|
+
return 'Could not generate document — not enough research context available.';
|
|
1309
|
+
const fullPath = `${workingDir}/.osborn/sessions/${sid}/library/${result.fileName}`;
|
|
1310
|
+
sendToFrontend({
|
|
1311
|
+
type: 'research_artifact_updated',
|
|
1312
|
+
filePath: fullPath,
|
|
1313
|
+
fileName: result.fileName,
|
|
1314
|
+
});
|
|
1315
|
+
return `Generated: ${result.fileName} (${result.content.length} chars) — saved to session library. The document contains a ${type} with the requested information.`;
|
|
1316
|
+
}
|
|
1317
|
+
catch (err) {
|
|
1318
|
+
console.error('❌ Document generation failed:', err);
|
|
1319
|
+
return 'Document generation failed. Try asking the research agent for a more detailed analysis.';
|
|
1320
|
+
}
|
|
1321
|
+
},
|
|
1322
|
+
});
|
|
1323
|
+
// Instructions for realtime voice LLM
|
|
1324
|
+
const realtimeInstructions = getRealtimeInstructions(workingDir);
|
|
1325
|
+
// Create realtime model
|
|
1326
|
+
const realtimeModel = createRealtimeModelFromConfig(rtConfig, realtimeInstructions);
|
|
1327
|
+
// Create the Agent with realtime model and tools
|
|
1328
|
+
const agent = new voice.Agent({
|
|
1329
|
+
instructions: realtimeInstructions,
|
|
1330
|
+
llm: realtimeModel,
|
|
1331
|
+
tools: {
|
|
1332
|
+
ask_agent: askAgentTool,
|
|
1333
|
+
ask_haiku: askHaikuTool,
|
|
1334
|
+
read_spec: readSpecTool,
|
|
1335
|
+
generate_document: generateDocumentTool,
|
|
1336
|
+
respond_permission: respondPermissionTool,
|
|
1337
|
+
},
|
|
1338
|
+
});
|
|
1339
|
+
// Create the session
|
|
1340
|
+
const session = new voice.AgentSession({});
|
|
1341
|
+
return { session, agent };
|
|
1342
|
+
}
|
|
1343
|
+
// ============================================================
|
|
1344
|
+
// Room Event Handlers
|
|
1345
|
+
// ============================================================
|
|
1346
|
+
room.on(RoomEvent.Connected, () => {
|
|
1347
|
+
console.log('✅ Connected to room:', roomName);
|
|
1348
|
+
localParticipant = room.localParticipant;
|
|
1349
|
+
});
|
|
1350
|
+
room.on(RoomEvent.Disconnected, () => {
|
|
1351
|
+
console.log('👋 Disconnected from room');
|
|
1352
|
+
// Clean up active research and voice queue
|
|
1353
|
+
voiceQueue.length = 0;
|
|
1354
|
+
isProcessingQueue = false;
|
|
1355
|
+
if (researchBatchTimer) {
|
|
1356
|
+
clearTimeout(researchBatchTimer);
|
|
1357
|
+
researchBatchTimer = null;
|
|
1358
|
+
}
|
|
1359
|
+
stopProactiveLoop();
|
|
1360
|
+
if (activeResearch) {
|
|
1361
|
+
activeResearch.cleanup();
|
|
1362
|
+
activeResearch = null;
|
|
1363
|
+
}
|
|
1364
|
+
currentSession = null;
|
|
1365
|
+
currentAgent = null;
|
|
1366
|
+
currentLLM = null;
|
|
1367
|
+
clearFastBrainHistory();
|
|
1368
|
+
});
|
|
1369
|
+
room.on(RoomEvent.ParticipantConnected, async (participant) => {
|
|
1370
|
+
console.log(`\n👤 User joined: ${participant.identity}`);
|
|
1371
|
+
// Clean up any existing session before creating a new one
|
|
1372
|
+
voiceQueue.length = 0;
|
|
1373
|
+
isProcessingQueue = false;
|
|
1374
|
+
if (researchBatchTimer) {
|
|
1375
|
+
clearTimeout(researchBatchTimer);
|
|
1376
|
+
researchBatchTimer = null;
|
|
1377
|
+
}
|
|
1378
|
+
stopProactiveLoop();
|
|
1379
|
+
clearFastBrainHistory();
|
|
1380
|
+
if (activeResearch) {
|
|
1381
|
+
activeResearch.cleanup();
|
|
1382
|
+
activeResearch = null;
|
|
1383
|
+
}
|
|
1384
|
+
if (currentSession) {
|
|
1385
|
+
console.log('🧹 Cleaning up previous session...');
|
|
1386
|
+
try {
|
|
1387
|
+
await currentSession.close();
|
|
1388
|
+
}
|
|
1389
|
+
catch { }
|
|
1390
|
+
try {
|
|
1391
|
+
currentSession.removeAllListeners();
|
|
1392
|
+
}
|
|
1393
|
+
catch { }
|
|
1394
|
+
currentSession = null;
|
|
1395
|
+
currentAgent = null;
|
|
1396
|
+
currentLLM = null;
|
|
1397
|
+
}
|
|
1398
|
+
// Extract voice architecture, provider, and sessionId from participant metadata (sent by frontend)
|
|
1399
|
+
// This overrides the config file setting for per-session flexibility
|
|
1400
|
+
let sessionVoiceMode = voiceMode; // Default to config
|
|
1401
|
+
let sessionRealtimeProvider = realtimeConfig.provider; // Default to config
|
|
1402
|
+
let preSelectedSessionId = null;
|
|
1403
|
+
try {
|
|
1404
|
+
const metadata = JSON.parse(participant.metadata || '{}');
|
|
1405
|
+
console.log(`📋 Participant metadata:`, metadata);
|
|
1406
|
+
if (metadata.voiceArch === 'realtime' || metadata.voiceArch === 'direct') {
|
|
1407
|
+
sessionVoiceMode = metadata.voiceArch;
|
|
1408
|
+
console.log(`🎙️ Using voice mode from frontend: ${sessionVoiceMode}`);
|
|
1409
|
+
}
|
|
1410
|
+
else if (metadata.voiceArch) {
|
|
1411
|
+
console.log(`⚠️ Unknown voiceArch "${metadata.voiceArch}", using config: ${voiceMode}`);
|
|
1412
|
+
}
|
|
1413
|
+
// Read provider selection from frontend (openai or gemini)
|
|
1414
|
+
if (metadata.provider === 'openai' || metadata.provider === 'gemini') {
|
|
1415
|
+
sessionRealtimeProvider = metadata.provider;
|
|
1416
|
+
console.log(`🎙️ Using provider from frontend: ${sessionRealtimeProvider}`);
|
|
1417
|
+
}
|
|
1418
|
+
// Read pre-selected session ID from frontend (session browser selection)
|
|
1419
|
+
if (metadata.sessionId && typeof metadata.sessionId === 'string' && metadata.sessionId.length > 0) {
|
|
1420
|
+
preSelectedSessionId = metadata.sessionId;
|
|
1421
|
+
console.log(`📂 Pre-selected session from frontend: ${preSelectedSessionId}`);
|
|
1422
|
+
}
|
|
1423
|
+
}
|
|
1424
|
+
catch (err) {
|
|
1425
|
+
console.log('⚠️ Could not parse participant metadata, using config voiceMode:', voiceMode);
|
|
1426
|
+
}
|
|
1427
|
+
// Sync to outer scope so DataReceived handler can use it
|
|
1428
|
+
currentVoiceMode = sessionVoiceMode;
|
|
1429
|
+
currentProvider = sessionRealtimeProvider;
|
|
1430
|
+
// Resume session ID — only set when resuming an existing session
|
|
1431
|
+
const resumeSessionId = preSelectedSessionId || undefined;
|
|
1432
|
+
if (resumeSessionId) {
|
|
1433
|
+
console.log(`🆔 Resuming session: ${resumeSessionId}`);
|
|
1434
|
+
}
|
|
1435
|
+
else {
|
|
1436
|
+
console.log(`🆔 New session (ID assigned by SDK)`);
|
|
1437
|
+
}
|
|
1438
|
+
// Create session based on voice mode (from frontend or config)
|
|
1439
|
+
let session;
|
|
1440
|
+
let agent;
|
|
1441
|
+
if (sessionVoiceMode === 'realtime') {
|
|
1442
|
+
// Override the config provider with the frontend's selection
|
|
1443
|
+
const sessionRealtimeConfig = { ...realtimeConfig, provider: sessionRealtimeProvider };
|
|
1444
|
+
console.log(`🎙️ REALTIME MODE: ${sessionRealtimeConfig.provider} native speech-to-speech`);
|
|
1445
|
+
const result = await createRealtimeSession(sessionRealtimeConfig, resumeSessionId);
|
|
1446
|
+
session = result.session;
|
|
1447
|
+
agent = result.agent;
|
|
1448
|
+
}
|
|
1449
|
+
else {
|
|
1450
|
+
console.log(`🎯 DIRECT MODE: Claude Agent SDK with full coding capabilities`);
|
|
1451
|
+
const result = await createDirectSession(resumeSessionId);
|
|
1452
|
+
session = result.session;
|
|
1453
|
+
agent = result.agent;
|
|
1454
|
+
}
|
|
1455
|
+
currentSession = session;
|
|
1456
|
+
currentAgent = agent; // Store for updateChatCtx() context injection
|
|
1457
|
+
// ============================================================
|
|
1458
|
+
// Session event wiring — extracted into function for auto-recovery
|
|
1459
|
+
// ============================================================
|
|
1460
|
+
let lastRecoveryTime = 0;
|
|
1461
|
+
const MIN_RECOVERY_INTERVAL = 10000; // 10 seconds between recovery attempts
|
|
1462
|
+
function wireSessionEvents(sess, agt) {
|
|
1463
|
+
// Transcript dedup state (reset per wiring)
|
|
1464
|
+
let lastSentUserTranscript = '';
|
|
1465
|
+
let lastSentAgentTranscript = '';
|
|
1466
|
+
function sendUserTranscript(transcript, source) {
|
|
1467
|
+
if (!transcript || transcript.length < 3)
|
|
1468
|
+
return;
|
|
1469
|
+
const normalized = transcript.trim().replace(/\s+/g, ' ');
|
|
1470
|
+
if (normalized === lastSentUserTranscript)
|
|
1471
|
+
return;
|
|
1472
|
+
if (normalized === '<noise>' || normalized.toLowerCase() === 'thank you')
|
|
1473
|
+
return;
|
|
1474
|
+
console.log(`📝 User (${source}): "${transcript.substring(0, 60)}..."`);
|
|
1475
|
+
sendToFrontend({ type: 'user_transcript', text: transcript });
|
|
1476
|
+
lastSentUserTranscript = normalized;
|
|
1477
|
+
}
|
|
1478
|
+
function sendAgentTranscript(text, source) {
|
|
1479
|
+
if (!text || text.length < 3)
|
|
1480
|
+
return;
|
|
1481
|
+
const normalized = text.trim().replace(/\s+/g, ' ');
|
|
1482
|
+
if (normalized === lastSentAgentTranscript)
|
|
1483
|
+
return;
|
|
1484
|
+
console.log(`💬 Agent (${source}): "${text.substring(0, 60)}..."`);
|
|
1485
|
+
sendToFrontend({ type: 'assistant_response', text });
|
|
1486
|
+
lastSentAgentTranscript = normalized;
|
|
1487
|
+
}
|
|
1488
|
+
// PRIMARY: conversation_item_added is the authoritative source
|
|
1489
|
+
sess.on('conversation_item_added', (ev) => {
|
|
1490
|
+
let text = '';
|
|
1491
|
+
if (Array.isArray(ev.item?.content)) {
|
|
1492
|
+
text = typeof ev.item.content[0] === 'string'
|
|
1493
|
+
? ev.item.content.join('\n')
|
|
1494
|
+
: ev.item.content.map((c) => c.text).filter(Boolean).join('\n');
|
|
1495
|
+
}
|
|
1496
|
+
else if (typeof ev.item?.content === 'string') {
|
|
1497
|
+
text = ev.item.content;
|
|
1498
|
+
}
|
|
1499
|
+
else if (ev.item?.text) {
|
|
1500
|
+
text = ev.item.text;
|
|
1501
|
+
}
|
|
1502
|
+
if (ev.item?.role === 'user' && text) {
|
|
1503
|
+
sendUserTranscript(text, 'conv_item');
|
|
1504
|
+
}
|
|
1505
|
+
else if (ev.item?.role === 'assistant' && text) {
|
|
1506
|
+
sendAgentTranscript(text, 'conv_item');
|
|
1507
|
+
}
|
|
1508
|
+
});
|
|
1509
|
+
// FALLBACK: user_speech_committed
|
|
1510
|
+
sess.on('user_speech_committed', (ev) => {
|
|
1511
|
+
const transcript = ev.transcript || ev.text || '';
|
|
1512
|
+
sendUserTranscript(transcript, 'committed');
|
|
1513
|
+
});
|
|
1514
|
+
// Agent state tracking
|
|
1515
|
+
sess.on('agent_state_changed', (ev) => {
|
|
1516
|
+
agentState = ev.newState;
|
|
1517
|
+
// Clear processing guard when model transitions to any new state
|
|
1518
|
+
isProcessingQueue = false;
|
|
1519
|
+
console.log(`🤖 State: ${ev.newState}`);
|
|
1520
|
+
sendToFrontend({ type: 'agent_state', state: ev.newState });
|
|
1521
|
+
// When the model becomes available (listening), process any queued voice injections
|
|
1522
|
+
if (ev.newState === 'listening' && voiceQueue.length > 0) {
|
|
1523
|
+
setTimeout(() => processVoiceQueue(), 500); // 500ms to let model settle
|
|
1524
|
+
}
|
|
1525
|
+
});
|
|
1526
|
+
// User state tracking — prevents queue from colliding with server-side VAD
|
|
1527
|
+
sess.on('user_state_changed', (ev) => {
|
|
1528
|
+
userState = ev.newState;
|
|
1529
|
+
console.log(`👤 User state: ${ev.newState}`);
|
|
1530
|
+
});
|
|
1531
|
+
// FALLBACK: playout_completed
|
|
1532
|
+
sess.on('playout_completed', (ev) => {
|
|
1533
|
+
const message = ev.message || ev.text || ev.content;
|
|
1534
|
+
if (message && message.length > 0) {
|
|
1535
|
+
sendAgentTranscript(message, 'playout');
|
|
1536
|
+
}
|
|
1537
|
+
});
|
|
1538
|
+
// Error handler
|
|
1539
|
+
sess.on('error', (ev) => {
|
|
1540
|
+
const msg = ev.error?.message || String(ev.error);
|
|
1541
|
+
// OpenAI race: voice queue collided with server-side VAD auto-response
|
|
1542
|
+
if (msg.includes('conversation_already_has_active_response') || msg.includes('active_response')) {
|
|
1543
|
+
console.log('⚠️ OpenAI active response collision — queue will retry on next listening state');
|
|
1544
|
+
return;
|
|
1545
|
+
}
|
|
1546
|
+
console.error('❌ Session error:', ev.error);
|
|
1547
|
+
});
|
|
1548
|
+
// Close handler with auto-recovery for Gemini 1008 crashes
|
|
1549
|
+
sess.on('close', async (ev) => {
|
|
1550
|
+
console.log('🚪 Session closed:', ev.reason);
|
|
1551
|
+
// Auto-recover from crashes in realtime mode
|
|
1552
|
+
if (ev.reason === 'error' && currentVoiceMode === 'realtime') {
|
|
1553
|
+
const now = Date.now();
|
|
1554
|
+
if (now - lastRecoveryTime < MIN_RECOVERY_INTERVAL) {
|
|
1555
|
+
console.log('⚠️ Recovery too frequent — skipping to prevent loop');
|
|
1556
|
+
sendToFrontend({ type: 'agent_state', state: 'error' });
|
|
1557
|
+
return;
|
|
1558
|
+
}
|
|
1559
|
+
lastRecoveryTime = now;
|
|
1560
|
+
console.log('🔄 Auto-recovering from session crash...');
|
|
1561
|
+
// Clean up dead session
|
|
1562
|
+
try {
|
|
1563
|
+
sess.removeAllListeners();
|
|
1564
|
+
}
|
|
1565
|
+
catch { }
|
|
1566
|
+
currentSession = null;
|
|
1567
|
+
currentAgent = null;
|
|
1568
|
+
// Clear voice queue — stale injections from the crashed session
|
|
1569
|
+
voiceQueue.length = 0;
|
|
1570
|
+
isProcessingQueue = false;
|
|
1571
|
+
if (researchBatchTimer) {
|
|
1572
|
+
clearTimeout(researchBatchTimer);
|
|
1573
|
+
researchBatchTimer = null;
|
|
1574
|
+
}
|
|
1575
|
+
stopProactiveLoop();
|
|
1576
|
+
if (activeResearch) {
|
|
1577
|
+
activeResearch.cleanup();
|
|
1578
|
+
activeResearch = null;
|
|
1579
|
+
}
|
|
1580
|
+
try {
|
|
1581
|
+
const recoveryConfig = { ...realtimeConfig, provider: currentProvider };
|
|
1582
|
+
// Reuse existing session ID for workspace continuity during recovery
|
|
1583
|
+
// Prefer real SDK session ID, fall back to original resume ID
|
|
1584
|
+
const recoverySessionId = currentLLM?.sessionId || resumeSessionId;
|
|
1585
|
+
const result = await createRealtimeSession(recoveryConfig, recoverySessionId);
|
|
1586
|
+
const newSession = result.session;
|
|
1587
|
+
const newAgent = result.agent;
|
|
1588
|
+
currentSession = newSession;
|
|
1589
|
+
currentAgent = newAgent;
|
|
1590
|
+
// Re-wire event listeners on the new session
|
|
1591
|
+
wireSessionEvents(newSession, newAgent);
|
|
1592
|
+
await newSession.start({ agent: newAgent, room });
|
|
1593
|
+
// Sync state
|
|
1594
|
+
agentState = 'listening';
|
|
1595
|
+
sendToFrontend({ type: 'agent_state', state: 'listening' });
|
|
1596
|
+
// Resume Claude session if one was active
|
|
1597
|
+
if (currentLLM?.sessionId) {
|
|
1598
|
+
currentLLM.setContinueSession(true);
|
|
1599
|
+
}
|
|
1600
|
+
// Inject conversation context into the recovered session
|
|
1601
|
+
const recoveredSessionId = currentLLM?.sessionId || recoverySessionId;
|
|
1602
|
+
if (recoveredSessionId) {
|
|
1603
|
+
try {
|
|
1604
|
+
const summary = await getSessionSummary(recoveredSessionId, workingDir);
|
|
1605
|
+
const conversationHistory = await getConversationHistory(recoveredSessionId, workingDir, 30);
|
|
1606
|
+
if (summary && conversationHistory.length > 0) {
|
|
1607
|
+
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
1608
|
+
queueVoiceInjection(`[SESSION RECOVERED] The voice session crashed and was auto-recovered. Here's the conversation context from before the crash:\n${contextBriefing}\n\nBriefly tell the user the connection was interrupted and you still have context from the conversation. Ask if they can hear you and what they'd like to continue with. Do NOT call any tools.`);
|
|
1609
|
+
console.log('📋 Injected conversation context into recovered session');
|
|
1610
|
+
}
|
|
1611
|
+
else {
|
|
1612
|
+
queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
|
|
1613
|
+
}
|
|
1614
|
+
}
|
|
1615
|
+
catch (err) {
|
|
1616
|
+
console.log('⚠️ Failed to load conversation context for recovery:', err);
|
|
1617
|
+
queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
|
|
1618
|
+
}
|
|
1619
|
+
}
|
|
1620
|
+
else {
|
|
1621
|
+
// No session ID — generic notification
|
|
1622
|
+
queueVoiceInjection('[NOTIFICATION] The voice session was briefly interrupted but has been recovered. Ask the user if they can hear you and continue where you left off. Do NOT call any tools.');
|
|
1623
|
+
}
|
|
1624
|
+
console.log('✅ Auto-recovery complete');
|
|
1625
|
+
}
|
|
1626
|
+
catch (err) {
|
|
1627
|
+
console.error('❌ Auto-recovery failed:', err);
|
|
1628
|
+
sendToFrontend({ type: 'agent_state', state: 'error' });
|
|
1629
|
+
}
|
|
1630
|
+
}
|
|
1631
|
+
});
|
|
1632
|
+
}
|
|
1633
|
+
// Wire events on the initial session
|
|
1634
|
+
wireSessionEvents(session, agent);
|
|
1635
|
+
// Start voice session
|
|
1636
|
+
console.log('🎬 Starting voice session...');
|
|
1637
|
+
try {
|
|
1638
|
+
await session.start({ agent, room });
|
|
1639
|
+
console.log('✅ Voice session started!');
|
|
1640
|
+
console.log('🎤 Ready - speak to begin!\n');
|
|
1641
|
+
// Workspace is created later in the session_id event handler (when SDK assigns real ID)
|
|
1642
|
+
// Send ready signal with persistent retry
|
|
1643
|
+
console.log('💓 Sending agent_ready signal...');
|
|
1644
|
+
let readySent = false;
|
|
1645
|
+
const provider = sessionVoiceMode === 'realtime' ? realtimeConfig.provider : 'claude';
|
|
1646
|
+
// Fetch full session list for startup session browser
|
|
1647
|
+
const allSessions = await listSessions(workingDir);
|
|
1648
|
+
const recentSessionId = allSessions.length > 0 ? allSessions[0].sessionId : null;
|
|
1649
|
+
const hasRecentSession = allSessions.length > 0;
|
|
1650
|
+
// Prepare sessions for frontend (up to 50)
|
|
1651
|
+
const sessionsForFrontend = allSessions.slice(0, 50).map(s => ({
|
|
1652
|
+
sessionId: s.sessionId,
|
|
1653
|
+
timestamp: s.timestamp.toISOString(),
|
|
1654
|
+
lastMessage: s.lastMessage,
|
|
1655
|
+
messageCount: s.messageCount,
|
|
1656
|
+
}));
|
|
1657
|
+
const sendReady = async () => {
|
|
1658
|
+
if (readySent)
|
|
1659
|
+
return;
|
|
1660
|
+
await sendToFrontend({
|
|
1661
|
+
type: 'agent_ready',
|
|
1662
|
+
provider,
|
|
1663
|
+
voiceMode: sessionVoiceMode,
|
|
1664
|
+
hasRecentSession,
|
|
1665
|
+
recentSessionId,
|
|
1666
|
+
sessions: sessionsForFrontend,
|
|
1667
|
+
preSelectedSessionId,
|
|
1668
|
+
mcpServers: getMcpServerStatusList(config),
|
|
1669
|
+
enabledMcpServers: enabledMcpNames,
|
|
1670
|
+
});
|
|
1671
|
+
};
|
|
1672
|
+
const readyInterval = setInterval(sendReady, 2000);
|
|
1673
|
+
await sendReady();
|
|
1674
|
+
setTimeout(() => {
|
|
1675
|
+
clearInterval(readyInterval);
|
|
1676
|
+
console.log('✅ agent_ready retries complete');
|
|
1677
|
+
}, 20000);
|
|
1678
|
+
// Stop agent_ready retries on user speech
|
|
1679
|
+
session.on('input_speech_started', () => {
|
|
1680
|
+
readySent = true;
|
|
1681
|
+
clearInterval(readyInterval);
|
|
1682
|
+
});
|
|
1683
|
+
// Greet user via TTS (delayed if resume prompt will be shown)
|
|
1684
|
+
// For realtime mode: use generateReply() since there's no standalone TTS
|
|
1685
|
+
// For direct mode: use say() which goes through the configured TTS
|
|
1686
|
+
const greetViaVoice = async (text) => {
|
|
1687
|
+
if (sessionVoiceMode === 'realtime') {
|
|
1688
|
+
// Realtime models handle their own speech generation
|
|
1689
|
+
await session.generateReply({ userInput: text });
|
|
1690
|
+
}
|
|
1691
|
+
else {
|
|
1692
|
+
await session.say(text);
|
|
1693
|
+
}
|
|
1694
|
+
};
|
|
1695
|
+
if (preSelectedSessionId && sessionExists(preSelectedSessionId, workingDir)) {
|
|
1696
|
+
// User pre-selected a session from the session browser — auto-resume immediately
|
|
1697
|
+
console.log(`📂 Auto-resuming pre-selected session: ${preSelectedSessionId}`);
|
|
1698
|
+
if (currentLLM) {
|
|
1699
|
+
currentLLM.setResumeSessionId(preSelectedSessionId);
|
|
1700
|
+
console.log(`🔄 Session resume configured: ${preSelectedSessionId}`);
|
|
1701
|
+
// Fetch context and greet with it
|
|
1702
|
+
const summary = await getSessionSummary(preSelectedSessionId, workingDir);
|
|
1703
|
+
const conversationHistory = await getConversationHistory(preSelectedSessionId, workingDir, 30);
|
|
1704
|
+
await sendToFrontend({
|
|
1705
|
+
type: 'session_resume_set',
|
|
1706
|
+
sessionId: preSelectedSessionId,
|
|
1707
|
+
success: true,
|
|
1708
|
+
});
|
|
1709
|
+
// Send existing workspace artifacts to frontend (session-scoped)
|
|
1710
|
+
const preArtifacts = listWorkspaceArtifacts(workingDir, preSelectedSessionId);
|
|
1711
|
+
if (preArtifacts.length > 0) {
|
|
1712
|
+
console.log(`📁 Sending ${preArtifacts.length} workspace artifacts to frontend`);
|
|
1713
|
+
await sendToFrontend({
|
|
1714
|
+
type: 'session_artifacts',
|
|
1715
|
+
sessionId: preSelectedSessionId,
|
|
1716
|
+
artifacts: preArtifacts.map(a => ({
|
|
1717
|
+
filePath: a.filePath,
|
|
1718
|
+
fileName: a.fileName,
|
|
1719
|
+
type: a.type,
|
|
1720
|
+
updatedAt: a.updatedAt,
|
|
1721
|
+
}))
|
|
1722
|
+
});
|
|
1723
|
+
}
|
|
1724
|
+
// Load full session history into realtime model's context
|
|
1725
|
+
if (summary) {
|
|
1726
|
+
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
1727
|
+
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
1728
|
+
const specContent = getSpecForVoiceModel(workingDir, preSelectedSessionId);
|
|
1729
|
+
const specSection = specContent
|
|
1730
|
+
? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
|
|
1731
|
+
: '';
|
|
1732
|
+
try {
|
|
1733
|
+
if (sessionVoiceMode === 'realtime') {
|
|
1734
|
+
const contextPrompt = `[SESSION RESUMED] The user chose to continue a previous research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
|
|
1735
|
+
await session.generateReply({ instructions: contextPrompt });
|
|
1736
|
+
}
|
|
1737
|
+
else {
|
|
1738
|
+
await session.say("Welcome back! Ready to continue our previous conversation.");
|
|
1739
|
+
}
|
|
1740
|
+
}
|
|
1741
|
+
catch (err) {
|
|
1742
|
+
console.log('⚠️ Pre-selected session greeting failed:', err);
|
|
1743
|
+
}
|
|
1744
|
+
}
|
|
1745
|
+
}
|
|
1746
|
+
}
|
|
1747
|
+
else if (!preSelectedSessionId && hasRecentSession) {
|
|
1748
|
+
// No pre-selected session but sessions exist — defer greeting for session gate
|
|
1749
|
+
console.log('⏳ Deferring greeting until session gate is completed');
|
|
1750
|
+
}
|
|
1751
|
+
else {
|
|
1752
|
+
// No sessions at all (or new session chosen) — greet as new user
|
|
1753
|
+
try {
|
|
1754
|
+
console.log('👋 Sending greeting...');
|
|
1755
|
+
await greetViaVoice("The user just connected for the first time. Briefly greet them as Osborn and ask what they're working on.");
|
|
1756
|
+
console.log('✅ Greeting sent');
|
|
1757
|
+
}
|
|
1758
|
+
catch (err) {
|
|
1759
|
+
console.log('⚠️ Greeting failed:', err);
|
|
1760
|
+
}
|
|
1761
|
+
}
|
|
1762
|
+
}
|
|
1763
|
+
catch (err) {
|
|
1764
|
+
console.error('❌ Failed to start session:', err);
|
|
1765
|
+
}
|
|
1766
|
+
});
|
|
1767
|
+
room.on(RoomEvent.ParticipantDisconnected, (participant) => {
|
|
1768
|
+
console.log(`👋 User left: ${participant.identity}`);
|
|
1769
|
+
if (currentSession) {
|
|
1770
|
+
currentSession.removeAllListeners();
|
|
1771
|
+
currentSession = null;
|
|
1772
|
+
currentLLM = null;
|
|
1773
|
+
}
|
|
1774
|
+
console.log('⏳ Waiting for new user...\n');
|
|
1775
|
+
});
|
|
1776
|
+
room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
|
|
1777
|
+
if (topic !== 'user-input')
|
|
1778
|
+
return;
|
|
1779
|
+
try {
|
|
1780
|
+
const data = JSON.parse(new TextDecoder().decode(payload));
|
|
1781
|
+
console.log('📨 Data:', data.type);
|
|
1782
|
+
if (data.type === 'permission_response') {
|
|
1783
|
+
// Handle permission response for direct mode
|
|
1784
|
+
if (currentLLM && currentLLM.hasPendingPermission?.()) {
|
|
1785
|
+
const allow = data.response === 'allow' || data.response === 'always_allow';
|
|
1786
|
+
currentLLM.respondToPermission(allow);
|
|
1787
|
+
console.log(`✅ Permission: ${data.response}`);
|
|
1788
|
+
}
|
|
1789
|
+
}
|
|
1790
|
+
else if (data.type === 'user_text' && currentSession) {
|
|
1791
|
+
console.log(`📝 Text: "${data.content}"`);
|
|
1792
|
+
// Skip interrupt for Gemini — disrupts state machine (hangs in speaking state)
|
|
1793
|
+
if (currentProvider !== 'gemini') {
|
|
1794
|
+
currentSession.interrupt();
|
|
1795
|
+
}
|
|
1796
|
+
await currentSession.generateReply({ userInput: data.content });
|
|
1797
|
+
}
|
|
1798
|
+
// ============================================================
|
|
1799
|
+
// SESSION MANAGEMENT HANDLERS
|
|
1800
|
+
// ============================================================
|
|
1801
|
+
else if (data.type === 'list_sessions') {
|
|
1802
|
+
// List available sessions for this project
|
|
1803
|
+
console.log('📋 Listing available sessions...');
|
|
1804
|
+
try {
|
|
1805
|
+
// Clean up orphaned metadata entries before listing
|
|
1806
|
+
await cleanupOrphanedMetadata(workingDir);
|
|
1807
|
+
const sessions = await listSessions(workingDir);
|
|
1808
|
+
await sendToFrontend({
|
|
1809
|
+
type: 'sessions_list',
|
|
1810
|
+
sessions: sessions.map(s => ({
|
|
1811
|
+
sessionId: s.sessionId,
|
|
1812
|
+
timestamp: s.timestamp.toISOString(),
|
|
1813
|
+
lastMessage: s.lastMessage,
|
|
1814
|
+
messageCount: s.messageCount,
|
|
1815
|
+
})),
|
|
1816
|
+
count: sessions.length,
|
|
1817
|
+
});
|
|
1818
|
+
}
|
|
1819
|
+
catch (err) {
|
|
1820
|
+
console.error('Failed to list sessions:', err);
|
|
1821
|
+
await sendToFrontend({
|
|
1822
|
+
type: 'sessions_list',
|
|
1823
|
+
sessions: [],
|
|
1824
|
+
count: 0,
|
|
1825
|
+
error: 'Failed to list sessions',
|
|
1826
|
+
});
|
|
1827
|
+
}
|
|
1828
|
+
}
|
|
1829
|
+
else if (data.type === 'resume_session' && currentLLM) {
|
|
1830
|
+
// Lightweight: set resume ID and send artifacts to frontend only
|
|
1831
|
+
// Context injection (generateReply) happens in session_selected handler
|
|
1832
|
+
// to avoid double generateReply calls that cause timeouts
|
|
1833
|
+
const sessionId = data.sessionId;
|
|
1834
|
+
if (sessionId && sessionExists(sessionId, workingDir)) {
|
|
1835
|
+
currentLLM.setResumeSessionId(sessionId);
|
|
1836
|
+
console.log(`🔄 Will resume session: ${sessionId}`);
|
|
1837
|
+
await sendToFrontend({
|
|
1838
|
+
type: 'session_resume_set',
|
|
1839
|
+
sessionId,
|
|
1840
|
+
success: true,
|
|
1841
|
+
});
|
|
1842
|
+
// Send existing session artifacts to frontend (session-scoped)
|
|
1843
|
+
const artifacts = listWorkspaceArtifacts(workingDir, sessionId);
|
|
1844
|
+
if (artifacts.length > 0) {
|
|
1845
|
+
console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
|
|
1846
|
+
await sendToFrontend({
|
|
1847
|
+
type: 'session_artifacts',
|
|
1848
|
+
sessionId,
|
|
1849
|
+
artifacts: artifacts.map(a => ({
|
|
1850
|
+
filePath: a.filePath,
|
|
1851
|
+
fileName: a.fileName,
|
|
1852
|
+
type: a.type,
|
|
1853
|
+
updatedAt: a.updatedAt,
|
|
1854
|
+
}))
|
|
1855
|
+
});
|
|
1856
|
+
}
|
|
1857
|
+
}
|
|
1858
|
+
else {
|
|
1859
|
+
console.error(`❌ Session not found: ${sessionId}`);
|
|
1860
|
+
await sendToFrontend({
|
|
1861
|
+
type: 'session_resume_set',
|
|
1862
|
+
sessionId,
|
|
1863
|
+
success: false,
|
|
1864
|
+
error: 'Session not found',
|
|
1865
|
+
});
|
|
1866
|
+
}
|
|
1867
|
+
}
|
|
1868
|
+
else if (data.type === 'continue_session' && currentLLM) {
|
|
1869
|
+
const recentId = await getMostRecentSessionId(workingDir);
|
|
1870
|
+
if (recentId) {
|
|
1871
|
+
currentLLM.setResumeSessionId(recentId);
|
|
1872
|
+
console.log(`🔄 Continuing most recent session: ${recentId}`);
|
|
1873
|
+
const summary = await getSessionSummary(recentId, workingDir);
|
|
1874
|
+
const conversationHistory = await getConversationHistory(recentId, workingDir, 30);
|
|
1875
|
+
await sendToFrontend({
|
|
1876
|
+
type: 'session_resume_set',
|
|
1877
|
+
sessionId: recentId,
|
|
1878
|
+
success: true,
|
|
1879
|
+
});
|
|
1880
|
+
// Send existing session artifacts to frontend (session-scoped)
|
|
1881
|
+
const artifacts = listWorkspaceArtifacts(workingDir, recentId);
|
|
1882
|
+
if (artifacts.length > 0) {
|
|
1883
|
+
console.log(`📁 Sending ${artifacts.length} session artifacts to frontend`);
|
|
1884
|
+
await sendToFrontend({
|
|
1885
|
+
type: 'session_artifacts',
|
|
1886
|
+
sessionId: recentId,
|
|
1887
|
+
artifacts: artifacts.map(a => ({
|
|
1888
|
+
filePath: a.filePath,
|
|
1889
|
+
fileName: a.fileName,
|
|
1890
|
+
type: a.type,
|
|
1891
|
+
updatedAt: a.updatedAt,
|
|
1892
|
+
}))
|
|
1893
|
+
});
|
|
1894
|
+
}
|
|
1895
|
+
if (currentSession && summary) {
|
|
1896
|
+
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
1897
|
+
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
1898
|
+
const specContent = getSpecForVoiceModel(workingDir, recentId);
|
|
1899
|
+
const specSection = specContent
|
|
1900
|
+
? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
|
|
1901
|
+
: '';
|
|
1902
|
+
console.log('📋 Injecting session context into voice agent...');
|
|
1903
|
+
try {
|
|
1904
|
+
if (currentVoiceMode === 'realtime') {
|
|
1905
|
+
const contextPrompt = `[SESSION RESUMED] The user chose to continue their most recent research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
|
|
1906
|
+
await currentSession.generateReply({ instructions: contextPrompt });
|
|
1907
|
+
}
|
|
1908
|
+
else {
|
|
1909
|
+
await currentSession.say("Continuing where we left off.");
|
|
1910
|
+
}
|
|
1911
|
+
}
|
|
1912
|
+
catch (err) {
|
|
1913
|
+
console.log('⚠️ Context injection failed:', err);
|
|
1914
|
+
}
|
|
1915
|
+
}
|
|
1916
|
+
}
|
|
1917
|
+
else {
|
|
1918
|
+
console.log('📋 No previous sessions found - starting fresh');
|
|
1919
|
+
await sendToFrontend({
|
|
1920
|
+
type: 'session_resume_set',
|
|
1921
|
+
sessionId: null,
|
|
1922
|
+
success: false,
|
|
1923
|
+
error: 'No previous sessions found',
|
|
1924
|
+
});
|
|
1925
|
+
}
|
|
1926
|
+
}
|
|
1927
|
+
else if (data.type === 'switch_session' && currentLLM) {
|
|
1928
|
+
// Switch to a different session mid-conversation
|
|
1929
|
+
const sessionId = data.sessionId;
|
|
1930
|
+
if (sessionId && sessionExists(sessionId, workingDir)) {
|
|
1931
|
+
// Step 1: Get FULL context summary with conversation history
|
|
1932
|
+
const summary = await getSessionSummary(sessionId, workingDir);
|
|
1933
|
+
const conversationHistory = await getConversationHistory(sessionId, workingDir, 30);
|
|
1934
|
+
// Step 2: Reset LLM state and configure for new session
|
|
1935
|
+
currentLLM.resetForSessionSwitch();
|
|
1936
|
+
currentLLM.setResumeSessionId(sessionId);
|
|
1937
|
+
clearFastBrainHistory();
|
|
1938
|
+
console.log(`🔄 Switched to session: ${sessionId}`);
|
|
1939
|
+
// Step 3: Send full context to frontend (including conversation history)
|
|
1940
|
+
await sendToFrontend({
|
|
1941
|
+
type: 'session_switched',
|
|
1942
|
+
sessionId,
|
|
1943
|
+
success: true,
|
|
1944
|
+
summary,
|
|
1945
|
+
conversationHistory,
|
|
1946
|
+
});
|
|
1947
|
+
// Step 3.5: Send existing session artifacts to frontend (session-scoped)
|
|
1948
|
+
const switchArtifacts = listWorkspaceArtifacts(workingDir, sessionId);
|
|
1949
|
+
if (switchArtifacts.length > 0) {
|
|
1950
|
+
console.log(`📁 Sending ${switchArtifacts.length} session artifacts to frontend`);
|
|
1951
|
+
await sendToFrontend({
|
|
1952
|
+
type: 'session_artifacts',
|
|
1953
|
+
sessionId,
|
|
1954
|
+
artifacts: switchArtifacts.map(a => ({
|
|
1955
|
+
filePath: a.filePath,
|
|
1956
|
+
fileName: a.fileName,
|
|
1957
|
+
type: a.type,
|
|
1958
|
+
updatedAt: a.updatedAt,
|
|
1959
|
+
}))
|
|
1960
|
+
});
|
|
1961
|
+
}
|
|
1962
|
+
// Step 4: Voice agent acknowledges context
|
|
1963
|
+
if (currentSession && summary) {
|
|
1964
|
+
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
1965
|
+
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
1966
|
+
try {
|
|
1967
|
+
if (currentVoiceMode === 'realtime') {
|
|
1968
|
+
const contextPrompt = `[SESSION SWITCHED] The user switched to a different research session. Here's the context:\n${contextBriefing}\n\nBriefly acknowledge the switch and summarize what was being worked on.`;
|
|
1969
|
+
await currentSession.generateReply({ instructions: contextPrompt });
|
|
1970
|
+
}
|
|
1971
|
+
else {
|
|
1972
|
+
const acknowledgment = summary.lastMessages.length > 0
|
|
1973
|
+
? `I've switched to your previous session. You were working on: ${summary.lastMessages[summary.lastMessages.length - 1]?.substring(0, 100)}`
|
|
1974
|
+
: `Switched to previous session with ${summary.messageCount} messages. What would you like to continue with?`;
|
|
1975
|
+
await currentSession.say(acknowledgment);
|
|
1976
|
+
}
|
|
1977
|
+
}
|
|
1978
|
+
catch (err) {
|
|
1979
|
+
console.log('⚠️ Switch acknowledgment failed:', err);
|
|
1980
|
+
}
|
|
1981
|
+
}
|
|
1982
|
+
}
|
|
1983
|
+
else {
|
|
1984
|
+
await sendToFrontend({
|
|
1985
|
+
type: 'session_switched',
|
|
1986
|
+
sessionId,
|
|
1987
|
+
success: false,
|
|
1988
|
+
error: 'Session not found',
|
|
1989
|
+
});
|
|
1990
|
+
}
|
|
1991
|
+
}
|
|
1992
|
+
else if (data.type === 'get_current_session' && currentLLM) {
|
|
1993
|
+
// Get current session ID
|
|
1994
|
+
await sendToFrontend({
|
|
1995
|
+
type: 'current_session',
|
|
1996
|
+
sessionId: currentLLM.sessionId,
|
|
1997
|
+
isResumingSession: currentLLM.isResumingSession,
|
|
1998
|
+
});
|
|
1999
|
+
}
|
|
2000
|
+
else if (data.type === 'get_session_artifacts') {
|
|
2001
|
+
const sessionId = data.sessionId;
|
|
2002
|
+
if (sessionId) {
|
|
2003
|
+
const artifacts = listWorkspaceArtifacts(workingDir, sessionId);
|
|
2004
|
+
console.log(`📁 Sending ${artifacts.length} session artifacts for ${sessionId.substring(0, 8)}`);
|
|
2005
|
+
await sendToFrontend({
|
|
2006
|
+
type: 'session_artifacts',
|
|
2007
|
+
sessionId,
|
|
2008
|
+
artifacts: artifacts.map(a => ({
|
|
2009
|
+
filePath: a.filePath,
|
|
2010
|
+
fileName: a.fileName,
|
|
2011
|
+
type: a.type,
|
|
2012
|
+
updatedAt: a.updatedAt,
|
|
2013
|
+
}))
|
|
2014
|
+
});
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
// ============================================================
|
|
2018
|
+
// SESSION GATE HANDLER (initial session selection before voice)
|
|
2019
|
+
// ============================================================
|
|
2020
|
+
else if (data.type === 'get_plan_file') {
|
|
2021
|
+
const filePath = data.filePath;
|
|
2022
|
+
if (filePath && filePath.includes('.claude/plans/')) {
|
|
2023
|
+
try {
|
|
2024
|
+
const fs = await import('fs');
|
|
2025
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
2026
|
+
await sendToFrontend({ type: 'plan_file_content', filePath, content, fileName: filePath.split('/').pop() });
|
|
2027
|
+
}
|
|
2028
|
+
catch (err) {
|
|
2029
|
+
await sendToFrontend({ type: 'plan_file_content', filePath, content: '', error: err.message });
|
|
2030
|
+
}
|
|
2031
|
+
}
|
|
2032
|
+
}
|
|
2033
|
+
else if (data.type === 'get_research_artifact') {
|
|
2034
|
+
const filePath = data.filePath;
|
|
2035
|
+
if (filePath && (filePath.includes('.osborn/sessions/') || filePath.includes('.osborn/research/'))) {
|
|
2036
|
+
try {
|
|
2037
|
+
const fs = await import('fs');
|
|
2038
|
+
const fileName = filePath.split('/').pop() || '';
|
|
2039
|
+
const ext = fileName.split('.').pop()?.toLowerCase() || '';
|
|
2040
|
+
const isImage = ['png', 'jpg', 'jpeg', 'gif', 'webp'].includes(ext);
|
|
2041
|
+
if (isImage) {
|
|
2042
|
+
const base64 = fs.readFileSync(filePath, 'base64');
|
|
2043
|
+
await sendToFrontend({ type: 'research_artifact_content', filePath, content: base64, fileName, isImage: true, mimeType: `image/${ext}` });
|
|
2044
|
+
}
|
|
2045
|
+
else {
|
|
2046
|
+
const content = fs.readFileSync(filePath, 'utf-8');
|
|
2047
|
+
await sendToFrontend({ type: 'research_artifact_content', filePath, content, fileName, isImage: false });
|
|
2048
|
+
}
|
|
2049
|
+
}
|
|
2050
|
+
catch (err) {
|
|
2051
|
+
await sendToFrontend({ type: 'research_artifact_content', filePath, content: '', error: err.message });
|
|
2052
|
+
}
|
|
2053
|
+
}
|
|
2054
|
+
}
|
|
2055
|
+
// ============================================================
|
|
2056
|
+
// MCP SERVER TOGGLE HANDLERS
|
|
2057
|
+
// ============================================================
|
|
2058
|
+
else if (data.type === 'mcp_toggle' && currentLLM) {
|
|
2059
|
+
const serverKey = data.serverKey;
|
|
2060
|
+
const enabled = data.enabled;
|
|
2061
|
+
console.log(`🔌 MCP toggle: ${serverKey} → ${enabled ? 'ON' : 'OFF'}`);
|
|
2062
|
+
if (enabled) {
|
|
2063
|
+
try {
|
|
2064
|
+
// Check if this is a Smithery HTTP server — use proxy to bypass SDK bug
|
|
2065
|
+
const catalogEntry = MCP_CATALOG.find(e => e.serverKey === serverKey);
|
|
2066
|
+
const isSmitheryServer = catalogEntry?.url && isSmitheryUrl(catalogEntry.url);
|
|
2067
|
+
if (isSmitheryServer && catalogEntry?.url) {
|
|
2068
|
+
// Smithery cloud server: use in-process proxy (bypasses SDK HTTP bug #18296)
|
|
2069
|
+
const parsed = parseSmitheryUrl(catalogEntry.url);
|
|
2070
|
+
if (parsed) {
|
|
2071
|
+
const proxyConfig = await createSmitheryProxy({
|
|
2072
|
+
name: serverKey,
|
|
2073
|
+
namespace: parsed.namespace,
|
|
2074
|
+
connectionId: parsed.connectionId,
|
|
2075
|
+
});
|
|
2076
|
+
currentLLM.enableMcpServer(serverKey, proxyConfig);
|
|
2077
|
+
await announceViaVoice(`${serverKey} tools enabled.`);
|
|
2078
|
+
}
|
|
2079
|
+
else {
|
|
2080
|
+
throw new Error(`Could not parse Smithery URL: ${catalogEntry.url}`);
|
|
2081
|
+
}
|
|
2082
|
+
}
|
|
2083
|
+
else {
|
|
2084
|
+
// Non-Smithery server: use standard config (stdio or direct http)
|
|
2085
|
+
const serverConfigs = buildMcpServersForKeys(config, [serverKey]);
|
|
2086
|
+
const serverConfig = serverConfigs[serverKey];
|
|
2087
|
+
if (serverConfig) {
|
|
2088
|
+
currentLLM.enableMcpServer(serverKey, serverConfig);
|
|
2089
|
+
await announceViaVoice(`${serverKey} tools enabled.`);
|
|
2090
|
+
}
|
|
2091
|
+
else {
|
|
2092
|
+
throw new Error('Server configuration not found');
|
|
2093
|
+
}
|
|
2094
|
+
}
|
|
2095
|
+
}
|
|
2096
|
+
catch (err) {
|
|
2097
|
+
const errorMsg = err instanceof SmitheryAuthorizationError
|
|
2098
|
+
? `OAuth required: ${err.authorizationUrl}`
|
|
2099
|
+
: err.message;
|
|
2100
|
+
console.error(`❌ MCP toggle failed for ${serverKey}: ${errorMsg}`);
|
|
2101
|
+
await sendToFrontend({
|
|
2102
|
+
type: 'mcp_toggle_result',
|
|
2103
|
+
serverKey,
|
|
2104
|
+
success: false,
|
|
2105
|
+
error: errorMsg,
|
|
2106
|
+
});
|
|
2107
|
+
}
|
|
2108
|
+
}
|
|
2109
|
+
else {
|
|
2110
|
+
await destroySmitheryProxy(serverKey); // Clean up proxy if exists
|
|
2111
|
+
currentLLM.disableMcpServer(serverKey);
|
|
2112
|
+
await announceViaVoice(`${serverKey} tools disabled.`);
|
|
2113
|
+
}
|
|
2114
|
+
// Send updated status back
|
|
2115
|
+
await sendToFrontend({
|
|
2116
|
+
type: 'mcp_toggle_result',
|
|
2117
|
+
serverKey,
|
|
2118
|
+
enabled,
|
|
2119
|
+
success: true,
|
|
2120
|
+
mcpServers: getMcpServerStatusList(config),
|
|
2121
|
+
enabledKeys: currentLLM.getEnabledMcpServerKeys(),
|
|
2122
|
+
});
|
|
2123
|
+
}
|
|
2124
|
+
else if (data.type === 'get_mcp_status') {
|
|
2125
|
+
// Frontend requesting current MCP status
|
|
2126
|
+
const statusList = getMcpServerStatusList(config);
|
|
2127
|
+
const enabledKeys = currentLLM?.getEnabledMcpServerKeys() || [];
|
|
2128
|
+
// Merge runtime enabled state into status list
|
|
2129
|
+
const mergedStatus = statusList.map(s => ({
|
|
2130
|
+
...s,
|
|
2131
|
+
enabled: enabledKeys.includes(s.serverKey),
|
|
2132
|
+
}));
|
|
2133
|
+
await sendToFrontend({
|
|
2134
|
+
type: 'mcp_status',
|
|
2135
|
+
mcpServers: mergedStatus,
|
|
2136
|
+
enabledKeys,
|
|
2137
|
+
});
|
|
2138
|
+
}
|
|
2139
|
+
else if (data.type === 'session_selected') {
|
|
2140
|
+
const sessionId = data.sessionId;
|
|
2141
|
+
console.log(`🚪 Session gate completed: ${sessionId ? `resume ${sessionId}` : 'fresh start'}`);
|
|
2142
|
+
if (sessionId && currentLLM && sessionExists(sessionId, workingDir)) {
|
|
2143
|
+
// Resume the selected session
|
|
2144
|
+
currentLLM.setResumeSessionId(sessionId);
|
|
2145
|
+
console.log(`🔄 Resuming session: ${sessionId}`);
|
|
2146
|
+
// Fetch context and greet with it
|
|
2147
|
+
const summary = await getSessionSummary(sessionId, workingDir);
|
|
2148
|
+
const conversationHistory = await getConversationHistory(sessionId, workingDir, 30);
|
|
2149
|
+
await sendToFrontend({
|
|
2150
|
+
type: 'session_resume_set',
|
|
2151
|
+
sessionId,
|
|
2152
|
+
success: true,
|
|
2153
|
+
});
|
|
2154
|
+
// Send existing session artifacts to frontend (session-scoped)
|
|
2155
|
+
const gateArtifacts = listWorkspaceArtifacts(workingDir, sessionId);
|
|
2156
|
+
if (gateArtifacts.length > 0) {
|
|
2157
|
+
console.log(`📁 Sending ${gateArtifacts.length} session artifacts to frontend`);
|
|
2158
|
+
await sendToFrontend({
|
|
2159
|
+
type: 'session_artifacts',
|
|
2160
|
+
sessionId,
|
|
2161
|
+
artifacts: gateArtifacts.map(a => ({
|
|
2162
|
+
filePath: a.filePath,
|
|
2163
|
+
fileName: a.fileName,
|
|
2164
|
+
type: a.type,
|
|
2165
|
+
updatedAt: a.updatedAt,
|
|
2166
|
+
}))
|
|
2167
|
+
});
|
|
2168
|
+
}
|
|
2169
|
+
// Load full session history and greet with context
|
|
2170
|
+
if (currentSession && summary) {
|
|
2171
|
+
loadSessionHistoryIntoChatCtx(currentAgent, conversationHistory, currentProvider);
|
|
2172
|
+
const contextBriefing = buildContextBriefing(summary, conversationHistory, currentProvider);
|
|
2173
|
+
const specContent = getSpecForVoiceModel(workingDir, sessionId);
|
|
2174
|
+
const specSection = specContent
|
|
2175
|
+
? `\n\n=== SESSION SPEC ===\n${specContent}\n=== END SPEC ===\nCheck "Open Questions" — if any are unanswered, ask the user about them.`
|
|
2176
|
+
: '';
|
|
2177
|
+
try {
|
|
2178
|
+
if (currentVoiceMode === 'realtime') {
|
|
2179
|
+
const contextPrompt = `[SESSION RESUMED] The user chose to continue a previous research session. Here's the context:\n${contextBriefing}${specSection}\n\nBriefly acknowledge the previous session. If there are open questions in the spec, ask the most important one. Otherwise ask what they'd like to continue with.`;
|
|
2180
|
+
await currentSession.generateReply({ instructions: contextPrompt });
|
|
2181
|
+
}
|
|
2182
|
+
else {
|
|
2183
|
+
await currentSession.say("Welcome back! Ready to continue our previous conversation.");
|
|
2184
|
+
}
|
|
2185
|
+
}
|
|
2186
|
+
catch (err) {
|
|
2187
|
+
console.log('⚠️ Session gate greeting failed:', err);
|
|
2188
|
+
}
|
|
2189
|
+
}
|
|
2190
|
+
}
|
|
2191
|
+
else {
|
|
2192
|
+
// Fresh start - just greet normally
|
|
2193
|
+
console.log('🆕 Starting fresh session');
|
|
2194
|
+
if (currentSession) {
|
|
2195
|
+
try {
|
|
2196
|
+
if (currentVoiceMode === 'realtime') {
|
|
2197
|
+
await currentSession.generateReply({ userInput: "The user just connected and chose to start a fresh session. Briefly greet them as Osborn and ask what they're working on." });
|
|
2198
|
+
}
|
|
2199
|
+
else {
|
|
2200
|
+
await currentSession.say("Hey! I'm Osborn. What are you working on?");
|
|
2201
|
+
}
|
|
2202
|
+
}
|
|
2203
|
+
catch (err) {
|
|
2204
|
+
console.log('⚠️ Fresh session greeting failed:', err);
|
|
2205
|
+
}
|
|
2206
|
+
}
|
|
2207
|
+
}
|
|
2208
|
+
}
|
|
2209
|
+
}
|
|
2210
|
+
catch { }
|
|
2211
|
+
});
|
|
2212
|
+
// ============================================================
|
|
2213
|
+
// Connect to Room
|
|
2214
|
+
// ============================================================
|
|
2215
|
+
try {
|
|
2216
|
+
await room.connect(livekitUrl, jwt, {
|
|
2217
|
+
autoSubscribe: true,
|
|
2218
|
+
dynacast: true,
|
|
2219
|
+
});
|
|
2220
|
+
localParticipant = room.localParticipant;
|
|
2221
|
+
console.log('✅ Connected to room:', roomName);
|
|
2222
|
+
console.log('\n⏳ Waiting for user to connect...');
|
|
2223
|
+
console.log(` Room: ${roomCode}\n`);
|
|
2224
|
+
// Keep process alive
|
|
2225
|
+
await new Promise(() => { });
|
|
2226
|
+
}
|
|
2227
|
+
catch (err) {
|
|
2228
|
+
console.error('❌ Failed to connect:', err);
|
|
2229
|
+
process.exit(1);
|
|
2230
|
+
}
|
|
2231
|
+
}
|
|
2232
|
+
// Run
|
|
2233
|
+
main().catch(console.error);
|