osborn 0.5.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -12,14 +12,17 @@ setMaxListeners(50);
12
12
  import { createServer } from 'http';
13
13
  import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync } from 'node:fs';
14
14
  import { join } from 'node:path';
15
+ import { createPatch } from 'diff';
15
16
  import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts } from './config.js';
16
17
  import { createSTT, createTTS, createRealtimeModelFromConfig, DIRECT_MODE_STT, DIRECT_MODE_TTS } from './voice-io.js';
17
18
  import { createClaudeLLM } from './claude-llm.js';
18
19
  import { clearPipelineFastBrainSession, prewarmBM25Index } from './pipeline-fastbrain.js';
20
+ import { ensureClaudeAuth } from './claude-auth.js';
19
21
  import { createSmitheryProxy, destroySmitheryProxy, parseSmitheryUrl, isSmitheryUrl, SmitheryAuthorizationError } from './smithery-proxy.js';
20
22
  import { askHaiku, askFastBrain, updateSpecFromJSONL, processResearchCompletion, handleResearchBatch, prepareBriefingScript, prepareRecoveryScript, writeQuestionToSpec, checkOutputAgainstQuestions, generateProactivePrompt, clearFastBrainSession } from './fast-brain.js';
21
23
  import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getScriptInjection, getProactiveInjection, getNotificationInjection } from './prompts.js';
22
24
  import { MCP_CATALOG } from './config.js';
25
+ import { getRecallClient } from './recall-client.js';
23
26
  import { llm } from '@livekit/agents';
24
27
  import { z } from 'zod';
25
28
  // ============================================================
@@ -131,11 +134,13 @@ process.on('uncaughtException', (error) => {
131
134
  // ============================================================
132
135
  // HTTP API SERVER - Exposes session data to cloud-deployed frontend
133
136
  // ============================================================
137
+ // Module-level room code so the HTTP server can expose it via GET /room-code
138
+ let currentRoomCode = null;
134
139
  function startApiServer(workingDir, port) {
135
140
  const server = createServer(async (req, res) => {
136
141
  // CORS headers for cloud frontend
137
142
  res.setHeader('Access-Control-Allow-Origin', '*');
138
- res.setHeader('Access-Control-Allow-Methods', 'GET, OPTIONS');
143
+ res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
139
144
  res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
140
145
  if (req.method === 'OPTIONS') {
141
146
  res.writeHead(204);
@@ -171,12 +176,52 @@ function startApiServer(workingDir, port) {
171
176
  res.end(JSON.stringify({ status: 'ok', workingDir }));
172
177
  return;
173
178
  }
179
+ // POST /webhook/recall — Recall.ai real-time transcript webhooks
180
+ if (req.method === 'POST' && url.pathname === '/webhook/recall') {
181
+ // Respond 200 immediately — never block or Node delays next webhooks
182
+ res.writeHead(200, { 'Content-Type': 'application/json' });
183
+ res.end('{"ok":true}');
184
+ let body = '';
185
+ req.on('data', (chunk) => { body += chunk.toString(); });
186
+ req.on('end', () => {
187
+ try {
188
+ const payload = JSON.parse(body);
189
+ const recall = getRecallClient();
190
+ if (recall)
191
+ recall.handleWebhook(payload);
192
+ }
193
+ catch (e) {
194
+ console.error('Recall webhook parse error:', e);
195
+ }
196
+ });
197
+ return;
198
+ }
199
+ // GET /meeting-output — Output Media webpage for Recall.ai bot audio
200
+ if (req.method === 'GET' && url.pathname === '/meeting-output') {
201
+ const htmlPath = join(process.cwd(), 'src', 'meeting-output.html');
202
+ try {
203
+ const html = readFileSync(htmlPath, 'utf-8');
204
+ res.writeHead(200, { 'Content-Type': 'text/html' });
205
+ res.end(html);
206
+ }
207
+ catch {
208
+ res.writeHead(404, { 'Content-Type': 'text/plain' });
209
+ res.end('meeting-output.html not found');
210
+ }
211
+ return;
212
+ }
213
+ if (req.method === 'GET' && url.pathname === '/room-code') {
214
+ res.writeHead(200, { 'Content-Type': 'application/json' });
215
+ res.end(JSON.stringify({ roomCode: currentRoomCode }));
216
+ return;
217
+ }
174
218
  res.writeHead(404, { 'Content-Type': 'application/json' });
175
219
  res.end(JSON.stringify({ error: 'Not found' }));
176
220
  });
177
- server.listen(port, () => {
178
- console.log(`🌐 API server listening on http://localhost:${port}`);
179
- console.log(` Sessions: http://localhost:${port}/sessions`);
221
+ const host = process.env.HOST || '0.0.0.0';
222
+ server.listen(port, host, () => {
223
+ console.log(`🌐 API server listening on http://${host}:${port}`);
224
+ console.log(` Sessions: http://${host}:${port}/sessions`);
180
225
  });
181
226
  server.on('error', (err) => {
182
227
  if (err.code === 'EADDRINUSE') {
@@ -287,6 +332,7 @@ async function main() {
287
332
  }
288
333
  // Determine room code
289
334
  const roomCode = cliArgs.roomCode || generateRoomCode();
335
+ currentRoomCode = roomCode;
290
336
  const roomName = `osborn-${roomCode}`;
291
337
  if (cliArgs.roomCode) {
292
338
  console.log(`🔗 Joining room: ${roomCode}`);
@@ -330,12 +376,16 @@ async function main() {
330
376
  let currentLLM = null;
331
377
  let localParticipant = null;
332
378
  let agentState = 'initializing';
379
+ // Session-level always-allow list: paths the user has approved for this session without prompting
380
+ let sessionAlwaysAllowPaths = new Set();
333
381
  let userState = 'listening'; // Track user speech state for queue safety
334
382
  let currentVoiceMode = voiceMode; // Track active voice mode for data handlers
335
383
  let currentProvider = realtimeConfig.provider; // Track active realtime provider
336
384
  // Track the active resume session ID across scopes (ParticipantConnected + DataReceived)
337
385
  // Updated by resume_session, session_selected, continue_session, switch_session handlers
338
386
  let currentResumeSessionId;
387
+ // Claude auth code submission handler (set during OAuth flow, cleared after)
388
+ let pendingAuthSubmitCode = null;
339
389
  // Task deduplication guard - prevents Gemini re-execution loops
340
390
  let lastTaskRequest = '';
341
391
  let lastTaskTime = 0;
@@ -348,6 +398,31 @@ async function main() {
348
398
  let lastCompletedResearch = null;
349
399
  // No manual queuing — the Claude SDK handles sequential queries internally
350
400
  // ============================================================
401
+ // Recall.ai — Meeting Transcript Routing
402
+ // ============================================================
403
+ const recall = getRecallClient();
404
+ if (recall) {
405
+ console.log('🎥 Recall.ai client initialized (RECALL_API_KEY present)');
406
+ recall.on('transcript', ({ botId, speaker, text }) => {
407
+ console.log(`📝 Meeting transcript [${speaker}]: ${text}`);
408
+ // Route meeting transcripts to Claude as user text with speaker attribution
409
+ if (currentLLM && currentSession) {
410
+ const meetingText = `[Meeting — ${speaker}]: ${text}`;
411
+ // Use the same pipeline as user_text data channel messages
412
+ try {
413
+ if (currentVoiceMode === 'pipeline' || currentVoiceMode === 'direct') {
414
+ const chatCtx = new llm.ChatContext();
415
+ chatCtx.addMessage({ role: 'user', content: meetingText });
416
+ currentLLM.chat({ chatCtx });
417
+ }
418
+ }
419
+ catch (err) {
420
+ console.error('❌ Failed to route meeting transcript:', err);
421
+ }
422
+ }
423
+ });
424
+ }
425
+ // ============================================================
351
426
  // Interruption Tracking (Content Ledger)
352
427
  // ============================================================
353
428
  // When user interrupts TTS, LiveKit truncates chatCtx to what was spoken.
@@ -697,6 +772,8 @@ async function main() {
697
772
  skipTTSQueue: true,
698
773
  });
699
774
  currentLLM = directLLM;
775
+ // Reset the session always-allow list for each new direct session
776
+ sessionAlwaysAllowPaths = new Set();
700
777
  // For resumed sessions, eagerly create workspace (we know the real ID)
701
778
  if (resumeSessionId) {
702
779
  const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
@@ -770,6 +847,15 @@ async function main() {
770
847
  console.log(`⚠️ Permission needed: ${data.toolName}`);
771
848
  const toolName = data.toolName;
772
849
  const input = data.input || {};
850
+ // Check session always-allow list before showing dialog
851
+ if (toolName === 'Write' || toolName === 'Edit' || toolName === 'MultiEdit') {
852
+ const filePath = String(input?.file_path || '');
853
+ if (filePath && sessionAlwaysAllowPaths.has(filePath)) {
854
+ console.log(`✅ Session always-allow: ${filePath}`);
855
+ directLLM.respondToPermission(true);
856
+ return;
857
+ }
858
+ }
773
859
  // Build descriptive message based on tool type
774
860
  let description = `I need permission to use ${toolName}.`;
775
861
  if (toolName === 'Bash' && input.command) {
@@ -785,17 +871,76 @@ async function main() {
785
871
  else if (toolName === 'WebFetch' && input.url) {
786
872
  description = `I want to fetch content from: ${input.url}`;
787
873
  }
874
+ // Generate diff for Write/Edit/MultiEdit tools
875
+ let diffString;
876
+ if (toolName === 'Write' || toolName === 'Edit' || toolName === 'MultiEdit') {
877
+ const diffStart = performance.now();
878
+ try {
879
+ const filePath = String(input?.file_path || '');
880
+ let beforeContent = '';
881
+ const readStart = performance.now();
882
+ try {
883
+ beforeContent = readFileSync(filePath, 'utf-8');
884
+ }
885
+ catch {
886
+ beforeContent = ''; // new file
887
+ }
888
+ const readMs = (performance.now() - readStart).toFixed(2);
889
+ console.log(`⏱️ diff read: ${readMs}ms (${beforeContent.length} chars, ${filePath.split('/').pop()})`);
890
+ let afterContent = beforeContent;
891
+ if (toolName === 'Write') {
892
+ afterContent = String(input?.content || '');
893
+ }
894
+ else if (toolName === 'Edit') {
895
+ const oldStr = String(input?.old_string || '');
896
+ const newStr = String(input?.new_string || '');
897
+ const replaceAll = Boolean(input?.replace_all);
898
+ if (replaceAll) {
899
+ afterContent = beforeContent.split(oldStr).join(newStr);
900
+ }
901
+ else {
902
+ afterContent = beforeContent.replace(oldStr, newStr);
903
+ }
904
+ }
905
+ else if (toolName === 'MultiEdit') {
906
+ afterContent = beforeContent;
907
+ const edits = Array.isArray(input?.edits) ? input.edits : [];
908
+ for (const edit of edits) {
909
+ if (edit.replace_all) {
910
+ afterContent = afterContent.split(edit.old_string).join(edit.new_string);
911
+ }
912
+ else {
913
+ afterContent = afterContent.replace(edit.old_string, edit.new_string);
914
+ }
915
+ }
916
+ }
917
+ const patchStart = performance.now();
918
+ const fileName = filePath.split('/').pop() || filePath;
919
+ diffString = createPatch(fileName, beforeContent, afterContent, '', '', { context: 4 });
920
+ const patchMs = (performance.now() - patchStart).toFixed(2);
921
+ const totalMs = (performance.now() - diffStart).toFixed(2);
922
+ console.log(`⏱️ diff patch: ${patchMs}ms | total: ${totalMs}ms (before: ${beforeContent.length} chars, after: ${afterContent.length} chars, diff: ${diffString.length} chars)`);
923
+ }
924
+ catch (e) {
925
+ const totalMs = (performance.now() - diffStart).toFixed(2);
926
+ console.log(`⏱️ diff failed after ${totalMs}ms:`, e);
927
+ // diff generation failed — proceed without diff
928
+ diffString = undefined;
929
+ }
930
+ }
931
+ console.log(`🔍 perm payload: diff=${diffString ? `✅ ${diffString.length} chars` : '❌ NONE'} toolName=${toolName}`);
788
932
  sendToFrontend({
789
933
  type: 'permission_request',
790
934
  toolName: data.toolName,
791
935
  input: data.input,
792
936
  description,
793
937
  agentRole: 'direct',
938
+ diff: diffString,
794
939
  });
795
940
  // Speak the descriptive request so user knows to respond
796
941
  if (currentSession) {
797
942
  const ttsMessage = `${description} Say yes, no, or always.`;
798
- currentSession.say?.(ttsMessage).catch(() => { });
943
+ currentSession.say?.(ttsMessage);
799
944
  }
800
945
  });
801
946
  // Wire up TTS say — bypass LiveKit's BufferedTokenStream, speak directly via session.say()
@@ -872,6 +1017,13 @@ async function main() {
872
1017
  const session = new voice.AgentSession({
873
1018
  turnDetection: 'stt',
874
1019
  preemptiveGeneration: false, // Only fire LLM on final committed transcript, not partial preemptives
1020
+ turnHandling: {
1021
+ endpointing: {
1022
+ mode: 'fixed',
1023
+ minDelay: 500, // Wait 500ms after STT commits before generating reply
1024
+ maxDelay: 2000, // Force end-of-turn after 2s to prevent hangs
1025
+ },
1026
+ },
875
1027
  });
876
1028
  return { session, agent };
877
1029
  }
@@ -1528,6 +1680,26 @@ async function main() {
1528
1680
  else {
1529
1681
  console.log(`🆔 New session (ID assigned by SDK)`);
1530
1682
  }
1683
+ // Ensure Claude is authenticated before creating voice session
1684
+ // In cloud deployments (Fly.io), this triggers OAuth flow on first boot:
1685
+ // captures login URL → sends to frontend → user clicks → gets code → pastes in frontend → auth completes
1686
+ try {
1687
+ const authResult = await ensureClaudeAuth((type, payload) => {
1688
+ sendToFrontend({ type, ...payload });
1689
+ });
1690
+ // If auth flow is running, store the submitCode handler for the DataReceived handler
1691
+ if (authResult.submitCode && authResult.done) {
1692
+ pendingAuthSubmitCode = authResult.submitCode;
1693
+ await authResult.done;
1694
+ pendingAuthSubmitCode = null;
1695
+ }
1696
+ }
1697
+ catch (err) {
1698
+ console.error('❌ Claude authentication failed:', err?.message);
1699
+ sendToFrontend({ type: 'claude_auth_error', message: err?.message || 'Authentication failed' });
1700
+ pendingAuthSubmitCode = null;
1701
+ // Continue anyway — the agent SDK will use ANTHROPIC_API_KEY if available
1702
+ }
1531
1703
  // Create session based on voice mode (from frontend or config)
1532
1704
  let session;
1533
1705
  let agent;
@@ -2083,10 +2255,20 @@ async function main() {
2083
2255
  try {
2084
2256
  const data = JSON.parse(new TextDecoder().decode(payload));
2085
2257
  console.log('📨 Data:', data.type);
2086
- if (data.type === 'permission_response') {
2258
+ if (data.type === 'claude_auth_code' && pendingAuthSubmitCode) {
2259
+ console.log('🔑 Received auth code from frontend');
2260
+ sendToFrontend({ type: 'claude_auth_submitting', message: 'Submitting code to Claude CLI...' });
2261
+ pendingAuthSubmitCode(data.code);
2262
+ }
2263
+ else if (data.type === 'permission_response') {
2087
2264
  // Handle permission response for direct mode
2088
2265
  if (currentLLM && currentLLM.hasPendingPermission?.()) {
2089
2266
  const allow = data.response === 'allow' || data.response === 'always_allow';
2267
+ // Track always_allow paths for this session so future requests auto-approve
2268
+ if (data.response === 'always_allow' && data.filePath) {
2269
+ sessionAlwaysAllowPaths.add(String(data.filePath));
2270
+ console.log(`🔒 Always-allow added for session: ${data.filePath}`);
2271
+ }
2090
2272
  currentLLM.respondToPermission(allow);
2091
2273
  console.log(`✅ Permission: ${data.response}`);
2092
2274
  }
@@ -2467,6 +2649,45 @@ async function main() {
2467
2649
  }
2468
2650
  }
2469
2651
  }
2652
+ else if (data.type === 'join_meeting') {
2653
+ const meetingUrl = data.url;
2654
+ if (meetingUrl) {
2655
+ const recallJoin = getRecallClient();
2656
+ if (!recallJoin) {
2657
+ await sendToFrontend({ type: 'meeting_error', message: 'Recall.ai not configured — set RECALL_API_KEY in .env' });
2658
+ }
2659
+ else {
2660
+ try {
2661
+ const webhookBase = process.env.FLY_APP_NAME
2662
+ ? `https://${process.env.FLY_APP_NAME}.fly.dev`
2663
+ : `http://localhost:${apiPort}`;
2664
+ await sendToFrontend({ type: 'meeting_joining', message: 'Osborn is joining your meeting...' });
2665
+ const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase);
2666
+ const sessionId = currentLLM?.sessionId || currentResumeSessionId || 'default';
2667
+ recallJoin.registerBot(botId, sessionId);
2668
+ await sendToFrontend({ type: 'meeting_joined', botId, message: 'Osborn has joined the meeting' });
2669
+ }
2670
+ catch (err) {
2671
+ console.error('❌ Recall.ai join error:', err);
2672
+ await sendToFrontend({ type: 'meeting_error', message: err.message });
2673
+ }
2674
+ }
2675
+ }
2676
+ }
2677
+ else if (data.type === 'leave_meeting') {
2678
+ const botId = data.botId;
2679
+ const recallLeave = getRecallClient();
2680
+ if (recallLeave && botId) {
2681
+ try {
2682
+ await recallLeave.leaveMeeting(botId);
2683
+ await sendToFrontend({ type: 'meeting_left', botId });
2684
+ }
2685
+ catch (err) {
2686
+ console.error('❌ Recall.ai leave error:', err);
2687
+ await sendToFrontend({ type: 'meeting_error', message: err.message });
2688
+ }
2689
+ }
2690
+ }
2470
2691
  else if (data.type === 'session_selected') {
2471
2692
  const sessionId = data.sessionId;
2472
2693
  console.log(`🚪 Session gate completed: ${sessionId ? `resume ${sessionId}` : 'fresh start'}`);
@@ -39,7 +39,11 @@ export class PipelineDirectLLM extends llm.LLM {
39
39
  // Proxy all methods
40
40
  setResumeSessionId(id) { this.#claudeLLM.setResumeSessionId(id); }
41
41
  setContinueSession(e) { this.#claudeLLM.setContinueSession(e); }
42
- resetForSessionSwitch() { this.#claudeLLM.resetForSessionSwitch(); }
42
+ resetForSessionSwitch() {
43
+ this.stopIndexWatcher();
44
+ this.#indexBuilding = false;
45
+ this.#claudeLLM.resetForSessionSwitch();
46
+ }
43
47
  respondToPermission(allow, msg) { this.#claudeLLM.respondToPermission(allow, msg); }
44
48
  hasPendingPermission() { return this.#claudeLLM.hasPendingPermission(); }
45
49
  getPendingPermission() { return this.#claudeLLM.getPendingPermission(); }
@@ -97,10 +101,11 @@ export class PipelineDirectLLM extends llm.LLM {
97
101
  ``,
98
102
  `User's message: "${userText}"`,
99
103
  ``,
100
- `Handle naturally:`,
101
- `- If it's a quick side question, answer it then continue where you left off (restart sub-agents if needed)`,
102
- `- If they want to change direction, follow their lead`,
103
- `- Don't repeat what was already spoken unless it makes sense to clarify`,
104
+ `RESPOND with speech first, then act:`,
105
+ `- ALWAYS reply with at least one spoken sentence before doing any tool calls`,
106
+ `- If it's a quick side question, answer it then continue where you left off`,
107
+ `- If they want to change direction, acknowledge and follow their lead`,
108
+ `- Clarify when asked to or the question requires going over what you just said`,
104
109
  `- Reference unspoken content naturally if relevant`,
105
110
  ].join('\n');
106
111
  // Modify the last user message in chatCtx
@@ -70,11 +70,13 @@ function createSearchTool(sessionId, workingDir, sessionBaseDir, agentControl) {
70
70
  name: 'emergency_stop',
71
71
  description: [
72
72
  'Kill and restart the main agent with new instructions.',
73
- 'ONLY call this when BOTH conditions are met:',
74
- ' 1. The agent is performing a DESTRUCTIVE or ALTERING action (write, edit, delete, overwrite, install, deploy, push, drop, remove, modify files/data).',
75
- ' 2. The user signals they want it stopped (high intent: "stop", "don\'t", "cancel that", "wait no", "not that").',
76
- 'NEVER call for: research, reading, exploring, searching, fetching, or conversation.',
77
- 'Priority: how destructive/unrecoverable the action is > how strongly the user signals.',
73
+ 'Call when the user clearly wants the agent to STOP what a DESTRUCTIVE or ALTERING action:',
74
+ ' - Destructive actions: write, edit, delete, install, deploy, push, modify files/data',
75
+ ' - Wrong direction: agent is doing something the user didn\'t ask for or explicitly rejects',
76
+ 'User signals: "stop", "don\'t", "cancel", "wait no", "not that", "no no no", "I said stop".',
77
+ 'NEVER call for: research, reading, exploring, searching, fetching, or casual conversation, questions about what the agent is doing, or research the user initiated.',
78
+ 'When in doubt about whether to stop: check get_recent first to see what the agent is actually doing. ',
79
+ 'Priority: how destructive/unrecoverable the action is > how strongly the user signals.'
78
80
  ].join(' '),
79
81
  parameters: {
80
82
  type: 'OBJECT',
@@ -120,7 +122,7 @@ function createSearchTool(sessionId, workingDir, sessionBaseDir, agentControl) {
120
122
  // Kill the destructive process and restart with new instructions
121
123
  agentControl.abort();
122
124
  const restartPrompt = [
123
- `[EMERGENCY STOP] A destructive action was stopped by the user.`,
125
+ `[EMERGENCY STOP] The user stopped your previous action.`,
124
126
  ``,
125
127
  `Reason: ${reason}`,
126
128
  ``,
@@ -130,7 +132,11 @@ function createSearchTool(sessionId, workingDir, sessionBaseDir, agentControl) {
130
132
  `What was happening before the stop:`,
131
133
  recentActivity.substring(0, 2000),
132
134
  ``,
133
- `Review any changes already made. The user wants to change course.`,
135
+ `RESPOND IMMEDIATELY with speech:`,
136
+ `1. Acknowledge what you were doing and that you've stopped`,
137
+ `2. If the user gave a new direction, confirm what you'll do instead`,
138
+ `3. If unclear, ask what they'd like to do next`,
139
+ `Do NOT silently do tool calls — speak first.`,
134
140
  ].join('\n');
135
141
  agentControl.sendPrompt(restartPrompt);
136
142
  results.push({ functionResponse: { name: 'emergency_stop', response: { result: `Agent stopped and restarted. Reason: ${reason}` } } });