npm - osborn - Versions diffs - 0.5.5 → 0.8.0 - Mend

osborn 0.5.5 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

package/.claude/skills/playwright-browser/SKILL.md +15 -0
package/.claude/skills/shadcn/SKILL.md +232 -0
package/.claude/skills/shadcn/image.png +0 -0
package/.dockerignore +13 -0
package/Dockerfile +103 -0
package/deploy.sh +70 -0
package/dist/claude-auth.d.ts +60 -0
package/dist/claude-auth.js +334 -0
package/dist/claude-llm.d.ts +22 -1
package/dist/claude-llm.js +392 -115
package/dist/fast-brain.js +2 -2
package/dist/index.js +227 -6
package/dist/pipeline-direct-llm.js +10 -5
package/dist/pipeline-fastbrain.js +13 -7
package/dist/prompts.js +141 -67
package/dist/recall-client.d.ts +33 -0
package/dist/recall-client.js +101 -0
package/dist/voice-io.d.ts +6 -2
package/dist/voice-io.js +17 -4
package/fly.toml +30 -0
package/package.json +7 -5

package/dist/index.js CHANGED Viewed

@@ -12,14 +12,17 @@ setMaxListeners(50);
 import { createServer } from 'http';
 import { existsSync, readdirSync, readFileSync, mkdirSync, writeFileSync } from 'node:fs';
 import { join } from 'node:path';
+import { createPatch } from 'diff';
 import { loadConfig, getMcpServers, getEnabledMcpServerNames, getVoiceMode, getRealtimeConfig, getDirectConfig, listSessions, getMostRecentSessionId, sessionExists, cleanupOrphanedMetadata, getSessionSummary, getConversationHistory, ensureSessionWorkspace, getMcpServerStatusList, buildMcpServersForKeys, listWorkspaceArtifacts } from './config.js';
 import { createSTT, createTTS, createRealtimeModelFromConfig, DIRECT_MODE_STT, DIRECT_MODE_TTS } from './voice-io.js';
 import { createClaudeLLM } from './claude-llm.js';
 import { clearPipelineFastBrainSession, prewarmBM25Index } from './pipeline-fastbrain.js';
+import { ensureClaudeAuth } from './claude-auth.js';
 import { createSmitheryProxy, destroySmitheryProxy, parseSmitheryUrl, isSmitheryUrl, SmitheryAuthorizationError } from './smithery-proxy.js';
 import { askHaiku, askFastBrain, updateSpecFromJSONL, processResearchCompletion, handleResearchBatch, prepareBriefingScript, prepareRecoveryScript, writeQuestionToSpec, checkOutputAgainstQuestions, generateProactivePrompt, clearFastBrainSession } from './fast-brain.js';
 import { DIRECT_MODE_PROMPT, getRealtimeInstructions, getScriptInjection, getProactiveInjection, getNotificationInjection } from './prompts.js';
 import { MCP_CATALOG } from './config.js';
+import { getRecallClient } from './recall-client.js';
 import { llm } from '@livekit/agents';
 import { z } from 'zod';
 // ============================================================
@@ -131,11 +134,13 @@ process.on('uncaughtException', (error) => {
 // ============================================================
 // HTTP API SERVER - Exposes session data to cloud-deployed frontend
 // ============================================================
+// Module-level room code so the HTTP server can expose it via GET /room-code
+let currentRoomCode = null;
 function startApiServer(workingDir, port) {
     const server = createServer(async (req, res) => {
         // CORS headers for cloud frontend
         res.setHeader('Access-Control-Allow-Origin', '*');
-        res.setHeader('Access-Control-Allow-Methods', 'GET, OPTIONS');
+        res.setHeader('Access-Control-Allow-Methods', 'GET, POST, OPTIONS');
         res.setHeader('Access-Control-Allow-Headers', 'Content-Type');
         if (req.method === 'OPTIONS') {
             res.writeHead(204);
@@ -171,12 +176,52 @@ function startApiServer(workingDir, port) {
             res.end(JSON.stringify({ status: 'ok', workingDir }));
             return;
         }
+        // POST /webhook/recall — Recall.ai real-time transcript webhooks
+        if (req.method === 'POST' && url.pathname === '/webhook/recall') {
+            // Respond 200 immediately — never block or Node delays next webhooks
+            res.writeHead(200, { 'Content-Type': 'application/json' });
+            res.end('{"ok":true}');
+            let body = '';
+            req.on('data', (chunk) => { body += chunk.toString(); });
+            req.on('end', () => {
+                try {
+                    const payload = JSON.parse(body);
+                    const recall = getRecallClient();
+                    if (recall)
+                        recall.handleWebhook(payload);
+                }
+                catch (e) {
+                    console.error('Recall webhook parse error:', e);
+                }
+            });
+            return;
+        }
+        // GET /meeting-output — Output Media webpage for Recall.ai bot audio
+        if (req.method === 'GET' && url.pathname === '/meeting-output') {
+            const htmlPath = join(process.cwd(), 'src', 'meeting-output.html');
+            try {
+                const html = readFileSync(htmlPath, 'utf-8');
+                res.writeHead(200, { 'Content-Type': 'text/html' });
+                res.end(html);
+            }
+            catch {
+                res.writeHead(404, { 'Content-Type': 'text/plain' });
+                res.end('meeting-output.html not found');
+            }
+            return;
+        }
+        if (req.method === 'GET' && url.pathname === '/room-code') {
+            res.writeHead(200, { 'Content-Type': 'application/json' });
+            res.end(JSON.stringify({ roomCode: currentRoomCode }));
+            return;
+        }
         res.writeHead(404, { 'Content-Type': 'application/json' });
         res.end(JSON.stringify({ error: 'Not found' }));
     });
-    server.listen(port, () => {
-        console.log(`🌐 API server listening on http://localhost:${port}`);
-        console.log(`   Sessions: http://localhost:${port}/sessions`);
+    const host = process.env.HOST || '0.0.0.0';
+    server.listen(port, host, () => {
+        console.log(`🌐 API server listening on http://${host}:${port}`);
+        console.log(`   Sessions: http://${host}:${port}/sessions`);
     });
     server.on('error', (err) => {
         if (err.code === 'EADDRINUSE') {
@@ -287,6 +332,7 @@ async function main() {
     }
     // Determine room code
     const roomCode = cliArgs.roomCode || generateRoomCode();
+    currentRoomCode = roomCode;
     const roomName = `osborn-${roomCode}`;
     if (cliArgs.roomCode) {
         console.log(`🔗 Joining room: ${roomCode}`);
@@ -330,12 +376,16 @@ async function main() {
     let currentLLM = null;
     let localParticipant = null;
     let agentState = 'initializing';
+    // Session-level always-allow list: paths the user has approved for this session without prompting
+    let sessionAlwaysAllowPaths = new Set();
     let userState = 'listening'; // Track user speech state for queue safety
     let currentVoiceMode = voiceMode; // Track active voice mode for data handlers
     let currentProvider = realtimeConfig.provider; // Track active realtime provider
     // Track the active resume session ID across scopes (ParticipantConnected + DataReceived)
     // Updated by resume_session, session_selected, continue_session, switch_session handlers
     let currentResumeSessionId;
+    // Claude auth code submission handler (set during OAuth flow, cleared after)
+    let pendingAuthSubmitCode = null;
     // Task deduplication guard - prevents Gemini re-execution loops
     let lastTaskRequest = '';
     let lastTaskTime = 0;
@@ -348,6 +398,31 @@ async function main() {
     let lastCompletedResearch = null;
     // No manual queuing — the Claude SDK handles sequential queries internally
     // ============================================================
+    // Recall.ai — Meeting Transcript Routing
+    // ============================================================
+    const recall = getRecallClient();
+    if (recall) {
+        console.log('🎥 Recall.ai client initialized (RECALL_API_KEY present)');
+        recall.on('transcript', ({ botId, speaker, text }) => {
+            console.log(`📝 Meeting transcript [${speaker}]: ${text}`);
+            // Route meeting transcripts to Claude as user text with speaker attribution
+            if (currentLLM && currentSession) {
+                const meetingText = `[Meeting — ${speaker}]: ${text}`;
+                // Use the same pipeline as user_text data channel messages
+                try {
+                    if (currentVoiceMode === 'pipeline' || currentVoiceMode === 'direct') {
+                        const chatCtx = new llm.ChatContext();
+                        chatCtx.addMessage({ role: 'user', content: meetingText });
+                        currentLLM.chat({ chatCtx });
+                    }
+                }
+                catch (err) {
+                    console.error('❌ Failed to route meeting transcript:', err);
+                }
+            }
+        });
+    }
+    // ============================================================
     // Interruption Tracking (Content Ledger)
     // ============================================================
     // When user interrupts TTS, LiveKit truncates chatCtx to what was spoken.
@@ -697,6 +772,8 @@ async function main() {
             skipTTSQueue: true,
         });
         currentLLM = directLLM;
+        // Reset the session always-allow list for each new direct session
+        sessionAlwaysAllowPaths = new Set();
         // For resumed sessions, eagerly create workspace (we know the real ID)
         if (resumeSessionId) {
             const workspace = ensureSessionWorkspace(sessionBaseDir, resumeSessionId);
@@ -770,6 +847,15 @@ async function main() {
             console.log(`⚠️ Permission needed: ${data.toolName}`);
             const toolName = data.toolName;
             const input = data.input || {};
+            // Check session always-allow list before showing dialog
+            if (toolName === 'Write' || toolName === 'Edit' || toolName === 'MultiEdit') {
+                const filePath = String(input?.file_path || '');
+                if (filePath && sessionAlwaysAllowPaths.has(filePath)) {
+                    console.log(`✅ Session always-allow: ${filePath}`);
+                    directLLM.respondToPermission(true);
+                    return;
+                }
+            }
             // Build descriptive message based on tool type
             let description = `I need permission to use ${toolName}.`;
             if (toolName === 'Bash' && input.command) {
@@ -785,17 +871,76 @@ async function main() {
             else if (toolName === 'WebFetch' && input.url) {
                 description = `I want to fetch content from: ${input.url}`;
             }
+            // Generate diff for Write/Edit/MultiEdit tools
+            let diffString;
+            if (toolName === 'Write' || toolName === 'Edit' || toolName === 'MultiEdit') {
+                const diffStart = performance.now();
+                try {
+                    const filePath = String(input?.file_path || '');
+                    let beforeContent = '';
+                    const readStart = performance.now();
+                    try {
+                        beforeContent = readFileSync(filePath, 'utf-8');
+                    }
+                    catch {
+                        beforeContent = ''; // new file
+                    }
+                    const readMs = (performance.now() - readStart).toFixed(2);
+                    console.log(`⏱️ diff read: ${readMs}ms (${beforeContent.length} chars, ${filePath.split('/').pop()})`);
+                    let afterContent = beforeContent;
+                    if (toolName === 'Write') {
+                        afterContent = String(input?.content || '');
+                    }
+                    else if (toolName === 'Edit') {
+                        const oldStr = String(input?.old_string || '');
+                        const newStr = String(input?.new_string || '');
+                        const replaceAll = Boolean(input?.replace_all);
+                        if (replaceAll) {
+                            afterContent = beforeContent.split(oldStr).join(newStr);
+                        }
+                        else {
+                            afterContent = beforeContent.replace(oldStr, newStr);
+                        }
+                    }
+                    else if (toolName === 'MultiEdit') {
+                        afterContent = beforeContent;
+                        const edits = Array.isArray(input?.edits) ? input.edits : [];
+                        for (const edit of edits) {
+                            if (edit.replace_all) {
+                                afterContent = afterContent.split(edit.old_string).join(edit.new_string);
+                            }
+                            else {
+                                afterContent = afterContent.replace(edit.old_string, edit.new_string);
+                            }
+                        }
+                    }
+                    const patchStart = performance.now();
+                    const fileName = filePath.split('/').pop() || filePath;
+                    diffString = createPatch(fileName, beforeContent, afterContent, '', '', { context: 4 });
+                    const patchMs = (performance.now() - patchStart).toFixed(2);
+                    const totalMs = (performance.now() - diffStart).toFixed(2);
+                    console.log(`⏱️ diff patch: ${patchMs}ms | total: ${totalMs}ms (before: ${beforeContent.length} chars, after: ${afterContent.length} chars, diff: ${diffString.length} chars)`);
+                }
+                catch (e) {
+                    const totalMs = (performance.now() - diffStart).toFixed(2);
+                    console.log(`⏱️ diff failed after ${totalMs}ms:`, e);
+                    // diff generation failed — proceed without diff
+                    diffString = undefined;
+                }
+            }
+            console.log(`🔍 perm payload: diff=${diffString ? `✅ ${diffString.length} chars` : '❌ NONE'} toolName=${toolName}`);
             sendToFrontend({
                 type: 'permission_request',
                 toolName: data.toolName,
                 input: data.input,
                 description,
                 agentRole: 'direct',
+                diff: diffString,
             });
             // Speak the descriptive request so user knows to respond
             if (currentSession) {
                 const ttsMessage = `${description} Say yes, no, or always.`;
-                currentSession.say?.(ttsMessage).catch(() => { });
+                currentSession.say?.(ttsMessage);
             }
         });
         // Wire up TTS say — bypass LiveKit's BufferedTokenStream, speak directly via session.say()
@@ -872,6 +1017,13 @@ async function main() {
         const session = new voice.AgentSession({
             turnDetection: 'stt',
             preemptiveGeneration: false, // Only fire LLM on final committed transcript, not partial preemptives
+            turnHandling: {
+                endpointing: {
+                    mode: 'fixed',
+                    minDelay: 500, // Wait 500ms after STT commits before generating reply
+                    maxDelay: 2000, // Force end-of-turn after 2s to prevent hangs
+                },
+            },
         });
         return { session, agent };
     }
@@ -1528,6 +1680,26 @@ async function main() {
         else {
             console.log(`🆔 New session (ID assigned by SDK)`);
         }
+        // Ensure Claude is authenticated before creating voice session
+        // In cloud deployments (Fly.io), this triggers OAuth flow on first boot:
+        // captures login URL → sends to frontend → user clicks → gets code → pastes in frontend → auth completes
+        try {
+            const authResult = await ensureClaudeAuth((type, payload) => {
+                sendToFrontend({ type, ...payload });
+            });
+            // If auth flow is running, store the submitCode handler for the DataReceived handler
+            if (authResult.submitCode && authResult.done) {
+                pendingAuthSubmitCode = authResult.submitCode;
+                await authResult.done;
+                pendingAuthSubmitCode = null;
+            }
+        }
+        catch (err) {
+            console.error('❌ Claude authentication failed:', err?.message);
+            sendToFrontend({ type: 'claude_auth_error', message: err?.message || 'Authentication failed' });
+            pendingAuthSubmitCode = null;
+            // Continue anyway — the agent SDK will use ANTHROPIC_API_KEY if available
+        }
         // Create session based on voice mode (from frontend or config)
         let session;
         let agent;
@@ -2083,10 +2255,20 @@ async function main() {
         try {
             const data = JSON.parse(new TextDecoder().decode(payload));
             console.log('📨 Data:', data.type);
-            if (data.type === 'permission_response') {
+            if (data.type === 'claude_auth_code' && pendingAuthSubmitCode) {
+                console.log('🔑 Received auth code from frontend');
+                sendToFrontend({ type: 'claude_auth_submitting', message: 'Submitting code to Claude CLI...' });
+                pendingAuthSubmitCode(data.code);
+            }
+            else if (data.type === 'permission_response') {
                 // Handle permission response for direct mode
                 if (currentLLM && currentLLM.hasPendingPermission?.()) {
                     const allow = data.response === 'allow' || data.response === 'always_allow';
+                    // Track always_allow paths for this session so future requests auto-approve
+                    if (data.response === 'always_allow' && data.filePath) {
+                        sessionAlwaysAllowPaths.add(String(data.filePath));
+                        console.log(`🔒 Always-allow added for session: ${data.filePath}`);
+                    }
                     currentLLM.respondToPermission(allow);
                     console.log(`✅ Permission: ${data.response}`);
                 }
@@ -2467,6 +2649,45 @@ async function main() {
                     }
                 }
             }
+            else if (data.type === 'join_meeting') {
+                const meetingUrl = data.url;
+                if (meetingUrl) {
+                    const recallJoin = getRecallClient();
+                    if (!recallJoin) {
+                        await sendToFrontend({ type: 'meeting_error', message: 'Recall.ai not configured — set RECALL_API_KEY in .env' });
+                    }
+                    else {
+                        try {
+                            const webhookBase = process.env.FLY_APP_NAME
+                                ? `https://${process.env.FLY_APP_NAME}.fly.dev`
+                                : `http://localhost:${apiPort}`;
+                            await sendToFrontend({ type: 'meeting_joining', message: 'Osborn is joining your meeting...' });
+                            const botId = await recallJoin.joinMeeting(meetingUrl, webhookBase);
+                            const sessionId = currentLLM?.sessionId || currentResumeSessionId || 'default';
+                            recallJoin.registerBot(botId, sessionId);
+                            await sendToFrontend({ type: 'meeting_joined', botId, message: 'Osborn has joined the meeting' });
+                        }
+                        catch (err) {
+                            console.error('❌ Recall.ai join error:', err);
+                            await sendToFrontend({ type: 'meeting_error', message: err.message });
+                        }
+                    }
+                }
+            }
+            else if (data.type === 'leave_meeting') {
+                const botId = data.botId;
+                const recallLeave = getRecallClient();
+                if (recallLeave && botId) {
+                    try {
+                        await recallLeave.leaveMeeting(botId);
+                        await sendToFrontend({ type: 'meeting_left', botId });
+                    }
+                    catch (err) {
+                        console.error('❌ Recall.ai leave error:', err);
+                        await sendToFrontend({ type: 'meeting_error', message: err.message });
+                    }
+                }
+            }
             else if (data.type === 'session_selected') {
                 const sessionId = data.sessionId;
                 console.log(`🚪 Session gate completed: ${sessionId ? `resume ${sessionId}` : 'fresh start'}`);

package/dist/pipeline-direct-llm.js CHANGED Viewed

@@ -39,7 +39,11 @@ export class PipelineDirectLLM extends llm.LLM {
     // Proxy all methods
     setResumeSessionId(id) { this.#claudeLLM.setResumeSessionId(id); }
     setContinueSession(e) { this.#claudeLLM.setContinueSession(e); }
-    resetForSessionSwitch() { this.#claudeLLM.resetForSessionSwitch(); }
+    resetForSessionSwitch() {
+        this.stopIndexWatcher();
+        this.#indexBuilding = false;
+        this.#claudeLLM.resetForSessionSwitch();
+    }
     respondToPermission(allow, msg) { this.#claudeLLM.respondToPermission(allow, msg); }
     hasPendingPermission() { return this.#claudeLLM.hasPendingPermission(); }
     getPendingPermission() { return this.#claudeLLM.getPendingPermission(); }
@@ -97,10 +101,11 @@ export class PipelineDirectLLM extends llm.LLM {
                 ``,
                 `User's message: "${userText}"`,
                 ``,
-                `Handle naturally:`,
-                `- If it's a quick side question, answer it then continue where you left off (restart sub-agents if needed)`,
-                `- If they want to change direction, follow their lead`,
-                `- Don't repeat what was already spoken unless it makes sense to clarify`,
+                `RESPOND with speech first, then act:`,
+                `- ALWAYS reply with at least one spoken sentence before doing any tool calls`,
+                `- If it's a quick side question, answer it then continue where you left off`,
+                `- If they want to change direction, acknowledge and follow their lead`,
+                `- Clarify when asked to or the question requires going over what you just said`,
                 `- Reference unspoken content naturally if relevant`,
             ].join('\n');
             // Modify the last user message in chatCtx

package/dist/pipeline-fastbrain.js CHANGED Viewed

@@ -70,11 +70,13 @@ function createSearchTool(sessionId, workingDir, sessionBaseDir, agentControl) {
                             name: 'emergency_stop',
                             description: [
                                 'Kill and restart the main agent with new instructions.',
-                                'ONLY call this when BOTH conditions are met:',
-                                '  1. The agent is performing a DESTRUCTIVE or ALTERING action (write, edit, delete, overwrite, install, deploy, push, drop, remove, modify files/data).',
-                                '  2. The user signals they want it stopped (high intent: "stop", "don\'t", "cancel that", "wait no", "not that").',
-                                'NEVER call for: research, reading, exploring, searching, fetching, or conversation.',
-                                'Priority: how destructive/unrecoverable the action is > how strongly the user signals.',
+                                'Call when the user clearly wants the agent to STOP what a  DESTRUCTIVE or ALTERING action:',
+                                '  - Destructive actions: write, edit, delete, install, deploy, push, modify files/data',
+                                '  - Wrong direction: agent is doing something the user didn\'t ask for or explicitly rejects',
+                                'User signals: "stop", "don\'t", "cancel", "wait no", "not that", "no no no", "I said stop".',
+                                'NEVER call for: research, reading, exploring, searching, fetching, or casual conversation, questions about what the agent is doing, or research the user initiated.',
+                                'When in doubt about whether to stop: check get_recent first to see what the agent is actually doing. ',
+                                'Priority: how destructive/unrecoverable the action is > how strongly the user signals.'
                             ].join(' '),
                             parameters: {
                                 type: 'OBJECT',
@@ -120,7 +122,7 @@ function createSearchTool(sessionId, workingDir, sessionBaseDir, agentControl) {
                     // Kill the destructive process and restart with new instructions
                     agentControl.abort();
                     const restartPrompt = [
-                        `[EMERGENCY STOP] A destructive action was stopped by the user.`,
+                        `[EMERGENCY STOP] The user stopped your previous action.`,
                         ``,
                         `Reason: ${reason}`,
                         ``,
@@ -130,7 +132,11 @@ function createSearchTool(sessionId, workingDir, sessionBaseDir, agentControl) {
                         `What was happening before the stop:`,
                         recentActivity.substring(0, 2000),
                         ``,
-                        `Review any changes already made. The user wants to change course.`,
+                        `RESPOND IMMEDIATELY with speech:`,
+                        `1. Acknowledge what you were doing and that you've stopped`,
+                        `2. If the user gave a new direction, confirm what you'll do instead`,
+                        `3. If unclear, ask what they'd like to do next`,
+                        `Do NOT silently do tool calls — speak first.`,
                     ].join('\n');
                     agentControl.sendPrompt(restartPrompt);
                     results.push({ functionResponse: { name: 'emergency_stop', response: { result: `Agent stopped and restarted. Reason: ${reason}` } } });