npm - osborn - Versions diffs - 0.1.2 → 0.1.6 - Mend

osborn 0.1.2 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

package/dist/claude-handler.d.ts CHANGED Viewed

@@ -3,9 +3,10 @@ import { EventEmitter } from 'events';
 interface ClaudeHandlerOptions {
     workingDirectory?: string;
     allowedTools?: string[];
-    permissionMode?: 'default' | 'acceptEdits' | 'bypassPermissions';
+    permissionMode?: 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan';
     mcpServers?: Record<string, McpServerConfig>;
     requireAllPermissions?: boolean;
+    agentRole?: 'plan' | 'execute';
 }
 export type { McpServerConfig };
 export interface PermissionRequestEvent {
@@ -32,7 +33,18 @@ export declare class ClaudeHandler extends EventEmitter {
     private toolStartTimes;
     private alwaysAllowedTools;
     private static readonly ALL_TOOLS;
+    private static readonly PLAN_TOOLS;
+    private static readonly EXECUTE_TOOLS;
+    private agentRole;
     constructor(options?: ClaudeHandlerOptions);
+    /**
+     * Get the agent's role
+     */
+    getRole(): 'plan' | 'execute';
+    /**
+     * Check if this is a plan-mode agent
+     */
+    isPlanMode(): boolean;
     /**
      * Generate human-readable description for a tool call
      */

package/dist/claude-handler.js CHANGED Viewed

@@ -54,22 +54,57 @@ export class ClaudeHandler extends EventEmitter {
         // LSP (Language Server Protocol)
         'LSP',
     ];
+    // Plan mode tools - read-only, research, context gathering
+    static PLAN_TOOLS = [
+        'Read', // View file contents
+        'Glob', // File pattern matching
+        'Grep', // Content searching
+        'Bash', // Read-only bash (ls, git status, git log, etc.)
+        'Task', // Research agents
+        'WebFetch', // Web content analysis
+        'WebSearch', // Internet searching
+        'LSP', // Code intelligence (go to definition, references)
+    ];
+    // Execute mode tools - full access
+    static EXECUTE_TOOLS = ClaudeHandler.ALL_TOOLS;
+    agentRole;
     constructor(options = {}) {
         super();
+        // Set agent role
+        this.agentRole = options.agentRole || (options.permissionMode === 'plan' ? 'plan' : 'execute');
+        // For plan mode, restrict to read-only tools
+        const isPlanMode = options.permissionMode === 'plan';
+        const defaultTools = isPlanMode ? ClaudeHandler.PLAN_TOOLS : ClaudeHandler.ALL_TOOLS;
         this.options = {
             workingDirectory: options.workingDirectory || process.cwd(),
-            allowedTools: options.allowedTools || ClaudeHandler.ALL_TOOLS,
-            permissionMode: options.permissionMode || 'default',
+            allowedTools: options.allowedTools || defaultTools,
+            // Plan mode uses 'default' permission mode but with restricted tools
+            permissionMode: isPlanMode ? 'default' : (options.permissionMode || 'default'),
             mcpServers: options.mcpServers,
-            // By default, require permission for ALL tools
-            requireAllPermissions: options.requireAllPermissions ?? true,
+            // Plan mode doesn't require permissions (read-only is safe)
+            // Execute mode requires permissions for safety
+            requireAllPermissions: isPlanMode ? false : (options.requireAllPermissions ?? true),
         };
+        const roleEmoji = this.agentRole === 'plan' ? '📋' : '🔨';
+        console.log(`${roleEmoji} Agent role: ${this.agentRole.toUpperCase()}`);
         console.log(`🔧 Allowed tools: ${this.options.allowedTools?.join(', ')}`);
-        console.log(`🔐 Require all permissions: ${this.options.requireAllPermissions}`);
+        console.log(`🔐 Require permissions: ${this.options.requireAllPermissions}`);
         if (this.options.mcpServers) {
             console.log(`🔌 MCP servers: ${Object.keys(this.options.mcpServers).join(', ')}`);
         }
     }
+    /**
+     * Get the agent's role
+     */
+    getRole() {
+        return this.agentRole;
+    }
+    /**
+     * Check if this is a plan-mode agent
+     */
+    isPlanMode() {
+        return this.agentRole === 'plan';
+    }
     /**
      * Generate human-readable description for a tool call
      */

package/dist/index.d.ts CHANGED Viewed

@@ -1,3 +1 @@
 import 'dotenv/config';
-declare const _default: import("@livekit/agents").Agent;
-export default _default;

package/dist/index.js CHANGED Viewed

@@ -1,337 +1,449 @@
-import { ServerOptions, cli, defineAgent, llm, voice } from '@livekit/agents';
+import { llm, voice, initializeLogger } from '@livekit/agents';
 import * as openai from '@livekit/agents-plugin-openai';
 import * as google from '@livekit/agents-plugin-google';
+import { Room, RoomEvent } from '@livekit/rtc-node';
+import { AccessToken } from 'livekit-server-sdk';
 import { z } from 'zod';
-import { fileURLToPath } from 'url';
 import 'dotenv/config';
+// Initialize logger before anything else
+initializeLogger({ pretty: true, level: 'info' });
 import { ClaudeHandler } from './claude-handler.js';
 import { CodexHandler } from './codex-handler.js';
 import { loadConfig, getMcpServers, getEnabledMcpServerNames } from './config.js';
-// Parse CLI arguments for room code
+// Generate a short, user-friendly room code
+function generateRoomCode() {
+    const chars = 'abcdefghjkmnpqrstuvwxyz23456789';
+    let code = '';
+    for (let i = 0; i < 6; i++) {
+        code += chars[Math.floor(Math.random() * chars.length)];
+    }
+    return code;
+}
+// Parse CLI arguments
 function parseArgs() {
     const args = process.argv.slice(2);
     let roomCode;
+    let provider;
     for (let i = 0; i < args.length; i++) {
         if (args[i] === '--room' && args[i + 1]) {
             roomCode = args[i + 1];
         }
+        if (args[i] === '--provider' && args[i + 1]) {
+            provider = args[i + 1];
+        }
+        // Short code detection (e.g., `npm run dev abc123`)
+        if (!args[i].startsWith('-') && args[i].length >= 4 && args[i].length <= 10 &&
+            !['dev', 'start'].includes(args[i])) {
+            roomCode = args[i];
+        }
     }
-    return { roomCode };
+    return { roomCode, provider };
 }
-const cliArgs = parseArgs();
-if (cliArgs.roomCode) {
-    console.log(`🔗 Room code provided: ${cliArgs.roomCode}`);
-}
-// Global error handlers to catch silent failures
-process.on('unhandledRejection', (reason, promise) => {
+// Global error handlers
+process.on('unhandledRejection', (reason) => {
     console.error('❌ Unhandled Rejection:', reason);
 });
 process.on('uncaughtException', (error) => {
     console.error('❌ Uncaught Exception:', error);
 });
-// Default provider (can be overridden by participant metadata)
-const DEFAULT_PROVIDER = process.env.LLM_PROVIDER || 'openai';
-// Debug mode
-const DEBUG = process.env.DEBUG_LIVEKIT === 'true';
-if (DEBUG) {
-    console.log('🐛 Debug logging enabled');
-}
-console.log(`🤖 Default LLM Provider: ${DEFAULT_PROVIDER}`);
-// Load configuration from ~/.osborn/config.yaml
-console.log('📁 Loading configuration...');
-const config = loadConfig();
-const mcpServers = getMcpServers(config);
-const enabledMcpNames = getEnabledMcpServerNames(config);
-if (enabledMcpNames.length > 0) {
-    console.log(`🔌 Enabled MCP servers: ${enabledMcpNames.join(', ')}`);
-}
-// Pre-initialize Claude handler at module load (before any connections)
-console.log('🔥 Pre-initializing Claude Code...');
-const workingDir = config.workingDirectory || process.cwd();
-const claude = new ClaudeHandler({
-    workingDirectory: workingDir,
-    permissionMode: 'default', // Ask for permission on dangerous tools (Bash, Write, Edit)
-    mcpServers: Object.keys(mcpServers).length > 0 ? mcpServers : undefined,
-});
-console.log(`📂 Working directory: ${workingDir}`);
-// Listen for permission requests from Claude
-claude.on('permission_request', (req) => {
-    console.log(`\n⚠️ PERMISSION REQUIRED ⚠️`);
-    console.log(`🔧 Tool: ${req.toolName}`);
-    console.log(`📝 Action: ${req.description}`);
-    console.log(`⏳ Waiting for user response (say: allow, deny, or always allow)...`);
-    // Send to frontend for UI display
-    sendToFrontend({
-        type: 'permission_request',
-        toolName: req.toolName,
-        description: req.description,
+// Main function
+async function main() {
+    console.log('\n🤖 Osborn Voice AI Coding Assistant\n');
+    // Validate environment
+    const livekitUrl = process.env.LIVEKIT_URL;
+    const apiKey = process.env.LIVEKIT_API_KEY;
+    const apiSecret = process.env.LIVEKIT_API_SECRET;
+    if (!livekitUrl || !apiKey || !apiSecret) {
+        console.error('❌ Missing required environment variables:');
+        if (!livekitUrl)
+            console.error('   - LIVEKIT_URL');
+        if (!apiKey)
+            console.error('   - LIVEKIT_API_KEY');
+        if (!apiSecret)
+            console.error('   - LIVEKIT_API_SECRET');
+        console.error('\nSet these in your .env file or environment.');
+        process.exit(1);
+    }
+    // Parse CLI args
+    const cliArgs = parseArgs();
+    // Load configuration
+    console.log('📁 Loading configuration...');
+    const config = loadConfig();
+    const mcpServers = getMcpServers(config);
+    const enabledMcpNames = getEnabledMcpServerNames(config);
+    if (enabledMcpNames.length > 0) {
+        console.log(`🔌 Enabled MCP servers: ${enabledMcpNames.join(', ')}`);
+    }
+    const workingDir = config.workingDirectory || process.cwd();
+    console.log(`📂 Working directory: ${workingDir}`);
+    // Determine room code
+    const roomCode = cliArgs.roomCode || generateRoomCode();
+    const roomName = `osborn-${roomCode}`;
+    if (cliArgs.roomCode) {
+        console.log(`🔗 Joining room: ${roomCode}`);
+    }
+    else {
+        console.log(`\n✨ Created new room: ${roomCode}`);
+        console.log(`\n📋 Share this with the frontend or run:`);
+        console.log(`   Open: https://osborn.app?room=${roomCode}`);
+        console.log(`   Or enter code "${roomCode}" in the frontend\n`);
+    }
+    // Default provider
+    const defaultProvider = cliArgs.provider || process.env.LLM_PROVIDER || 'openai';
+    console.log(`🎯 Default voice provider: ${defaultProvider}`);
+    // ============================================================
+    // Initialize Claude Agents (Dual Architecture)
+    // ============================================================
+    console.log('\n🔥 Initializing Claude agents...');
+    // Plan Agent - Read-only, research
+    const planAgent = {
+        id: 1,
+        role: 'plan',
+        handler: new ClaudeHandler({
+            workingDirectory: workingDir,
+            permissionMode: 'plan',
+            agentRole: 'plan',
+            mcpServers: Object.keys(mcpServers).length > 0 ? mcpServers : undefined,
+        }),
+        busy: false,
+        currentTask: null,
+        context: [],
+    };
+    // Execute Agent - Full access
+    const executeAgent = {
+        id: 2,
+        role: 'execute',
+        handler: new ClaudeHandler({
+            workingDirectory: workingDir,
+            permissionMode: 'default',
+            agentRole: 'execute',
+            mcpServers: Object.keys(mcpServers).length > 0 ? mcpServers : undefined,
+        }),
+        busy: false,
+        currentTask: null,
+        context: [],
+    };
+    const agentPool = [planAgent, executeAgent];
+    // Smart routing
+    function routeTask(task) {
+        const taskLower = task.toLowerCase();
+        const executeKeywords = [
+            'create', 'make', 'build', 'implement', 'add', 'write',
+            'fix', 'update', 'change', 'modify', 'edit', 'refactor',
+            'delete', 'remove', 'run', 'execute', 'install', 'deploy',
+            'commit', 'push', 'test', 'debug', 'start', 'stop',
+        ];
+        for (const keyword of executeKeywords) {
+            if (taskLower.includes(keyword)) {
+                if (executeAgent.busy && !planAgent.busy) {
+                    return planAgent;
+                }
+                return executeAgent;
+            }
+        }
+        return planAgent.busy ? executeAgent : planAgent;
+    }
+    // ============================================================
+    // Create Access Token for Agent
+    // ============================================================
+    console.log('🔑 Creating access token...');
+    const token = new AccessToken(apiKey, apiSecret, {
+        identity: 'osborn-agent',
+        name: 'Osborn AI',
+        metadata: JSON.stringify({ type: 'agent', version: '0.1.5' }),
     });
-});
-// Pre-warm Claude immediately on server start
-claude.run('Respond with just: ready')
-    .then(() => console.log('✅ Claude pre-warmed and ready!'))
-    .catch((err) => console.log('⚠️ Pre-warm failed:', err.message));
-// Track job context and session for data channel
-let jobContext = null;
-let currentSession = null;
-// Track the current coding handler (can be Claude or Codex)
-let currentCodingAgent = 'claude';
-let codexHandler = null;
-// Helper to cleanup previous session before starting new one
-async function cleanupSession() {
-    if (currentSession) {
-        console.log('🧹 Cleaning up previous session...');
+    token.addGrant({
+        roomJoin: true,
+        room: roomName,
+        canPublish: true,
+        canSubscribe: true,
+        canPublishData: true,
+    });
+    const jwt = await token.toJwt();
+    // ============================================================
+    // Connect to Room Directly
+    // ============================================================
+    console.log('📡 Connecting to LiveKit...');
+    const room = new Room();
+    // Track state
+    let currentSession = null;
+    let currentProvider = defaultProvider;
+    let currentCodingAgent = 'claude';
+    let codexHandler = null;
+    let localParticipant = null;
+    let agentState = 'initializing';
+    // Speech queue
+    const speechQueue = [];
+    let isSpeaking = false;
+    // Helper to send data to frontend
+    async function sendToFrontend(data) {
+        if (!localParticipant) {
+            console.log('⚠️ sendToFrontend: no localParticipant!');
+            return;
+        }
         try {
-            currentSession.removeAllListeners();
-            // Close session gracefully if method exists
-            if (typeof currentSession.close === 'function') {
-                await currentSession.close();
-            }
+            const encoder = new TextEncoder();
+            const payload = encoder.encode(JSON.stringify(data));
+            await localParticipant.publishData(payload, {
+                reliable: true,
+                topic: 'osborn-updates',
+            });
+            console.log(`📤 Sent to frontend: ${data.type}`);
         }
         catch (err) {
-            console.log('⚠️ Session cleanup error (non-fatal):', err.message);
+            console.error('❌ sendToFrontend error:', err);
         }
-        currentSession = null;
-    }
-}
-// Helper to send data to frontend
-async function sendToFrontend(data) {
-    if (!jobContext)
-        return;
-    try {
-        const encoder = new TextEncoder();
-        const payload = encoder.encode(JSON.stringify(data));
-        await jobContext.room.localParticipant?.publishData(payload, {
-            reliable: true,
-            topic: 'osborn-updates',
-        });
-    }
-    catch (err) {
-        // Ignore send errors
     }
-}
-// Define the run_code tool (works with both Claude and Codex)
-const runCodeTool = llm.tool({
-    description: `Execute coding tasks using the coding agent. Use for:
-- Files: read, write, create, edit, list, search
-- Directories: current directory, list contents
-- Code: fix bugs, refactor, explain, review
-- Terminal: run commands, install packages, git
-- Project: analyze codebase, make changes
-- Web: search the web for information`,
-    parameters: z.object({
-        task: z.string().describe('The coding task to execute'),
-    }),
-    execute: async ({ task }) => {
-        const agentName = currentCodingAgent === 'claude' ? 'Claude Code' : 'OpenAI Codex';
-        console.log(`\n🔨 ${agentName}: "${task}"`);
-        await sendToFrontend({ type: 'system', text: `Working on: ${task}` });
-        try {
-            let result;
-            if (currentCodingAgent === 'codex' && codexHandler) {
-                result = await codexHandler.run(task);
+    // Process speech queue
+    async function processSpeechQueue() {
+        if (isSpeaking || speechQueue.length === 0 || !currentSession)
+            return;
+        if (agentState !== 'listening')
+            return;
+        if (currentProvider === 'gemini') {
+            // Gemini doesn't support generateReply
+            while (speechQueue.length > 0) {
+                console.log(`🔊 [Would say] ${speechQueue.shift()}`);
             }
-            else {
-                result = await claude.run(task);
-            }
-            console.log(`✅ Done: ${result.length} chars`);
-            await sendToFrontend({ type: 'assistant_response', text: result });
-            return result;
+            return;
         }
-        catch (err) {
-            console.error('❌ Error:', err);
-            return `Error: ${err.message}`;
+        isSpeaking = true;
+        const message = speechQueue.shift();
+        try {
+            await Promise.race([
+                currentSession.generateReply({ userInput: message }),
+                new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 5000))
+            ]);
         }
-    },
-});
-// Define the permission response tool
-const respondPermissionTool = llm.tool({
-    description: `Respond to a pending permission request from Claude Code.
-Use this ONLY when there is a pending permission request.
-Call this after hearing the user's response to a permission prompt.`,
-    parameters: z.object({
-        response: z.enum(['allow', 'deny', 'always_allow']).describe('The user response: "allow" for one-time approval, "deny" to reject, "always_allow" to permanently allow this tool type'),
-    }),
-    execute: async ({ response }) => {
-        if (!claude.hasPendingPermission()) {
-            return 'No pending permission request.';
+        catch {
+            // Ignore speech errors
+        }
+        finally {
+            isSpeaking = false;
+            if (speechQueue.length > 0) {
+                setTimeout(processSpeechQueue, 500);
+            }
         }
-        const pending = claude.getPendingPermission();
-        claude.respondToPermission(response);
-        await sendToFrontend({
-            type: 'permission_response',
-            response,
-            toolName: pending?.toolName
+    }
+    // Setup agent event handlers
+    agentPool.forEach(slot => {
+        slot.handler.on('permission_request', (req) => {
+            console.log(`\n⚠️ [${slot.role}] PERMISSION: ${req.toolName}`);
+            sendToFrontend({
+                type: 'permission_request',
+                toolName: req.toolName,
+                description: req.description,
+                agentId: slot.id,
+            });
+            speechQueue.push(`[Tell user] I need permission to ${req.description}. Say yes, no, or always allow.`);
+            processSpeechQueue();
         });
-        return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
-    },
-});
-// Agent instructions - dynamically includes available tools
-const OSBORN_INSTRUCTIONS = `You are Osborn, a voice-enabled AI assistant with coding superpowers.
-Keep responses under 50 words. Sound natural and human.
+        slot.handler.on('tool_use', (tool) => {
+            console.log(`🔧 [${slot.role}] Using: ${tool.name}`);
+        });
+        slot.handler.on('error', (err) => {
+            console.error(`❌ [${slot.role}] Error:`, err);
+        });
+    });
+    // Define tools for voice LLM
+    const runCodeTool = llm.tool({
+        description: `Execute ANY coding task by delegating to Claude agents. YOU MUST USE THIS for:
+- Reading files ("read package.json", "show me the code")
+- Writing/editing files ("fix this bug", "add a function")
+- Running commands ("run npm test", "git status")
+- Searching code ("find where X is defined")
+- Explaining code ("what does this function do")
-AVAILABLE CAPABILITIES via run_code tool:
-- Read, Write, Edit, MultiEdit files
-- Glob (find files by pattern), Grep (search content)
-- Bash (run terminal commands)
-- WebSearch (search the web), WebFetch (fetch URLs)
-- NotebookEdit (edit Jupyter notebooks)
-- Task (delegate complex tasks), TodoWrite (track tasks)
-- LSP (code intelligence - go to definition, find references)
+You DON'T need permission to use this - it routes to the right agent automatically.
+Plan Agent = reading/research. Execute Agent = writing (will ask user for permission).`,
+        parameters: z.object({
+            task: z.string().describe('The coding task to execute'),
+        }),
+        execute: async ({ task }) => {
+            const slot = routeTask(task);
+            console.log(`\n🔨 [${slot.role}] Task: "${task}"`);
+            await sendToFrontend({ type: 'system', text: `${slot.role} agent: ${task}` });
+            slot.busy = true;
+            slot.currentTask = task;
+            sharedContext.currentFocus = task.substring(0, 50);
+            try {
+                let result;
+                if (currentCodingAgent === 'codex' && codexHandler) {
+                    result = await codexHandler.run(task);
+                }
+                else {
+                    const contextPrefix = slot.context.length > 0
+                        ? `Context: ${slot.context.slice(-3).join(' | ')}\n\nTask: `
+                        : '';
+                    result = await slot.handler.run(contextPrefix + task);
+                }
+                slot.context.push(`${task.substring(0, 50)} → Done`);
+                if (slot.context.length > 10)
+                    slot.context.shift();
+                // Update shared context
+                sharedContext.addAction(`${slot.role}: ${task.substring(0, 30)}`);
+                // Extract file references from result
+                const fileMatches = result.match(/(?:\/[\w\-\.\/]+|src\/[\w\-\.\/]+|\.\/[\w\-\.\/]+)/g);
+                if (fileMatches) {
+                    fileMatches.slice(0, 3).forEach(f => sharedContext.addFile(f));
+                }
+                console.log(`✅ [${slot.role}] Done`);
+                await sendToFrontend({ type: 'assistant_response', text: result });
+                // Return a concise summary for the voice LLM
+                const summary = result.length > 500
+                    ? result.substring(0, 500) + '... [truncated for voice]'
+                    : result;
+                return summary;
+            }
+            catch (err) {
+                return `Error: ${err.message}`;
+            }
+            finally {
+                slot.busy = false;
+                slot.currentTask = null;
+            }
+        },
+    });
+    const respondPermissionTool = llm.tool({
+        description: `Respond to a permission request. Call after hearing user's response.`,
+        parameters: z.object({
+            response: z.enum(['allow', 'deny', 'always_allow']),
+        }),
+        execute: async ({ response }) => {
+            const slot = agentPool.find(s => s.handler.hasPendingPermission());
+            if (!slot)
+                return 'No pending permission.';
+            const pending = slot.handler.getPendingPermission();
+            slot.handler.respondToPermission(response);
+            await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
+            return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
+        },
+    });
+    // Shared context that both voice and coding agents contribute to
+    const sharedContext = {
+        recentActions: [],
+        discoveredFiles: [],
+        currentFocus: null,
+        addAction(action) {
+            this.recentActions.push(action);
+            if (this.recentActions.length > 5)
+                this.recentActions.shift();
+        },
+        addFile(file) {
+            if (!this.discoveredFiles.includes(file)) {
+                this.discoveredFiles.push(file);
+                if (this.discoveredFiles.length > 10)
+                    this.discoveredFiles.shift();
+            }
+        },
+        getContextSummary() {
+            const parts = [];
+            if (this.currentFocus)
+                parts.push(`Focus: ${this.currentFocus}`);
+            if (this.recentActions.length)
+                parts.push(`Recent: ${this.recentActions.slice(-3).join(', ')}`);
+            if (this.discoveredFiles.length)
+                parts.push(`Files: ${this.discoveredFiles.slice(-5).join(', ')}`);
+            return parts.join(' | ');
+        }
+    };
+    // Dynamic instructions with working directory context
+    const getInstructions = () => `You are Osborn, a voice AI coding assistant.
-WHEN TO USE run_code:
-- File operations (read, write, create, edit, list, find)
-- Code tasks (fix, refactor, explain, review, debug)
-- Terminal commands (run, install, test, build, git)
-- Web searches (look up documentation, APIs, errors)
-- Project analysis (understand codebase, find patterns)
+WORKING DIRECTORY: ${workingDir}
-WHEN TO RESPOND DIRECTLY:
-- Greetings and small talk
-- General knowledge questions
-- Clarifying what the user wants
+STYLE: Keep responses SHORT (under 70 words). Sound natural. Say "Got it" when given a task.
-PERMISSION HANDLING:
-When the coding agent needs permission, you MUST:
-1. Tell the user: "[Agent] wants to [action]. Allow, deny, or always allow?"
-2. When they respond, call respond_permission with their choice
+CAPABILITIES (via run_code tool):
+- Read/write/edit files, search codebase
+- Run terminal commands (npm, git, etc)
+- Fix bugs, refactor, explain code
+- Search web/docs for solutions
-Be conversational and helpful. Ask follow-up questions when needed.`;
-// Voice assistant with tools
-class OsbornAssistant extends voice.Agent {
-    constructor() {
-        super({
-            instructions: OSBORN_INSTRUCTIONS,
-            tools: {
-                run_code: runCodeTool,
-                respond_permission: respondPermissionTool,
-            },
-        });
-    }
-}
-// Create the appropriate model based on provider
-function createModel(provider) {
-    if (provider === 'gemini') {
-        console.log('📱 Using Gemini Live API');
-        console.log('🔑 GOOGLE_API_KEY:', process.env.GOOGLE_API_KEY ? 'set' : 'NOT SET');
-        // From official docs: https://docs.livekit.io/agents/models/realtime/plugins/gemini/
-        // Package v1.0.31 uses google.beta.realtime (not google.realtime yet)
-        const model = new google.beta.realtime.RealtimeModel({
-            // model: 'gemini-2.5-flash-native-audio-preview-12-2025', // From official docs
-            model: 'gemini-3.5-flash-latest', // From official docs
-            voice: 'Puck',
-            instructions: OSBORN_INSTRUCTIONS,
-        });
-        // console.log('✅ Gemini model created with gemini-2.5-flash-native-audio-preview-12-2025')
-        console.log('✅ Gemini model created with gemini-3.5-flash-latest');
-        return model;
-    }
-    else {
-        console.log('📱 Using OpenAI Realtime API');
-        console.log('🔑 OPENAI_API_KEY:', process.env.OPENAI_API_KEY ? 'set' : 'NOT SET');
-        const model = new openai.realtime.RealtimeModel({
-            voice: 'alloy',
-        });
-        console.log('✅ OpenAI model created');
-        return model;
-    }
-}
-// Helper to get provider from participant metadata
-function getProviderFromParticipant(metadata) {
-    if (!metadata)
-        return DEFAULT_PROVIDER;
-    try {
-        const data = JSON.parse(metadata);
-        return data.provider || DEFAULT_PROVIDER;
-    }
-    catch {
-        return DEFAULT_PROVIDER;
-    }
-}
-// Helper to get coding agent from participant metadata
-function getCodingAgentFromParticipant(metadata) {
-    if (!metadata)
-        return 'claude';
-    try {
-        const data = JSON.parse(metadata);
-        return data.codingAgent || 'claude';
+TWO AGENTS AVAILABLE:
+- Plan Agent: Research, explore, read files (fast, no permissions needed)
+- Execute Agent: Write code, make changes (asks permission for writes)
+${sharedContext.getContextSummary() ? `CONTEXT: ${sharedContext.getContextSummary()}` : ''}
+PERMISSIONS: When you hear permission request, tell user what needs permission and ask "allow, deny, or always allow?" Then call respond_permission.`;
+    const INSTRUCTIONS = getInstructions();
+    // Voice agent class
+    class OsbornVoiceAgent extends voice.Agent {
+        constructor() {
+            super({
+                instructions: INSTRUCTIONS,
+                tools: {
+                    run_code: runCodeTool,
+                    respond_permission: respondPermissionTool,
+                },
+            });
+        }
     }
-    catch {
-        return 'claude';
+    // Create voice model
+    function createModel(provider) {
+        if (provider === 'gemini') {
+            console.log('📱 Using Gemini Live API');
+            return new google.beta.realtime.RealtimeModel({
+                model: 'gemini-2.5-flash-native-audio-preview-12-2025',
+                voice: 'Puck',
+                instructions: INSTRUCTIONS,
+            });
+        }
+        else {
+            console.log('📱 Using OpenAI Realtime API');
+            return new openai.realtime.RealtimeModel({
+                voice: 'alloy',
+            });
+        }
     }
-}
-export default defineAgent({
-    entry: async (ctx) => {
-        console.log('🚀 Agent starting for room:', ctx.room.name);
-        // If room code was provided via CLI, validate room name
-        if (cliArgs.roomCode) {
-            const expectedRoom = `osborn-${cliArgs.roomCode}`;
-            if (ctx.room.name !== expectedRoom) {
-                console.log(`⏭️ Skipping room ${ctx.room.name} (waiting for ${expectedRoom})`);
-                return; // Don't handle this room
+    // ============================================================
+    // Room Event Handlers
+    // ============================================================
+    room.on(RoomEvent.Connected, () => {
+        console.log('✅ Connected to room:', roomName);
+        localParticipant = room.localParticipant;
+    });
+    room.on(RoomEvent.Disconnected, () => {
+        console.log('👋 Disconnected from room');
+        currentSession = null;
+    });
+    room.on(RoomEvent.ParticipantConnected, async (participant) => {
+        console.log(`\n👤 User joined: ${participant.identity}`);
+        // Get provider from participant metadata
+        let provider = defaultProvider;
+        let codingAgent = 'claude';
+        if (participant.metadata) {
+            try {
+                const meta = JSON.parse(participant.metadata);
+                provider = meta.provider || defaultProvider;
+                codingAgent = meta.codingAgent || 'claude';
             }
-            console.log(`✅ Room matches expected: ${expectedRoom}`);
+            catch { }
         }
-        jobContext = ctx;
-        // Claude verbose logging
-        claude.on('tool_use', (tool) => {
-            console.log(`\n🔧 Claude Tool Started: ${tool.name}`);
-            if (tool.input) {
-                const inputStr = JSON.stringify(tool.input).substring(0, 200);
-                console.log(`   Input: ${inputStr}${inputStr.length >= 200 ? '...' : ''}`);
-            }
-        });
-        claude.on('tool_result', (result) => {
-            console.log(`✅ Claude Tool Completed: ${result.name || 'unknown'}`);
-        });
-        claude.on('text', (text) => {
-            if (text.length > 0) {
-                console.log(`💬 Claude says: ${text.substring(0, 100)}${text.length > 100 ? '...' : ''}`);
-            }
-        });
-        claude.on('error', (err) => {
-            console.error(`❌ Claude Error:`, err);
-        });
-        // Connect FIRST so we can wait for participants
-        console.log('📡 Connecting to room...');
-        await ctx.connect();
-        console.log('✅ Connected to room');
-        // Wait for a participant to join using LiveKit's built-in method
-        console.log('⏳ Waiting for participant...');
-        const participant = await ctx.waitForParticipant();
-        console.log('👤 Participant joined:', participant.identity);
-        console.log('📋 Participant metadata:', participant.metadata);
-        const provider = getProviderFromParticipant(participant.metadata);
-        const codingAgent = getCodingAgentFromParticipant(participant.metadata);
-        console.log(`🎯 User selected provider: ${provider}`);
-        console.log(`🔧 User selected coding agent: ${codingAgent}`);
-        // Set the current coding agent and initialize if needed
+        currentProvider = provider;
         currentCodingAgent = codingAgent;
+        console.log(`🎯 Provider: ${provider}, Agent: ${codingAgent}`);
         if (codingAgent === 'codex') {
-            console.log('🔧 Initializing Codex handler...');
-            codexHandler = new CodexHandler({
-                workingDirectory: workingDir,
-            });
-            console.log('✅ Codex handler ready');
+            codexHandler = new CodexHandler({ workingDirectory: workingDir });
         }
-        // Create model based on user's choice
+        // Create voice session
         const model = createModel(provider);
-        // Clean up any previous session before creating new one
-        await cleanupSession();
-        const session = new voice.AgentSession({
-            llm: model,
-        });
+        const session = new voice.AgentSession({ llm: model });
         currentSession = session;
-        // Add session event listeners for debugging
-        // Using string literals as AgentSessionEventTypes is not directly exported
-        session.on('user_state_changed', (ev) => {
-            console.log(`👤 User state: ${ev.oldState} → ${ev.newState}`);
-        });
+        // Session events
         session.on('agent_state_changed', (ev) => {
-            console.log(`🤖 Agent state: ${ev.oldState} → ${ev.newState}`);
+            agentState = ev.newState;
+            console.log(`🤖 State: ${ev.newState}`);
+            if (ev.newState === 'listening' && speechQueue.length > 0) {
+                processSpeechQueue();
+            }
         });
         session.on('user_input_transcribed', (ev) => {
-            console.log(`📝 Transcribed: "${ev.transcript}" (final: ${ev.isFinal})`);
+            console.log(`📝 User: "${ev.transcript}"`);
+        });
+        session.on('user_state_changed', (ev) => {
+            console.log(`👤 User state: ${ev.oldState} → ${ev.newState}`);
         });
         session.on('error', (ev) => {
             console.error('❌ Session error:', ev.error);
@@ -339,77 +451,108 @@ export default defineAgent({
         session.on('close', (ev) => {
             console.log('🚪 Session closed:', ev.reason);
         });
-        ctx.room.on('trackSubscribed', (track, publication, p) => {
-            console.log(`📥 Track subscribed: ${track.kind} from ${p.identity}`);
-        });
-        ctx.room.on('participantDisconnected', async (p) => {
-            console.log(`👋 Participant disconnected: ${p.identity}`);
-            // Clean up session when user disconnects to prepare for next connection
-            await cleanupSession();
-        });
-        // Listen for data channel messages from frontend
-        ctx.room.on('dataReceived', async (payload, participant, kind, topic) => {
-            if (topic === 'user-input') {
+        // Start voice session
+        console.log('🎬 Starting voice session...');
+        const agent = new OsbornVoiceAgent();
+        try {
+            await session.start({
+                agent,
+                room,
+            });
+            console.log('✅ Voice session started!');
+            console.log('🎤 Ready - speak to begin!\n');
+            // Send ready signal with persistent retry (frontend might not be subscribed yet)
+            console.log('💓 Sending agent_ready signal...');
+            let readySent = false;
+            const sendReady = async () => {
+                if (readySent)
+                    return;
+                await sendToFrontend({ type: 'agent_ready', provider, codingAgent });
+            };
+            // Keep sending every 2 seconds for 20 seconds total
+            const readyInterval = setInterval(sendReady, 2000);
+            await sendReady();
+            setTimeout(() => {
+                clearInterval(readyInterval);
+                console.log('✅ agent_ready retries complete');
+            }, 20000);
+            // Mark as sent when user first speaks (no need to keep sending)
+            session.on('input_speech_started', () => {
+                readySent = true;
+                clearInterval(readyInterval);
+            });
+            console.log('✅ agent_ready sent (with retries scheduled)');
+            // Greet user (OpenAI only)
+            if (provider !== 'gemini') {
                 try {
-                    const data = JSON.parse(new TextDecoder().decode(payload));
-                    console.log(`📨 Received from frontend:`, data);
-                    if (data.type === 'permission_response') {
-                        // Handle permission response from UI
-                        if (claude.hasPendingPermission()) {
-                            claude.respondToPermission(data.response);
-                            console.log(`✅ Permission ${data.response} from UI`);
-                        }
-                    }
-                    else if (data.type === 'user_text') {
-                        // Handle text input from frontend
-                        console.log(`📝 Text input: "${data.content}"`);
-                        // Inject text into the session as user input
-                        if (currentSession) {
-                            try {
-                                // Interrupt any current speech first
-                                currentSession.interrupt();
-                                // Generate a reply to the text input
-                                await currentSession.generateReply({
-                                    userInput: data.content,
-                                });
-                                console.log(`✅ Injected text to session`);
-                            }
-                            catch (err) {
-                                console.error(`❌ Failed to inject text:`, err);
-                            }
-                        }
-                    }
+                    await session.generateReply({
+                        userInput: '[Greet the user: "Hey, I\'m Osborn. What are you working on?"]'
+                    });
                 }
-                catch (e) {
-                    // Not JSON, ignore
+                catch {
+                    console.log('⚠️ Greeting skipped');
                 }
             }
+        }
+        catch (err) {
+            console.error('❌ Failed to start session:', err);
+        }
+    });
+    room.on(RoomEvent.ParticipantDisconnected, (participant) => {
+        console.log(`👋 User left: ${participant.identity}`);
+        if (currentSession) {
+            currentSession.removeAllListeners();
+            currentSession = null;
+        }
+        console.log('⏳ Waiting for new user...\n');
+    });
+    room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
+        if (topic !== 'user-input')
+            return;
+        try {
+            const data = JSON.parse(new TextDecoder().decode(payload));
+            console.log('📨 Data:', data.type);
+            if (data.type === 'permission_response') {
+                const slot = agentPool.find(s => s.handler.hasPendingPermission());
+                if (slot) {
+                    slot.handler.respondToPermission(data.response);
+                    console.log(`✅ Permission: ${data.response}`);
+                }
+            }
+            else if (data.type === 'user_text' && currentSession) {
+                console.log(`📝 Text: "${data.content}"`);
+                currentSession.interrupt();
+                await currentSession.generateReply({ userInput: data.content });
+            }
+        }
+        catch { }
+    });
+    // ============================================================
+    // Connect to Room
+    // ============================================================
+    try {
+        await room.connect(livekitUrl, jwt, {
+            autoSubscribe: true,
+            dynacast: true,
         });
-        // Create the agent
-        const agent = new OsbornAssistant();
-        // Start session
-        console.log('🎬 Starting voice session...');
-        const startTime = Date.now();
-        await session.start({
-            agent,
-            room: ctx.room,
-        });
-        console.log(`✅ Session started in ${Date.now() - startTime}ms with ${provider.toUpperCase()} + Claude tools`);
-        console.log('🎤 Ready for voice input! Speak to start.');
-    },
-});
-// Configure server options
-const serverOptions = {
-    agent: fileURLToPath(import.meta.url),
-};
-// If room code is provided, filter to only handle that room
-if (cliArgs.roomCode) {
-    const targetRoom = `osborn-${cliArgs.roomCode}`;
-    console.log(`🎯 Filtering for room: ${targetRoom}`);
-    // The agent will be dispatched to rooms matching this pattern
-    serverOptions.workerOptions = {
-    // Note: Room filtering is handled by LiveKit dispatch
-    // For local development, we validate the room in the entry function
-    };
+        // Set localParticipant immediately after connection
+        localParticipant = room.localParticipant;
+        console.log('✅ Connected to room:', roomName);
+        console.log('\n⏳ Waiting for user to connect...');
+        console.log(`   Room: ${roomCode}\n`);
+        // Warm up agents in background
+        console.log('🔥 Warming up agents...');
+        Promise.all([
+            planAgent.handler.run('ready').then(() => console.log('✅ Plan agent ready')),
+            executeAgent.handler.run('ready').then(() => console.log('✅ Execute agent ready')),
+        ]).catch(() => { });
+        // Keep process alive
+        await new Promise(() => { });
+    }
+    catch (err) {
+        console.error('❌ Failed to connect:', err);
+        process.exit(1);
+    }
 }
-cli.runApp(new ServerOptions(serverOptions));
+// Run
+main().catch(console.error);

package/package.json CHANGED Viewed

@@ -1,15 +1,16 @@
 {
   "name": "osborn",
-  "version": "0.1.2",
+  "version": "0.1.6",
   "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
   "type": "module",
   "bin": {
     "osborn": "./bin/cli.js"
   },
   "scripts": {
-    "dev": "tsx src/index.ts dev",
-    "start": "tsx src/index.ts start",
-    "build": "tsc"
+    "dev": "tsx src/index.ts",
+    "start": "tsx src/index.ts",
+    "build": "tsc",
+    "room": "tsx src/index.ts --room"
   },
   "keywords": [
     "voice",
@@ -30,8 +31,10 @@
     "@livekit/agents": "^1.0.0",
     "@livekit/agents-plugin-google": "^1.0.0",
     "@livekit/agents-plugin-openai": "^1.0.0",
+    "@livekit/rtc-node": "^0.13.22",
     "@openai/codex-sdk": "^0.77.0",
     "dotenv": "^16.4.0",
+    "livekit-server-sdk": "^2.15.0",
     "tsx": "^4.0.0",
     "yaml": "^2.3.0",
     "zod": "^3.23.0"