osborn 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/.env.example ADDED
@@ -0,0 +1,14 @@
1
+ # LLM Provider: 'openai' or 'gemini'
2
+ LLM_PROVIDER=openai
3
+
4
+ # LiveKit
5
+ LIVEKIT_URL=wss://your-project.livekit.cloud
6
+ LIVEKIT_API_KEY=your-api-key
7
+ LIVEKIT_API_SECRET=your-api-secret
8
+
9
+ # OpenAI (for Realtime API)
10
+ OPENAI_API_KEY=sk-...
11
+
12
+ # Google AI (for Gemini Live - FREE during preview!)
13
+ # Get your key at: https://aistudio.google.com/apikey
14
+ GOOGLE_API_KEY=AIzaSy...
package/bin/cli.js ADDED
@@ -0,0 +1,70 @@
1
+ #!/usr/bin/env node
2
+
3
+ import { spawn } from 'child_process'
4
+ import { fileURLToPath } from 'url'
5
+ import { dirname, join } from 'path'
6
+
7
+ const __filename = fileURLToPath(import.meta.url)
8
+ const __dirname = dirname(__filename)
9
+
10
+ // Pass all args to the actual agent
11
+ const args = process.argv.slice(2)
12
+
13
+ // Check for help
14
+ if (args.includes('--help') || args.includes('-h')) {
15
+ console.log(`
16
+ Osborn Agent - Voice AI Coding Assistant
17
+
18
+ Usage:
19
+ osborn-agent --room <code> Connect to a specific room
20
+ osborn-agent dev Run in development mode
21
+ osborn-agent start Run in production mode
22
+
23
+ Options:
24
+ --room <code> Room code from the Osborn web interface
25
+ --help, -h Show this help message
26
+
27
+ Environment Variables:
28
+ LIVEKIT_URL LiveKit server URL
29
+ LIVEKIT_API_KEY LiveKit API key
30
+ LIVEKIT_API_SECRET LiveKit API secret
31
+ OPENAI_API_KEY OpenAI API key (for voice)
32
+ ANTHROPIC_API_KEY Anthropic API key (for Claude Code)
33
+ GOOGLE_API_KEY Google API key (for Gemini)
34
+
35
+ Config File:
36
+ ~/.osborn/config.yaml MCP servers and settings
37
+
38
+ Example:
39
+ osborn-agent --room abc123
40
+ `)
41
+ process.exit(0)
42
+ }
43
+
44
+ // Run the agent using tsx
45
+ const agentPath = join(__dirname, '..', 'src', 'index.ts')
46
+ const tsxPath = join(__dirname, '..', 'node_modules', '.bin', 'tsx')
47
+
48
+ // Determine mode (default to 'dev' if no mode specified)
49
+ let mode = 'dev'
50
+ if (args.includes('start')) {
51
+ mode = 'start'
52
+ args.splice(args.indexOf('start'), 1)
53
+ } else if (args.includes('dev')) {
54
+ args.splice(args.indexOf('dev'), 1)
55
+ }
56
+
57
+ const child = spawn(tsxPath, [agentPath, mode, ...args], {
58
+ stdio: 'inherit',
59
+ cwd: join(__dirname, '..'),
60
+ env: process.env,
61
+ })
62
+
63
+ child.on('error', (err) => {
64
+ console.error('Failed to start agent:', err.message)
65
+ process.exit(1)
66
+ })
67
+
68
+ child.on('exit', (code) => {
69
+ process.exit(code || 0)
70
+ })
@@ -0,0 +1,78 @@
1
+ import { type McpServerConfig } from '@anthropic-ai/claude-agent-sdk';
2
+ import { EventEmitter } from 'events';
3
+ interface ClaudeHandlerOptions {
4
+ workingDirectory?: string;
5
+ allowedTools?: string[];
6
+ permissionMode?: 'default' | 'acceptEdits' | 'bypassPermissions';
7
+ mcpServers?: Record<string, McpServerConfig>;
8
+ }
9
+ export type { McpServerConfig };
10
+ export interface PermissionRequestEvent {
11
+ toolName: string;
12
+ description: string;
13
+ toolInput: any;
14
+ toolUseId: string;
15
+ }
16
+ /**
17
+ * Claude Handler using the official Claude Agent SDK
18
+ *
19
+ * Features:
20
+ * - Session persistence (reuses same session for context)
21
+ * - Built-in tools (Read, Write, Edit, Bash, Glob, Grep, WebSearch, WebFetch)
22
+ * - Permission interception for voice approval
23
+ * - Hooks for observability
24
+ */
25
+ export type PermissionResponse = 'allow' | 'deny' | 'always_allow';
26
+ export declare class ClaudeHandler extends EventEmitter {
27
+ private options;
28
+ private abortController;
29
+ private sessionId;
30
+ private pendingPermission;
31
+ private dangerousTools;
32
+ private alwaysAllowedTools;
33
+ private static readonly ALL_TOOLS;
34
+ constructor(options?: ClaudeHandlerOptions);
35
+ run(prompt: string): Promise<string>;
36
+ /**
37
+ * Request permission from user via event emission
38
+ * Returns a promise that resolves when user responds with allow/deny/always_allow
39
+ */
40
+ private requestPermission;
41
+ /**
42
+ * Respond to a pending permission request
43
+ */
44
+ respondToPermission(response: PermissionResponse): void;
45
+ /**
46
+ * Grant permission for pending request (shorthand)
47
+ */
48
+ grantPermission(): void;
49
+ /**
50
+ * Deny permission for pending request (shorthand)
51
+ */
52
+ denyPermission(): void;
53
+ /**
54
+ * Always allow this tool type (shorthand)
55
+ */
56
+ alwaysAllowPermission(): void;
57
+ /**
58
+ * Check if there's a pending permission request
59
+ */
60
+ hasPendingPermission(): boolean;
61
+ /**
62
+ * Get current pending permission info (for displaying to user)
63
+ */
64
+ getPendingPermission(): PermissionRequestEvent | null;
65
+ /**
66
+ * Get list of always-allowed tools
67
+ */
68
+ getAlwaysAllowedTools(): string[];
69
+ /**
70
+ * Reset always-allowed tools
71
+ */
72
+ resetAlwaysAllowed(): void;
73
+ getSessionId(): string | null;
74
+ clearSession(): void;
75
+ private handleMessage;
76
+ stop(): void;
77
+ }
78
+ export declare function askClaude(prompt: string, cwd?: string): Promise<string>;
@@ -0,0 +1,320 @@
1
+ import { query } from '@anthropic-ai/claude-agent-sdk';
2
+ import { EventEmitter } from 'events';
3
+ export class ClaudeHandler extends EventEmitter {
4
+ options;
5
+ abortController = null;
6
+ sessionId = null;
7
+ pendingPermission = null;
8
+ // Tools that require permission
9
+ dangerousTools = ['Bash', 'Write', 'Edit'];
10
+ // Tools the user has permanently approved
11
+ alwaysAllowedTools = new Set();
12
+ // All available Claude Agent SDK tools
13
+ static ALL_TOOLS = [
14
+ // File operations
15
+ 'Read', 'Write', 'Edit', 'MultiEdit',
16
+ // Search
17
+ 'Glob', 'Grep',
18
+ // Terminal
19
+ 'Bash',
20
+ // Web
21
+ 'WebSearch', 'WebFetch',
22
+ // Notebooks
23
+ 'NotebookEdit',
24
+ // Task management
25
+ 'Task', 'TodoWrite',
26
+ // LSP (Language Server Protocol)
27
+ 'LSP',
28
+ ];
29
+ constructor(options = {}) {
30
+ super();
31
+ this.options = {
32
+ workingDirectory: options.workingDirectory || process.cwd(),
33
+ allowedTools: options.allowedTools || ClaudeHandler.ALL_TOOLS,
34
+ permissionMode: options.permissionMode || 'default',
35
+ mcpServers: options.mcpServers,
36
+ };
37
+ console.log(`🔧 Allowed tools: ${this.options.allowedTools?.join(', ')}`);
38
+ if (this.options.mcpServers) {
39
+ console.log(`🔌 MCP servers: ${Object.keys(this.options.mcpServers).join(', ')}`);
40
+ }
41
+ }
42
+ async run(prompt) {
43
+ this.abortController = new AbortController();
44
+ let fullResponse = '';
45
+ try {
46
+ // Log session state
47
+ if (this.sessionId) {
48
+ console.log(`🔄 RESUMING session: ${this.sessionId}`);
49
+ }
50
+ else {
51
+ console.log('🆕 STARTING new session');
52
+ }
53
+ console.log(`📁 CWD: ${this.options.workingDirectory}`);
54
+ console.log(`🔑 Mode: ${this.options.permissionMode}`);
55
+ // Build SDK options with session resume
56
+ const sdkOptions = {
57
+ allowedTools: this.options.allowedTools,
58
+ cwd: this.options.workingDirectory,
59
+ permissionMode: this.options.permissionMode,
60
+ abortController: this.abortController || undefined,
61
+ // CRITICAL: Resume existing session for context continuity
62
+ ...(this.sessionId && { resume: this.sessionId }),
63
+ // MCP servers configuration
64
+ ...(this.options.mcpServers && { mcpServers: this.options.mcpServers }),
65
+ hooks: {
66
+ PreToolUse: [{
67
+ matcher: '.*',
68
+ hooks: [async (input, toolUseId) => {
69
+ const toolName = input?.tool_name || 'unknown';
70
+ const toolInput = input?.tool_input || {};
71
+ console.log(`🔧 Tool: ${toolName}`);
72
+ this.emit('tool_use', { name: toolName, input: toolInput });
73
+ // Check if this tool needs permission
74
+ if (this.dangerousTools.includes(toolName) && this.options.permissionMode === 'default') {
75
+ // Skip if user has permanently approved this tool
76
+ if (this.alwaysAllowedTools.has(toolName)) {
77
+ console.log(`✅ Auto-approved (always allow): ${toolName}`);
78
+ }
79
+ else {
80
+ console.log(`⚠️ Permission required for: ${toolName}`);
81
+ // Emit permission request and wait for approval
82
+ const response = await this.requestPermission(toolName, toolInput, toolUseId || 'unknown');
83
+ if (response === 'deny') {
84
+ console.log(`❌ Permission denied for: ${toolName}`);
85
+ return {
86
+ decision: 'block',
87
+ reason: 'User denied permission for this operation'
88
+ };
89
+ }
90
+ if (response === 'always_allow') {
91
+ this.alwaysAllowedTools.add(toolName);
92
+ console.log(`✅ Permission granted (always allow): ${toolName}`);
93
+ }
94
+ else {
95
+ console.log(`✅ Permission granted for: ${toolName}`);
96
+ }
97
+ }
98
+ }
99
+ return {}; // Allow tool to proceed
100
+ }]
101
+ }],
102
+ PostToolUse: [{
103
+ matcher: '.*',
104
+ hooks: [async (input) => {
105
+ const toolName = input?.tool_name || 'unknown';
106
+ console.log(`✅ Completed: ${toolName}`);
107
+ this.emit('tool_result', { name: toolName });
108
+ return {};
109
+ }]
110
+ }]
111
+ }
112
+ };
113
+ // Run the query
114
+ for await (const message of query({
115
+ prompt,
116
+ options: sdkOptions,
117
+ })) {
118
+ this.handleMessage(message);
119
+ // Capture session ID from init
120
+ if (message.type === 'system' && message.subtype === 'init') {
121
+ const newSessionId = message.session_id;
122
+ if (newSessionId) {
123
+ if (!this.sessionId) {
124
+ this.sessionId = newSessionId;
125
+ console.log(`📋 Session CREATED: ${this.sessionId}`);
126
+ }
127
+ else if (this.sessionId === newSessionId) {
128
+ console.log(`📋 Session CONTINUED: ${this.sessionId}`);
129
+ }
130
+ else {
131
+ // Session ID changed - SDK created new session despite resume
132
+ console.log(`⚠️ Session CHANGED: ${this.sessionId} → ${newSessionId}`);
133
+ this.sessionId = newSessionId;
134
+ }
135
+ }
136
+ }
137
+ // Collect text from assistant
138
+ if (message.type === 'assistant' && message.message?.content) {
139
+ for (const block of message.message.content) {
140
+ if (block.type === 'text') {
141
+ fullResponse += block.text;
142
+ }
143
+ }
144
+ }
145
+ // Final result
146
+ if (message.type === 'result') {
147
+ console.log(`📋 Result: ${message.subtype}`);
148
+ if (message.result && !fullResponse) {
149
+ fullResponse = message.result;
150
+ }
151
+ }
152
+ }
153
+ console.log(`✅ Done. Length: ${fullResponse.length}`);
154
+ return fullResponse || 'Task completed.';
155
+ }
156
+ catch (error) {
157
+ if (error.name === 'AbortError') {
158
+ this.emit('aborted');
159
+ return 'Task was cancelled.';
160
+ }
161
+ console.error('❌ SDK error:', error);
162
+ this.emit('error', error);
163
+ // Don't clear session on error - might be transient
164
+ throw error;
165
+ }
166
+ }
167
+ /**
168
+ * Request permission from user via event emission
169
+ * Returns a promise that resolves when user responds with allow/deny/always_allow
170
+ */
171
+ requestPermission(toolName, toolInput, toolUseId) {
172
+ return new Promise((resolve) => {
173
+ // Format the permission request message
174
+ let description = '';
175
+ if (toolName === 'Bash') {
176
+ description = `Run command: ${toolInput.command || 'unknown command'}`;
177
+ }
178
+ else if (toolName === 'Write') {
179
+ description = `Create file: ${toolInput.file_path || 'unknown file'}`;
180
+ }
181
+ else if (toolName === 'Edit') {
182
+ description = `Edit file: ${toolInput.file_path || 'unknown file'}`;
183
+ }
184
+ this.pendingPermission = { toolName, toolInput, toolUseId, resolve: resolve };
185
+ // Emit event for voice handler to pick up
186
+ this.emit('permission_request', {
187
+ toolName,
188
+ description,
189
+ toolInput,
190
+ toolUseId,
191
+ });
192
+ // No auto-approve - wait for user response (with 2 minute timeout for safety)
193
+ setTimeout(() => {
194
+ if (this.pendingPermission?.toolUseId === toolUseId) {
195
+ console.log(`⏰ Permission timeout - denying for safety: ${toolName}`);
196
+ this.pendingPermission = null;
197
+ resolve('deny');
198
+ }
199
+ }, 120000); // 2 minute timeout
200
+ });
201
+ }
202
+ /**
203
+ * Respond to a pending permission request
204
+ */
205
+ respondToPermission(response) {
206
+ if (this.pendingPermission) {
207
+ console.log(`📋 Permission response: ${response}`);
208
+ this.pendingPermission.resolve(response);
209
+ this.pendingPermission = null;
210
+ }
211
+ }
212
+ /**
213
+ * Grant permission for pending request (shorthand)
214
+ */
215
+ grantPermission() {
216
+ this.respondToPermission('allow');
217
+ }
218
+ /**
219
+ * Deny permission for pending request (shorthand)
220
+ */
221
+ denyPermission() {
222
+ this.respondToPermission('deny');
223
+ }
224
+ /**
225
+ * Always allow this tool type (shorthand)
226
+ */
227
+ alwaysAllowPermission() {
228
+ this.respondToPermission('always_allow');
229
+ }
230
+ /**
231
+ * Check if there's a pending permission request
232
+ */
233
+ hasPendingPermission() {
234
+ return this.pendingPermission !== null;
235
+ }
236
+ /**
237
+ * Get current pending permission info (for displaying to user)
238
+ */
239
+ getPendingPermission() {
240
+ if (!this.pendingPermission)
241
+ return null;
242
+ const { toolName, toolInput, toolUseId } = this.pendingPermission;
243
+ let description = '';
244
+ if (toolName === 'Bash') {
245
+ description = `Run command: ${toolInput.command || 'unknown command'}`;
246
+ }
247
+ else if (toolName === 'Write') {
248
+ description = `Create file: ${toolInput.file_path || 'unknown file'}`;
249
+ }
250
+ else if (toolName === 'Edit') {
251
+ description = `Edit file: ${toolInput.file_path || 'unknown file'}`;
252
+ }
253
+ return { toolName, description, toolInput, toolUseId };
254
+ }
255
+ /**
256
+ * Get list of always-allowed tools
257
+ */
258
+ getAlwaysAllowedTools() {
259
+ return Array.from(this.alwaysAllowedTools);
260
+ }
261
+ /**
262
+ * Reset always-allowed tools
263
+ */
264
+ resetAlwaysAllowed() {
265
+ this.alwaysAllowedTools.clear();
266
+ console.log('🔄 Reset always-allowed tools');
267
+ }
268
+ getSessionId() {
269
+ return this.sessionId;
270
+ }
271
+ clearSession() {
272
+ this.sessionId = null;
273
+ console.log('🗑️ Session cleared');
274
+ }
275
+ handleMessage(message) {
276
+ switch (message.type) {
277
+ case 'assistant':
278
+ if (message.message?.content) {
279
+ for (const block of message.message.content) {
280
+ if (block.type === 'text') {
281
+ this.emit('text', block.text);
282
+ }
283
+ else if (block.type === 'tool_use') {
284
+ this.emit('tool_use', {
285
+ id: block.id,
286
+ name: block.name,
287
+ input: block.input,
288
+ });
289
+ }
290
+ }
291
+ }
292
+ break;
293
+ case 'tool_result':
294
+ this.emit('tool_result', {
295
+ id: message.tool_use_id,
296
+ content: message.content,
297
+ });
298
+ break;
299
+ case 'system':
300
+ this.emit('system', message);
301
+ break;
302
+ case 'result':
303
+ this.emit('result', message);
304
+ break;
305
+ default:
306
+ this.emit('message', message);
307
+ }
308
+ }
309
+ stop() {
310
+ if (this.abortController) {
311
+ this.abortController.abort();
312
+ this.abortController = null;
313
+ }
314
+ }
315
+ }
316
+ // Quick utility for one-off queries
317
+ export async function askClaude(prompt, cwd) {
318
+ const handler = new ClaudeHandler({ workingDirectory: cwd });
319
+ return handler.run(prompt);
320
+ }
@@ -0,0 +1,38 @@
1
+ import { EventEmitter } from 'events';
2
+ type SandboxMode = 'workspace-write' | 'danger-full-access';
3
+ type ApprovalPolicy = 'on-failure' | 'on-request' | 'never';
4
+ interface CodexHandlerOptions {
5
+ workingDirectory?: string;
6
+ skipGitRepoCheck?: boolean;
7
+ sandboxMode?: SandboxMode;
8
+ approvalPolicy?: ApprovalPolicy;
9
+ }
10
+ /**
11
+ * Codex Handler using the official OpenAI Codex SDK
12
+ *
13
+ * Features:
14
+ * - Thread persistence (reuses same thread for context)
15
+ * - Built-in coding tools (file operations, terminal commands)
16
+ * - Hooks for observability
17
+ *
18
+ * Requirements:
19
+ * - Codex CLI must be installed (npm install -g @openai/codex or use npx)
20
+ * - Must be authenticated via: codex login (ChatGPT) or OPENAI_API_KEY env var
21
+ *
22
+ * Sandbox/Approval modes are configured via:
23
+ * - ~/.codex/config.yaml for global settings
24
+ * - CLI flags: --sandbox, --approval-policy
25
+ *
26
+ * Note: The SDK spawns the CLI binary, so authentication must be done beforehand.
27
+ */
28
+ export declare class CodexHandler extends EventEmitter {
29
+ private codex;
30
+ private thread;
31
+ private options;
32
+ constructor(options?: CodexHandlerOptions);
33
+ run(prompt: string): Promise<string>;
34
+ getThreadId(): string | null;
35
+ clearThread(): void;
36
+ }
37
+ export declare function askCodex(prompt: string, cwd?: string): Promise<string>;
38
+ export {};
@@ -0,0 +1,83 @@
1
+ import { Codex } from '@openai/codex-sdk';
2
+ import { EventEmitter } from 'events';
3
+ /**
4
+ * Codex Handler using the official OpenAI Codex SDK
5
+ *
6
+ * Features:
7
+ * - Thread persistence (reuses same thread for context)
8
+ * - Built-in coding tools (file operations, terminal commands)
9
+ * - Hooks for observability
10
+ *
11
+ * Requirements:
12
+ * - Codex CLI must be installed (npm install -g @openai/codex or use npx)
13
+ * - Must be authenticated via: codex login (ChatGPT) or OPENAI_API_KEY env var
14
+ *
15
+ * Sandbox/Approval modes are configured via:
16
+ * - ~/.codex/config.yaml for global settings
17
+ * - CLI flags: --sandbox, --approval-policy
18
+ *
19
+ * Note: The SDK spawns the CLI binary, so authentication must be done beforehand.
20
+ */
21
+ export class CodexHandler extends EventEmitter {
22
+ codex;
23
+ thread = null;
24
+ options;
25
+ constructor(options = {}) {
26
+ super();
27
+ this.options = {
28
+ workingDirectory: options.workingDirectory || process.cwd(),
29
+ skipGitRepoCheck: options.skipGitRepoCheck ?? true,
30
+ };
31
+ // Initialize Codex (inherits process env by default)
32
+ this.codex = new Codex();
33
+ }
34
+ async run(prompt) {
35
+ try {
36
+ // Create or reuse thread
37
+ if (!this.thread) {
38
+ console.log('🆕 Starting new Codex thread');
39
+ this.thread = this.codex.startThread({
40
+ workingDirectory: this.options.workingDirectory,
41
+ skipGitRepoCheck: this.options.skipGitRepoCheck,
42
+ });
43
+ }
44
+ else {
45
+ console.log('🔄 Continuing Codex thread');
46
+ }
47
+ console.log(`📁 CWD: ${this.options.workingDirectory}`);
48
+ this.emit('thinking', prompt);
49
+ // Run the prompt and get the Turn result
50
+ const turn = await this.thread.run(prompt);
51
+ // Extract the final response text
52
+ const result = turn.finalResponse || '';
53
+ this.emit('result', result);
54
+ console.log(`✅ Codex done. Length: ${result.length}`);
55
+ // Log tool usage if any
56
+ if (turn.items && turn.items.length > 0) {
57
+ for (const item of turn.items) {
58
+ this.emit('tool_use', item);
59
+ }
60
+ }
61
+ return result || 'Task completed.';
62
+ }
63
+ catch (error) {
64
+ console.error('❌ Codex SDK error:', error);
65
+ this.emit('error', error);
66
+ throw error;
67
+ }
68
+ }
69
+ getThreadId() {
70
+ // Codex threads have IDs but they may not be directly exposed
71
+ // Check the thread object for an id property
72
+ return this.thread?.id || null;
73
+ }
74
+ clearThread() {
75
+ this.thread = null;
76
+ console.log('🗑️ Codex thread cleared');
77
+ }
78
+ }
79
+ // Quick utility for one-off queries
80
+ export async function askCodex(prompt, cwd) {
81
+ const handler = new CodexHandler({ workingDirectory: cwd });
82
+ return handler.run(prompt);
83
+ }
@@ -0,0 +1,15 @@
1
+ interface AcknowledgmentResult {
2
+ acknowledgment: string;
3
+ isCodingTask: boolean;
4
+ intent: 'code' | 'chat' | 'question' | 'permission';
5
+ }
6
+ /**
7
+ * Fast LLM for immediate acknowledgment and intent classification
8
+ * Target: < 200ms response time
9
+ */
10
+ export declare function getAcknowledgment(userInput: string): Promise<AcknowledgmentResult>;
11
+ /**
12
+ * Summarize Claude's response for voice output
13
+ */
14
+ export declare function summarizeForVoice(claudeResponse: string): Promise<string>;
15
+ export {};
@@ -0,0 +1,81 @@
1
+ import Groq from 'groq-sdk';
2
+ const groq = new Groq({
3
+ apiKey: process.env.GROQ_API_KEY,
4
+ });
5
+ /**
6
+ * Fast LLM for immediate acknowledgment and intent classification
7
+ * Target: < 200ms response time
8
+ */
9
+ export async function getAcknowledgment(userInput) {
10
+ const response = await groq.chat.completions.create({
11
+ model: 'llama-3.3-70b-versatile', // Fast model
12
+ messages: [
13
+ {
14
+ role: 'system',
15
+ content: `You are Osborn, a voice-enabled coding assistant. Classify user requests:
16
+
17
+ CODING TASKS (isCodingTask: true) - Anything involving:
18
+ - Files: read, write, create, edit, list, find, search
19
+ - Directories: what directory, current folder, list files, project structure
20
+ - Code: fix bugs, refactor, explain code, run tests
21
+ - Terminal: run commands, install packages, git operations
22
+ - Project: what's in this project, show me files, analyze codebase
23
+
24
+ NOT CODING (isCodingTask: false):
25
+ - General chat: hi, how are you, tell me a joke
26
+ - Off-topic: weather, news, recipes
27
+ - Permission responses: yes, no, approve, deny
28
+
29
+ Respond in JSON:
30
+ {
31
+ "acknowledgment": "brief friendly response",
32
+ "isCodingTask": true/false,
33
+ "intent": "code|chat|question|permission"
34
+ }
35
+
36
+ Examples:
37
+ - "What directory are we in?" → {"acknowledgment": "Let me check", "isCodingTask": true, "intent": "code"}
38
+ - "What files are in this project?" → {"acknowledgment": "I'll list them", "isCodingTask": true, "intent": "code"}
39
+ - "Read package.json" → {"acknowledgment": "On it", "isCodingTask": true, "intent": "code"}
40
+ - "Fix the bug" → {"acknowledgment": "Let me look", "isCodingTask": true, "intent": "code"}
41
+ - "Hello" → {"acknowledgment": "Hi there!", "isCodingTask": false, "intent": "chat"}
42
+ - "Yes, do it" → {"acknowledgment": "Got it", "isCodingTask": false, "intent": "permission"}`,
43
+ },
44
+ {
45
+ role: 'user',
46
+ content: userInput,
47
+ },
48
+ ],
49
+ temperature: 0.3,
50
+ max_tokens: 150,
51
+ response_format: { type: 'json_object' },
52
+ });
53
+ const content = response.choices[0]?.message?.content || '{}';
54
+ return JSON.parse(content);
55
+ }
56
+ /**
57
+ * Summarize Claude's response for voice output
58
+ */
59
+ export async function summarizeForVoice(claudeResponse) {
60
+ if (claudeResponse.length < 200) {
61
+ return claudeResponse;
62
+ }
63
+ const response = await groq.chat.completions.create({
64
+ model: 'llama-3.3-70b-versatile',
65
+ messages: [
66
+ {
67
+ role: 'system',
68
+ content: `Summarize this coding assistant response for voice output.
69
+ Keep it under 2 sentences. Focus on what was done and the result.
70
+ Don't include code - just describe what happened.`,
71
+ },
72
+ {
73
+ role: 'user',
74
+ content: claudeResponse,
75
+ },
76
+ ],
77
+ temperature: 0.3,
78
+ max_tokens: 100,
79
+ });
80
+ return response.choices[0]?.message?.content || claudeResponse;
81
+ }
@@ -0,0 +1,3 @@
1
+ import 'dotenv/config';
2
+ declare const _default: import("@livekit/agents").Agent;
3
+ export default _default;
package/dist/index.js ADDED
@@ -0,0 +1,356 @@
1
+ import { ServerOptions, cli, defineAgent, llm, voice } from '@livekit/agents';
2
+ import * as openai from '@livekit/agents-plugin-openai';
3
+ import * as google from '@livekit/agents-plugin-google';
4
+ import { z } from 'zod';
5
+ import { fileURLToPath } from 'url';
6
+ import 'dotenv/config';
7
+ import { ClaudeHandler } from './claude-handler.js';
8
+ import { CodexHandler } from './codex-handler.js';
9
+ // Global error handlers to catch silent failures
10
+ process.on('unhandledRejection', (reason, promise) => {
11
+ console.error('❌ Unhandled Rejection:', reason);
12
+ });
13
+ process.on('uncaughtException', (error) => {
14
+ console.error('❌ Uncaught Exception:', error);
15
+ });
16
+ // Default provider (can be overridden by participant metadata)
17
+ const DEFAULT_PROVIDER = process.env.LLM_PROVIDER || 'openai';
18
+ // Debug mode
19
+ const DEBUG = process.env.DEBUG_LIVEKIT === 'true';
20
+ if (DEBUG) {
21
+ console.log('🐛 Debug logging enabled');
22
+ }
23
+ console.log(`🤖 Default LLM Provider: ${DEFAULT_PROVIDER}`);
24
+ // Example MCP server configurations (uncomment to enable)
25
+ const MCP_SERVERS = {
26
+ // GitHub integration
27
+ // 'github': {
28
+ // command: 'npx',
29
+ // args: ['@modelcontextprotocol/server-github'],
30
+ // env: { GITHUB_TOKEN: process.env.GITHUB_TOKEN || '' }
31
+ // },
32
+ // Filesystem with specific allowed paths
33
+ // 'filesystem': {
34
+ // command: 'npx',
35
+ // args: ['@modelcontextprotocol/server-filesystem'],
36
+ // env: { ALLOWED_PATHS: '/Users/newupgrade/Desktop/Developer' }
37
+ // },
38
+ };
39
+ // Pre-initialize Claude handler at module load (before any connections)
40
+ console.log('🔥 Pre-initializing Claude Code...');
41
+ const claude = new ClaudeHandler({
42
+ workingDirectory: '/Users/newupgrade/Desktop/Developer/osborn',
43
+ permissionMode: 'default', // Ask for permission on dangerous tools (Bash, Write, Edit)
44
+ // Uncomment to enable MCP servers:
45
+ // mcpServers: MCP_SERVERS,
46
+ });
47
+ // Listen for permission requests from Claude
48
+ claude.on('permission_request', (req) => {
49
+ console.log(`\n⚠️ PERMISSION REQUIRED ⚠️`);
50
+ console.log(`🔧 Tool: ${req.toolName}`);
51
+ console.log(`📝 Action: ${req.description}`);
52
+ console.log(`⏳ Waiting for user response (say: allow, deny, or always allow)...`);
53
+ // Send to frontend for UI display
54
+ sendToFrontend({
55
+ type: 'permission_request',
56
+ toolName: req.toolName,
57
+ description: req.description,
58
+ });
59
+ });
60
+ // Pre-warm Claude immediately on server start
61
+ claude.run('Respond with just: ready')
62
+ .then(() => console.log('✅ Claude pre-warmed and ready!'))
63
+ .catch((err) => console.log('⚠️ Pre-warm failed:', err.message));
64
+ // Track job context and session for data channel
65
+ let jobContext = null;
66
+ let currentSession = null;
67
+ // Track the current coding handler (can be Claude or Codex)
68
+ let currentCodingAgent = 'claude';
69
+ let codexHandler = null;
70
+ // Helper to send data to frontend
71
+ async function sendToFrontend(data) {
72
+ if (!jobContext)
73
+ return;
74
+ try {
75
+ const encoder = new TextEncoder();
76
+ const payload = encoder.encode(JSON.stringify(data));
77
+ await jobContext.room.localParticipant?.publishData(payload, {
78
+ reliable: true,
79
+ topic: 'osborn-updates',
80
+ });
81
+ }
82
+ catch (err) {
83
+ // Ignore send errors
84
+ }
85
+ }
86
+ // Define the run_code tool (works with both Claude and Codex)
87
+ const runCodeTool = llm.tool({
88
+ description: `Execute coding tasks using the coding agent. Use for:
89
+ - Files: read, write, create, edit, list, search
90
+ - Directories: current directory, list contents
91
+ - Code: fix bugs, refactor, explain, review
92
+ - Terminal: run commands, install packages, git
93
+ - Project: analyze codebase, make changes
94
+ - Web: search the web for information`,
95
+ parameters: z.object({
96
+ task: z.string().describe('The coding task to execute'),
97
+ }),
98
+ execute: async ({ task }) => {
99
+ const agentName = currentCodingAgent === 'claude' ? 'Claude Code' : 'OpenAI Codex';
100
+ console.log(`\n🔨 ${agentName}: "${task}"`);
101
+ await sendToFrontend({ type: 'system', text: `Working on: ${task}` });
102
+ try {
103
+ let result;
104
+ if (currentCodingAgent === 'codex' && codexHandler) {
105
+ result = await codexHandler.run(task);
106
+ }
107
+ else {
108
+ result = await claude.run(task);
109
+ }
110
+ console.log(`✅ Done: ${result.length} chars`);
111
+ await sendToFrontend({ type: 'assistant_response', text: result });
112
+ return result;
113
+ }
114
+ catch (err) {
115
+ console.error('❌ Error:', err);
116
+ return `Error: ${err.message}`;
117
+ }
118
+ },
119
+ });
120
+ // Define the permission response tool
121
+ const respondPermissionTool = llm.tool({
122
+ description: `Respond to a pending permission request from Claude Code.
123
+ Use this ONLY when there is a pending permission request.
124
+ Call this after hearing the user's response to a permission prompt.`,
125
+ parameters: z.object({
126
+ response: z.enum(['allow', 'deny', 'always_allow']).describe('The user response: "allow" for one-time approval, "deny" to reject, "always_allow" to permanently allow this tool type'),
127
+ }),
128
+ execute: async ({ response }) => {
129
+ if (!claude.hasPendingPermission()) {
130
+ return 'No pending permission request.';
131
+ }
132
+ const pending = claude.getPendingPermission();
133
+ claude.respondToPermission(response);
134
+ await sendToFrontend({
135
+ type: 'permission_response',
136
+ response,
137
+ toolName: pending?.toolName
138
+ });
139
+ return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
140
+ },
141
+ });
142
+ // Agent instructions - dynamically includes available tools
143
+ const OSBORN_INSTRUCTIONS = `You are Osborn, a voice-enabled AI assistant with coding superpowers.
144
+ Keep responses under 50 words. Sound natural and human.
145
+
146
+ AVAILABLE CAPABILITIES via run_code tool:
147
+ - Read, Write, Edit, MultiEdit files
148
+ - Glob (find files by pattern), Grep (search content)
149
+ - Bash (run terminal commands)
150
+ - WebSearch (search the web), WebFetch (fetch URLs)
151
+ - NotebookEdit (edit Jupyter notebooks)
152
+ - Task (delegate complex tasks), TodoWrite (track tasks)
153
+ - LSP (code intelligence - go to definition, find references)
154
+
155
+ WHEN TO USE run_code:
156
+ - File operations (read, write, create, edit, list, find)
157
+ - Code tasks (fix, refactor, explain, review, debug)
158
+ - Terminal commands (run, install, test, build, git)
159
+ - Web searches (look up documentation, APIs, errors)
160
+ - Project analysis (understand codebase, find patterns)
161
+
162
+ WHEN TO RESPOND DIRECTLY:
163
+ - Greetings and small talk
164
+ - General knowledge questions
165
+ - Clarifying what the user wants
166
+
167
+ PERMISSION HANDLING:
168
+ When the coding agent needs permission, you MUST:
169
+ 1. Tell the user: "[Agent] wants to [action]. Allow, deny, or always allow?"
170
+ 2. When they respond, call respond_permission with their choice
171
+
172
+ Be conversational and helpful. Ask follow-up questions when needed.`;
173
+ // Voice assistant with tools
174
+ class OsbornAssistant extends voice.Agent {
175
+ constructor() {
176
+ super({
177
+ instructions: OSBORN_INSTRUCTIONS,
178
+ tools: {
179
+ run_code: runCodeTool,
180
+ respond_permission: respondPermissionTool,
181
+ },
182
+ });
183
+ }
184
+ }
185
+ // Create the appropriate model based on provider
186
+ function createModel(provider) {
187
+ if (provider === 'gemini') {
188
+ console.log('📱 Using Gemini Live API');
189
+ console.log('🔑 GOOGLE_API_KEY:', process.env.GOOGLE_API_KEY ? 'set' : 'NOT SET');
190
+ // From official docs: https://docs.livekit.io/agents/models/realtime/plugins/gemini/
191
+ // Package v1.0.31 uses google.beta.realtime (not google.realtime yet)
192
+ const model = new google.beta.realtime.RealtimeModel({
193
+ model: 'gemini-2.5-flash-native-audio-preview-12-2025', // From official docs
194
+ voice: 'Puck',
195
+ instructions: OSBORN_INSTRUCTIONS,
196
+ });
197
+ console.log('✅ Gemini model created with gemini-2.5-flash-native-audio-preview-12-2025');
198
+ return model;
199
+ }
200
+ else {
201
+ console.log('📱 Using OpenAI Realtime API');
202
+ console.log('🔑 OPENAI_API_KEY:', process.env.OPENAI_API_KEY ? 'set' : 'NOT SET');
203
+ const model = new openai.realtime.RealtimeModel({
204
+ voice: 'alloy',
205
+ });
206
+ console.log('✅ OpenAI model created');
207
+ return model;
208
+ }
209
+ }
210
+ // Helper to get provider from participant metadata
211
+ function getProviderFromParticipant(metadata) {
212
+ if (!metadata)
213
+ return DEFAULT_PROVIDER;
214
+ try {
215
+ const data = JSON.parse(metadata);
216
+ return data.provider || DEFAULT_PROVIDER;
217
+ }
218
+ catch {
219
+ return DEFAULT_PROVIDER;
220
+ }
221
+ }
222
+ // Helper to get coding agent from participant metadata
223
+ function getCodingAgentFromParticipant(metadata) {
224
+ if (!metadata)
225
+ return 'claude';
226
+ try {
227
+ const data = JSON.parse(metadata);
228
+ return data.codingAgent || 'claude';
229
+ }
230
+ catch {
231
+ return 'claude';
232
+ }
233
+ }
234
+ export default defineAgent({
235
+ entry: async (ctx) => {
236
+ console.log('🚀 Agent starting for room:', ctx.room.name);
237
+ jobContext = ctx;
238
+ // Claude verbose logging
239
+ claude.on('tool_use', (tool) => {
240
+ console.log(`\n🔧 Claude Tool Started: ${tool.name}`);
241
+ if (tool.input) {
242
+ const inputStr = JSON.stringify(tool.input).substring(0, 200);
243
+ console.log(` Input: ${inputStr}${inputStr.length >= 200 ? '...' : ''}`);
244
+ }
245
+ });
246
+ claude.on('tool_result', (result) => {
247
+ console.log(`✅ Claude Tool Completed: ${result.name || 'unknown'}`);
248
+ });
249
+ claude.on('text', (text) => {
250
+ if (text.length > 0) {
251
+ console.log(`💬 Claude says: ${text.substring(0, 100)}${text.length > 100 ? '...' : ''}`);
252
+ }
253
+ });
254
+ claude.on('error', (err) => {
255
+ console.error(`❌ Claude Error:`, err);
256
+ });
257
+ // Connect FIRST so we can wait for participants
258
+ console.log('📡 Connecting to room...');
259
+ await ctx.connect();
260
+ console.log('✅ Connected to room');
261
+ // Wait for a participant to join using LiveKit's built-in method
262
+ console.log('⏳ Waiting for participant...');
263
+ const participant = await ctx.waitForParticipant();
264
+ console.log('👤 Participant joined:', participant.identity);
265
+ console.log('📋 Participant metadata:', participant.metadata);
266
+ const provider = getProviderFromParticipant(participant.metadata);
267
+ const codingAgent = getCodingAgentFromParticipant(participant.metadata);
268
+ console.log(`🎯 User selected provider: ${provider}`);
269
+ console.log(`🔧 User selected coding agent: ${codingAgent}`);
270
+ // Set the current coding agent and initialize if needed
271
+ currentCodingAgent = codingAgent;
272
+ if (codingAgent === 'codex') {
273
+ console.log('🔧 Initializing Codex handler...');
274
+ codexHandler = new CodexHandler({
275
+ workingDirectory: '/Users/newupgrade/Desktop/Developer/osborn',
276
+ });
277
+ console.log('✅ Codex handler ready');
278
+ }
279
+ // Create model based on user's choice
280
+ const model = createModel(provider);
281
+ const session = new voice.AgentSession({
282
+ llm: model,
283
+ });
284
+ currentSession = session;
285
+ // Add session event listeners for debugging
286
+ // Using string literals as AgentSessionEventTypes is not directly exported
287
+ session.on('user_state_changed', (ev) => {
288
+ console.log(`👤 User state: ${ev.oldState} → ${ev.newState}`);
289
+ });
290
+ session.on('agent_state_changed', (ev) => {
291
+ console.log(`🤖 Agent state: ${ev.oldState} → ${ev.newState}`);
292
+ });
293
+ session.on('user_input_transcribed', (ev) => {
294
+ console.log(`📝 Transcribed: "${ev.transcript}" (final: ${ev.isFinal})`);
295
+ });
296
+ session.on('error', (ev) => {
297
+ console.error('❌ Session error:', ev.error);
298
+ });
299
+ session.on('close', (ev) => {
300
+ console.log('🚪 Session closed:', ev.reason);
301
+ });
302
+ ctx.room.on('trackSubscribed', (track, publication, p) => {
303
+ console.log(`📥 Track subscribed: ${track.kind} from ${p.identity}`);
304
+ });
305
+ // Listen for data channel messages from frontend
306
+ ctx.room.on('dataReceived', async (payload, participant, kind, topic) => {
307
+ if (topic === 'user-input') {
308
+ try {
309
+ const data = JSON.parse(new TextDecoder().decode(payload));
310
+ console.log(`📨 Received from frontend:`, data);
311
+ if (data.type === 'permission_response') {
312
+ // Handle permission response from UI
313
+ if (claude.hasPendingPermission()) {
314
+ claude.respondToPermission(data.response);
315
+ console.log(`✅ Permission ${data.response} from UI`);
316
+ }
317
+ }
318
+ else if (data.type === 'user_text') {
319
+ // Handle text input from frontend
320
+ console.log(`📝 Text input: "${data.content}"`);
321
+ // Inject text into the session as user input
322
+ if (currentSession) {
323
+ try {
324
+ // Interrupt any current speech first
325
+ currentSession.interrupt();
326
+ // Generate a reply to the text input
327
+ await currentSession.generateReply({
328
+ userInput: data.content,
329
+ });
330
+ console.log(`✅ Injected text to session`);
331
+ }
332
+ catch (err) {
333
+ console.error(`❌ Failed to inject text:`, err);
334
+ }
335
+ }
336
+ }
337
+ }
338
+ catch (e) {
339
+ // Not JSON, ignore
340
+ }
341
+ }
342
+ });
343
+ // Create the agent
344
+ const agent = new OsbornAssistant();
345
+ // Start session
346
+ console.log('🎬 Starting voice session...');
347
+ const startTime = Date.now();
348
+ await session.start({
349
+ agent,
350
+ room: ctx.room,
351
+ });
352
+ console.log(`✅ Session started in ${Date.now() - startTime}ms with ${provider.toUpperCase()} + Claude tools`);
353
+ console.log('🎤 Ready for voice input! Speak to start.');
354
+ },
355
+ });
356
+ cli.runApp(new ServerOptions({ agent: fileURLToPath(import.meta.url) }));
package/package.json ADDED
@@ -0,0 +1,46 @@
1
+ {
2
+ "name": "osborn",
3
+ "version": "0.1.0",
4
+ "description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
5
+ "type": "module",
6
+ "bin": {
7
+ "osborn": "./bin/cli.js"
8
+ },
9
+ "scripts": {
10
+ "dev": "tsx src/index.ts dev",
11
+ "start": "tsx src/index.ts start",
12
+ "build": "tsc"
13
+ },
14
+ "keywords": [
15
+ "voice",
16
+ "ai",
17
+ "coding",
18
+ "assistant",
19
+ "claude",
20
+ "livekit"
21
+ ],
22
+ "repository": {
23
+ "type": "git",
24
+ "url": "https://github.com/Erriccc/osborn.git"
25
+ },
26
+ "author": "Osborn Ojure",
27
+ "license": "MIT",
28
+ "dependencies": {
29
+ "@anthropic-ai/claude-agent-sdk": "^0.1.74",
30
+ "@livekit/agents": "^1.0.0",
31
+ "@livekit/agents-plugin-google": "^1.0.0",
32
+ "@livekit/agents-plugin-openai": "^1.0.0",
33
+ "@openai/codex-sdk": "^0.77.0",
34
+ "dotenv": "^16.4.0",
35
+ "tsx": "^4.0.0",
36
+ "yaml": "^2.3.0",
37
+ "zod": "^3.23.0"
38
+ },
39
+ "devDependencies": {
40
+ "@types/node": "^20.0.0",
41
+ "typescript": "^5.0.0"
42
+ },
43
+ "engines": {
44
+ "node": ">=18.0.0"
45
+ }
46
+ }