osborn 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.env.example +14 -0
- package/bin/cli.js +70 -0
- package/dist/claude-handler.d.ts +78 -0
- package/dist/claude-handler.js +320 -0
- package/dist/codex-handler.d.ts +38 -0
- package/dist/codex-handler.js +83 -0
- package/dist/fast-llm.d.ts +15 -0
- package/dist/fast-llm.js +81 -0
- package/dist/index.d.ts +3 -0
- package/dist/index.js +356 -0
- package/package.json +46 -0
package/.env.example
ADDED
|
@@ -0,0 +1,14 @@
|
|
|
1
|
+
# LLM Provider: 'openai' or 'gemini'
|
|
2
|
+
LLM_PROVIDER=openai
|
|
3
|
+
|
|
4
|
+
# LiveKit
|
|
5
|
+
LIVEKIT_URL=wss://your-project.livekit.cloud
|
|
6
|
+
LIVEKIT_API_KEY=your-api-key
|
|
7
|
+
LIVEKIT_API_SECRET=your-api-secret
|
|
8
|
+
|
|
9
|
+
# OpenAI (for Realtime API)
|
|
10
|
+
OPENAI_API_KEY=sk-...
|
|
11
|
+
|
|
12
|
+
# Google AI (for Gemini Live - FREE during preview!)
|
|
13
|
+
# Get your key at: https://aistudio.google.com/apikey
|
|
14
|
+
GOOGLE_API_KEY=AIzaSy...
|
package/bin/cli.js
ADDED
|
@@ -0,0 +1,70 @@
|
|
|
1
|
+
#!/usr/bin/env node
|
|
2
|
+
|
|
3
|
+
import { spawn } from 'child_process'
|
|
4
|
+
import { fileURLToPath } from 'url'
|
|
5
|
+
import { dirname, join } from 'path'
|
|
6
|
+
|
|
7
|
+
const __filename = fileURLToPath(import.meta.url)
|
|
8
|
+
const __dirname = dirname(__filename)
|
|
9
|
+
|
|
10
|
+
// Pass all args to the actual agent
|
|
11
|
+
const args = process.argv.slice(2)
|
|
12
|
+
|
|
13
|
+
// Check for help
|
|
14
|
+
if (args.includes('--help') || args.includes('-h')) {
|
|
15
|
+
console.log(`
|
|
16
|
+
Osborn Agent - Voice AI Coding Assistant
|
|
17
|
+
|
|
18
|
+
Usage:
|
|
19
|
+
osborn-agent --room <code> Connect to a specific room
|
|
20
|
+
osborn-agent dev Run in development mode
|
|
21
|
+
osborn-agent start Run in production mode
|
|
22
|
+
|
|
23
|
+
Options:
|
|
24
|
+
--room <code> Room code from the Osborn web interface
|
|
25
|
+
--help, -h Show this help message
|
|
26
|
+
|
|
27
|
+
Environment Variables:
|
|
28
|
+
LIVEKIT_URL LiveKit server URL
|
|
29
|
+
LIVEKIT_API_KEY LiveKit API key
|
|
30
|
+
LIVEKIT_API_SECRET LiveKit API secret
|
|
31
|
+
OPENAI_API_KEY OpenAI API key (for voice)
|
|
32
|
+
ANTHROPIC_API_KEY Anthropic API key (for Claude Code)
|
|
33
|
+
GOOGLE_API_KEY Google API key (for Gemini)
|
|
34
|
+
|
|
35
|
+
Config File:
|
|
36
|
+
~/.osborn/config.yaml MCP servers and settings
|
|
37
|
+
|
|
38
|
+
Example:
|
|
39
|
+
osborn-agent --room abc123
|
|
40
|
+
`)
|
|
41
|
+
process.exit(0)
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
// Run the agent using tsx
|
|
45
|
+
const agentPath = join(__dirname, '..', 'src', 'index.ts')
|
|
46
|
+
const tsxPath = join(__dirname, '..', 'node_modules', '.bin', 'tsx')
|
|
47
|
+
|
|
48
|
+
// Determine mode (default to 'dev' if no mode specified)
|
|
49
|
+
let mode = 'dev'
|
|
50
|
+
if (args.includes('start')) {
|
|
51
|
+
mode = 'start'
|
|
52
|
+
args.splice(args.indexOf('start'), 1)
|
|
53
|
+
} else if (args.includes('dev')) {
|
|
54
|
+
args.splice(args.indexOf('dev'), 1)
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
const child = spawn(tsxPath, [agentPath, mode, ...args], {
|
|
58
|
+
stdio: 'inherit',
|
|
59
|
+
cwd: join(__dirname, '..'),
|
|
60
|
+
env: process.env,
|
|
61
|
+
})
|
|
62
|
+
|
|
63
|
+
child.on('error', (err) => {
|
|
64
|
+
console.error('Failed to start agent:', err.message)
|
|
65
|
+
process.exit(1)
|
|
66
|
+
})
|
|
67
|
+
|
|
68
|
+
child.on('exit', (code) => {
|
|
69
|
+
process.exit(code || 0)
|
|
70
|
+
})
|
|
@@ -0,0 +1,78 @@
|
|
|
1
|
+
import { type McpServerConfig } from '@anthropic-ai/claude-agent-sdk';
|
|
2
|
+
import { EventEmitter } from 'events';
|
|
3
|
+
interface ClaudeHandlerOptions {
|
|
4
|
+
workingDirectory?: string;
|
|
5
|
+
allowedTools?: string[];
|
|
6
|
+
permissionMode?: 'default' | 'acceptEdits' | 'bypassPermissions';
|
|
7
|
+
mcpServers?: Record<string, McpServerConfig>;
|
|
8
|
+
}
|
|
9
|
+
export type { McpServerConfig };
|
|
10
|
+
export interface PermissionRequestEvent {
|
|
11
|
+
toolName: string;
|
|
12
|
+
description: string;
|
|
13
|
+
toolInput: any;
|
|
14
|
+
toolUseId: string;
|
|
15
|
+
}
|
|
16
|
+
/**
|
|
17
|
+
* Claude Handler using the official Claude Agent SDK
|
|
18
|
+
*
|
|
19
|
+
* Features:
|
|
20
|
+
* - Session persistence (reuses same session for context)
|
|
21
|
+
* - Built-in tools (Read, Write, Edit, Bash, Glob, Grep, WebSearch, WebFetch)
|
|
22
|
+
* - Permission interception for voice approval
|
|
23
|
+
* - Hooks for observability
|
|
24
|
+
*/
|
|
25
|
+
export type PermissionResponse = 'allow' | 'deny' | 'always_allow';
|
|
26
|
+
export declare class ClaudeHandler extends EventEmitter {
|
|
27
|
+
private options;
|
|
28
|
+
private abortController;
|
|
29
|
+
private sessionId;
|
|
30
|
+
private pendingPermission;
|
|
31
|
+
private dangerousTools;
|
|
32
|
+
private alwaysAllowedTools;
|
|
33
|
+
private static readonly ALL_TOOLS;
|
|
34
|
+
constructor(options?: ClaudeHandlerOptions);
|
|
35
|
+
run(prompt: string): Promise<string>;
|
|
36
|
+
/**
|
|
37
|
+
* Request permission from user via event emission
|
|
38
|
+
* Returns a promise that resolves when user responds with allow/deny/always_allow
|
|
39
|
+
*/
|
|
40
|
+
private requestPermission;
|
|
41
|
+
/**
|
|
42
|
+
* Respond to a pending permission request
|
|
43
|
+
*/
|
|
44
|
+
respondToPermission(response: PermissionResponse): void;
|
|
45
|
+
/**
|
|
46
|
+
* Grant permission for pending request (shorthand)
|
|
47
|
+
*/
|
|
48
|
+
grantPermission(): void;
|
|
49
|
+
/**
|
|
50
|
+
* Deny permission for pending request (shorthand)
|
|
51
|
+
*/
|
|
52
|
+
denyPermission(): void;
|
|
53
|
+
/**
|
|
54
|
+
* Always allow this tool type (shorthand)
|
|
55
|
+
*/
|
|
56
|
+
alwaysAllowPermission(): void;
|
|
57
|
+
/**
|
|
58
|
+
* Check if there's a pending permission request
|
|
59
|
+
*/
|
|
60
|
+
hasPendingPermission(): boolean;
|
|
61
|
+
/**
|
|
62
|
+
* Get current pending permission info (for displaying to user)
|
|
63
|
+
*/
|
|
64
|
+
getPendingPermission(): PermissionRequestEvent | null;
|
|
65
|
+
/**
|
|
66
|
+
* Get list of always-allowed tools
|
|
67
|
+
*/
|
|
68
|
+
getAlwaysAllowedTools(): string[];
|
|
69
|
+
/**
|
|
70
|
+
* Reset always-allowed tools
|
|
71
|
+
*/
|
|
72
|
+
resetAlwaysAllowed(): void;
|
|
73
|
+
getSessionId(): string | null;
|
|
74
|
+
clearSession(): void;
|
|
75
|
+
private handleMessage;
|
|
76
|
+
stop(): void;
|
|
77
|
+
}
|
|
78
|
+
export declare function askClaude(prompt: string, cwd?: string): Promise<string>;
|
|
@@ -0,0 +1,320 @@
|
|
|
1
|
+
import { query } from '@anthropic-ai/claude-agent-sdk';
|
|
2
|
+
import { EventEmitter } from 'events';
|
|
3
|
+
export class ClaudeHandler extends EventEmitter {
|
|
4
|
+
options;
|
|
5
|
+
abortController = null;
|
|
6
|
+
sessionId = null;
|
|
7
|
+
pendingPermission = null;
|
|
8
|
+
// Tools that require permission
|
|
9
|
+
dangerousTools = ['Bash', 'Write', 'Edit'];
|
|
10
|
+
// Tools the user has permanently approved
|
|
11
|
+
alwaysAllowedTools = new Set();
|
|
12
|
+
// All available Claude Agent SDK tools
|
|
13
|
+
static ALL_TOOLS = [
|
|
14
|
+
// File operations
|
|
15
|
+
'Read', 'Write', 'Edit', 'MultiEdit',
|
|
16
|
+
// Search
|
|
17
|
+
'Glob', 'Grep',
|
|
18
|
+
// Terminal
|
|
19
|
+
'Bash',
|
|
20
|
+
// Web
|
|
21
|
+
'WebSearch', 'WebFetch',
|
|
22
|
+
// Notebooks
|
|
23
|
+
'NotebookEdit',
|
|
24
|
+
// Task management
|
|
25
|
+
'Task', 'TodoWrite',
|
|
26
|
+
// LSP (Language Server Protocol)
|
|
27
|
+
'LSP',
|
|
28
|
+
];
|
|
29
|
+
constructor(options = {}) {
|
|
30
|
+
super();
|
|
31
|
+
this.options = {
|
|
32
|
+
workingDirectory: options.workingDirectory || process.cwd(),
|
|
33
|
+
allowedTools: options.allowedTools || ClaudeHandler.ALL_TOOLS,
|
|
34
|
+
permissionMode: options.permissionMode || 'default',
|
|
35
|
+
mcpServers: options.mcpServers,
|
|
36
|
+
};
|
|
37
|
+
console.log(`🔧 Allowed tools: ${this.options.allowedTools?.join(', ')}`);
|
|
38
|
+
if (this.options.mcpServers) {
|
|
39
|
+
console.log(`🔌 MCP servers: ${Object.keys(this.options.mcpServers).join(', ')}`);
|
|
40
|
+
}
|
|
41
|
+
}
|
|
42
|
+
async run(prompt) {
|
|
43
|
+
this.abortController = new AbortController();
|
|
44
|
+
let fullResponse = '';
|
|
45
|
+
try {
|
|
46
|
+
// Log session state
|
|
47
|
+
if (this.sessionId) {
|
|
48
|
+
console.log(`🔄 RESUMING session: ${this.sessionId}`);
|
|
49
|
+
}
|
|
50
|
+
else {
|
|
51
|
+
console.log('🆕 STARTING new session');
|
|
52
|
+
}
|
|
53
|
+
console.log(`📁 CWD: ${this.options.workingDirectory}`);
|
|
54
|
+
console.log(`🔑 Mode: ${this.options.permissionMode}`);
|
|
55
|
+
// Build SDK options with session resume
|
|
56
|
+
const sdkOptions = {
|
|
57
|
+
allowedTools: this.options.allowedTools,
|
|
58
|
+
cwd: this.options.workingDirectory,
|
|
59
|
+
permissionMode: this.options.permissionMode,
|
|
60
|
+
abortController: this.abortController || undefined,
|
|
61
|
+
// CRITICAL: Resume existing session for context continuity
|
|
62
|
+
...(this.sessionId && { resume: this.sessionId }),
|
|
63
|
+
// MCP servers configuration
|
|
64
|
+
...(this.options.mcpServers && { mcpServers: this.options.mcpServers }),
|
|
65
|
+
hooks: {
|
|
66
|
+
PreToolUse: [{
|
|
67
|
+
matcher: '.*',
|
|
68
|
+
hooks: [async (input, toolUseId) => {
|
|
69
|
+
const toolName = input?.tool_name || 'unknown';
|
|
70
|
+
const toolInput = input?.tool_input || {};
|
|
71
|
+
console.log(`🔧 Tool: ${toolName}`);
|
|
72
|
+
this.emit('tool_use', { name: toolName, input: toolInput });
|
|
73
|
+
// Check if this tool needs permission
|
|
74
|
+
if (this.dangerousTools.includes(toolName) && this.options.permissionMode === 'default') {
|
|
75
|
+
// Skip if user has permanently approved this tool
|
|
76
|
+
if (this.alwaysAllowedTools.has(toolName)) {
|
|
77
|
+
console.log(`✅ Auto-approved (always allow): ${toolName}`);
|
|
78
|
+
}
|
|
79
|
+
else {
|
|
80
|
+
console.log(`⚠️ Permission required for: ${toolName}`);
|
|
81
|
+
// Emit permission request and wait for approval
|
|
82
|
+
const response = await this.requestPermission(toolName, toolInput, toolUseId || 'unknown');
|
|
83
|
+
if (response === 'deny') {
|
|
84
|
+
console.log(`❌ Permission denied for: ${toolName}`);
|
|
85
|
+
return {
|
|
86
|
+
decision: 'block',
|
|
87
|
+
reason: 'User denied permission for this operation'
|
|
88
|
+
};
|
|
89
|
+
}
|
|
90
|
+
if (response === 'always_allow') {
|
|
91
|
+
this.alwaysAllowedTools.add(toolName);
|
|
92
|
+
console.log(`✅ Permission granted (always allow): ${toolName}`);
|
|
93
|
+
}
|
|
94
|
+
else {
|
|
95
|
+
console.log(`✅ Permission granted for: ${toolName}`);
|
|
96
|
+
}
|
|
97
|
+
}
|
|
98
|
+
}
|
|
99
|
+
return {}; // Allow tool to proceed
|
|
100
|
+
}]
|
|
101
|
+
}],
|
|
102
|
+
PostToolUse: [{
|
|
103
|
+
matcher: '.*',
|
|
104
|
+
hooks: [async (input) => {
|
|
105
|
+
const toolName = input?.tool_name || 'unknown';
|
|
106
|
+
console.log(`✅ Completed: ${toolName}`);
|
|
107
|
+
this.emit('tool_result', { name: toolName });
|
|
108
|
+
return {};
|
|
109
|
+
}]
|
|
110
|
+
}]
|
|
111
|
+
}
|
|
112
|
+
};
|
|
113
|
+
// Run the query
|
|
114
|
+
for await (const message of query({
|
|
115
|
+
prompt,
|
|
116
|
+
options: sdkOptions,
|
|
117
|
+
})) {
|
|
118
|
+
this.handleMessage(message);
|
|
119
|
+
// Capture session ID from init
|
|
120
|
+
if (message.type === 'system' && message.subtype === 'init') {
|
|
121
|
+
const newSessionId = message.session_id;
|
|
122
|
+
if (newSessionId) {
|
|
123
|
+
if (!this.sessionId) {
|
|
124
|
+
this.sessionId = newSessionId;
|
|
125
|
+
console.log(`📋 Session CREATED: ${this.sessionId}`);
|
|
126
|
+
}
|
|
127
|
+
else if (this.sessionId === newSessionId) {
|
|
128
|
+
console.log(`📋 Session CONTINUED: ${this.sessionId}`);
|
|
129
|
+
}
|
|
130
|
+
else {
|
|
131
|
+
// Session ID changed - SDK created new session despite resume
|
|
132
|
+
console.log(`⚠️ Session CHANGED: ${this.sessionId} → ${newSessionId}`);
|
|
133
|
+
this.sessionId = newSessionId;
|
|
134
|
+
}
|
|
135
|
+
}
|
|
136
|
+
}
|
|
137
|
+
// Collect text from assistant
|
|
138
|
+
if (message.type === 'assistant' && message.message?.content) {
|
|
139
|
+
for (const block of message.message.content) {
|
|
140
|
+
if (block.type === 'text') {
|
|
141
|
+
fullResponse += block.text;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
}
|
|
145
|
+
// Final result
|
|
146
|
+
if (message.type === 'result') {
|
|
147
|
+
console.log(`📋 Result: ${message.subtype}`);
|
|
148
|
+
if (message.result && !fullResponse) {
|
|
149
|
+
fullResponse = message.result;
|
|
150
|
+
}
|
|
151
|
+
}
|
|
152
|
+
}
|
|
153
|
+
console.log(`✅ Done. Length: ${fullResponse.length}`);
|
|
154
|
+
return fullResponse || 'Task completed.';
|
|
155
|
+
}
|
|
156
|
+
catch (error) {
|
|
157
|
+
if (error.name === 'AbortError') {
|
|
158
|
+
this.emit('aborted');
|
|
159
|
+
return 'Task was cancelled.';
|
|
160
|
+
}
|
|
161
|
+
console.error('❌ SDK error:', error);
|
|
162
|
+
this.emit('error', error);
|
|
163
|
+
// Don't clear session on error - might be transient
|
|
164
|
+
throw error;
|
|
165
|
+
}
|
|
166
|
+
}
|
|
167
|
+
/**
|
|
168
|
+
* Request permission from user via event emission
|
|
169
|
+
* Returns a promise that resolves when user responds with allow/deny/always_allow
|
|
170
|
+
*/
|
|
171
|
+
requestPermission(toolName, toolInput, toolUseId) {
|
|
172
|
+
return new Promise((resolve) => {
|
|
173
|
+
// Format the permission request message
|
|
174
|
+
let description = '';
|
|
175
|
+
if (toolName === 'Bash') {
|
|
176
|
+
description = `Run command: ${toolInput.command || 'unknown command'}`;
|
|
177
|
+
}
|
|
178
|
+
else if (toolName === 'Write') {
|
|
179
|
+
description = `Create file: ${toolInput.file_path || 'unknown file'}`;
|
|
180
|
+
}
|
|
181
|
+
else if (toolName === 'Edit') {
|
|
182
|
+
description = `Edit file: ${toolInput.file_path || 'unknown file'}`;
|
|
183
|
+
}
|
|
184
|
+
this.pendingPermission = { toolName, toolInput, toolUseId, resolve: resolve };
|
|
185
|
+
// Emit event for voice handler to pick up
|
|
186
|
+
this.emit('permission_request', {
|
|
187
|
+
toolName,
|
|
188
|
+
description,
|
|
189
|
+
toolInput,
|
|
190
|
+
toolUseId,
|
|
191
|
+
});
|
|
192
|
+
// No auto-approve - wait for user response (with 2 minute timeout for safety)
|
|
193
|
+
setTimeout(() => {
|
|
194
|
+
if (this.pendingPermission?.toolUseId === toolUseId) {
|
|
195
|
+
console.log(`⏰ Permission timeout - denying for safety: ${toolName}`);
|
|
196
|
+
this.pendingPermission = null;
|
|
197
|
+
resolve('deny');
|
|
198
|
+
}
|
|
199
|
+
}, 120000); // 2 minute timeout
|
|
200
|
+
});
|
|
201
|
+
}
|
|
202
|
+
/**
|
|
203
|
+
* Respond to a pending permission request
|
|
204
|
+
*/
|
|
205
|
+
respondToPermission(response) {
|
|
206
|
+
if (this.pendingPermission) {
|
|
207
|
+
console.log(`📋 Permission response: ${response}`);
|
|
208
|
+
this.pendingPermission.resolve(response);
|
|
209
|
+
this.pendingPermission = null;
|
|
210
|
+
}
|
|
211
|
+
}
|
|
212
|
+
/**
|
|
213
|
+
* Grant permission for pending request (shorthand)
|
|
214
|
+
*/
|
|
215
|
+
grantPermission() {
|
|
216
|
+
this.respondToPermission('allow');
|
|
217
|
+
}
|
|
218
|
+
/**
|
|
219
|
+
* Deny permission for pending request (shorthand)
|
|
220
|
+
*/
|
|
221
|
+
denyPermission() {
|
|
222
|
+
this.respondToPermission('deny');
|
|
223
|
+
}
|
|
224
|
+
/**
|
|
225
|
+
* Always allow this tool type (shorthand)
|
|
226
|
+
*/
|
|
227
|
+
alwaysAllowPermission() {
|
|
228
|
+
this.respondToPermission('always_allow');
|
|
229
|
+
}
|
|
230
|
+
/**
|
|
231
|
+
* Check if there's a pending permission request
|
|
232
|
+
*/
|
|
233
|
+
hasPendingPermission() {
|
|
234
|
+
return this.pendingPermission !== null;
|
|
235
|
+
}
|
|
236
|
+
/**
|
|
237
|
+
* Get current pending permission info (for displaying to user)
|
|
238
|
+
*/
|
|
239
|
+
getPendingPermission() {
|
|
240
|
+
if (!this.pendingPermission)
|
|
241
|
+
return null;
|
|
242
|
+
const { toolName, toolInput, toolUseId } = this.pendingPermission;
|
|
243
|
+
let description = '';
|
|
244
|
+
if (toolName === 'Bash') {
|
|
245
|
+
description = `Run command: ${toolInput.command || 'unknown command'}`;
|
|
246
|
+
}
|
|
247
|
+
else if (toolName === 'Write') {
|
|
248
|
+
description = `Create file: ${toolInput.file_path || 'unknown file'}`;
|
|
249
|
+
}
|
|
250
|
+
else if (toolName === 'Edit') {
|
|
251
|
+
description = `Edit file: ${toolInput.file_path || 'unknown file'}`;
|
|
252
|
+
}
|
|
253
|
+
return { toolName, description, toolInput, toolUseId };
|
|
254
|
+
}
|
|
255
|
+
/**
|
|
256
|
+
* Get list of always-allowed tools
|
|
257
|
+
*/
|
|
258
|
+
getAlwaysAllowedTools() {
|
|
259
|
+
return Array.from(this.alwaysAllowedTools);
|
|
260
|
+
}
|
|
261
|
+
/**
|
|
262
|
+
* Reset always-allowed tools
|
|
263
|
+
*/
|
|
264
|
+
resetAlwaysAllowed() {
|
|
265
|
+
this.alwaysAllowedTools.clear();
|
|
266
|
+
console.log('🔄 Reset always-allowed tools');
|
|
267
|
+
}
|
|
268
|
+
getSessionId() {
|
|
269
|
+
return this.sessionId;
|
|
270
|
+
}
|
|
271
|
+
clearSession() {
|
|
272
|
+
this.sessionId = null;
|
|
273
|
+
console.log('🗑️ Session cleared');
|
|
274
|
+
}
|
|
275
|
+
handleMessage(message) {
|
|
276
|
+
switch (message.type) {
|
|
277
|
+
case 'assistant':
|
|
278
|
+
if (message.message?.content) {
|
|
279
|
+
for (const block of message.message.content) {
|
|
280
|
+
if (block.type === 'text') {
|
|
281
|
+
this.emit('text', block.text);
|
|
282
|
+
}
|
|
283
|
+
else if (block.type === 'tool_use') {
|
|
284
|
+
this.emit('tool_use', {
|
|
285
|
+
id: block.id,
|
|
286
|
+
name: block.name,
|
|
287
|
+
input: block.input,
|
|
288
|
+
});
|
|
289
|
+
}
|
|
290
|
+
}
|
|
291
|
+
}
|
|
292
|
+
break;
|
|
293
|
+
case 'tool_result':
|
|
294
|
+
this.emit('tool_result', {
|
|
295
|
+
id: message.tool_use_id,
|
|
296
|
+
content: message.content,
|
|
297
|
+
});
|
|
298
|
+
break;
|
|
299
|
+
case 'system':
|
|
300
|
+
this.emit('system', message);
|
|
301
|
+
break;
|
|
302
|
+
case 'result':
|
|
303
|
+
this.emit('result', message);
|
|
304
|
+
break;
|
|
305
|
+
default:
|
|
306
|
+
this.emit('message', message);
|
|
307
|
+
}
|
|
308
|
+
}
|
|
309
|
+
stop() {
|
|
310
|
+
if (this.abortController) {
|
|
311
|
+
this.abortController.abort();
|
|
312
|
+
this.abortController = null;
|
|
313
|
+
}
|
|
314
|
+
}
|
|
315
|
+
}
|
|
316
|
+
// Quick utility for one-off queries
|
|
317
|
+
export async function askClaude(prompt, cwd) {
|
|
318
|
+
const handler = new ClaudeHandler({ workingDirectory: cwd });
|
|
319
|
+
return handler.run(prompt);
|
|
320
|
+
}
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
type SandboxMode = 'workspace-write' | 'danger-full-access';
|
|
3
|
+
type ApprovalPolicy = 'on-failure' | 'on-request' | 'never';
|
|
4
|
+
interface CodexHandlerOptions {
|
|
5
|
+
workingDirectory?: string;
|
|
6
|
+
skipGitRepoCheck?: boolean;
|
|
7
|
+
sandboxMode?: SandboxMode;
|
|
8
|
+
approvalPolicy?: ApprovalPolicy;
|
|
9
|
+
}
|
|
10
|
+
/**
|
|
11
|
+
* Codex Handler using the official OpenAI Codex SDK
|
|
12
|
+
*
|
|
13
|
+
* Features:
|
|
14
|
+
* - Thread persistence (reuses same thread for context)
|
|
15
|
+
* - Built-in coding tools (file operations, terminal commands)
|
|
16
|
+
* - Hooks for observability
|
|
17
|
+
*
|
|
18
|
+
* Requirements:
|
|
19
|
+
* - Codex CLI must be installed (npm install -g @openai/codex or use npx)
|
|
20
|
+
* - Must be authenticated via: codex login (ChatGPT) or OPENAI_API_KEY env var
|
|
21
|
+
*
|
|
22
|
+
* Sandbox/Approval modes are configured via:
|
|
23
|
+
* - ~/.codex/config.yaml for global settings
|
|
24
|
+
* - CLI flags: --sandbox, --approval-policy
|
|
25
|
+
*
|
|
26
|
+
* Note: The SDK spawns the CLI binary, so authentication must be done beforehand.
|
|
27
|
+
*/
|
|
28
|
+
export declare class CodexHandler extends EventEmitter {
|
|
29
|
+
private codex;
|
|
30
|
+
private thread;
|
|
31
|
+
private options;
|
|
32
|
+
constructor(options?: CodexHandlerOptions);
|
|
33
|
+
run(prompt: string): Promise<string>;
|
|
34
|
+
getThreadId(): string | null;
|
|
35
|
+
clearThread(): void;
|
|
36
|
+
}
|
|
37
|
+
export declare function askCodex(prompt: string, cwd?: string): Promise<string>;
|
|
38
|
+
export {};
|
|
@@ -0,0 +1,83 @@
|
|
|
1
|
+
import { Codex } from '@openai/codex-sdk';
|
|
2
|
+
import { EventEmitter } from 'events';
|
|
3
|
+
/**
|
|
4
|
+
* Codex Handler using the official OpenAI Codex SDK
|
|
5
|
+
*
|
|
6
|
+
* Features:
|
|
7
|
+
* - Thread persistence (reuses same thread for context)
|
|
8
|
+
* - Built-in coding tools (file operations, terminal commands)
|
|
9
|
+
* - Hooks for observability
|
|
10
|
+
*
|
|
11
|
+
* Requirements:
|
|
12
|
+
* - Codex CLI must be installed (npm install -g @openai/codex or use npx)
|
|
13
|
+
* - Must be authenticated via: codex login (ChatGPT) or OPENAI_API_KEY env var
|
|
14
|
+
*
|
|
15
|
+
* Sandbox/Approval modes are configured via:
|
|
16
|
+
* - ~/.codex/config.yaml for global settings
|
|
17
|
+
* - CLI flags: --sandbox, --approval-policy
|
|
18
|
+
*
|
|
19
|
+
* Note: The SDK spawns the CLI binary, so authentication must be done beforehand.
|
|
20
|
+
*/
|
|
21
|
+
export class CodexHandler extends EventEmitter {
|
|
22
|
+
codex;
|
|
23
|
+
thread = null;
|
|
24
|
+
options;
|
|
25
|
+
constructor(options = {}) {
|
|
26
|
+
super();
|
|
27
|
+
this.options = {
|
|
28
|
+
workingDirectory: options.workingDirectory || process.cwd(),
|
|
29
|
+
skipGitRepoCheck: options.skipGitRepoCheck ?? true,
|
|
30
|
+
};
|
|
31
|
+
// Initialize Codex (inherits process env by default)
|
|
32
|
+
this.codex = new Codex();
|
|
33
|
+
}
|
|
34
|
+
async run(prompt) {
|
|
35
|
+
try {
|
|
36
|
+
// Create or reuse thread
|
|
37
|
+
if (!this.thread) {
|
|
38
|
+
console.log('🆕 Starting new Codex thread');
|
|
39
|
+
this.thread = this.codex.startThread({
|
|
40
|
+
workingDirectory: this.options.workingDirectory,
|
|
41
|
+
skipGitRepoCheck: this.options.skipGitRepoCheck,
|
|
42
|
+
});
|
|
43
|
+
}
|
|
44
|
+
else {
|
|
45
|
+
console.log('🔄 Continuing Codex thread');
|
|
46
|
+
}
|
|
47
|
+
console.log(`📁 CWD: ${this.options.workingDirectory}`);
|
|
48
|
+
this.emit('thinking', prompt);
|
|
49
|
+
// Run the prompt and get the Turn result
|
|
50
|
+
const turn = await this.thread.run(prompt);
|
|
51
|
+
// Extract the final response text
|
|
52
|
+
const result = turn.finalResponse || '';
|
|
53
|
+
this.emit('result', result);
|
|
54
|
+
console.log(`✅ Codex done. Length: ${result.length}`);
|
|
55
|
+
// Log tool usage if any
|
|
56
|
+
if (turn.items && turn.items.length > 0) {
|
|
57
|
+
for (const item of turn.items) {
|
|
58
|
+
this.emit('tool_use', item);
|
|
59
|
+
}
|
|
60
|
+
}
|
|
61
|
+
return result || 'Task completed.';
|
|
62
|
+
}
|
|
63
|
+
catch (error) {
|
|
64
|
+
console.error('❌ Codex SDK error:', error);
|
|
65
|
+
this.emit('error', error);
|
|
66
|
+
throw error;
|
|
67
|
+
}
|
|
68
|
+
}
|
|
69
|
+
getThreadId() {
|
|
70
|
+
// Codex threads have IDs but they may not be directly exposed
|
|
71
|
+
// Check the thread object for an id property
|
|
72
|
+
return this.thread?.id || null;
|
|
73
|
+
}
|
|
74
|
+
clearThread() {
|
|
75
|
+
this.thread = null;
|
|
76
|
+
console.log('🗑️ Codex thread cleared');
|
|
77
|
+
}
|
|
78
|
+
}
|
|
79
|
+
// Quick utility for one-off queries
|
|
80
|
+
export async function askCodex(prompt, cwd) {
|
|
81
|
+
const handler = new CodexHandler({ workingDirectory: cwd });
|
|
82
|
+
return handler.run(prompt);
|
|
83
|
+
}
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
interface AcknowledgmentResult {
|
|
2
|
+
acknowledgment: string;
|
|
3
|
+
isCodingTask: boolean;
|
|
4
|
+
intent: 'code' | 'chat' | 'question' | 'permission';
|
|
5
|
+
}
|
|
6
|
+
/**
|
|
7
|
+
* Fast LLM for immediate acknowledgment and intent classification
|
|
8
|
+
* Target: < 200ms response time
|
|
9
|
+
*/
|
|
10
|
+
export declare function getAcknowledgment(userInput: string): Promise<AcknowledgmentResult>;
|
|
11
|
+
/**
|
|
12
|
+
* Summarize Claude's response for voice output
|
|
13
|
+
*/
|
|
14
|
+
export declare function summarizeForVoice(claudeResponse: string): Promise<string>;
|
|
15
|
+
export {};
|
package/dist/fast-llm.js
ADDED
|
@@ -0,0 +1,81 @@
|
|
|
1
|
+
import Groq from 'groq-sdk';
|
|
2
|
+
const groq = new Groq({
|
|
3
|
+
apiKey: process.env.GROQ_API_KEY,
|
|
4
|
+
});
|
|
5
|
+
/**
|
|
6
|
+
* Fast LLM for immediate acknowledgment and intent classification
|
|
7
|
+
* Target: < 200ms response time
|
|
8
|
+
*/
|
|
9
|
+
export async function getAcknowledgment(userInput) {
|
|
10
|
+
const response = await groq.chat.completions.create({
|
|
11
|
+
model: 'llama-3.3-70b-versatile', // Fast model
|
|
12
|
+
messages: [
|
|
13
|
+
{
|
|
14
|
+
role: 'system',
|
|
15
|
+
content: `You are Osborn, a voice-enabled coding assistant. Classify user requests:
|
|
16
|
+
|
|
17
|
+
CODING TASKS (isCodingTask: true) - Anything involving:
|
|
18
|
+
- Files: read, write, create, edit, list, find, search
|
|
19
|
+
- Directories: what directory, current folder, list files, project structure
|
|
20
|
+
- Code: fix bugs, refactor, explain code, run tests
|
|
21
|
+
- Terminal: run commands, install packages, git operations
|
|
22
|
+
- Project: what's in this project, show me files, analyze codebase
|
|
23
|
+
|
|
24
|
+
NOT CODING (isCodingTask: false):
|
|
25
|
+
- General chat: hi, how are you, tell me a joke
|
|
26
|
+
- Off-topic: weather, news, recipes
|
|
27
|
+
- Permission responses: yes, no, approve, deny
|
|
28
|
+
|
|
29
|
+
Respond in JSON:
|
|
30
|
+
{
|
|
31
|
+
"acknowledgment": "brief friendly response",
|
|
32
|
+
"isCodingTask": true/false,
|
|
33
|
+
"intent": "code|chat|question|permission"
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
Examples:
|
|
37
|
+
- "What directory are we in?" → {"acknowledgment": "Let me check", "isCodingTask": true, "intent": "code"}
|
|
38
|
+
- "What files are in this project?" → {"acknowledgment": "I'll list them", "isCodingTask": true, "intent": "code"}
|
|
39
|
+
- "Read package.json" → {"acknowledgment": "On it", "isCodingTask": true, "intent": "code"}
|
|
40
|
+
- "Fix the bug" → {"acknowledgment": "Let me look", "isCodingTask": true, "intent": "code"}
|
|
41
|
+
- "Hello" → {"acknowledgment": "Hi there!", "isCodingTask": false, "intent": "chat"}
|
|
42
|
+
- "Yes, do it" → {"acknowledgment": "Got it", "isCodingTask": false, "intent": "permission"}`,
|
|
43
|
+
},
|
|
44
|
+
{
|
|
45
|
+
role: 'user',
|
|
46
|
+
content: userInput,
|
|
47
|
+
},
|
|
48
|
+
],
|
|
49
|
+
temperature: 0.3,
|
|
50
|
+
max_tokens: 150,
|
|
51
|
+
response_format: { type: 'json_object' },
|
|
52
|
+
});
|
|
53
|
+
const content = response.choices[0]?.message?.content || '{}';
|
|
54
|
+
return JSON.parse(content);
|
|
55
|
+
}
|
|
56
|
+
/**
|
|
57
|
+
* Summarize Claude's response for voice output
|
|
58
|
+
*/
|
|
59
|
+
export async function summarizeForVoice(claudeResponse) {
|
|
60
|
+
if (claudeResponse.length < 200) {
|
|
61
|
+
return claudeResponse;
|
|
62
|
+
}
|
|
63
|
+
const response = await groq.chat.completions.create({
|
|
64
|
+
model: 'llama-3.3-70b-versatile',
|
|
65
|
+
messages: [
|
|
66
|
+
{
|
|
67
|
+
role: 'system',
|
|
68
|
+
content: `Summarize this coding assistant response for voice output.
|
|
69
|
+
Keep it under 2 sentences. Focus on what was done and the result.
|
|
70
|
+
Don't include code - just describe what happened.`,
|
|
71
|
+
},
|
|
72
|
+
{
|
|
73
|
+
role: 'user',
|
|
74
|
+
content: claudeResponse,
|
|
75
|
+
},
|
|
76
|
+
],
|
|
77
|
+
temperature: 0.3,
|
|
78
|
+
max_tokens: 100,
|
|
79
|
+
});
|
|
80
|
+
return response.choices[0]?.message?.content || claudeResponse;
|
|
81
|
+
}
|
package/dist/index.d.ts
ADDED
package/dist/index.js
ADDED
|
@@ -0,0 +1,356 @@
|
|
|
1
|
+
import { ServerOptions, cli, defineAgent, llm, voice } from '@livekit/agents';
|
|
2
|
+
import * as openai from '@livekit/agents-plugin-openai';
|
|
3
|
+
import * as google from '@livekit/agents-plugin-google';
|
|
4
|
+
import { z } from 'zod';
|
|
5
|
+
import { fileURLToPath } from 'url';
|
|
6
|
+
import 'dotenv/config';
|
|
7
|
+
import { ClaudeHandler } from './claude-handler.js';
|
|
8
|
+
import { CodexHandler } from './codex-handler.js';
|
|
9
|
+
// Global error handlers to catch silent failures
|
|
10
|
+
process.on('unhandledRejection', (reason, promise) => {
|
|
11
|
+
console.error('❌ Unhandled Rejection:', reason);
|
|
12
|
+
});
|
|
13
|
+
process.on('uncaughtException', (error) => {
|
|
14
|
+
console.error('❌ Uncaught Exception:', error);
|
|
15
|
+
});
|
|
16
|
+
// Default provider (can be overridden by participant metadata)
|
|
17
|
+
const DEFAULT_PROVIDER = process.env.LLM_PROVIDER || 'openai';
|
|
18
|
+
// Debug mode
|
|
19
|
+
const DEBUG = process.env.DEBUG_LIVEKIT === 'true';
|
|
20
|
+
if (DEBUG) {
|
|
21
|
+
console.log('🐛 Debug logging enabled');
|
|
22
|
+
}
|
|
23
|
+
console.log(`🤖 Default LLM Provider: ${DEFAULT_PROVIDER}`);
|
|
24
|
+
// Example MCP server configurations (uncomment to enable)
|
|
25
|
+
const MCP_SERVERS = {
|
|
26
|
+
// GitHub integration
|
|
27
|
+
// 'github': {
|
|
28
|
+
// command: 'npx',
|
|
29
|
+
// args: ['@modelcontextprotocol/server-github'],
|
|
30
|
+
// env: { GITHUB_TOKEN: process.env.GITHUB_TOKEN || '' }
|
|
31
|
+
// },
|
|
32
|
+
// Filesystem with specific allowed paths
|
|
33
|
+
// 'filesystem': {
|
|
34
|
+
// command: 'npx',
|
|
35
|
+
// args: ['@modelcontextprotocol/server-filesystem'],
|
|
36
|
+
// env: { ALLOWED_PATHS: '/Users/newupgrade/Desktop/Developer' }
|
|
37
|
+
// },
|
|
38
|
+
};
|
|
39
|
+
// Pre-initialize Claude handler at module load (before any connections)
|
|
40
|
+
console.log('🔥 Pre-initializing Claude Code...');
|
|
41
|
+
const claude = new ClaudeHandler({
|
|
42
|
+
workingDirectory: '/Users/newupgrade/Desktop/Developer/osborn',
|
|
43
|
+
permissionMode: 'default', // Ask for permission on dangerous tools (Bash, Write, Edit)
|
|
44
|
+
// Uncomment to enable MCP servers:
|
|
45
|
+
// mcpServers: MCP_SERVERS,
|
|
46
|
+
});
|
|
47
|
+
// Listen for permission requests from Claude
|
|
48
|
+
claude.on('permission_request', (req) => {
|
|
49
|
+
console.log(`\n⚠️ PERMISSION REQUIRED ⚠️`);
|
|
50
|
+
console.log(`🔧 Tool: ${req.toolName}`);
|
|
51
|
+
console.log(`📝 Action: ${req.description}`);
|
|
52
|
+
console.log(`⏳ Waiting for user response (say: allow, deny, or always allow)...`);
|
|
53
|
+
// Send to frontend for UI display
|
|
54
|
+
sendToFrontend({
|
|
55
|
+
type: 'permission_request',
|
|
56
|
+
toolName: req.toolName,
|
|
57
|
+
description: req.description,
|
|
58
|
+
});
|
|
59
|
+
});
|
|
60
|
+
// Pre-warm Claude immediately on server start
|
|
61
|
+
claude.run('Respond with just: ready')
|
|
62
|
+
.then(() => console.log('✅ Claude pre-warmed and ready!'))
|
|
63
|
+
.catch((err) => console.log('⚠️ Pre-warm failed:', err.message));
|
|
64
|
+
// Track job context and session for data channel
|
|
65
|
+
let jobContext = null;
|
|
66
|
+
let currentSession = null;
|
|
67
|
+
// Track the current coding handler (can be Claude or Codex)
|
|
68
|
+
let currentCodingAgent = 'claude';
|
|
69
|
+
let codexHandler = null;
|
|
70
|
+
// Helper to send data to frontend
|
|
71
|
+
async function sendToFrontend(data) {
|
|
72
|
+
if (!jobContext)
|
|
73
|
+
return;
|
|
74
|
+
try {
|
|
75
|
+
const encoder = new TextEncoder();
|
|
76
|
+
const payload = encoder.encode(JSON.stringify(data));
|
|
77
|
+
await jobContext.room.localParticipant?.publishData(payload, {
|
|
78
|
+
reliable: true,
|
|
79
|
+
topic: 'osborn-updates',
|
|
80
|
+
});
|
|
81
|
+
}
|
|
82
|
+
catch (err) {
|
|
83
|
+
// Ignore send errors
|
|
84
|
+
}
|
|
85
|
+
}
|
|
86
|
+
// Define the run_code tool (works with both Claude and Codex)
|
|
87
|
+
const runCodeTool = llm.tool({
|
|
88
|
+
description: `Execute coding tasks using the coding agent. Use for:
|
|
89
|
+
- Files: read, write, create, edit, list, search
|
|
90
|
+
- Directories: current directory, list contents
|
|
91
|
+
- Code: fix bugs, refactor, explain, review
|
|
92
|
+
- Terminal: run commands, install packages, git
|
|
93
|
+
- Project: analyze codebase, make changes
|
|
94
|
+
- Web: search the web for information`,
|
|
95
|
+
parameters: z.object({
|
|
96
|
+
task: z.string().describe('The coding task to execute'),
|
|
97
|
+
}),
|
|
98
|
+
execute: async ({ task }) => {
|
|
99
|
+
const agentName = currentCodingAgent === 'claude' ? 'Claude Code' : 'OpenAI Codex';
|
|
100
|
+
console.log(`\n🔨 ${agentName}: "${task}"`);
|
|
101
|
+
await sendToFrontend({ type: 'system', text: `Working on: ${task}` });
|
|
102
|
+
try {
|
|
103
|
+
let result;
|
|
104
|
+
if (currentCodingAgent === 'codex' && codexHandler) {
|
|
105
|
+
result = await codexHandler.run(task);
|
|
106
|
+
}
|
|
107
|
+
else {
|
|
108
|
+
result = await claude.run(task);
|
|
109
|
+
}
|
|
110
|
+
console.log(`✅ Done: ${result.length} chars`);
|
|
111
|
+
await sendToFrontend({ type: 'assistant_response', text: result });
|
|
112
|
+
return result;
|
|
113
|
+
}
|
|
114
|
+
catch (err) {
|
|
115
|
+
console.error('❌ Error:', err);
|
|
116
|
+
return `Error: ${err.message}`;
|
|
117
|
+
}
|
|
118
|
+
},
|
|
119
|
+
});
|
|
120
|
+
// Define the permission response tool
|
|
121
|
+
const respondPermissionTool = llm.tool({
|
|
122
|
+
description: `Respond to a pending permission request from Claude Code.
|
|
123
|
+
Use this ONLY when there is a pending permission request.
|
|
124
|
+
Call this after hearing the user's response to a permission prompt.`,
|
|
125
|
+
parameters: z.object({
|
|
126
|
+
response: z.enum(['allow', 'deny', 'always_allow']).describe('The user response: "allow" for one-time approval, "deny" to reject, "always_allow" to permanently allow this tool type'),
|
|
127
|
+
}),
|
|
128
|
+
execute: async ({ response }) => {
|
|
129
|
+
if (!claude.hasPendingPermission()) {
|
|
130
|
+
return 'No pending permission request.';
|
|
131
|
+
}
|
|
132
|
+
const pending = claude.getPendingPermission();
|
|
133
|
+
claude.respondToPermission(response);
|
|
134
|
+
await sendToFrontend({
|
|
135
|
+
type: 'permission_response',
|
|
136
|
+
response,
|
|
137
|
+
toolName: pending?.toolName
|
|
138
|
+
});
|
|
139
|
+
return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
|
|
140
|
+
},
|
|
141
|
+
});
|
|
142
|
+
// Agent instructions - dynamically includes available tools
|
|
143
|
+
const OSBORN_INSTRUCTIONS = `You are Osborn, a voice-enabled AI assistant with coding superpowers.
|
|
144
|
+
Keep responses under 50 words. Sound natural and human.
|
|
145
|
+
|
|
146
|
+
AVAILABLE CAPABILITIES via run_code tool:
|
|
147
|
+
- Read, Write, Edit, MultiEdit files
|
|
148
|
+
- Glob (find files by pattern), Grep (search content)
|
|
149
|
+
- Bash (run terminal commands)
|
|
150
|
+
- WebSearch (search the web), WebFetch (fetch URLs)
|
|
151
|
+
- NotebookEdit (edit Jupyter notebooks)
|
|
152
|
+
- Task (delegate complex tasks), TodoWrite (track tasks)
|
|
153
|
+
- LSP (code intelligence - go to definition, find references)
|
|
154
|
+
|
|
155
|
+
WHEN TO USE run_code:
|
|
156
|
+
- File operations (read, write, create, edit, list, find)
|
|
157
|
+
- Code tasks (fix, refactor, explain, review, debug)
|
|
158
|
+
- Terminal commands (run, install, test, build, git)
|
|
159
|
+
- Web searches (look up documentation, APIs, errors)
|
|
160
|
+
- Project analysis (understand codebase, find patterns)
|
|
161
|
+
|
|
162
|
+
WHEN TO RESPOND DIRECTLY:
|
|
163
|
+
- Greetings and small talk
|
|
164
|
+
- General knowledge questions
|
|
165
|
+
- Clarifying what the user wants
|
|
166
|
+
|
|
167
|
+
PERMISSION HANDLING:
|
|
168
|
+
When the coding agent needs permission, you MUST:
|
|
169
|
+
1. Tell the user: "[Agent] wants to [action]. Allow, deny, or always allow?"
|
|
170
|
+
2. When they respond, call respond_permission with their choice
|
|
171
|
+
|
|
172
|
+
Be conversational and helpful. Ask follow-up questions when needed.`;
|
|
173
|
+
// Voice assistant with tools
|
|
174
|
+
class OsbornAssistant extends voice.Agent {
|
|
175
|
+
constructor() {
|
|
176
|
+
super({
|
|
177
|
+
instructions: OSBORN_INSTRUCTIONS,
|
|
178
|
+
tools: {
|
|
179
|
+
run_code: runCodeTool,
|
|
180
|
+
respond_permission: respondPermissionTool,
|
|
181
|
+
},
|
|
182
|
+
});
|
|
183
|
+
}
|
|
184
|
+
}
|
|
185
|
+
// Create the appropriate model based on provider
|
|
186
|
+
function createModel(provider) {
|
|
187
|
+
if (provider === 'gemini') {
|
|
188
|
+
console.log('📱 Using Gemini Live API');
|
|
189
|
+
console.log('🔑 GOOGLE_API_KEY:', process.env.GOOGLE_API_KEY ? 'set' : 'NOT SET');
|
|
190
|
+
// From official docs: https://docs.livekit.io/agents/models/realtime/plugins/gemini/
|
|
191
|
+
// Package v1.0.31 uses google.beta.realtime (not google.realtime yet)
|
|
192
|
+
const model = new google.beta.realtime.RealtimeModel({
|
|
193
|
+
model: 'gemini-2.5-flash-native-audio-preview-12-2025', // From official docs
|
|
194
|
+
voice: 'Puck',
|
|
195
|
+
instructions: OSBORN_INSTRUCTIONS,
|
|
196
|
+
});
|
|
197
|
+
console.log('✅ Gemini model created with gemini-2.5-flash-native-audio-preview-12-2025');
|
|
198
|
+
return model;
|
|
199
|
+
}
|
|
200
|
+
else {
|
|
201
|
+
console.log('📱 Using OpenAI Realtime API');
|
|
202
|
+
console.log('🔑 OPENAI_API_KEY:', process.env.OPENAI_API_KEY ? 'set' : 'NOT SET');
|
|
203
|
+
const model = new openai.realtime.RealtimeModel({
|
|
204
|
+
voice: 'alloy',
|
|
205
|
+
});
|
|
206
|
+
console.log('✅ OpenAI model created');
|
|
207
|
+
return model;
|
|
208
|
+
}
|
|
209
|
+
}
|
|
210
|
+
// Helper to get provider from participant metadata
|
|
211
|
+
function getProviderFromParticipant(metadata) {
|
|
212
|
+
if (!metadata)
|
|
213
|
+
return DEFAULT_PROVIDER;
|
|
214
|
+
try {
|
|
215
|
+
const data = JSON.parse(metadata);
|
|
216
|
+
return data.provider || DEFAULT_PROVIDER;
|
|
217
|
+
}
|
|
218
|
+
catch {
|
|
219
|
+
return DEFAULT_PROVIDER;
|
|
220
|
+
}
|
|
221
|
+
}
|
|
222
|
+
// Helper to get coding agent from participant metadata
|
|
223
|
+
function getCodingAgentFromParticipant(metadata) {
|
|
224
|
+
if (!metadata)
|
|
225
|
+
return 'claude';
|
|
226
|
+
try {
|
|
227
|
+
const data = JSON.parse(metadata);
|
|
228
|
+
return data.codingAgent || 'claude';
|
|
229
|
+
}
|
|
230
|
+
catch {
|
|
231
|
+
return 'claude';
|
|
232
|
+
}
|
|
233
|
+
}
|
|
234
|
+
export default defineAgent({
|
|
235
|
+
entry: async (ctx) => {
|
|
236
|
+
console.log('🚀 Agent starting for room:', ctx.room.name);
|
|
237
|
+
jobContext = ctx;
|
|
238
|
+
// Claude verbose logging
|
|
239
|
+
claude.on('tool_use', (tool) => {
|
|
240
|
+
console.log(`\n🔧 Claude Tool Started: ${tool.name}`);
|
|
241
|
+
if (tool.input) {
|
|
242
|
+
const inputStr = JSON.stringify(tool.input).substring(0, 200);
|
|
243
|
+
console.log(` Input: ${inputStr}${inputStr.length >= 200 ? '...' : ''}`);
|
|
244
|
+
}
|
|
245
|
+
});
|
|
246
|
+
claude.on('tool_result', (result) => {
|
|
247
|
+
console.log(`✅ Claude Tool Completed: ${result.name || 'unknown'}`);
|
|
248
|
+
});
|
|
249
|
+
claude.on('text', (text) => {
|
|
250
|
+
if (text.length > 0) {
|
|
251
|
+
console.log(`💬 Claude says: ${text.substring(0, 100)}${text.length > 100 ? '...' : ''}`);
|
|
252
|
+
}
|
|
253
|
+
});
|
|
254
|
+
claude.on('error', (err) => {
|
|
255
|
+
console.error(`❌ Claude Error:`, err);
|
|
256
|
+
});
|
|
257
|
+
// Connect FIRST so we can wait for participants
|
|
258
|
+
console.log('📡 Connecting to room...');
|
|
259
|
+
await ctx.connect();
|
|
260
|
+
console.log('✅ Connected to room');
|
|
261
|
+
// Wait for a participant to join using LiveKit's built-in method
|
|
262
|
+
console.log('⏳ Waiting for participant...');
|
|
263
|
+
const participant = await ctx.waitForParticipant();
|
|
264
|
+
console.log('👤 Participant joined:', participant.identity);
|
|
265
|
+
console.log('📋 Participant metadata:', participant.metadata);
|
|
266
|
+
const provider = getProviderFromParticipant(participant.metadata);
|
|
267
|
+
const codingAgent = getCodingAgentFromParticipant(participant.metadata);
|
|
268
|
+
console.log(`🎯 User selected provider: ${provider}`);
|
|
269
|
+
console.log(`🔧 User selected coding agent: ${codingAgent}`);
|
|
270
|
+
// Set the current coding agent and initialize if needed
|
|
271
|
+
currentCodingAgent = codingAgent;
|
|
272
|
+
if (codingAgent === 'codex') {
|
|
273
|
+
console.log('🔧 Initializing Codex handler...');
|
|
274
|
+
codexHandler = new CodexHandler({
|
|
275
|
+
workingDirectory: '/Users/newupgrade/Desktop/Developer/osborn',
|
|
276
|
+
});
|
|
277
|
+
console.log('✅ Codex handler ready');
|
|
278
|
+
}
|
|
279
|
+
// Create model based on user's choice
|
|
280
|
+
const model = createModel(provider);
|
|
281
|
+
const session = new voice.AgentSession({
|
|
282
|
+
llm: model,
|
|
283
|
+
});
|
|
284
|
+
currentSession = session;
|
|
285
|
+
// Add session event listeners for debugging
|
|
286
|
+
// Using string literals as AgentSessionEventTypes is not directly exported
|
|
287
|
+
session.on('user_state_changed', (ev) => {
|
|
288
|
+
console.log(`👤 User state: ${ev.oldState} → ${ev.newState}`);
|
|
289
|
+
});
|
|
290
|
+
session.on('agent_state_changed', (ev) => {
|
|
291
|
+
console.log(`🤖 Agent state: ${ev.oldState} → ${ev.newState}`);
|
|
292
|
+
});
|
|
293
|
+
session.on('user_input_transcribed', (ev) => {
|
|
294
|
+
console.log(`📝 Transcribed: "${ev.transcript}" (final: ${ev.isFinal})`);
|
|
295
|
+
});
|
|
296
|
+
session.on('error', (ev) => {
|
|
297
|
+
console.error('❌ Session error:', ev.error);
|
|
298
|
+
});
|
|
299
|
+
session.on('close', (ev) => {
|
|
300
|
+
console.log('🚪 Session closed:', ev.reason);
|
|
301
|
+
});
|
|
302
|
+
ctx.room.on('trackSubscribed', (track, publication, p) => {
|
|
303
|
+
console.log(`📥 Track subscribed: ${track.kind} from ${p.identity}`);
|
|
304
|
+
});
|
|
305
|
+
// Listen for data channel messages from frontend
|
|
306
|
+
ctx.room.on('dataReceived', async (payload, participant, kind, topic) => {
|
|
307
|
+
if (topic === 'user-input') {
|
|
308
|
+
try {
|
|
309
|
+
const data = JSON.parse(new TextDecoder().decode(payload));
|
|
310
|
+
console.log(`📨 Received from frontend:`, data);
|
|
311
|
+
if (data.type === 'permission_response') {
|
|
312
|
+
// Handle permission response from UI
|
|
313
|
+
if (claude.hasPendingPermission()) {
|
|
314
|
+
claude.respondToPermission(data.response);
|
|
315
|
+
console.log(`✅ Permission ${data.response} from UI`);
|
|
316
|
+
}
|
|
317
|
+
}
|
|
318
|
+
else if (data.type === 'user_text') {
|
|
319
|
+
// Handle text input from frontend
|
|
320
|
+
console.log(`📝 Text input: "${data.content}"`);
|
|
321
|
+
// Inject text into the session as user input
|
|
322
|
+
if (currentSession) {
|
|
323
|
+
try {
|
|
324
|
+
// Interrupt any current speech first
|
|
325
|
+
currentSession.interrupt();
|
|
326
|
+
// Generate a reply to the text input
|
|
327
|
+
await currentSession.generateReply({
|
|
328
|
+
userInput: data.content,
|
|
329
|
+
});
|
|
330
|
+
console.log(`✅ Injected text to session`);
|
|
331
|
+
}
|
|
332
|
+
catch (err) {
|
|
333
|
+
console.error(`❌ Failed to inject text:`, err);
|
|
334
|
+
}
|
|
335
|
+
}
|
|
336
|
+
}
|
|
337
|
+
}
|
|
338
|
+
catch (e) {
|
|
339
|
+
// Not JSON, ignore
|
|
340
|
+
}
|
|
341
|
+
}
|
|
342
|
+
});
|
|
343
|
+
// Create the agent
|
|
344
|
+
const agent = new OsbornAssistant();
|
|
345
|
+
// Start session
|
|
346
|
+
console.log('🎬 Starting voice session...');
|
|
347
|
+
const startTime = Date.now();
|
|
348
|
+
await session.start({
|
|
349
|
+
agent,
|
|
350
|
+
room: ctx.room,
|
|
351
|
+
});
|
|
352
|
+
console.log(`✅ Session started in ${Date.now() - startTime}ms with ${provider.toUpperCase()} + Claude tools`);
|
|
353
|
+
console.log('🎤 Ready for voice input! Speak to start.');
|
|
354
|
+
},
|
|
355
|
+
});
|
|
356
|
+
cli.runApp(new ServerOptions({ agent: fileURLToPath(import.meta.url) }));
|
package/package.json
ADDED
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "osborn",
|
|
3
|
+
"version": "0.1.0",
|
|
4
|
+
"description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"bin": {
|
|
7
|
+
"osborn": "./bin/cli.js"
|
|
8
|
+
},
|
|
9
|
+
"scripts": {
|
|
10
|
+
"dev": "tsx src/index.ts dev",
|
|
11
|
+
"start": "tsx src/index.ts start",
|
|
12
|
+
"build": "tsc"
|
|
13
|
+
},
|
|
14
|
+
"keywords": [
|
|
15
|
+
"voice",
|
|
16
|
+
"ai",
|
|
17
|
+
"coding",
|
|
18
|
+
"assistant",
|
|
19
|
+
"claude",
|
|
20
|
+
"livekit"
|
|
21
|
+
],
|
|
22
|
+
"repository": {
|
|
23
|
+
"type": "git",
|
|
24
|
+
"url": "https://github.com/Erriccc/osborn.git"
|
|
25
|
+
},
|
|
26
|
+
"author": "Osborn Ojure",
|
|
27
|
+
"license": "MIT",
|
|
28
|
+
"dependencies": {
|
|
29
|
+
"@anthropic-ai/claude-agent-sdk": "^0.1.74",
|
|
30
|
+
"@livekit/agents": "^1.0.0",
|
|
31
|
+
"@livekit/agents-plugin-google": "^1.0.0",
|
|
32
|
+
"@livekit/agents-plugin-openai": "^1.0.0",
|
|
33
|
+
"@openai/codex-sdk": "^0.77.0",
|
|
34
|
+
"dotenv": "^16.4.0",
|
|
35
|
+
"tsx": "^4.0.0",
|
|
36
|
+
"yaml": "^2.3.0",
|
|
37
|
+
"zod": "^3.23.0"
|
|
38
|
+
},
|
|
39
|
+
"devDependencies": {
|
|
40
|
+
"@types/node": "^20.0.0",
|
|
41
|
+
"typescript": "^5.0.0"
|
|
42
|
+
},
|
|
43
|
+
"engines": {
|
|
44
|
+
"node": ">=18.0.0"
|
|
45
|
+
}
|
|
46
|
+
}
|