osborn 0.1.2 ā 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/claude-handler.d.ts +13 -1
- package/dist/claude-handler.js +40 -5
- package/dist/index.d.ts +0 -2
- package/dist/index.js +496 -353
- package/package.json +7 -4
package/dist/claude-handler.d.ts
CHANGED
|
@@ -3,9 +3,10 @@ import { EventEmitter } from 'events';
|
|
|
3
3
|
interface ClaudeHandlerOptions {
|
|
4
4
|
workingDirectory?: string;
|
|
5
5
|
allowedTools?: string[];
|
|
6
|
-
permissionMode?: 'default' | 'acceptEdits' | 'bypassPermissions';
|
|
6
|
+
permissionMode?: 'default' | 'acceptEdits' | 'bypassPermissions' | 'plan';
|
|
7
7
|
mcpServers?: Record<string, McpServerConfig>;
|
|
8
8
|
requireAllPermissions?: boolean;
|
|
9
|
+
agentRole?: 'plan' | 'execute';
|
|
9
10
|
}
|
|
10
11
|
export type { McpServerConfig };
|
|
11
12
|
export interface PermissionRequestEvent {
|
|
@@ -32,7 +33,18 @@ export declare class ClaudeHandler extends EventEmitter {
|
|
|
32
33
|
private toolStartTimes;
|
|
33
34
|
private alwaysAllowedTools;
|
|
34
35
|
private static readonly ALL_TOOLS;
|
|
36
|
+
private static readonly PLAN_TOOLS;
|
|
37
|
+
private static readonly EXECUTE_TOOLS;
|
|
38
|
+
private agentRole;
|
|
35
39
|
constructor(options?: ClaudeHandlerOptions);
|
|
40
|
+
/**
|
|
41
|
+
* Get the agent's role
|
|
42
|
+
*/
|
|
43
|
+
getRole(): 'plan' | 'execute';
|
|
44
|
+
/**
|
|
45
|
+
* Check if this is a plan-mode agent
|
|
46
|
+
*/
|
|
47
|
+
isPlanMode(): boolean;
|
|
36
48
|
/**
|
|
37
49
|
* Generate human-readable description for a tool call
|
|
38
50
|
*/
|
package/dist/claude-handler.js
CHANGED
|
@@ -54,22 +54,57 @@ export class ClaudeHandler extends EventEmitter {
|
|
|
54
54
|
// LSP (Language Server Protocol)
|
|
55
55
|
'LSP',
|
|
56
56
|
];
|
|
57
|
+
// Plan mode tools - read-only, research, context gathering
|
|
58
|
+
static PLAN_TOOLS = [
|
|
59
|
+
'Read', // View file contents
|
|
60
|
+
'Glob', // File pattern matching
|
|
61
|
+
'Grep', // Content searching
|
|
62
|
+
'Bash', // Read-only bash (ls, git status, git log, etc.)
|
|
63
|
+
'Task', // Research agents
|
|
64
|
+
'WebFetch', // Web content analysis
|
|
65
|
+
'WebSearch', // Internet searching
|
|
66
|
+
'LSP', // Code intelligence (go to definition, references)
|
|
67
|
+
];
|
|
68
|
+
// Execute mode tools - full access
|
|
69
|
+
static EXECUTE_TOOLS = ClaudeHandler.ALL_TOOLS;
|
|
70
|
+
agentRole;
|
|
57
71
|
constructor(options = {}) {
|
|
58
72
|
super();
|
|
73
|
+
// Set agent role
|
|
74
|
+
this.agentRole = options.agentRole || (options.permissionMode === 'plan' ? 'plan' : 'execute');
|
|
75
|
+
// For plan mode, restrict to read-only tools
|
|
76
|
+
const isPlanMode = options.permissionMode === 'plan';
|
|
77
|
+
const defaultTools = isPlanMode ? ClaudeHandler.PLAN_TOOLS : ClaudeHandler.ALL_TOOLS;
|
|
59
78
|
this.options = {
|
|
60
79
|
workingDirectory: options.workingDirectory || process.cwd(),
|
|
61
|
-
allowedTools: options.allowedTools ||
|
|
62
|
-
|
|
80
|
+
allowedTools: options.allowedTools || defaultTools,
|
|
81
|
+
// Plan mode uses 'default' permission mode but with restricted tools
|
|
82
|
+
permissionMode: isPlanMode ? 'default' : (options.permissionMode || 'default'),
|
|
63
83
|
mcpServers: options.mcpServers,
|
|
64
|
-
//
|
|
65
|
-
|
|
84
|
+
// Plan mode doesn't require permissions (read-only is safe)
|
|
85
|
+
// Execute mode requires permissions for safety
|
|
86
|
+
requireAllPermissions: isPlanMode ? false : (options.requireAllPermissions ?? true),
|
|
66
87
|
};
|
|
88
|
+
const roleEmoji = this.agentRole === 'plan' ? 'š' : 'šØ';
|
|
89
|
+
console.log(`${roleEmoji} Agent role: ${this.agentRole.toUpperCase()}`);
|
|
67
90
|
console.log(`š§ Allowed tools: ${this.options.allowedTools?.join(', ')}`);
|
|
68
|
-
console.log(`š Require
|
|
91
|
+
console.log(`š Require permissions: ${this.options.requireAllPermissions}`);
|
|
69
92
|
if (this.options.mcpServers) {
|
|
70
93
|
console.log(`š MCP servers: ${Object.keys(this.options.mcpServers).join(', ')}`);
|
|
71
94
|
}
|
|
72
95
|
}
|
|
96
|
+
/**
|
|
97
|
+
* Get the agent's role
|
|
98
|
+
*/
|
|
99
|
+
getRole() {
|
|
100
|
+
return this.agentRole;
|
|
101
|
+
}
|
|
102
|
+
/**
|
|
103
|
+
* Check if this is a plan-mode agent
|
|
104
|
+
*/
|
|
105
|
+
isPlanMode() {
|
|
106
|
+
return this.agentRole === 'plan';
|
|
107
|
+
}
|
|
73
108
|
/**
|
|
74
109
|
* Generate human-readable description for a tool call
|
|
75
110
|
*/
|
package/dist/index.d.ts
CHANGED
package/dist/index.js
CHANGED
|
@@ -1,337 +1,449 @@
|
|
|
1
|
-
import {
|
|
1
|
+
import { llm, voice, initializeLogger } from '@livekit/agents';
|
|
2
2
|
import * as openai from '@livekit/agents-plugin-openai';
|
|
3
3
|
import * as google from '@livekit/agents-plugin-google';
|
|
4
|
+
import { Room, RoomEvent } from '@livekit/rtc-node';
|
|
5
|
+
import { AccessToken } from 'livekit-server-sdk';
|
|
4
6
|
import { z } from 'zod';
|
|
5
|
-
import { fileURLToPath } from 'url';
|
|
6
7
|
import 'dotenv/config';
|
|
8
|
+
// Initialize logger before anything else
|
|
9
|
+
initializeLogger({ pretty: true, level: 'info' });
|
|
7
10
|
import { ClaudeHandler } from './claude-handler.js';
|
|
8
11
|
import { CodexHandler } from './codex-handler.js';
|
|
9
12
|
import { loadConfig, getMcpServers, getEnabledMcpServerNames } from './config.js';
|
|
10
|
-
//
|
|
13
|
+
// Generate a short, user-friendly room code
|
|
14
|
+
function generateRoomCode() {
|
|
15
|
+
const chars = 'abcdefghjkmnpqrstuvwxyz23456789';
|
|
16
|
+
let code = '';
|
|
17
|
+
for (let i = 0; i < 6; i++) {
|
|
18
|
+
code += chars[Math.floor(Math.random() * chars.length)];
|
|
19
|
+
}
|
|
20
|
+
return code;
|
|
21
|
+
}
|
|
22
|
+
// Parse CLI arguments
|
|
11
23
|
function parseArgs() {
|
|
12
24
|
const args = process.argv.slice(2);
|
|
13
25
|
let roomCode;
|
|
26
|
+
let provider;
|
|
14
27
|
for (let i = 0; i < args.length; i++) {
|
|
15
28
|
if (args[i] === '--room' && args[i + 1]) {
|
|
16
29
|
roomCode = args[i + 1];
|
|
17
30
|
}
|
|
31
|
+
if (args[i] === '--provider' && args[i + 1]) {
|
|
32
|
+
provider = args[i + 1];
|
|
33
|
+
}
|
|
34
|
+
// Short code detection (e.g., `npm run dev abc123`)
|
|
35
|
+
if (!args[i].startsWith('-') && args[i].length >= 4 && args[i].length <= 10 &&
|
|
36
|
+
!['dev', 'start'].includes(args[i])) {
|
|
37
|
+
roomCode = args[i];
|
|
38
|
+
}
|
|
18
39
|
}
|
|
19
|
-
return { roomCode };
|
|
40
|
+
return { roomCode, provider };
|
|
20
41
|
}
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
console.log(`š Room code provided: ${cliArgs.roomCode}`);
|
|
24
|
-
}
|
|
25
|
-
// Global error handlers to catch silent failures
|
|
26
|
-
process.on('unhandledRejection', (reason, promise) => {
|
|
42
|
+
// Global error handlers
|
|
43
|
+
process.on('unhandledRejection', (reason) => {
|
|
27
44
|
console.error('ā Unhandled Rejection:', reason);
|
|
28
45
|
});
|
|
29
46
|
process.on('uncaughtException', (error) => {
|
|
30
47
|
console.error('ā Uncaught Exception:', error);
|
|
31
48
|
});
|
|
32
|
-
//
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
if (
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
const
|
|
52
|
-
|
|
53
|
-
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
console.log(
|
|
62
|
-
|
|
63
|
-
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
|
|
67
|
-
|
|
49
|
+
// Main function
|
|
50
|
+
async function main() {
|
|
51
|
+
console.log('\nš¤ Osborn Voice AI Coding Assistant\n');
|
|
52
|
+
// Validate environment
|
|
53
|
+
const livekitUrl = process.env.LIVEKIT_URL;
|
|
54
|
+
const apiKey = process.env.LIVEKIT_API_KEY;
|
|
55
|
+
const apiSecret = process.env.LIVEKIT_API_SECRET;
|
|
56
|
+
if (!livekitUrl || !apiKey || !apiSecret) {
|
|
57
|
+
console.error('ā Missing required environment variables:');
|
|
58
|
+
if (!livekitUrl)
|
|
59
|
+
console.error(' - LIVEKIT_URL');
|
|
60
|
+
if (!apiKey)
|
|
61
|
+
console.error(' - LIVEKIT_API_KEY');
|
|
62
|
+
if (!apiSecret)
|
|
63
|
+
console.error(' - LIVEKIT_API_SECRET');
|
|
64
|
+
console.error('\nSet these in your .env file or environment.');
|
|
65
|
+
process.exit(1);
|
|
66
|
+
}
|
|
67
|
+
// Parse CLI args
|
|
68
|
+
const cliArgs = parseArgs();
|
|
69
|
+
// Load configuration
|
|
70
|
+
console.log('š Loading configuration...');
|
|
71
|
+
const config = loadConfig();
|
|
72
|
+
const mcpServers = getMcpServers(config);
|
|
73
|
+
const enabledMcpNames = getEnabledMcpServerNames(config);
|
|
74
|
+
if (enabledMcpNames.length > 0) {
|
|
75
|
+
console.log(`š Enabled MCP servers: ${enabledMcpNames.join(', ')}`);
|
|
76
|
+
}
|
|
77
|
+
const workingDir = config.workingDirectory || process.cwd();
|
|
78
|
+
console.log(`š Working directory: ${workingDir}`);
|
|
79
|
+
// Determine room code
|
|
80
|
+
const roomCode = cliArgs.roomCode || generateRoomCode();
|
|
81
|
+
const roomName = `osborn-${roomCode}`;
|
|
82
|
+
if (cliArgs.roomCode) {
|
|
83
|
+
console.log(`š Joining room: ${roomCode}`);
|
|
84
|
+
}
|
|
85
|
+
else {
|
|
86
|
+
console.log(`\n⨠Created new room: ${roomCode}`);
|
|
87
|
+
console.log(`\nš Share this with the frontend or run:`);
|
|
88
|
+
console.log(` Open: https://osborn.app?room=${roomCode}`);
|
|
89
|
+
console.log(` Or enter code "${roomCode}" in the frontend\n`);
|
|
90
|
+
}
|
|
91
|
+
// Default provider
|
|
92
|
+
const defaultProvider = cliArgs.provider || process.env.LLM_PROVIDER || 'openai';
|
|
93
|
+
console.log(`šÆ Default voice provider: ${defaultProvider}`);
|
|
94
|
+
// ============================================================
|
|
95
|
+
// Initialize Claude Agents (Dual Architecture)
|
|
96
|
+
// ============================================================
|
|
97
|
+
console.log('\nš„ Initializing Claude agents...');
|
|
98
|
+
// Plan Agent - Read-only, research
|
|
99
|
+
const planAgent = {
|
|
100
|
+
id: 1,
|
|
101
|
+
role: 'plan',
|
|
102
|
+
handler: new ClaudeHandler({
|
|
103
|
+
workingDirectory: workingDir,
|
|
104
|
+
permissionMode: 'plan',
|
|
105
|
+
agentRole: 'plan',
|
|
106
|
+
mcpServers: Object.keys(mcpServers).length > 0 ? mcpServers : undefined,
|
|
107
|
+
}),
|
|
108
|
+
busy: false,
|
|
109
|
+
currentTask: null,
|
|
110
|
+
context: [],
|
|
111
|
+
};
|
|
112
|
+
// Execute Agent - Full access
|
|
113
|
+
const executeAgent = {
|
|
114
|
+
id: 2,
|
|
115
|
+
role: 'execute',
|
|
116
|
+
handler: new ClaudeHandler({
|
|
117
|
+
workingDirectory: workingDir,
|
|
118
|
+
permissionMode: 'default',
|
|
119
|
+
agentRole: 'execute',
|
|
120
|
+
mcpServers: Object.keys(mcpServers).length > 0 ? mcpServers : undefined,
|
|
121
|
+
}),
|
|
122
|
+
busy: false,
|
|
123
|
+
currentTask: null,
|
|
124
|
+
context: [],
|
|
125
|
+
};
|
|
126
|
+
const agentPool = [planAgent, executeAgent];
|
|
127
|
+
// Smart routing
|
|
128
|
+
function routeTask(task) {
|
|
129
|
+
const taskLower = task.toLowerCase();
|
|
130
|
+
const executeKeywords = [
|
|
131
|
+
'create', 'make', 'build', 'implement', 'add', 'write',
|
|
132
|
+
'fix', 'update', 'change', 'modify', 'edit', 'refactor',
|
|
133
|
+
'delete', 'remove', 'run', 'execute', 'install', 'deploy',
|
|
134
|
+
'commit', 'push', 'test', 'debug', 'start', 'stop',
|
|
135
|
+
];
|
|
136
|
+
for (const keyword of executeKeywords) {
|
|
137
|
+
if (taskLower.includes(keyword)) {
|
|
138
|
+
if (executeAgent.busy && !planAgent.busy) {
|
|
139
|
+
return planAgent;
|
|
140
|
+
}
|
|
141
|
+
return executeAgent;
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
return planAgent.busy ? executeAgent : planAgent;
|
|
145
|
+
}
|
|
146
|
+
// ============================================================
|
|
147
|
+
// Create Access Token for Agent
|
|
148
|
+
// ============================================================
|
|
149
|
+
console.log('š Creating access token...');
|
|
150
|
+
const token = new AccessToken(apiKey, apiSecret, {
|
|
151
|
+
identity: 'osborn-agent',
|
|
152
|
+
name: 'Osborn AI',
|
|
153
|
+
metadata: JSON.stringify({ type: 'agent', version: '0.1.5' }),
|
|
68
154
|
});
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
73
|
-
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
//
|
|
78
|
-
|
|
79
|
-
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
155
|
+
token.addGrant({
|
|
156
|
+
roomJoin: true,
|
|
157
|
+
room: roomName,
|
|
158
|
+
canPublish: true,
|
|
159
|
+
canSubscribe: true,
|
|
160
|
+
canPublishData: true,
|
|
161
|
+
});
|
|
162
|
+
const jwt = await token.toJwt();
|
|
163
|
+
// ============================================================
|
|
164
|
+
// Connect to Room Directly
|
|
165
|
+
// ============================================================
|
|
166
|
+
console.log('š” Connecting to LiveKit...');
|
|
167
|
+
const room = new Room();
|
|
168
|
+
// Track state
|
|
169
|
+
let currentSession = null;
|
|
170
|
+
let currentProvider = defaultProvider;
|
|
171
|
+
let currentCodingAgent = 'claude';
|
|
172
|
+
let codexHandler = null;
|
|
173
|
+
let localParticipant = null;
|
|
174
|
+
let agentState = 'initializing';
|
|
175
|
+
// Speech queue
|
|
176
|
+
const speechQueue = [];
|
|
177
|
+
let isSpeaking = false;
|
|
178
|
+
// Helper to send data to frontend
|
|
179
|
+
async function sendToFrontend(data) {
|
|
180
|
+
if (!localParticipant) {
|
|
181
|
+
console.log('ā ļø sendToFrontend: no localParticipant!');
|
|
182
|
+
return;
|
|
183
|
+
}
|
|
84
184
|
try {
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
185
|
+
const encoder = new TextEncoder();
|
|
186
|
+
const payload = encoder.encode(JSON.stringify(data));
|
|
187
|
+
await localParticipant.publishData(payload, {
|
|
188
|
+
reliable: true,
|
|
189
|
+
topic: 'osborn-updates',
|
|
190
|
+
});
|
|
191
|
+
console.log(`š¤ Sent to frontend: ${data.type}`);
|
|
90
192
|
}
|
|
91
193
|
catch (err) {
|
|
92
|
-
console.
|
|
194
|
+
console.error('ā sendToFrontend error:', err);
|
|
93
195
|
}
|
|
94
|
-
currentSession = null;
|
|
95
|
-
}
|
|
96
|
-
}
|
|
97
|
-
// Helper to send data to frontend
|
|
98
|
-
async function sendToFrontend(data) {
|
|
99
|
-
if (!jobContext)
|
|
100
|
-
return;
|
|
101
|
-
try {
|
|
102
|
-
const encoder = new TextEncoder();
|
|
103
|
-
const payload = encoder.encode(JSON.stringify(data));
|
|
104
|
-
await jobContext.room.localParticipant?.publishData(payload, {
|
|
105
|
-
reliable: true,
|
|
106
|
-
topic: 'osborn-updates',
|
|
107
|
-
});
|
|
108
|
-
}
|
|
109
|
-
catch (err) {
|
|
110
|
-
// Ignore send errors
|
|
111
196
|
}
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
parameters: z.object({
|
|
123
|
-
task: z.string().describe('The coding task to execute'),
|
|
124
|
-
}),
|
|
125
|
-
execute: async ({ task }) => {
|
|
126
|
-
const agentName = currentCodingAgent === 'claude' ? 'Claude Code' : 'OpenAI Codex';
|
|
127
|
-
console.log(`\nšØ ${agentName}: "${task}"`);
|
|
128
|
-
await sendToFrontend({ type: 'system', text: `Working on: ${task}` });
|
|
129
|
-
try {
|
|
130
|
-
let result;
|
|
131
|
-
if (currentCodingAgent === 'codex' && codexHandler) {
|
|
132
|
-
result = await codexHandler.run(task);
|
|
197
|
+
// Process speech queue
|
|
198
|
+
async function processSpeechQueue() {
|
|
199
|
+
if (isSpeaking || speechQueue.length === 0 || !currentSession)
|
|
200
|
+
return;
|
|
201
|
+
if (agentState !== 'listening')
|
|
202
|
+
return;
|
|
203
|
+
if (currentProvider === 'gemini') {
|
|
204
|
+
// Gemini doesn't support generateReply
|
|
205
|
+
while (speechQueue.length > 0) {
|
|
206
|
+
console.log(`š [Would say] ${speechQueue.shift()}`);
|
|
133
207
|
}
|
|
134
|
-
|
|
135
|
-
result = await claude.run(task);
|
|
136
|
-
}
|
|
137
|
-
console.log(`ā
Done: ${result.length} chars`);
|
|
138
|
-
await sendToFrontend({ type: 'assistant_response', text: result });
|
|
139
|
-
return result;
|
|
208
|
+
return;
|
|
140
209
|
}
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
|
|
210
|
+
isSpeaking = true;
|
|
211
|
+
const message = speechQueue.shift();
|
|
212
|
+
try {
|
|
213
|
+
await Promise.race([
|
|
214
|
+
currentSession.generateReply({ userInput: message }),
|
|
215
|
+
new Promise((_, reject) => setTimeout(() => reject(new Error('timeout')), 5000))
|
|
216
|
+
]);
|
|
144
217
|
}
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
response: z.enum(['allow', 'deny', 'always_allow']).describe('The user response: "allow" for one-time approval, "deny" to reject, "always_allow" to permanently allow this tool type'),
|
|
154
|
-
}),
|
|
155
|
-
execute: async ({ response }) => {
|
|
156
|
-
if (!claude.hasPendingPermission()) {
|
|
157
|
-
return 'No pending permission request.';
|
|
218
|
+
catch {
|
|
219
|
+
// Ignore speech errors
|
|
220
|
+
}
|
|
221
|
+
finally {
|
|
222
|
+
isSpeaking = false;
|
|
223
|
+
if (speechQueue.length > 0) {
|
|
224
|
+
setTimeout(processSpeechQueue, 500);
|
|
225
|
+
}
|
|
158
226
|
}
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
227
|
+
}
|
|
228
|
+
// Setup agent event handlers
|
|
229
|
+
agentPool.forEach(slot => {
|
|
230
|
+
slot.handler.on('permission_request', (req) => {
|
|
231
|
+
console.log(`\nā ļø [${slot.role}] PERMISSION: ${req.toolName}`);
|
|
232
|
+
sendToFrontend({
|
|
233
|
+
type: 'permission_request',
|
|
234
|
+
toolName: req.toolName,
|
|
235
|
+
description: req.description,
|
|
236
|
+
agentId: slot.id,
|
|
237
|
+
});
|
|
238
|
+
speechQueue.push(`[Tell user] I need permission to ${req.description}. Say yes, no, or always allow.`);
|
|
239
|
+
processSpeechQueue();
|
|
165
240
|
});
|
|
166
|
-
|
|
167
|
-
|
|
168
|
-
});
|
|
169
|
-
|
|
170
|
-
|
|
171
|
-
|
|
241
|
+
slot.handler.on('tool_use', (tool) => {
|
|
242
|
+
console.log(`š§ [${slot.role}] Using: ${tool.name}`);
|
|
243
|
+
});
|
|
244
|
+
slot.handler.on('error', (err) => {
|
|
245
|
+
console.error(`ā [${slot.role}] Error:`, err);
|
|
246
|
+
});
|
|
247
|
+
});
|
|
248
|
+
// Define tools for voice LLM
|
|
249
|
+
const runCodeTool = llm.tool({
|
|
250
|
+
description: `Execute ANY coding task by delegating to Claude agents. YOU MUST USE THIS for:
|
|
251
|
+
- Reading files ("read package.json", "show me the code")
|
|
252
|
+
- Writing/editing files ("fix this bug", "add a function")
|
|
253
|
+
- Running commands ("run npm test", "git status")
|
|
254
|
+
- Searching code ("find where X is defined")
|
|
255
|
+
- Explaining code ("what does this function do")
|
|
172
256
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
257
|
+
You DON'T need permission to use this - it routes to the right agent automatically.
|
|
258
|
+
Plan Agent = reading/research. Execute Agent = writing (will ask user for permission).`,
|
|
259
|
+
parameters: z.object({
|
|
260
|
+
task: z.string().describe('The coding task to execute'),
|
|
261
|
+
}),
|
|
262
|
+
execute: async ({ task }) => {
|
|
263
|
+
const slot = routeTask(task);
|
|
264
|
+
console.log(`\nšØ [${slot.role}] Task: "${task}"`);
|
|
265
|
+
await sendToFrontend({ type: 'system', text: `${slot.role} agent: ${task}` });
|
|
266
|
+
slot.busy = true;
|
|
267
|
+
slot.currentTask = task;
|
|
268
|
+
sharedContext.currentFocus = task.substring(0, 50);
|
|
269
|
+
try {
|
|
270
|
+
let result;
|
|
271
|
+
if (currentCodingAgent === 'codex' && codexHandler) {
|
|
272
|
+
result = await codexHandler.run(task);
|
|
273
|
+
}
|
|
274
|
+
else {
|
|
275
|
+
const contextPrefix = slot.context.length > 0
|
|
276
|
+
? `Context: ${slot.context.slice(-3).join(' | ')}\n\nTask: `
|
|
277
|
+
: '';
|
|
278
|
+
result = await slot.handler.run(contextPrefix + task);
|
|
279
|
+
}
|
|
280
|
+
slot.context.push(`${task.substring(0, 50)} ā Done`);
|
|
281
|
+
if (slot.context.length > 10)
|
|
282
|
+
slot.context.shift();
|
|
283
|
+
// Update shared context
|
|
284
|
+
sharedContext.addAction(`${slot.role}: ${task.substring(0, 30)}`);
|
|
285
|
+
// Extract file references from result
|
|
286
|
+
const fileMatches = result.match(/(?:\/[\w\-\.\/]+|src\/[\w\-\.\/]+|\.\/[\w\-\.\/]+)/g);
|
|
287
|
+
if (fileMatches) {
|
|
288
|
+
fileMatches.slice(0, 3).forEach(f => sharedContext.addFile(f));
|
|
289
|
+
}
|
|
290
|
+
console.log(`ā
[${slot.role}] Done`);
|
|
291
|
+
await sendToFrontend({ type: 'assistant_response', text: result });
|
|
292
|
+
// Return a concise summary for the voice LLM
|
|
293
|
+
const summary = result.length > 500
|
|
294
|
+
? result.substring(0, 500) + '... [truncated for voice]'
|
|
295
|
+
: result;
|
|
296
|
+
return summary;
|
|
297
|
+
}
|
|
298
|
+
catch (err) {
|
|
299
|
+
return `Error: ${err.message}`;
|
|
300
|
+
}
|
|
301
|
+
finally {
|
|
302
|
+
slot.busy = false;
|
|
303
|
+
slot.currentTask = null;
|
|
304
|
+
}
|
|
305
|
+
},
|
|
306
|
+
});
|
|
307
|
+
const respondPermissionTool = llm.tool({
|
|
308
|
+
description: `Respond to a permission request. Call after hearing user's response.`,
|
|
309
|
+
parameters: z.object({
|
|
310
|
+
response: z.enum(['allow', 'deny', 'always_allow']),
|
|
311
|
+
}),
|
|
312
|
+
execute: async ({ response }) => {
|
|
313
|
+
const slot = agentPool.find(s => s.handler.hasPendingPermission());
|
|
314
|
+
if (!slot)
|
|
315
|
+
return 'No pending permission.';
|
|
316
|
+
const pending = slot.handler.getPendingPermission();
|
|
317
|
+
slot.handler.respondToPermission(response);
|
|
318
|
+
await sendToFrontend({ type: 'permission_response', response, toolName: pending?.toolName });
|
|
319
|
+
return `Permission ${response} for ${pending?.toolName || 'tool'}.`;
|
|
320
|
+
},
|
|
321
|
+
});
|
|
322
|
+
// Shared context that both voice and coding agents contribute to
|
|
323
|
+
const sharedContext = {
|
|
324
|
+
recentActions: [],
|
|
325
|
+
discoveredFiles: [],
|
|
326
|
+
currentFocus: null,
|
|
327
|
+
addAction(action) {
|
|
328
|
+
this.recentActions.push(action);
|
|
329
|
+
if (this.recentActions.length > 5)
|
|
330
|
+
this.recentActions.shift();
|
|
331
|
+
},
|
|
332
|
+
addFile(file) {
|
|
333
|
+
if (!this.discoveredFiles.includes(file)) {
|
|
334
|
+
this.discoveredFiles.push(file);
|
|
335
|
+
if (this.discoveredFiles.length > 10)
|
|
336
|
+
this.discoveredFiles.shift();
|
|
337
|
+
}
|
|
338
|
+
},
|
|
339
|
+
getContextSummary() {
|
|
340
|
+
const parts = [];
|
|
341
|
+
if (this.currentFocus)
|
|
342
|
+
parts.push(`Focus: ${this.currentFocus}`);
|
|
343
|
+
if (this.recentActions.length)
|
|
344
|
+
parts.push(`Recent: ${this.recentActions.slice(-3).join(', ')}`);
|
|
345
|
+
if (this.discoveredFiles.length)
|
|
346
|
+
parts.push(`Files: ${this.discoveredFiles.slice(-5).join(', ')}`);
|
|
347
|
+
return parts.join(' | ');
|
|
348
|
+
}
|
|
349
|
+
};
|
|
350
|
+
// Dynamic instructions with working directory context
|
|
351
|
+
const getInstructions = () => `You are Osborn, a voice AI coding assistant.
|
|
181
352
|
|
|
182
|
-
|
|
183
|
-
- File operations (read, write, create, edit, list, find)
|
|
184
|
-
- Code tasks (fix, refactor, explain, review, debug)
|
|
185
|
-
- Terminal commands (run, install, test, build, git)
|
|
186
|
-
- Web searches (look up documentation, APIs, errors)
|
|
187
|
-
- Project analysis (understand codebase, find patterns)
|
|
353
|
+
WORKING DIRECTORY: ${workingDir}
|
|
188
354
|
|
|
189
|
-
|
|
190
|
-
- Greetings and small talk
|
|
191
|
-
- General knowledge questions
|
|
192
|
-
- Clarifying what the user wants
|
|
355
|
+
STYLE: Keep responses SHORT (under 70 words). Sound natural. Say "Got it" when given a task.
|
|
193
356
|
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
357
|
+
CAPABILITIES (via run_code tool):
|
|
358
|
+
- Read/write/edit files, search codebase
|
|
359
|
+
- Run terminal commands (npm, git, etc)
|
|
360
|
+
- Fix bugs, refactor, explain code
|
|
361
|
+
- Search web/docs for solutions
|
|
198
362
|
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
// Package v1.0.31 uses google.beta.realtime (not google.realtime yet)
|
|
219
|
-
const model = new google.beta.realtime.RealtimeModel({
|
|
220
|
-
// model: 'gemini-2.5-flash-native-audio-preview-12-2025', // From official docs
|
|
221
|
-
model: 'gemini-3.5-flash-latest', // From official docs
|
|
222
|
-
voice: 'Puck',
|
|
223
|
-
instructions: OSBORN_INSTRUCTIONS,
|
|
224
|
-
});
|
|
225
|
-
// console.log('ā
Gemini model created with gemini-2.5-flash-native-audio-preview-12-2025')
|
|
226
|
-
console.log('ā
Gemini model created with gemini-3.5-flash-latest');
|
|
227
|
-
return model;
|
|
228
|
-
}
|
|
229
|
-
else {
|
|
230
|
-
console.log('š± Using OpenAI Realtime API');
|
|
231
|
-
console.log('š OPENAI_API_KEY:', process.env.OPENAI_API_KEY ? 'set' : 'NOT SET');
|
|
232
|
-
const model = new openai.realtime.RealtimeModel({
|
|
233
|
-
voice: 'alloy',
|
|
234
|
-
});
|
|
235
|
-
console.log('ā
OpenAI model created');
|
|
236
|
-
return model;
|
|
237
|
-
}
|
|
238
|
-
}
|
|
239
|
-
// Helper to get provider from participant metadata
|
|
240
|
-
function getProviderFromParticipant(metadata) {
|
|
241
|
-
if (!metadata)
|
|
242
|
-
return DEFAULT_PROVIDER;
|
|
243
|
-
try {
|
|
244
|
-
const data = JSON.parse(metadata);
|
|
245
|
-
return data.provider || DEFAULT_PROVIDER;
|
|
246
|
-
}
|
|
247
|
-
catch {
|
|
248
|
-
return DEFAULT_PROVIDER;
|
|
249
|
-
}
|
|
250
|
-
}
|
|
251
|
-
// Helper to get coding agent from participant metadata
|
|
252
|
-
function getCodingAgentFromParticipant(metadata) {
|
|
253
|
-
if (!metadata)
|
|
254
|
-
return 'claude';
|
|
255
|
-
try {
|
|
256
|
-
const data = JSON.parse(metadata);
|
|
257
|
-
return data.codingAgent || 'claude';
|
|
363
|
+
TWO AGENTS AVAILABLE:
|
|
364
|
+
- Plan Agent: Research, explore, read files (fast, no permissions needed)
|
|
365
|
+
- Execute Agent: Write code, make changes (asks permission for writes)
|
|
366
|
+
|
|
367
|
+
${sharedContext.getContextSummary() ? `CONTEXT: ${sharedContext.getContextSummary()}` : ''}
|
|
368
|
+
|
|
369
|
+
PERMISSIONS: When you hear permission request, tell user what needs permission and ask "allow, deny, or always allow?" Then call respond_permission.`;
|
|
370
|
+
const INSTRUCTIONS = getInstructions();
|
|
371
|
+
// Voice agent class
|
|
372
|
+
class OsbornVoiceAgent extends voice.Agent {
|
|
373
|
+
constructor() {
|
|
374
|
+
super({
|
|
375
|
+
instructions: INSTRUCTIONS,
|
|
376
|
+
tools: {
|
|
377
|
+
run_code: runCodeTool,
|
|
378
|
+
respond_permission: respondPermissionTool,
|
|
379
|
+
},
|
|
380
|
+
});
|
|
381
|
+
}
|
|
258
382
|
}
|
|
259
|
-
|
|
260
|
-
|
|
383
|
+
// Create voice model
|
|
384
|
+
function createModel(provider) {
|
|
385
|
+
if (provider === 'gemini') {
|
|
386
|
+
console.log('š± Using Gemini Live API');
|
|
387
|
+
return new google.beta.realtime.RealtimeModel({
|
|
388
|
+
model: 'gemini-2.5-flash-native-audio-preview-12-2025',
|
|
389
|
+
voice: 'Puck',
|
|
390
|
+
instructions: INSTRUCTIONS,
|
|
391
|
+
});
|
|
392
|
+
}
|
|
393
|
+
else {
|
|
394
|
+
console.log('š± Using OpenAI Realtime API');
|
|
395
|
+
return new openai.realtime.RealtimeModel({
|
|
396
|
+
voice: 'alloy',
|
|
397
|
+
});
|
|
398
|
+
}
|
|
261
399
|
}
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
265
|
-
|
|
266
|
-
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
271
|
-
|
|
400
|
+
// ============================================================
|
|
401
|
+
// Room Event Handlers
|
|
402
|
+
// ============================================================
|
|
403
|
+
room.on(RoomEvent.Connected, () => {
|
|
404
|
+
console.log('ā
Connected to room:', roomName);
|
|
405
|
+
localParticipant = room.localParticipant;
|
|
406
|
+
});
|
|
407
|
+
room.on(RoomEvent.Disconnected, () => {
|
|
408
|
+
console.log('š Disconnected from room');
|
|
409
|
+
currentSession = null;
|
|
410
|
+
});
|
|
411
|
+
room.on(RoomEvent.ParticipantConnected, async (participant) => {
|
|
412
|
+
console.log(`\nš¤ User joined: ${participant.identity}`);
|
|
413
|
+
// Get provider from participant metadata
|
|
414
|
+
let provider = defaultProvider;
|
|
415
|
+
let codingAgent = 'claude';
|
|
416
|
+
if (participant.metadata) {
|
|
417
|
+
try {
|
|
418
|
+
const meta = JSON.parse(participant.metadata);
|
|
419
|
+
provider = meta.provider || defaultProvider;
|
|
420
|
+
codingAgent = meta.codingAgent || 'claude';
|
|
272
421
|
}
|
|
273
|
-
|
|
422
|
+
catch { }
|
|
274
423
|
}
|
|
275
|
-
|
|
276
|
-
// Claude verbose logging
|
|
277
|
-
claude.on('tool_use', (tool) => {
|
|
278
|
-
console.log(`\nš§ Claude Tool Started: ${tool.name}`);
|
|
279
|
-
if (tool.input) {
|
|
280
|
-
const inputStr = JSON.stringify(tool.input).substring(0, 200);
|
|
281
|
-
console.log(` Input: ${inputStr}${inputStr.length >= 200 ? '...' : ''}`);
|
|
282
|
-
}
|
|
283
|
-
});
|
|
284
|
-
claude.on('tool_result', (result) => {
|
|
285
|
-
console.log(`ā
Claude Tool Completed: ${result.name || 'unknown'}`);
|
|
286
|
-
});
|
|
287
|
-
claude.on('text', (text) => {
|
|
288
|
-
if (text.length > 0) {
|
|
289
|
-
console.log(`š¬ Claude says: ${text.substring(0, 100)}${text.length > 100 ? '...' : ''}`);
|
|
290
|
-
}
|
|
291
|
-
});
|
|
292
|
-
claude.on('error', (err) => {
|
|
293
|
-
console.error(`ā Claude Error:`, err);
|
|
294
|
-
});
|
|
295
|
-
// Connect FIRST so we can wait for participants
|
|
296
|
-
console.log('š” Connecting to room...');
|
|
297
|
-
await ctx.connect();
|
|
298
|
-
console.log('ā
Connected to room');
|
|
299
|
-
// Wait for a participant to join using LiveKit's built-in method
|
|
300
|
-
console.log('ā³ Waiting for participant...');
|
|
301
|
-
const participant = await ctx.waitForParticipant();
|
|
302
|
-
console.log('š¤ Participant joined:', participant.identity);
|
|
303
|
-
console.log('š Participant metadata:', participant.metadata);
|
|
304
|
-
const provider = getProviderFromParticipant(participant.metadata);
|
|
305
|
-
const codingAgent = getCodingAgentFromParticipant(participant.metadata);
|
|
306
|
-
console.log(`šÆ User selected provider: ${provider}`);
|
|
307
|
-
console.log(`š§ User selected coding agent: ${codingAgent}`);
|
|
308
|
-
// Set the current coding agent and initialize if needed
|
|
424
|
+
currentProvider = provider;
|
|
309
425
|
currentCodingAgent = codingAgent;
|
|
426
|
+
console.log(`šÆ Provider: ${provider}, Agent: ${codingAgent}`);
|
|
310
427
|
if (codingAgent === 'codex') {
|
|
311
|
-
|
|
312
|
-
codexHandler = new CodexHandler({
|
|
313
|
-
workingDirectory: workingDir,
|
|
314
|
-
});
|
|
315
|
-
console.log('ā
Codex handler ready');
|
|
428
|
+
codexHandler = new CodexHandler({ workingDirectory: workingDir });
|
|
316
429
|
}
|
|
317
|
-
// Create
|
|
430
|
+
// Create voice session
|
|
318
431
|
const model = createModel(provider);
|
|
319
|
-
|
|
320
|
-
await cleanupSession();
|
|
321
|
-
const session = new voice.AgentSession({
|
|
322
|
-
llm: model,
|
|
323
|
-
});
|
|
432
|
+
const session = new voice.AgentSession({ llm: model });
|
|
324
433
|
currentSession = session;
|
|
325
|
-
//
|
|
326
|
-
// Using string literals as AgentSessionEventTypes is not directly exported
|
|
327
|
-
session.on('user_state_changed', (ev) => {
|
|
328
|
-
console.log(`š¤ User state: ${ev.oldState} ā ${ev.newState}`);
|
|
329
|
-
});
|
|
434
|
+
// Session events
|
|
330
435
|
session.on('agent_state_changed', (ev) => {
|
|
331
|
-
|
|
436
|
+
agentState = ev.newState;
|
|
437
|
+
console.log(`š¤ State: ${ev.newState}`);
|
|
438
|
+
if (ev.newState === 'listening' && speechQueue.length > 0) {
|
|
439
|
+
processSpeechQueue();
|
|
440
|
+
}
|
|
332
441
|
});
|
|
333
442
|
session.on('user_input_transcribed', (ev) => {
|
|
334
|
-
console.log(`š
|
|
443
|
+
console.log(`š User: "${ev.transcript}"`);
|
|
444
|
+
});
|
|
445
|
+
session.on('user_state_changed', (ev) => {
|
|
446
|
+
console.log(`š¤ User state: ${ev.oldState} ā ${ev.newState}`);
|
|
335
447
|
});
|
|
336
448
|
session.on('error', (ev) => {
|
|
337
449
|
console.error('ā Session error:', ev.error);
|
|
@@ -339,77 +451,108 @@ export default defineAgent({
|
|
|
339
451
|
session.on('close', (ev) => {
|
|
340
452
|
console.log('šŖ Session closed:', ev.reason);
|
|
341
453
|
});
|
|
342
|
-
|
|
343
|
-
|
|
344
|
-
|
|
345
|
-
|
|
346
|
-
|
|
347
|
-
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
351
|
-
|
|
352
|
-
|
|
454
|
+
// Start voice session
|
|
455
|
+
console.log('š¬ Starting voice session...');
|
|
456
|
+
const agent = new OsbornVoiceAgent();
|
|
457
|
+
try {
|
|
458
|
+
await session.start({
|
|
459
|
+
agent,
|
|
460
|
+
room,
|
|
461
|
+
});
|
|
462
|
+
console.log('ā
Voice session started!');
|
|
463
|
+
console.log('š¤ Ready - speak to begin!\n');
|
|
464
|
+
// Send ready signal with persistent retry (frontend might not be subscribed yet)
|
|
465
|
+
console.log('š Sending agent_ready signal...');
|
|
466
|
+
let readySent = false;
|
|
467
|
+
const sendReady = async () => {
|
|
468
|
+
if (readySent)
|
|
469
|
+
return;
|
|
470
|
+
await sendToFrontend({ type: 'agent_ready', provider, codingAgent });
|
|
471
|
+
};
|
|
472
|
+
// Keep sending every 2 seconds for 20 seconds total
|
|
473
|
+
const readyInterval = setInterval(sendReady, 2000);
|
|
474
|
+
await sendReady();
|
|
475
|
+
setTimeout(() => {
|
|
476
|
+
clearInterval(readyInterval);
|
|
477
|
+
console.log('ā
agent_ready retries complete');
|
|
478
|
+
}, 20000);
|
|
479
|
+
// Mark as sent when user first speaks (no need to keep sending)
|
|
480
|
+
session.on('input_speech_started', () => {
|
|
481
|
+
readySent = true;
|
|
482
|
+
clearInterval(readyInterval);
|
|
483
|
+
});
|
|
484
|
+
console.log('ā
agent_ready sent (with retries scheduled)');
|
|
485
|
+
// Greet user (OpenAI only)
|
|
486
|
+
if (provider !== 'gemini') {
|
|
353
487
|
try {
|
|
354
|
-
|
|
355
|
-
|
|
356
|
-
|
|
357
|
-
// Handle permission response from UI
|
|
358
|
-
if (claude.hasPendingPermission()) {
|
|
359
|
-
claude.respondToPermission(data.response);
|
|
360
|
-
console.log(`ā
Permission ${data.response} from UI`);
|
|
361
|
-
}
|
|
362
|
-
}
|
|
363
|
-
else if (data.type === 'user_text') {
|
|
364
|
-
// Handle text input from frontend
|
|
365
|
-
console.log(`š Text input: "${data.content}"`);
|
|
366
|
-
// Inject text into the session as user input
|
|
367
|
-
if (currentSession) {
|
|
368
|
-
try {
|
|
369
|
-
// Interrupt any current speech first
|
|
370
|
-
currentSession.interrupt();
|
|
371
|
-
// Generate a reply to the text input
|
|
372
|
-
await currentSession.generateReply({
|
|
373
|
-
userInput: data.content,
|
|
374
|
-
});
|
|
375
|
-
console.log(`ā
Injected text to session`);
|
|
376
|
-
}
|
|
377
|
-
catch (err) {
|
|
378
|
-
console.error(`ā Failed to inject text:`, err);
|
|
379
|
-
}
|
|
380
|
-
}
|
|
381
|
-
}
|
|
488
|
+
await session.generateReply({
|
|
489
|
+
userInput: '[Greet the user: "Hey, I\'m Osborn. What are you working on?"]'
|
|
490
|
+
});
|
|
382
491
|
}
|
|
383
|
-
catch
|
|
384
|
-
|
|
492
|
+
catch {
|
|
493
|
+
console.log('ā ļø Greeting skipped');
|
|
385
494
|
}
|
|
386
495
|
}
|
|
496
|
+
}
|
|
497
|
+
catch (err) {
|
|
498
|
+
console.error('ā Failed to start session:', err);
|
|
499
|
+
}
|
|
500
|
+
});
|
|
501
|
+
room.on(RoomEvent.ParticipantDisconnected, (participant) => {
|
|
502
|
+
console.log(`š User left: ${participant.identity}`);
|
|
503
|
+
if (currentSession) {
|
|
504
|
+
currentSession.removeAllListeners();
|
|
505
|
+
currentSession = null;
|
|
506
|
+
}
|
|
507
|
+
console.log('ā³ Waiting for new user...\n');
|
|
508
|
+
});
|
|
509
|
+
room.on(RoomEvent.DataReceived, async (payload, participant, kind, topic) => {
|
|
510
|
+
if (topic !== 'user-input')
|
|
511
|
+
return;
|
|
512
|
+
try {
|
|
513
|
+
const data = JSON.parse(new TextDecoder().decode(payload));
|
|
514
|
+
console.log('šØ Data:', data.type);
|
|
515
|
+
if (data.type === 'permission_response') {
|
|
516
|
+
const slot = agentPool.find(s => s.handler.hasPendingPermission());
|
|
517
|
+
if (slot) {
|
|
518
|
+
slot.handler.respondToPermission(data.response);
|
|
519
|
+
console.log(`ā
Permission: ${data.response}`);
|
|
520
|
+
}
|
|
521
|
+
}
|
|
522
|
+
else if (data.type === 'user_text' && currentSession) {
|
|
523
|
+
console.log(`š Text: "${data.content}"`);
|
|
524
|
+
currentSession.interrupt();
|
|
525
|
+
await currentSession.generateReply({ userInput: data.content });
|
|
526
|
+
}
|
|
527
|
+
}
|
|
528
|
+
catch { }
|
|
529
|
+
});
|
|
530
|
+
// ============================================================
|
|
531
|
+
// Connect to Room
|
|
532
|
+
// ============================================================
|
|
533
|
+
try {
|
|
534
|
+
await room.connect(livekitUrl, jwt, {
|
|
535
|
+
autoSubscribe: true,
|
|
536
|
+
dynacast: true,
|
|
387
537
|
});
|
|
388
|
-
//
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
console.log('
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
|
|
400
|
-
});
|
|
401
|
-
|
|
402
|
-
|
|
403
|
-
|
|
404
|
-
|
|
405
|
-
|
|
406
|
-
if (cliArgs.roomCode) {
|
|
407
|
-
const targetRoom = `osborn-${cliArgs.roomCode}`;
|
|
408
|
-
console.log(`šÆ Filtering for room: ${targetRoom}`);
|
|
409
|
-
// The agent will be dispatched to rooms matching this pattern
|
|
410
|
-
serverOptions.workerOptions = {
|
|
411
|
-
// Note: Room filtering is handled by LiveKit dispatch
|
|
412
|
-
// For local development, we validate the room in the entry function
|
|
413
|
-
};
|
|
538
|
+
// Set localParticipant immediately after connection
|
|
539
|
+
localParticipant = room.localParticipant;
|
|
540
|
+
console.log('ā
Connected to room:', roomName);
|
|
541
|
+
console.log('\nā³ Waiting for user to connect...');
|
|
542
|
+
console.log(` Room: ${roomCode}\n`);
|
|
543
|
+
// Warm up agents in background
|
|
544
|
+
console.log('š„ Warming up agents...');
|
|
545
|
+
Promise.all([
|
|
546
|
+
planAgent.handler.run('ready').then(() => console.log('ā
Plan agent ready')),
|
|
547
|
+
executeAgent.handler.run('ready').then(() => console.log('ā
Execute agent ready')),
|
|
548
|
+
]).catch(() => { });
|
|
549
|
+
// Keep process alive
|
|
550
|
+
await new Promise(() => { });
|
|
551
|
+
}
|
|
552
|
+
catch (err) {
|
|
553
|
+
console.error('ā Failed to connect:', err);
|
|
554
|
+
process.exit(1);
|
|
555
|
+
}
|
|
414
556
|
}
|
|
415
|
-
|
|
557
|
+
// Run
|
|
558
|
+
main().catch(console.error);
|
package/package.json
CHANGED
|
@@ -1,15 +1,16 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "osborn",
|
|
3
|
-
"version": "0.1.
|
|
3
|
+
"version": "0.1.6",
|
|
4
4
|
"description": "Voice AI coding assistant - local agent that connects to Osborn frontend",
|
|
5
5
|
"type": "module",
|
|
6
6
|
"bin": {
|
|
7
7
|
"osborn": "./bin/cli.js"
|
|
8
8
|
},
|
|
9
9
|
"scripts": {
|
|
10
|
-
"dev": "tsx src/index.ts
|
|
11
|
-
"start": "tsx src/index.ts
|
|
12
|
-
"build": "tsc"
|
|
10
|
+
"dev": "tsx src/index.ts",
|
|
11
|
+
"start": "tsx src/index.ts",
|
|
12
|
+
"build": "tsc",
|
|
13
|
+
"room": "tsx src/index.ts --room"
|
|
13
14
|
},
|
|
14
15
|
"keywords": [
|
|
15
16
|
"voice",
|
|
@@ -30,8 +31,10 @@
|
|
|
30
31
|
"@livekit/agents": "^1.0.0",
|
|
31
32
|
"@livekit/agents-plugin-google": "^1.0.0",
|
|
32
33
|
"@livekit/agents-plugin-openai": "^1.0.0",
|
|
34
|
+
"@livekit/rtc-node": "^0.13.22",
|
|
33
35
|
"@openai/codex-sdk": "^0.77.0",
|
|
34
36
|
"dotenv": "^16.4.0",
|
|
37
|
+
"livekit-server-sdk": "^2.15.0",
|
|
35
38
|
"tsx": "^4.0.0",
|
|
36
39
|
"yaml": "^2.3.0",
|
|
37
40
|
"zod": "^3.23.0"
|