@hanzo/dev 1.2.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/.eslintrc.js +25 -0
- package/dist/cli/dev.js +8202 -553
- package/jest.config.js +30 -0
- package/package.json +13 -1
- package/src/cli/dev.ts +456 -106
- package/src/lib/agent-loop.ts +552 -0
- package/src/lib/code-act-agent.ts +378 -0
- package/src/lib/config.ts +163 -0
- package/src/lib/editor.ts +368 -0
- package/src/lib/function-calling.ts +318 -0
- package/src/lib/mcp-client.ts +259 -0
- package/src/lib/peer-agent-network.ts +584 -0
- package/src/lib/unified-workspace.ts +435 -0
- package/tests/browser-integration.test.ts +242 -0
- package/tests/code-act-agent.test.ts +305 -0
- package/tests/editor.test.ts +223 -0
- package/tests/mcp-client.test.ts +238 -0
- package/tests/peer-agent-network.test.ts +340 -0
- package/tests/setup.ts +25 -0
- package/tests/swe-bench.test.ts +357 -0
- package/tsconfig.json +13 -15
|
@@ -0,0 +1,435 @@
|
|
|
1
|
+
import { EventEmitter } from 'events';
|
|
2
|
+
import { spawn, ChildProcess } from 'child_process';
|
|
3
|
+
import * as fs from 'fs';
|
|
4
|
+
import * as path from 'path';
|
|
5
|
+
import chalk from 'chalk';
|
|
6
|
+
import { FileEditor } from './editor';
|
|
7
|
+
import { CodeActAgent } from './code-act-agent';
|
|
8
|
+
import { FunctionCallingSystem } from './function-calling';
|
|
9
|
+
import { MCPClient, MCPSession } from './mcp-client';
|
|
10
|
+
|
|
11
|
+
export interface WorkspacePane {
|
|
12
|
+
id: string;
|
|
13
|
+
type: 'shell' | 'editor' | 'browser' | 'planner' | 'output';
|
|
14
|
+
title: string;
|
|
15
|
+
content?: string;
|
|
16
|
+
active: boolean;
|
|
17
|
+
}
|
|
18
|
+
|
|
19
|
+
export interface ShellSession {
|
|
20
|
+
id: string;
|
|
21
|
+
process: ChildProcess;
|
|
22
|
+
cwd: string;
|
|
23
|
+
history: string[];
|
|
24
|
+
output: string;
|
|
25
|
+
}
|
|
26
|
+
|
|
27
|
+
export class UnifiedWorkspace extends EventEmitter {
|
|
28
|
+
private panes: Map<string, WorkspacePane> = new Map();
|
|
29
|
+
private shellSessions: Map<string, ShellSession> = new Map();
|
|
30
|
+
private editor: FileEditor;
|
|
31
|
+
private agent: CodeActAgent;
|
|
32
|
+
private functionCalling: FunctionCallingSystem;
|
|
33
|
+
private mcpClient: MCPClient;
|
|
34
|
+
private activePane: string = '';
|
|
35
|
+
private browserUrl: string = '';
|
|
36
|
+
|
|
37
|
+
constructor() {
|
|
38
|
+
super();
|
|
39
|
+
this.editor = new FileEditor();
|
|
40
|
+
this.agent = new CodeActAgent();
|
|
41
|
+
this.functionCalling = new FunctionCallingSystem();
|
|
42
|
+
this.mcpClient = new MCPClient();
|
|
43
|
+
|
|
44
|
+
// Initialize default panes
|
|
45
|
+
this.initializeDefaultPanes();
|
|
46
|
+
}
|
|
47
|
+
|
|
48
|
+
private initializeDefaultPanes(): void {
|
|
49
|
+
// Shell pane
|
|
50
|
+
this.createPane('shell', 'Shell', '');
|
|
51
|
+
|
|
52
|
+
// Editor pane
|
|
53
|
+
this.createPane('editor', 'Editor', 'No file open');
|
|
54
|
+
|
|
55
|
+
// Browser pane
|
|
56
|
+
this.createPane('browser', 'Browser', 'Browser: Ready');
|
|
57
|
+
|
|
58
|
+
// Planner pane
|
|
59
|
+
this.createPane('planner', 'Planner', 'Task planner ready');
|
|
60
|
+
|
|
61
|
+
// Output pane
|
|
62
|
+
this.createPane('output', 'Output', '');
|
|
63
|
+
|
|
64
|
+
// Set shell as active by default
|
|
65
|
+
this.setActivePane('shell');
|
|
66
|
+
}
|
|
67
|
+
|
|
68
|
+
private createPane(type: WorkspacePane['type'], title: string, content: string): void {
|
|
69
|
+
const id = `${type}-${Date.now()}`;
|
|
70
|
+
const pane: WorkspacePane = {
|
|
71
|
+
id,
|
|
72
|
+
type,
|
|
73
|
+
title,
|
|
74
|
+
content,
|
|
75
|
+
active: false
|
|
76
|
+
};
|
|
77
|
+
this.panes.set(id, pane);
|
|
78
|
+
}
|
|
79
|
+
|
|
80
|
+
setActivePane(type: WorkspacePane['type']): void {
|
|
81
|
+
// Find pane by type
|
|
82
|
+
for (const [id, pane] of this.panes) {
|
|
83
|
+
if (pane.type === type) {
|
|
84
|
+
this.activePane = id;
|
|
85
|
+
pane.active = true;
|
|
86
|
+
} else {
|
|
87
|
+
pane.active = false;
|
|
88
|
+
}
|
|
89
|
+
}
|
|
90
|
+
this.emit('pane-changed', type);
|
|
91
|
+
}
|
|
92
|
+
|
|
93
|
+
// Shell operations
|
|
94
|
+
async executeShellCommand(command: string): Promise<void> {
|
|
95
|
+
const shellPane = this.getPane('shell');
|
|
96
|
+
if (!shellPane) return;
|
|
97
|
+
|
|
98
|
+
// Get or create shell session
|
|
99
|
+
let session = this.getOrCreateShellSession();
|
|
100
|
+
|
|
101
|
+
// Add to history
|
|
102
|
+
session.history.push(command);
|
|
103
|
+
|
|
104
|
+
// Execute command
|
|
105
|
+
this.appendToPane('shell', `\n$ ${command}\n`);
|
|
106
|
+
|
|
107
|
+
try {
|
|
108
|
+
const result = await this.functionCalling.callFunction({
|
|
109
|
+
id: Date.now().toString(),
|
|
110
|
+
name: 'run_command',
|
|
111
|
+
arguments: { command, cwd: session.cwd }
|
|
112
|
+
});
|
|
113
|
+
|
|
114
|
+
if (result.result?.stdout) {
|
|
115
|
+
this.appendToPane('shell', result.result.stdout);
|
|
116
|
+
}
|
|
117
|
+
if (result.result?.stderr) {
|
|
118
|
+
this.appendToPane('shell', chalk.red(result.result.stderr));
|
|
119
|
+
}
|
|
120
|
+
|
|
121
|
+
// Update cwd if cd command
|
|
122
|
+
if (command.startsWith('cd ')) {
|
|
123
|
+
const newDir = command.substring(3).trim();
|
|
124
|
+
session.cwd = path.resolve(session.cwd, newDir);
|
|
125
|
+
}
|
|
126
|
+
} catch (error) {
|
|
127
|
+
this.appendToPane('shell', chalk.red(`Error: ${error}`));
|
|
128
|
+
}
|
|
129
|
+
}
|
|
130
|
+
|
|
131
|
+
private getOrCreateShellSession(): ShellSession {
|
|
132
|
+
const sessionId = 'main';
|
|
133
|
+
if (!this.shellSessions.has(sessionId)) {
|
|
134
|
+
const session: ShellSession = {
|
|
135
|
+
id: sessionId,
|
|
136
|
+
process: spawn('bash', [], { cwd: process.cwd() }),
|
|
137
|
+
cwd: process.cwd(),
|
|
138
|
+
history: [],
|
|
139
|
+
output: ''
|
|
140
|
+
};
|
|
141
|
+
this.shellSessions.set(sessionId, session);
|
|
142
|
+
}
|
|
143
|
+
return this.shellSessions.get(sessionId)!;
|
|
144
|
+
}
|
|
145
|
+
|
|
146
|
+
// Editor operations
|
|
147
|
+
async openFile(filePath: string): Promise<void> {
|
|
148
|
+
const result = await this.editor.execute({
|
|
149
|
+
command: 'view',
|
|
150
|
+
path: filePath
|
|
151
|
+
});
|
|
152
|
+
|
|
153
|
+
if (result.success) {
|
|
154
|
+
this.updatePane('editor', result.content || '');
|
|
155
|
+
this.updatePaneTitle('editor', `Editor - ${path.basename(filePath)}`);
|
|
156
|
+
this.setActivePane('editor');
|
|
157
|
+
} else {
|
|
158
|
+
this.appendToPane('output', chalk.red(`Failed to open file: ${result.message}`));
|
|
159
|
+
}
|
|
160
|
+
}
|
|
161
|
+
|
|
162
|
+
async saveFile(filePath: string, content: string): Promise<void> {
|
|
163
|
+
fs.writeFileSync(filePath, content);
|
|
164
|
+
this.appendToPane('output', chalk.green(`✓ Saved ${filePath}`));
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
// Browser operations
|
|
168
|
+
async navigateBrowser(url: string): Promise<void> {
|
|
169
|
+
this.browserUrl = url;
|
|
170
|
+
this.updatePane('browser', `Browser: ${url}`);
|
|
171
|
+
this.appendToPane('output', `Navigated to ${url}`);
|
|
172
|
+
|
|
173
|
+
// In a real implementation, this would use a headless browser
|
|
174
|
+
// For now, we'll just simulate
|
|
175
|
+
try {
|
|
176
|
+
const response = await fetch(url);
|
|
177
|
+
const text = await response.text();
|
|
178
|
+
const preview = text.substring(0, 500) + '...';
|
|
179
|
+
this.updatePane('browser', `URL: ${url}\n\n${preview}`);
|
|
180
|
+
} catch (error) {
|
|
181
|
+
this.updatePane('browser', `Failed to load ${url}: ${error}`);
|
|
182
|
+
}
|
|
183
|
+
}
|
|
184
|
+
|
|
185
|
+
// Planner operations
|
|
186
|
+
async planTask(description: string): Promise<void> {
|
|
187
|
+
this.updatePane('planner', `Planning: ${description}\n\nGenerating execution plan...`);
|
|
188
|
+
this.setActivePane('planner');
|
|
189
|
+
|
|
190
|
+
// Use the agent to plan
|
|
191
|
+
const plan = await this.generatePlan(description);
|
|
192
|
+
|
|
193
|
+
let planContent = `Task: ${description}\n\nExecution Plan:\n`;
|
|
194
|
+
plan.steps.forEach((step, i) => {
|
|
195
|
+
const parallel = plan.parallelizable[i] ? ' [can run in parallel]' : '';
|
|
196
|
+
planContent += `${i + 1}. ${step}${parallel}\n`;
|
|
197
|
+
});
|
|
198
|
+
|
|
199
|
+
this.updatePane('planner', planContent);
|
|
200
|
+
}
|
|
201
|
+
|
|
202
|
+
private async generatePlan(description: string): Promise<{
|
|
203
|
+
steps: string[];
|
|
204
|
+
parallelizable: boolean[];
|
|
205
|
+
}> {
|
|
206
|
+
// Simplified planning logic
|
|
207
|
+
const steps: string[] = [];
|
|
208
|
+
const parallelizable: boolean[] = [];
|
|
209
|
+
|
|
210
|
+
if (description.includes('debug')) {
|
|
211
|
+
steps.push('Reproduce the issue');
|
|
212
|
+
parallelizable.push(false);
|
|
213
|
+
steps.push('Analyze error logs');
|
|
214
|
+
parallelizable.push(false);
|
|
215
|
+
steps.push('Identify root cause');
|
|
216
|
+
parallelizable.push(false);
|
|
217
|
+
steps.push('Implement fix');
|
|
218
|
+
parallelizable.push(false);
|
|
219
|
+
steps.push('Test the fix');
|
|
220
|
+
parallelizable.push(false);
|
|
221
|
+
} else if (description.includes('feature')) {
|
|
222
|
+
steps.push('Analyze requirements');
|
|
223
|
+
parallelizable.push(false);
|
|
224
|
+
steps.push('Design implementation');
|
|
225
|
+
parallelizable.push(false);
|
|
226
|
+
steps.push('Write tests');
|
|
227
|
+
parallelizable.push(true);
|
|
228
|
+
steps.push('Implement feature');
|
|
229
|
+
parallelizable.push(true);
|
|
230
|
+
steps.push('Run tests');
|
|
231
|
+
parallelizable.push(false);
|
|
232
|
+
steps.push('Update documentation');
|
|
233
|
+
parallelizable.push(true);
|
|
234
|
+
} else {
|
|
235
|
+
steps.push('Analyze task');
|
|
236
|
+
parallelizable.push(false);
|
|
237
|
+
steps.push('Execute task');
|
|
238
|
+
parallelizable.push(false);
|
|
239
|
+
steps.push('Verify results');
|
|
240
|
+
parallelizable.push(false);
|
|
241
|
+
}
|
|
242
|
+
|
|
243
|
+
return { steps, parallelizable };
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
// Execute planned task
|
|
247
|
+
async executePlan(): Promise<void> {
|
|
248
|
+
const plannerPane = this.getPane('planner');
|
|
249
|
+
if (!plannerPane || !plannerPane.content) return;
|
|
250
|
+
|
|
251
|
+
// Extract task from planner
|
|
252
|
+
const lines = plannerPane.content.split('\n');
|
|
253
|
+
const taskLine = lines.find(l => l.startsWith('Task:'));
|
|
254
|
+
if (!taskLine) return;
|
|
255
|
+
|
|
256
|
+
const task = taskLine.substring(5).trim();
|
|
257
|
+
this.appendToPane('output', chalk.cyan(`\nExecuting task: ${task}\n`));
|
|
258
|
+
|
|
259
|
+
// Execute using agent
|
|
260
|
+
await this.agent.executeTask(task);
|
|
261
|
+
}
|
|
262
|
+
|
|
263
|
+
// Pane management
|
|
264
|
+
private getPane(type: WorkspacePane['type']): WorkspacePane | undefined {
|
|
265
|
+
for (const pane of this.panes.values()) {
|
|
266
|
+
if (pane.type === type) return pane;
|
|
267
|
+
}
|
|
268
|
+
return undefined;
|
|
269
|
+
}
|
|
270
|
+
|
|
271
|
+
private updatePane(type: WorkspacePane['type'], content: string): void {
|
|
272
|
+
const pane = this.getPane(type);
|
|
273
|
+
if (pane) {
|
|
274
|
+
pane.content = content;
|
|
275
|
+
this.emit('pane-updated', type, content);
|
|
276
|
+
}
|
|
277
|
+
}
|
|
278
|
+
|
|
279
|
+
private appendToPane(type: WorkspacePane['type'], content: string): void {
|
|
280
|
+
const pane = this.getPane(type);
|
|
281
|
+
if (pane) {
|
|
282
|
+
pane.content = (pane.content || '') + content;
|
|
283
|
+
this.emit('pane-updated', type, pane.content);
|
|
284
|
+
}
|
|
285
|
+
}
|
|
286
|
+
|
|
287
|
+
private updatePaneTitle(type: WorkspacePane['type'], title: string): void {
|
|
288
|
+
const pane = this.getPane(type);
|
|
289
|
+
if (pane) {
|
|
290
|
+
pane.title = title;
|
|
291
|
+
this.emit('pane-title-updated', type, title);
|
|
292
|
+
}
|
|
293
|
+
}
|
|
294
|
+
|
|
295
|
+
// Display workspace (simplified for CLI)
|
|
296
|
+
displayWorkspace(): void {
|
|
297
|
+
console.clear();
|
|
298
|
+
console.log(chalk.bold.cyan('╔══════════════════════════════════════════════════════════════╗'));
|
|
299
|
+
console.log(chalk.bold.cyan('║ 🚀 Hanzo Dev Workspace ║'));
|
|
300
|
+
console.log(chalk.bold.cyan('╚══════════════════════════════════════════════════════════════╝'));
|
|
301
|
+
console.log();
|
|
302
|
+
|
|
303
|
+
// Display pane tabs
|
|
304
|
+
const tabs: string[] = [];
|
|
305
|
+
for (const pane of this.panes.values()) {
|
|
306
|
+
const isActive = pane.id === this.activePane;
|
|
307
|
+
const tab = isActive
|
|
308
|
+
? chalk.bold.yellow(`[${pane.title}]`)
|
|
309
|
+
: chalk.gray(`[${pane.title}]`);
|
|
310
|
+
tabs.push(tab);
|
|
311
|
+
}
|
|
312
|
+
console.log(tabs.join(' '));
|
|
313
|
+
console.log(chalk.gray('─'.repeat(64)));
|
|
314
|
+
|
|
315
|
+
// Display active pane content
|
|
316
|
+
const activePane = this.panes.get(this.activePane);
|
|
317
|
+
if (activePane && activePane.content) {
|
|
318
|
+
const lines = activePane.content.split('\n');
|
|
319
|
+
const maxLines = 20;
|
|
320
|
+
const displayLines = lines.slice(-maxLines);
|
|
321
|
+
console.log(displayLines.join('\n'));
|
|
322
|
+
}
|
|
323
|
+
|
|
324
|
+
console.log(chalk.gray('─'.repeat(64)));
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
// Cleanup
|
|
328
|
+
async cleanup(): Promise<void> {
|
|
329
|
+
// Close shell sessions
|
|
330
|
+
for (const session of this.shellSessions.values()) {
|
|
331
|
+
session.process.kill();
|
|
332
|
+
}
|
|
333
|
+
|
|
334
|
+
// Disconnect MCP sessions
|
|
335
|
+
const sessions = this.mcpClient.getAllSessions();
|
|
336
|
+
for (const session of sessions) {
|
|
337
|
+
await this.mcpClient.disconnect(session.id);
|
|
338
|
+
}
|
|
339
|
+
}
|
|
340
|
+
}
|
|
341
|
+
|
|
342
|
+
// Interactive workspace session
|
|
343
|
+
export class WorkspaceSession {
|
|
344
|
+
private workspace: UnifiedWorkspace;
|
|
345
|
+
private running: boolean = true;
|
|
346
|
+
|
|
347
|
+
constructor() {
|
|
348
|
+
this.workspace = new UnifiedWorkspace();
|
|
349
|
+
}
|
|
350
|
+
|
|
351
|
+
async start(): Promise<void> {
|
|
352
|
+
console.log(chalk.bold.cyan('\n🎯 Starting Unified Workspace...\n'));
|
|
353
|
+
|
|
354
|
+
// Set up event listeners
|
|
355
|
+
this.workspace.on('pane-updated', () => {
|
|
356
|
+
if (this.running) {
|
|
357
|
+
this.workspace.displayWorkspace();
|
|
358
|
+
}
|
|
359
|
+
});
|
|
360
|
+
|
|
361
|
+
// Initial display
|
|
362
|
+
this.workspace.displayWorkspace();
|
|
363
|
+
|
|
364
|
+
// Start interactive loop
|
|
365
|
+
const readline = require('readline');
|
|
366
|
+
const rl = readline.createInterface({
|
|
367
|
+
input: process.stdin,
|
|
368
|
+
output: process.stdout
|
|
369
|
+
});
|
|
370
|
+
|
|
371
|
+
console.log(chalk.gray('\nCommands: shell <cmd>, edit <file>, browse <url>, plan <task>, execute, switch <pane>, exit\n'));
|
|
372
|
+
|
|
373
|
+
const prompt = () => {
|
|
374
|
+
rl.question(chalk.green('workspace> '), async (input) => {
|
|
375
|
+
if (!this.running) return;
|
|
376
|
+
|
|
377
|
+
const [cmd, ...args] = input.trim().split(' ');
|
|
378
|
+
const arg = args.join(' ');
|
|
379
|
+
|
|
380
|
+
try {
|
|
381
|
+
switch (cmd) {
|
|
382
|
+
case 'shell':
|
|
383
|
+
case 'sh':
|
|
384
|
+
await this.workspace.executeShellCommand(arg);
|
|
385
|
+
break;
|
|
386
|
+
|
|
387
|
+
case 'edit':
|
|
388
|
+
case 'e':
|
|
389
|
+
await this.workspace.openFile(arg);
|
|
390
|
+
break;
|
|
391
|
+
|
|
392
|
+
case 'browse':
|
|
393
|
+
case 'b':
|
|
394
|
+
await this.workspace.navigateBrowser(arg);
|
|
395
|
+
break;
|
|
396
|
+
|
|
397
|
+
case 'plan':
|
|
398
|
+
case 'p':
|
|
399
|
+
await this.workspace.planTask(arg);
|
|
400
|
+
break;
|
|
401
|
+
|
|
402
|
+
case 'execute':
|
|
403
|
+
case 'x':
|
|
404
|
+
await this.workspace.executePlan();
|
|
405
|
+
break;
|
|
406
|
+
|
|
407
|
+
case 'switch':
|
|
408
|
+
case 's':
|
|
409
|
+
this.workspace.setActivePane(arg as any);
|
|
410
|
+
this.workspace.displayWorkspace();
|
|
411
|
+
break;
|
|
412
|
+
|
|
413
|
+
case 'exit':
|
|
414
|
+
case 'quit':
|
|
415
|
+
this.running = false;
|
|
416
|
+
await this.workspace.cleanup();
|
|
417
|
+
rl.close();
|
|
418
|
+
return;
|
|
419
|
+
|
|
420
|
+
default:
|
|
421
|
+
console.log(chalk.red(`Unknown command: ${cmd}`));
|
|
422
|
+
}
|
|
423
|
+
} catch (error) {
|
|
424
|
+
console.log(chalk.red(`Error: ${error}`));
|
|
425
|
+
}
|
|
426
|
+
|
|
427
|
+
if (this.running) {
|
|
428
|
+
prompt();
|
|
429
|
+
}
|
|
430
|
+
});
|
|
431
|
+
};
|
|
432
|
+
|
|
433
|
+
prompt();
|
|
434
|
+
}
|
|
435
|
+
}
|
|
@@ -0,0 +1,242 @@
|
|
|
1
|
+
import { describe, test, expect, beforeEach, afterEach, jest } from '@jest/globals';
|
|
2
|
+
import { ConfigurableAgentLoop, LLMProvider } from '../src/lib/agent-loop';
|
|
3
|
+
import WebSocket from 'ws';
|
|
4
|
+
import * as http from 'http';
|
|
5
|
+
|
|
6
|
+
// Mock WebSocket
|
|
7
|
+
jest.mock('ws');
|
|
8
|
+
|
|
9
|
+
describe('Browser Integration', () => {
|
|
10
|
+
let agentLoop: ConfigurableAgentLoop;
|
|
11
|
+
let mockWebSocketServer: http.Server;
|
|
12
|
+
let mockWebSocket: any;
|
|
13
|
+
|
|
14
|
+
beforeEach(() => {
|
|
15
|
+
// Mock WebSocket connection
|
|
16
|
+
mockWebSocket = {
|
|
17
|
+
on: jest.fn(),
|
|
18
|
+
close: jest.fn(),
|
|
19
|
+
send: jest.fn()
|
|
20
|
+
};
|
|
21
|
+
|
|
22
|
+
(WebSocket as jest.MockedClass<typeof WebSocket>).mockImplementation(() => mockWebSocket);
|
|
23
|
+
|
|
24
|
+
// Create agent loop with browser enabled
|
|
25
|
+
const provider: LLMProvider = {
|
|
26
|
+
name: 'Test Provider',
|
|
27
|
+
type: 'local',
|
|
28
|
+
model: 'test-model',
|
|
29
|
+
supportsTools: true,
|
|
30
|
+
supportsStreaming: false
|
|
31
|
+
};
|
|
32
|
+
|
|
33
|
+
agentLoop = new ConfigurableAgentLoop({
|
|
34
|
+
provider,
|
|
35
|
+
maxIterations: 10,
|
|
36
|
+
enableMCP: false,
|
|
37
|
+
enableBrowser: true,
|
|
38
|
+
enableSwarm: false,
|
|
39
|
+
streamOutput: false,
|
|
40
|
+
confirmActions: false
|
|
41
|
+
});
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
afterEach(() => {
|
|
45
|
+
jest.clearAllMocks();
|
|
46
|
+
if (mockWebSocketServer) {
|
|
47
|
+
mockWebSocketServer.close();
|
|
48
|
+
}
|
|
49
|
+
});
|
|
50
|
+
|
|
51
|
+
describe('browser tool registration', () => {
|
|
52
|
+
test('should detect and connect to browser extension', async () => {
|
|
53
|
+
// Simulate successful WebSocket connection
|
|
54
|
+
mockWebSocket.on.mockImplementation((event: string, handler: Function) => {
|
|
55
|
+
if (event === 'open') {
|
|
56
|
+
setTimeout(() => handler(), 10);
|
|
57
|
+
}
|
|
58
|
+
});
|
|
59
|
+
|
|
60
|
+
// Mock checkBrowserExtension to return true
|
|
61
|
+
(agentLoop as any).checkBrowserExtension = jest.fn().mockResolvedValue(true);
|
|
62
|
+
|
|
63
|
+
await agentLoop.initialize();
|
|
64
|
+
|
|
65
|
+
// Verify browser tools were registered
|
|
66
|
+
const tools = (agentLoop as any).functionCalling.getAvailableTools();
|
|
67
|
+
const browserTools = tools.filter((t: any) => t.name.startsWith('browser_'));
|
|
68
|
+
|
|
69
|
+
expect(browserTools).toHaveLength(4);
|
|
70
|
+
expect(browserTools.map((t: any) => t.name)).toContain('browser_navigate');
|
|
71
|
+
expect(browserTools.map((t: any) => t.name)).toContain('browser_click');
|
|
72
|
+
expect(browserTools.map((t: any) => t.name)).toContain('browser_screenshot');
|
|
73
|
+
expect(browserTools.map((t: any) => t.name)).toContain('browser_fill');
|
|
74
|
+
});
|
|
75
|
+
|
|
76
|
+
test('should fall back to Hanzo Browser if extension not available', async () => {
|
|
77
|
+
// Mock extension check to fail
|
|
78
|
+
(agentLoop as any).checkBrowserExtension = jest.fn().mockResolvedValue(false);
|
|
79
|
+
|
|
80
|
+
// Mock browser check to succeed
|
|
81
|
+
global.fetch = jest.fn().mockResolvedValue({ ok: true });
|
|
82
|
+
|
|
83
|
+
await agentLoop.initialize();
|
|
84
|
+
|
|
85
|
+
// Verify browser tools were still registered
|
|
86
|
+
const tools = (agentLoop as any).functionCalling.getAvailableTools();
|
|
87
|
+
const browserTools = tools.filter((t: any) => t.name.startsWith('browser_'));
|
|
88
|
+
|
|
89
|
+
expect(browserTools).toHaveLength(4);
|
|
90
|
+
});
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
describe('browser actions', () => {
|
|
94
|
+
test('should navigate to URL', async () => {
|
|
95
|
+
const result = await (agentLoop as any).browserNavigate('https://example.com');
|
|
96
|
+
|
|
97
|
+
expect(result).toEqual({
|
|
98
|
+
success: true,
|
|
99
|
+
url: 'https://example.com'
|
|
100
|
+
});
|
|
101
|
+
});
|
|
102
|
+
|
|
103
|
+
test('should click element', async () => {
|
|
104
|
+
const result = await (agentLoop as any).browserClick('#submit-button');
|
|
105
|
+
|
|
106
|
+
expect(result).toEqual({
|
|
107
|
+
success: true,
|
|
108
|
+
selector: '#submit-button'
|
|
109
|
+
});
|
|
110
|
+
});
|
|
111
|
+
|
|
112
|
+
test('should take screenshot', async () => {
|
|
113
|
+
const result = await (agentLoop as any).browserScreenshot(true);
|
|
114
|
+
|
|
115
|
+
expect(result).toEqual({
|
|
116
|
+
success: true,
|
|
117
|
+
screenshot: 'base64_image_data'
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
|
|
121
|
+
test('should fill form field', async () => {
|
|
122
|
+
const result = await (agentLoop as any).browserFill('#email', 'test@example.com');
|
|
123
|
+
|
|
124
|
+
expect(result).toEqual({
|
|
125
|
+
success: true,
|
|
126
|
+
selector: '#email',
|
|
127
|
+
value: 'test@example.com'
|
|
128
|
+
});
|
|
129
|
+
});
|
|
130
|
+
});
|
|
131
|
+
|
|
132
|
+
describe('browser action execution via LLM', () => {
|
|
133
|
+
test('should execute browser navigation through agent loop', async () => {
|
|
134
|
+
// Mock LLM to return browser navigation tool call
|
|
135
|
+
(agentLoop as any).callLLM = jest.fn().mockResolvedValue({
|
|
136
|
+
role: 'assistant',
|
|
137
|
+
content: 'I will navigate to the website.',
|
|
138
|
+
toolCalls: [{
|
|
139
|
+
id: 'call_1',
|
|
140
|
+
name: 'browser_navigate',
|
|
141
|
+
arguments: { url: 'https://example.com' }
|
|
142
|
+
}]
|
|
143
|
+
});
|
|
144
|
+
|
|
145
|
+
// Mock tool execution
|
|
146
|
+
(agentLoop as any).functionCalling.callFunctions = jest.fn()
|
|
147
|
+
.mockResolvedValue([{ success: true, url: 'https://example.com' }]);
|
|
148
|
+
|
|
149
|
+
await agentLoop.initialize();
|
|
150
|
+
await agentLoop.execute('Navigate to example.com');
|
|
151
|
+
|
|
152
|
+
// Verify tool was called
|
|
153
|
+
expect((agentLoop as any).functionCalling.callFunctions).toHaveBeenCalledWith([{
|
|
154
|
+
id: 'call_1',
|
|
155
|
+
name: 'browser_navigate',
|
|
156
|
+
arguments: { url: 'https://example.com' }
|
|
157
|
+
}]);
|
|
158
|
+
});
|
|
159
|
+
|
|
160
|
+
test('should handle browser action errors', async () => {
|
|
161
|
+
// Mock LLM to return browser action
|
|
162
|
+
(agentLoop as any).callLLM = jest.fn().mockResolvedValue({
|
|
163
|
+
role: 'assistant',
|
|
164
|
+
content: 'I will click the button.',
|
|
165
|
+
toolCalls: [{
|
|
166
|
+
id: 'call_2',
|
|
167
|
+
name: 'browser_click',
|
|
168
|
+
arguments: { selector: '#missing-button' }
|
|
169
|
+
}]
|
|
170
|
+
});
|
|
171
|
+
|
|
172
|
+
// Mock tool execution to fail
|
|
173
|
+
(agentLoop as any).functionCalling.callFunctions = jest.fn()
|
|
174
|
+
.mockRejectedValue(new Error('Element not found'));
|
|
175
|
+
|
|
176
|
+
await agentLoop.initialize();
|
|
177
|
+
|
|
178
|
+
// Execute should handle the error gracefully
|
|
179
|
+
await expect(agentLoop.execute('Click the submit button')).resolves.not.toThrow();
|
|
180
|
+
});
|
|
181
|
+
});
|
|
182
|
+
|
|
183
|
+
describe('browser-based evaluation scenarios', () => {
|
|
184
|
+
test('should handle multi-step browser automation', async () => {
|
|
185
|
+
const responses = [
|
|
186
|
+
{
|
|
187
|
+
role: 'assistant',
|
|
188
|
+
content: 'I will navigate to the login page.',
|
|
189
|
+
toolCalls: [{
|
|
190
|
+
id: 'nav_1',
|
|
191
|
+
name: 'browser_navigate',
|
|
192
|
+
arguments: { url: 'https://example.com/login' }
|
|
193
|
+
}]
|
|
194
|
+
},
|
|
195
|
+
{
|
|
196
|
+
role: 'assistant',
|
|
197
|
+
content: 'I will fill in the login form.',
|
|
198
|
+
toolCalls: [
|
|
199
|
+
{
|
|
200
|
+
id: 'fill_1',
|
|
201
|
+
name: 'browser_fill',
|
|
202
|
+
arguments: { selector: '#username', value: 'testuser' }
|
|
203
|
+
},
|
|
204
|
+
{
|
|
205
|
+
id: 'fill_2',
|
|
206
|
+
name: 'browser_fill',
|
|
207
|
+
arguments: { selector: '#password', value: 'testpass' }
|
|
208
|
+
}
|
|
209
|
+
]
|
|
210
|
+
},
|
|
211
|
+
{
|
|
212
|
+
role: 'assistant',
|
|
213
|
+
content: 'I will submit the form.',
|
|
214
|
+
toolCalls: [{
|
|
215
|
+
id: 'click_1',
|
|
216
|
+
name: 'browser_click',
|
|
217
|
+
arguments: { selector: '#submit' }
|
|
218
|
+
}]
|
|
219
|
+
},
|
|
220
|
+
{
|
|
221
|
+
role: 'assistant',
|
|
222
|
+
content: 'Login completed successfully.',
|
|
223
|
+
toolCalls: []
|
|
224
|
+
}
|
|
225
|
+
];
|
|
226
|
+
|
|
227
|
+
let callCount = 0;
|
|
228
|
+
(agentLoop as any).callLLM = jest.fn().mockImplementation(() => {
|
|
229
|
+
return Promise.resolve(responses[callCount++]);
|
|
230
|
+
});
|
|
231
|
+
|
|
232
|
+
(agentLoop as any).functionCalling.callFunctions = jest.fn()
|
|
233
|
+
.mockResolvedValue([{ success: true }]);
|
|
234
|
+
|
|
235
|
+
await agentLoop.initialize();
|
|
236
|
+
await agentLoop.execute('Login to the website with username "testuser"');
|
|
237
|
+
|
|
238
|
+
// Verify all browser actions were executed
|
|
239
|
+
expect((agentLoop as any).functionCalling.callFunctions).toHaveBeenCalledTimes(3);
|
|
240
|
+
});
|
|
241
|
+
});
|
|
242
|
+
});
|