@hanzo/dev 1.2.0 → 2.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,242 @@
1
+ import { describe, test, expect, beforeEach, afterEach, jest } from '@jest/globals';
2
+ import { ConfigurableAgentLoop, LLMProvider } from '../src/lib/agent-loop';
3
+ import WebSocket from 'ws';
4
+ import * as http from 'http';
5
+
6
+ // Mock WebSocket
7
+ jest.mock('ws');
8
+
9
+ describe('Browser Integration', () => {
10
+ let agentLoop: ConfigurableAgentLoop;
11
+ let mockWebSocketServer: http.Server;
12
+ let mockWebSocket: any;
13
+
14
+ beforeEach(() => {
15
+ // Mock WebSocket connection
16
+ mockWebSocket = {
17
+ on: jest.fn(),
18
+ close: jest.fn(),
19
+ send: jest.fn()
20
+ };
21
+
22
+ (WebSocket as jest.MockedClass<typeof WebSocket>).mockImplementation(() => mockWebSocket);
23
+
24
+ // Create agent loop with browser enabled
25
+ const provider: LLMProvider = {
26
+ name: 'Test Provider',
27
+ type: 'local',
28
+ model: 'test-model',
29
+ supportsTools: true,
30
+ supportsStreaming: false
31
+ };
32
+
33
+ agentLoop = new ConfigurableAgentLoop({
34
+ provider,
35
+ maxIterations: 10,
36
+ enableMCP: false,
37
+ enableBrowser: true,
38
+ enableSwarm: false,
39
+ streamOutput: false,
40
+ confirmActions: false
41
+ });
42
+ });
43
+
44
+ afterEach(() => {
45
+ jest.clearAllMocks();
46
+ if (mockWebSocketServer) {
47
+ mockWebSocketServer.close();
48
+ }
49
+ });
50
+
51
+ describe('browser tool registration', () => {
52
+ test('should detect and connect to browser extension', async () => {
53
+ // Simulate successful WebSocket connection
54
+ mockWebSocket.on.mockImplementation((event: string, handler: Function) => {
55
+ if (event === 'open') {
56
+ setTimeout(() => handler(), 10);
57
+ }
58
+ });
59
+
60
+ // Mock checkBrowserExtension to return true
61
+ (agentLoop as any).checkBrowserExtension = jest.fn().mockResolvedValue(true);
62
+
63
+ await agentLoop.initialize();
64
+
65
+ // Verify browser tools were registered
66
+ const tools = (agentLoop as any).functionCalling.getAvailableTools();
67
+ const browserTools = tools.filter((t: any) => t.name.startsWith('browser_'));
68
+
69
+ expect(browserTools).toHaveLength(4);
70
+ expect(browserTools.map((t: any) => t.name)).toContain('browser_navigate');
71
+ expect(browserTools.map((t: any) => t.name)).toContain('browser_click');
72
+ expect(browserTools.map((t: any) => t.name)).toContain('browser_screenshot');
73
+ expect(browserTools.map((t: any) => t.name)).toContain('browser_fill');
74
+ });
75
+
76
+ test('should fall back to Hanzo Browser if extension not available', async () => {
77
+ // Mock extension check to fail
78
+ (agentLoop as any).checkBrowserExtension = jest.fn().mockResolvedValue(false);
79
+
80
+ // Mock browser check to succeed
81
+ global.fetch = jest.fn().mockResolvedValue({ ok: true });
82
+
83
+ await agentLoop.initialize();
84
+
85
+ // Verify browser tools were still registered
86
+ const tools = (agentLoop as any).functionCalling.getAvailableTools();
87
+ const browserTools = tools.filter((t: any) => t.name.startsWith('browser_'));
88
+
89
+ expect(browserTools).toHaveLength(4);
90
+ });
91
+ });
92
+
93
+ describe('browser actions', () => {
94
+ test('should navigate to URL', async () => {
95
+ const result = await (agentLoop as any).browserNavigate('https://example.com');
96
+
97
+ expect(result).toEqual({
98
+ success: true,
99
+ url: 'https://example.com'
100
+ });
101
+ });
102
+
103
+ test('should click element', async () => {
104
+ const result = await (agentLoop as any).browserClick('#submit-button');
105
+
106
+ expect(result).toEqual({
107
+ success: true,
108
+ selector: '#submit-button'
109
+ });
110
+ });
111
+
112
+ test('should take screenshot', async () => {
113
+ const result = await (agentLoop as any).browserScreenshot(true);
114
+
115
+ expect(result).toEqual({
116
+ success: true,
117
+ screenshot: 'base64_image_data'
118
+ });
119
+ });
120
+
121
+ test('should fill form field', async () => {
122
+ const result = await (agentLoop as any).browserFill('#email', 'test@example.com');
123
+
124
+ expect(result).toEqual({
125
+ success: true,
126
+ selector: '#email',
127
+ value: 'test@example.com'
128
+ });
129
+ });
130
+ });
131
+
132
+ describe('browser action execution via LLM', () => {
133
+ test('should execute browser navigation through agent loop', async () => {
134
+ // Mock LLM to return browser navigation tool call
135
+ (agentLoop as any).callLLM = jest.fn().mockResolvedValue({
136
+ role: 'assistant',
137
+ content: 'I will navigate to the website.',
138
+ toolCalls: [{
139
+ id: 'call_1',
140
+ name: 'browser_navigate',
141
+ arguments: { url: 'https://example.com' }
142
+ }]
143
+ });
144
+
145
+ // Mock tool execution
146
+ (agentLoop as any).functionCalling.callFunctions = jest.fn()
147
+ .mockResolvedValue([{ success: true, url: 'https://example.com' }]);
148
+
149
+ await agentLoop.initialize();
150
+ await agentLoop.execute('Navigate to example.com');
151
+
152
+ // Verify tool was called
153
+ expect((agentLoop as any).functionCalling.callFunctions).toHaveBeenCalledWith([{
154
+ id: 'call_1',
155
+ name: 'browser_navigate',
156
+ arguments: { url: 'https://example.com' }
157
+ }]);
158
+ });
159
+
160
+ test('should handle browser action errors', async () => {
161
+ // Mock LLM to return browser action
162
+ (agentLoop as any).callLLM = jest.fn().mockResolvedValue({
163
+ role: 'assistant',
164
+ content: 'I will click the button.',
165
+ toolCalls: [{
166
+ id: 'call_2',
167
+ name: 'browser_click',
168
+ arguments: { selector: '#missing-button' }
169
+ }]
170
+ });
171
+
172
+ // Mock tool execution to fail
173
+ (agentLoop as any).functionCalling.callFunctions = jest.fn()
174
+ .mockRejectedValue(new Error('Element not found'));
175
+
176
+ await agentLoop.initialize();
177
+
178
+ // Execute should handle the error gracefully
179
+ await expect(agentLoop.execute('Click the submit button')).resolves.not.toThrow();
180
+ });
181
+ });
182
+
183
+ describe('browser-based evaluation scenarios', () => {
184
+ test('should handle multi-step browser automation', async () => {
185
+ const responses = [
186
+ {
187
+ role: 'assistant',
188
+ content: 'I will navigate to the login page.',
189
+ toolCalls: [{
190
+ id: 'nav_1',
191
+ name: 'browser_navigate',
192
+ arguments: { url: 'https://example.com/login' }
193
+ }]
194
+ },
195
+ {
196
+ role: 'assistant',
197
+ content: 'I will fill in the login form.',
198
+ toolCalls: [
199
+ {
200
+ id: 'fill_1',
201
+ name: 'browser_fill',
202
+ arguments: { selector: '#username', value: 'testuser' }
203
+ },
204
+ {
205
+ id: 'fill_2',
206
+ name: 'browser_fill',
207
+ arguments: { selector: '#password', value: 'testpass' }
208
+ }
209
+ ]
210
+ },
211
+ {
212
+ role: 'assistant',
213
+ content: 'I will submit the form.',
214
+ toolCalls: [{
215
+ id: 'click_1',
216
+ name: 'browser_click',
217
+ arguments: { selector: '#submit' }
218
+ }]
219
+ },
220
+ {
221
+ role: 'assistant',
222
+ content: 'Login completed successfully.',
223
+ toolCalls: []
224
+ }
225
+ ];
226
+
227
+ let callCount = 0;
228
+ (agentLoop as any).callLLM = jest.fn().mockImplementation(() => {
229
+ return Promise.resolve(responses[callCount++]);
230
+ });
231
+
232
+ (agentLoop as any).functionCalling.callFunctions = jest.fn()
233
+ .mockResolvedValue([{ success: true }]);
234
+
235
+ await agentLoop.initialize();
236
+ await agentLoop.execute('Login to the website with username "testuser"');
237
+
238
+ // Verify all browser actions were executed
239
+ expect((agentLoop as any).functionCalling.callFunctions).toHaveBeenCalledTimes(3);
240
+ });
241
+ });
242
+ });
@@ -0,0 +1,305 @@
1
+ import { describe, test, expect, beforeEach, jest } from '@jest/globals';
2
+ import { CodeActAgent, AgentState } from '../src/lib/code-act-agent';
3
+ import { FunctionCallingSystem } from '../src/lib/function-calling';
4
+
5
+ describe('CodeActAgent', () => {
6
+ let agent: CodeActAgent;
7
+ let mockFunctionCalling: jest.Mocked<FunctionCallingSystem>;
8
+
9
+ beforeEach(() => {
10
+ // Mock function calling system
11
+ mockFunctionCalling = {
12
+ registerTool: jest.fn(),
13
+ callFunctions: jest.fn(),
14
+ getAvailableTools: jest.fn().mockReturnValue([
15
+ { name: 'view_file', description: 'View file contents' },
16
+ { name: 'str_replace', description: 'Replace string in file' },
17
+ { name: 'run_command', description: 'Run shell command' }
18
+ ]),
19
+ getAllToolSchemas: jest.fn().mockReturnValue([])
20
+ } as any;
21
+
22
+ agent = new CodeActAgent('test-agent', mockFunctionCalling);
23
+ });
24
+
25
+ describe('state management', () => {
26
+ test('should initialize with correct default state', () => {
27
+ const state = agent.getState();
28
+ expect(state.currentTask).toBe('');
29
+ expect(state.plan).toEqual([]);
30
+ expect(state.completedSteps).toEqual([]);
31
+ expect(state.currentStep).toBe(0);
32
+ expect(state.errors).toEqual([]);
33
+ expect(state.observations).toEqual([]);
34
+ });
35
+
36
+ test('should update state correctly', () => {
37
+ const newState: Partial<AgentState> = {
38
+ currentTask: 'Fix bug in login',
39
+ plan: ['Locate login file', 'Fix validation', 'Test changes'],
40
+ currentStep: 1
41
+ };
42
+
43
+ agent.setState(newState);
44
+ const state = agent.getState();
45
+
46
+ expect(state.currentTask).toBe('Fix bug in login');
47
+ expect(state.plan).toHaveLength(3);
48
+ expect(state.currentStep).toBe(1);
49
+ });
50
+ });
51
+
52
+ describe('planning', () => {
53
+ test('should generate plan for task', async () => {
54
+ const task = 'Add user authentication to the API';
55
+
56
+ // Mock LLM response for planning
57
+ const mockPlan = [
58
+ 'Analyze current API structure',
59
+ 'Install authentication dependencies',
60
+ 'Create auth middleware',
61
+ 'Add login/logout endpoints',
62
+ 'Update existing endpoints with auth checks',
63
+ 'Write tests for authentication'
64
+ ];
65
+
66
+ // The agent should generate a plan based on the task
67
+ await agent.plan(task);
68
+
69
+ const state = agent.getState();
70
+ expect(state.currentTask).toBe(task);
71
+ expect(state.plan.length).toBeGreaterThan(0);
72
+ });
73
+
74
+ test('should handle planning errors gracefully', async () => {
75
+ const task = 'Invalid task that causes error';
76
+
77
+ // Even with errors, planning should not throw
78
+ await expect(agent.plan(task)).resolves.not.toThrow();
79
+
80
+ const state = agent.getState();
81
+ expect(state.currentTask).toBe(task);
82
+ });
83
+ });
84
+
85
+ describe('task execution', () => {
86
+ test('should execute single step', async () => {
87
+ // Set up agent with a plan
88
+ agent.setState({
89
+ currentTask: 'Fix typo in README',
90
+ plan: ['View README.md', 'Fix typo', 'Verify changes'],
91
+ currentStep: 0
92
+ });
93
+
94
+ // Mock function calling for view_file
95
+ mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
96
+ success: true,
97
+ content: '# README\n\nThis is a typpo in the readme.'
98
+ }]);
99
+
100
+ const result = await agent.executeStep();
101
+
102
+ expect(result.completed).toBe(false);
103
+ expect(result.action).toBe('View README.md');
104
+ expect(mockFunctionCalling.callFunctions).toHaveBeenCalled();
105
+ });
106
+
107
+ test('should handle step execution errors', async () => {
108
+ agent.setState({
109
+ currentTask: 'Run failing command',
110
+ plan: ['Execute broken command'],
111
+ currentStep: 0
112
+ });
113
+
114
+ // Mock function calling to throw error
115
+ mockFunctionCalling.callFunctions.mockRejectedValueOnce(
116
+ new Error('Command not found')
117
+ );
118
+
119
+ const result = await agent.executeStep();
120
+
121
+ expect(result.completed).toBe(false);
122
+ expect(result.error).toBe('Command not found');
123
+
124
+ const state = agent.getState();
125
+ expect(state.errors).toHaveLength(1);
126
+ expect(state.errors[0]).toContain('Command not found');
127
+ });
128
+
129
+ test('should mark task as completed when all steps done', async () => {
130
+ agent.setState({
131
+ currentTask: 'Simple task',
132
+ plan: ['Step 1', 'Step 2'],
133
+ currentStep: 1,
134
+ completedSteps: ['Step 1']
135
+ });
136
+
137
+ // Mock successful execution
138
+ mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
139
+ success: true
140
+ }]);
141
+
142
+ const result = await agent.executeStep();
143
+
144
+ expect(result.completed).toBe(true);
145
+ expect(result.action).toBe('Step 2');
146
+
147
+ const state = agent.getState();
148
+ expect(state.completedSteps).toHaveLength(2);
149
+ });
150
+ });
151
+
152
+ describe('parallel execution', () => {
153
+ test('should identify parallelizable steps', () => {
154
+ const plan = [
155
+ 'Download file A',
156
+ 'Download file B',
157
+ 'Process file A',
158
+ 'Process file B',
159
+ 'Merge results'
160
+ ];
161
+
162
+ const parallel = agent.identifyParallelSteps(plan);
163
+
164
+ // Downloads can be parallel
165
+ expect(parallel[0]).toEqual([0, 1]);
166
+ // Processing depends on downloads
167
+ expect(parallel[1]).toEqual([2]);
168
+ expect(parallel[2]).toEqual([3]);
169
+ // Merge depends on processing
170
+ expect(parallel[3]).toEqual([4]);
171
+ });
172
+
173
+ test('should execute parallel steps concurrently', async () => {
174
+ agent.setState({
175
+ currentTask: 'Parallel downloads',
176
+ plan: ['Download file1.txt', 'Download file2.txt', 'Merge files'],
177
+ currentStep: 0
178
+ });
179
+
180
+ // Mock both downloads to succeed
181
+ mockFunctionCalling.callFunctions
182
+ .mockResolvedValueOnce([{ success: true, file: 'file1.txt' }])
183
+ .mockResolvedValueOnce([{ success: true, file: 'file2.txt' }]);
184
+
185
+ // Execute should handle parallel steps
186
+ const result1 = await agent.executeStep();
187
+ expect(result1.action).toContain('Download');
188
+
189
+ // The agent should recognize these can be parallel
190
+ const state = agent.getState();
191
+ expect(state.currentStep).toBeLessThanOrEqual(2);
192
+ });
193
+ });
194
+
195
+ describe('self-correction', () => {
196
+ test('should retry failed steps with corrections', async () => {
197
+ agent.setState({
198
+ currentTask: 'Fix syntax error',
199
+ plan: ['Edit file with error'],
200
+ currentStep: 0
201
+ });
202
+
203
+ // First attempt fails
204
+ mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
205
+ success: false,
206
+ error: 'Syntax error in edit'
207
+ }]);
208
+
209
+ // Agent should detect error and retry
210
+ const result1 = await agent.executeStep();
211
+ expect(result1.error).toBeDefined();
212
+
213
+ // Second attempt with correction succeeds
214
+ mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
215
+ success: true
216
+ }]);
217
+
218
+ const result2 = await agent.executeStep();
219
+ expect(result2.error).toBeUndefined();
220
+ expect(result2.retryCount).toBeGreaterThan(0);
221
+ });
222
+
223
+ test('should give up after max retries', async () => {
224
+ agent.setState({
225
+ currentTask: 'Impossible task',
226
+ plan: ['Do impossible thing'],
227
+ currentStep: 0
228
+ });
229
+
230
+ // All attempts fail
231
+ mockFunctionCalling.callFunctions.mockRejectedValue(
232
+ new Error('Cannot do impossible thing')
233
+ );
234
+
235
+ let lastResult;
236
+ for (let i = 0; i < 5; i++) {
237
+ lastResult = await agent.executeStep();
238
+ }
239
+
240
+ expect(lastResult!.error).toBeDefined();
241
+ expect(lastResult!.aborted).toBe(true);
242
+ });
243
+ });
244
+
245
+ describe('observation handling', () => {
246
+ test('should collect and store observations', async () => {
247
+ agent.setState({
248
+ currentTask: 'Analyze codebase',
249
+ plan: ['List files', 'Read main file'],
250
+ currentStep: 0
251
+ });
252
+
253
+ // Mock file listing
254
+ mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
255
+ success: true,
256
+ output: 'file1.js\nfile2.js\nindex.js'
257
+ }]);
258
+
259
+ await agent.executeStep();
260
+
261
+ const state = agent.getState();
262
+ expect(state.observations).toHaveLength(1);
263
+ expect(state.observations[0]).toContain('file1.js');
264
+ });
265
+
266
+ test('should use observations for context', async () => {
267
+ // Pre-populate observations
268
+ agent.setState({
269
+ currentTask: 'Fix bug',
270
+ plan: ['Find bug location', 'Fix bug'],
271
+ currentStep: 1,
272
+ observations: ['Bug is in auth.js on line 42']
273
+ });
274
+
275
+ // The agent should use the observation context
276
+ mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
277
+ success: true,
278
+ result: 'Fixed bug in auth.js'
279
+ }]);
280
+
281
+ const result = await agent.executeStep();
282
+ expect(result.completed).toBe(true);
283
+ });
284
+ });
285
+
286
+ describe('complete task execution', () => {
287
+ test('should execute entire task from plan to completion', async () => {
288
+ const task = 'Add logging to application';
289
+
290
+ // Mock successful execution of all steps
291
+ mockFunctionCalling.callFunctions
292
+ .mockResolvedValueOnce([{ success: true }]) // Install logger
293
+ .mockResolvedValueOnce([{ success: true }]) // Create logger config
294
+ .mockResolvedValueOnce([{ success: true }]) // Add logging statements
295
+ .mockResolvedValueOnce([{ success: true }]); // Test logging
296
+
297
+ await agent.plan(task);
298
+ const result = await agent.execute(task);
299
+
300
+ expect(result.success).toBe(true);
301
+ expect(result.completedSteps.length).toBeGreaterThan(0);
302
+ expect(result.errors).toHaveLength(0);
303
+ });
304
+ });
305
+ });