@hanzo/dev 2.1.1 → 3.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +278 -279
- package/bin/dev.js +413 -0
- package/package.json +32 -61
- package/postinstall.js +513 -0
- package/scripts/preinstall.js +69 -0
- package/scripts/windows-cleanup.ps1 +31 -0
- package/.eslintrc.json +0 -24
- package/dist/cli/dev.js +0 -24746
- package/src/cli/dev.ts +0 -946
- package/src/lib/agent-loop.ts +0 -552
- package/src/lib/benchmark-runner.ts +0 -431
- package/src/lib/code-act-agent.ts +0 -378
- package/src/lib/config.ts +0 -163
- package/src/lib/editor.ts +0 -395
- package/src/lib/function-calling.ts +0 -318
- package/src/lib/mcp-client.ts +0 -259
- package/src/lib/peer-agent-network.ts +0 -584
- package/src/lib/swarm-runner.ts +0 -389
- package/src/lib/unified-workspace.ts +0 -435
- package/test-swarm/file1.js +0 -6
- package/test-swarm/file2.ts +0 -12
- package/test-swarm/file3.py +0 -15
- package/test-swarm/file4.md +0 -13
- package/test-swarm/file5.json +0 -12
- package/test-swarm-demo.sh +0 -22
- package/tests/browser-integration.test.ts +0 -242
- package/tests/code-act-agent.test.ts +0 -305
- package/tests/editor.test.ts +0 -223
- package/tests/fixtures/sample-code.js +0 -13
- package/tests/fixtures/sample-code.py +0 -28
- package/tests/fixtures/sample-code.ts +0 -22
- package/tests/mcp-client.test.ts +0 -238
- package/tests/peer-agent-network.test.ts +0 -340
- package/tests/swarm-runner.test.ts +0 -301
- package/tests/swe-bench.test.ts +0 -357
- package/tsconfig.cli.json +0 -25
- package/tsconfig.json +0 -35
- package/vitest.config.ts +0 -37
|
@@ -1,242 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect, beforeEach, afterEach, jest } from '@jest/globals';
|
|
2
|
-
import { ConfigurableAgentLoop, LLMProvider } from '../src/lib/agent-loop';
|
|
3
|
-
import WebSocket from 'ws';
|
|
4
|
-
import * as http from 'http';
|
|
5
|
-
|
|
6
|
-
// Mock WebSocket
|
|
7
|
-
jest.mock('ws');
|
|
8
|
-
|
|
9
|
-
describe('Browser Integration', () => {
|
|
10
|
-
let agentLoop: ConfigurableAgentLoop;
|
|
11
|
-
let mockWebSocketServer: http.Server;
|
|
12
|
-
let mockWebSocket: any;
|
|
13
|
-
|
|
14
|
-
beforeEach(() => {
|
|
15
|
-
// Mock WebSocket connection
|
|
16
|
-
mockWebSocket = {
|
|
17
|
-
on: jest.fn(),
|
|
18
|
-
close: jest.fn(),
|
|
19
|
-
send: jest.fn()
|
|
20
|
-
};
|
|
21
|
-
|
|
22
|
-
(WebSocket as jest.MockedClass<typeof WebSocket>).mockImplementation(() => mockWebSocket);
|
|
23
|
-
|
|
24
|
-
// Create agent loop with browser enabled
|
|
25
|
-
const provider: LLMProvider = {
|
|
26
|
-
name: 'Test Provider',
|
|
27
|
-
type: 'local',
|
|
28
|
-
model: 'test-model',
|
|
29
|
-
supportsTools: true,
|
|
30
|
-
supportsStreaming: false
|
|
31
|
-
};
|
|
32
|
-
|
|
33
|
-
agentLoop = new ConfigurableAgentLoop({
|
|
34
|
-
provider,
|
|
35
|
-
maxIterations: 10,
|
|
36
|
-
enableMCP: false,
|
|
37
|
-
enableBrowser: true,
|
|
38
|
-
enableSwarm: false,
|
|
39
|
-
streamOutput: false,
|
|
40
|
-
confirmActions: false
|
|
41
|
-
});
|
|
42
|
-
});
|
|
43
|
-
|
|
44
|
-
afterEach(() => {
|
|
45
|
-
jest.clearAllMocks();
|
|
46
|
-
if (mockWebSocketServer) {
|
|
47
|
-
mockWebSocketServer.close();
|
|
48
|
-
}
|
|
49
|
-
});
|
|
50
|
-
|
|
51
|
-
describe('browser tool registration', () => {
|
|
52
|
-
test('should detect and connect to browser extension', async () => {
|
|
53
|
-
// Simulate successful WebSocket connection
|
|
54
|
-
mockWebSocket.on.mockImplementation((event: string, handler: Function) => {
|
|
55
|
-
if (event === 'open') {
|
|
56
|
-
setTimeout(() => handler(), 10);
|
|
57
|
-
}
|
|
58
|
-
});
|
|
59
|
-
|
|
60
|
-
// Mock checkBrowserExtension to return true
|
|
61
|
-
(agentLoop as any).checkBrowserExtension = jest.fn().mockResolvedValue(true);
|
|
62
|
-
|
|
63
|
-
await agentLoop.initialize();
|
|
64
|
-
|
|
65
|
-
// Verify browser tools were registered
|
|
66
|
-
const tools = (agentLoop as any).functionCalling.getAvailableTools();
|
|
67
|
-
const browserTools = tools.filter((t: any) => t.name.startsWith('browser_'));
|
|
68
|
-
|
|
69
|
-
expect(browserTools).toHaveLength(4);
|
|
70
|
-
expect(browserTools.map((t: any) => t.name)).toContain('browser_navigate');
|
|
71
|
-
expect(browserTools.map((t: any) => t.name)).toContain('browser_click');
|
|
72
|
-
expect(browserTools.map((t: any) => t.name)).toContain('browser_screenshot');
|
|
73
|
-
expect(browserTools.map((t: any) => t.name)).toContain('browser_fill');
|
|
74
|
-
});
|
|
75
|
-
|
|
76
|
-
test('should fall back to Hanzo Browser if extension not available', async () => {
|
|
77
|
-
// Mock extension check to fail
|
|
78
|
-
(agentLoop as any).checkBrowserExtension = jest.fn().mockResolvedValue(false);
|
|
79
|
-
|
|
80
|
-
// Mock browser check to succeed
|
|
81
|
-
global.fetch = jest.fn().mockResolvedValue({ ok: true });
|
|
82
|
-
|
|
83
|
-
await agentLoop.initialize();
|
|
84
|
-
|
|
85
|
-
// Verify browser tools were still registered
|
|
86
|
-
const tools = (agentLoop as any).functionCalling.getAvailableTools();
|
|
87
|
-
const browserTools = tools.filter((t: any) => t.name.startsWith('browser_'));
|
|
88
|
-
|
|
89
|
-
expect(browserTools).toHaveLength(4);
|
|
90
|
-
});
|
|
91
|
-
});
|
|
92
|
-
|
|
93
|
-
describe('browser actions', () => {
|
|
94
|
-
test('should navigate to URL', async () => {
|
|
95
|
-
const result = await (agentLoop as any).browserNavigate('https://example.com');
|
|
96
|
-
|
|
97
|
-
expect(result).toEqual({
|
|
98
|
-
success: true,
|
|
99
|
-
url: 'https://example.com'
|
|
100
|
-
});
|
|
101
|
-
});
|
|
102
|
-
|
|
103
|
-
test('should click element', async () => {
|
|
104
|
-
const result = await (agentLoop as any).browserClick('#submit-button');
|
|
105
|
-
|
|
106
|
-
expect(result).toEqual({
|
|
107
|
-
success: true,
|
|
108
|
-
selector: '#submit-button'
|
|
109
|
-
});
|
|
110
|
-
});
|
|
111
|
-
|
|
112
|
-
test('should take screenshot', async () => {
|
|
113
|
-
const result = await (agentLoop as any).browserScreenshot(true);
|
|
114
|
-
|
|
115
|
-
expect(result).toEqual({
|
|
116
|
-
success: true,
|
|
117
|
-
screenshot: 'base64_image_data'
|
|
118
|
-
});
|
|
119
|
-
});
|
|
120
|
-
|
|
121
|
-
test('should fill form field', async () => {
|
|
122
|
-
const result = await (agentLoop as any).browserFill('#email', 'test@example.com');
|
|
123
|
-
|
|
124
|
-
expect(result).toEqual({
|
|
125
|
-
success: true,
|
|
126
|
-
selector: '#email',
|
|
127
|
-
value: 'test@example.com'
|
|
128
|
-
});
|
|
129
|
-
});
|
|
130
|
-
});
|
|
131
|
-
|
|
132
|
-
describe('browser action execution via LLM', () => {
|
|
133
|
-
test('should execute browser navigation through agent loop', async () => {
|
|
134
|
-
// Mock LLM to return browser navigation tool call
|
|
135
|
-
(agentLoop as any).callLLM = jest.fn().mockResolvedValue({
|
|
136
|
-
role: 'assistant',
|
|
137
|
-
content: 'I will navigate to the website.',
|
|
138
|
-
toolCalls: [{
|
|
139
|
-
id: 'call_1',
|
|
140
|
-
name: 'browser_navigate',
|
|
141
|
-
arguments: { url: 'https://example.com' }
|
|
142
|
-
}]
|
|
143
|
-
});
|
|
144
|
-
|
|
145
|
-
// Mock tool execution
|
|
146
|
-
(agentLoop as any).functionCalling.callFunctions = jest.fn()
|
|
147
|
-
.mockResolvedValue([{ success: true, url: 'https://example.com' }]);
|
|
148
|
-
|
|
149
|
-
await agentLoop.initialize();
|
|
150
|
-
await agentLoop.execute('Navigate to example.com');
|
|
151
|
-
|
|
152
|
-
// Verify tool was called
|
|
153
|
-
expect((agentLoop as any).functionCalling.callFunctions).toHaveBeenCalledWith([{
|
|
154
|
-
id: 'call_1',
|
|
155
|
-
name: 'browser_navigate',
|
|
156
|
-
arguments: { url: 'https://example.com' }
|
|
157
|
-
}]);
|
|
158
|
-
});
|
|
159
|
-
|
|
160
|
-
test('should handle browser action errors', async () => {
|
|
161
|
-
// Mock LLM to return browser action
|
|
162
|
-
(agentLoop as any).callLLM = jest.fn().mockResolvedValue({
|
|
163
|
-
role: 'assistant',
|
|
164
|
-
content: 'I will click the button.',
|
|
165
|
-
toolCalls: [{
|
|
166
|
-
id: 'call_2',
|
|
167
|
-
name: 'browser_click',
|
|
168
|
-
arguments: { selector: '#missing-button' }
|
|
169
|
-
}]
|
|
170
|
-
});
|
|
171
|
-
|
|
172
|
-
// Mock tool execution to fail
|
|
173
|
-
(agentLoop as any).functionCalling.callFunctions = jest.fn()
|
|
174
|
-
.mockRejectedValue(new Error('Element not found'));
|
|
175
|
-
|
|
176
|
-
await agentLoop.initialize();
|
|
177
|
-
|
|
178
|
-
// Execute should handle the error gracefully
|
|
179
|
-
await expect(agentLoop.execute('Click the submit button')).resolves.not.toThrow();
|
|
180
|
-
});
|
|
181
|
-
});
|
|
182
|
-
|
|
183
|
-
describe('browser-based evaluation scenarios', () => {
|
|
184
|
-
test('should handle multi-step browser automation', async () => {
|
|
185
|
-
const responses = [
|
|
186
|
-
{
|
|
187
|
-
role: 'assistant',
|
|
188
|
-
content: 'I will navigate to the login page.',
|
|
189
|
-
toolCalls: [{
|
|
190
|
-
id: 'nav_1',
|
|
191
|
-
name: 'browser_navigate',
|
|
192
|
-
arguments: { url: 'https://example.com/login' }
|
|
193
|
-
}]
|
|
194
|
-
},
|
|
195
|
-
{
|
|
196
|
-
role: 'assistant',
|
|
197
|
-
content: 'I will fill in the login form.',
|
|
198
|
-
toolCalls: [
|
|
199
|
-
{
|
|
200
|
-
id: 'fill_1',
|
|
201
|
-
name: 'browser_fill',
|
|
202
|
-
arguments: { selector: '#username', value: 'testuser' }
|
|
203
|
-
},
|
|
204
|
-
{
|
|
205
|
-
id: 'fill_2',
|
|
206
|
-
name: 'browser_fill',
|
|
207
|
-
arguments: { selector: '#password', value: 'testpass' }
|
|
208
|
-
}
|
|
209
|
-
]
|
|
210
|
-
},
|
|
211
|
-
{
|
|
212
|
-
role: 'assistant',
|
|
213
|
-
content: 'I will submit the form.',
|
|
214
|
-
toolCalls: [{
|
|
215
|
-
id: 'click_1',
|
|
216
|
-
name: 'browser_click',
|
|
217
|
-
arguments: { selector: '#submit' }
|
|
218
|
-
}]
|
|
219
|
-
},
|
|
220
|
-
{
|
|
221
|
-
role: 'assistant',
|
|
222
|
-
content: 'Login completed successfully.',
|
|
223
|
-
toolCalls: []
|
|
224
|
-
}
|
|
225
|
-
];
|
|
226
|
-
|
|
227
|
-
let callCount = 0;
|
|
228
|
-
(agentLoop as any).callLLM = jest.fn().mockImplementation(() => {
|
|
229
|
-
return Promise.resolve(responses[callCount++]);
|
|
230
|
-
});
|
|
231
|
-
|
|
232
|
-
(agentLoop as any).functionCalling.callFunctions = jest.fn()
|
|
233
|
-
.mockResolvedValue([{ success: true }]);
|
|
234
|
-
|
|
235
|
-
await agentLoop.initialize();
|
|
236
|
-
await agentLoop.execute('Login to the website with username "testuser"');
|
|
237
|
-
|
|
238
|
-
// Verify all browser actions were executed
|
|
239
|
-
expect((agentLoop as any).functionCalling.callFunctions).toHaveBeenCalledTimes(3);
|
|
240
|
-
});
|
|
241
|
-
});
|
|
242
|
-
});
|
|
@@ -1,305 +0,0 @@
|
|
|
1
|
-
import { describe, test, expect, beforeEach, jest } from '@jest/globals';
|
|
2
|
-
import { CodeActAgent, AgentState } from '../src/lib/code-act-agent';
|
|
3
|
-
import { FunctionCallingSystem } from '../src/lib/function-calling';
|
|
4
|
-
|
|
5
|
-
describe('CodeActAgent', () => {
|
|
6
|
-
let agent: CodeActAgent;
|
|
7
|
-
let mockFunctionCalling: jest.Mocked<FunctionCallingSystem>;
|
|
8
|
-
|
|
9
|
-
beforeEach(() => {
|
|
10
|
-
// Mock function calling system
|
|
11
|
-
mockFunctionCalling = {
|
|
12
|
-
registerTool: jest.fn(),
|
|
13
|
-
callFunctions: jest.fn(),
|
|
14
|
-
getAvailableTools: jest.fn().mockReturnValue([
|
|
15
|
-
{ name: 'view_file', description: 'View file contents' },
|
|
16
|
-
{ name: 'str_replace', description: 'Replace string in file' },
|
|
17
|
-
{ name: 'run_command', description: 'Run shell command' }
|
|
18
|
-
]),
|
|
19
|
-
getAllToolSchemas: jest.fn().mockReturnValue([])
|
|
20
|
-
} as any;
|
|
21
|
-
|
|
22
|
-
agent = new CodeActAgent('test-agent', mockFunctionCalling);
|
|
23
|
-
});
|
|
24
|
-
|
|
25
|
-
describe('state management', () => {
|
|
26
|
-
test('should initialize with correct default state', () => {
|
|
27
|
-
const state = agent.getState();
|
|
28
|
-
expect(state.currentTask).toBe('');
|
|
29
|
-
expect(state.plan).toEqual([]);
|
|
30
|
-
expect(state.completedSteps).toEqual([]);
|
|
31
|
-
expect(state.currentStep).toBe(0);
|
|
32
|
-
expect(state.errors).toEqual([]);
|
|
33
|
-
expect(state.observations).toEqual([]);
|
|
34
|
-
});
|
|
35
|
-
|
|
36
|
-
test('should update state correctly', () => {
|
|
37
|
-
const newState: Partial<AgentState> = {
|
|
38
|
-
currentTask: 'Fix bug in login',
|
|
39
|
-
plan: ['Locate login file', 'Fix validation', 'Test changes'],
|
|
40
|
-
currentStep: 1
|
|
41
|
-
};
|
|
42
|
-
|
|
43
|
-
agent.setState(newState);
|
|
44
|
-
const state = agent.getState();
|
|
45
|
-
|
|
46
|
-
expect(state.currentTask).toBe('Fix bug in login');
|
|
47
|
-
expect(state.plan).toHaveLength(3);
|
|
48
|
-
expect(state.currentStep).toBe(1);
|
|
49
|
-
});
|
|
50
|
-
});
|
|
51
|
-
|
|
52
|
-
describe('planning', () => {
|
|
53
|
-
test('should generate plan for task', async () => {
|
|
54
|
-
const task = 'Add user authentication to the API';
|
|
55
|
-
|
|
56
|
-
// Mock LLM response for planning
|
|
57
|
-
const mockPlan = [
|
|
58
|
-
'Analyze current API structure',
|
|
59
|
-
'Install authentication dependencies',
|
|
60
|
-
'Create auth middleware',
|
|
61
|
-
'Add login/logout endpoints',
|
|
62
|
-
'Update existing endpoints with auth checks',
|
|
63
|
-
'Write tests for authentication'
|
|
64
|
-
];
|
|
65
|
-
|
|
66
|
-
// The agent should generate a plan based on the task
|
|
67
|
-
await agent.plan(task);
|
|
68
|
-
|
|
69
|
-
const state = agent.getState();
|
|
70
|
-
expect(state.currentTask).toBe(task);
|
|
71
|
-
expect(state.plan.length).toBeGreaterThan(0);
|
|
72
|
-
});
|
|
73
|
-
|
|
74
|
-
test('should handle planning errors gracefully', async () => {
|
|
75
|
-
const task = 'Invalid task that causes error';
|
|
76
|
-
|
|
77
|
-
// Even with errors, planning should not throw
|
|
78
|
-
await expect(agent.plan(task)).resolves.not.toThrow();
|
|
79
|
-
|
|
80
|
-
const state = agent.getState();
|
|
81
|
-
expect(state.currentTask).toBe(task);
|
|
82
|
-
});
|
|
83
|
-
});
|
|
84
|
-
|
|
85
|
-
describe('task execution', () => {
|
|
86
|
-
test('should execute single step', async () => {
|
|
87
|
-
// Set up agent with a plan
|
|
88
|
-
agent.setState({
|
|
89
|
-
currentTask: 'Fix typo in README',
|
|
90
|
-
plan: ['View README.md', 'Fix typo', 'Verify changes'],
|
|
91
|
-
currentStep: 0
|
|
92
|
-
});
|
|
93
|
-
|
|
94
|
-
// Mock function calling for view_file
|
|
95
|
-
mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
|
|
96
|
-
success: true,
|
|
97
|
-
content: '# README\n\nThis is a typpo in the readme.'
|
|
98
|
-
}]);
|
|
99
|
-
|
|
100
|
-
const result = await agent.executeStep();
|
|
101
|
-
|
|
102
|
-
expect(result.completed).toBe(false);
|
|
103
|
-
expect(result.action).toBe('View README.md');
|
|
104
|
-
expect(mockFunctionCalling.callFunctions).toHaveBeenCalled();
|
|
105
|
-
});
|
|
106
|
-
|
|
107
|
-
test('should handle step execution errors', async () => {
|
|
108
|
-
agent.setState({
|
|
109
|
-
currentTask: 'Run failing command',
|
|
110
|
-
plan: ['Execute broken command'],
|
|
111
|
-
currentStep: 0
|
|
112
|
-
});
|
|
113
|
-
|
|
114
|
-
// Mock function calling to throw error
|
|
115
|
-
mockFunctionCalling.callFunctions.mockRejectedValueOnce(
|
|
116
|
-
new Error('Command not found')
|
|
117
|
-
);
|
|
118
|
-
|
|
119
|
-
const result = await agent.executeStep();
|
|
120
|
-
|
|
121
|
-
expect(result.completed).toBe(false);
|
|
122
|
-
expect(result.error).toBe('Command not found');
|
|
123
|
-
|
|
124
|
-
const state = agent.getState();
|
|
125
|
-
expect(state.errors).toHaveLength(1);
|
|
126
|
-
expect(state.errors[0]).toContain('Command not found');
|
|
127
|
-
});
|
|
128
|
-
|
|
129
|
-
test('should mark task as completed when all steps done', async () => {
|
|
130
|
-
agent.setState({
|
|
131
|
-
currentTask: 'Simple task',
|
|
132
|
-
plan: ['Step 1', 'Step 2'],
|
|
133
|
-
currentStep: 1,
|
|
134
|
-
completedSteps: ['Step 1']
|
|
135
|
-
});
|
|
136
|
-
|
|
137
|
-
// Mock successful execution
|
|
138
|
-
mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
|
|
139
|
-
success: true
|
|
140
|
-
}]);
|
|
141
|
-
|
|
142
|
-
const result = await agent.executeStep();
|
|
143
|
-
|
|
144
|
-
expect(result.completed).toBe(true);
|
|
145
|
-
expect(result.action).toBe('Step 2');
|
|
146
|
-
|
|
147
|
-
const state = agent.getState();
|
|
148
|
-
expect(state.completedSteps).toHaveLength(2);
|
|
149
|
-
});
|
|
150
|
-
});
|
|
151
|
-
|
|
152
|
-
describe('parallel execution', () => {
|
|
153
|
-
test('should identify parallelizable steps', () => {
|
|
154
|
-
const plan = [
|
|
155
|
-
'Download file A',
|
|
156
|
-
'Download file B',
|
|
157
|
-
'Process file A',
|
|
158
|
-
'Process file B',
|
|
159
|
-
'Merge results'
|
|
160
|
-
];
|
|
161
|
-
|
|
162
|
-
const parallel = agent.identifyParallelSteps(plan);
|
|
163
|
-
|
|
164
|
-
// Downloads can be parallel
|
|
165
|
-
expect(parallel[0]).toEqual([0, 1]);
|
|
166
|
-
// Processing depends on downloads
|
|
167
|
-
expect(parallel[1]).toEqual([2]);
|
|
168
|
-
expect(parallel[2]).toEqual([3]);
|
|
169
|
-
// Merge depends on processing
|
|
170
|
-
expect(parallel[3]).toEqual([4]);
|
|
171
|
-
});
|
|
172
|
-
|
|
173
|
-
test('should execute parallel steps concurrently', async () => {
|
|
174
|
-
agent.setState({
|
|
175
|
-
currentTask: 'Parallel downloads',
|
|
176
|
-
plan: ['Download file1.txt', 'Download file2.txt', 'Merge files'],
|
|
177
|
-
currentStep: 0
|
|
178
|
-
});
|
|
179
|
-
|
|
180
|
-
// Mock both downloads to succeed
|
|
181
|
-
mockFunctionCalling.callFunctions
|
|
182
|
-
.mockResolvedValueOnce([{ success: true, file: 'file1.txt' }])
|
|
183
|
-
.mockResolvedValueOnce([{ success: true, file: 'file2.txt' }]);
|
|
184
|
-
|
|
185
|
-
// Execute should handle parallel steps
|
|
186
|
-
const result1 = await agent.executeStep();
|
|
187
|
-
expect(result1.action).toContain('Download');
|
|
188
|
-
|
|
189
|
-
// The agent should recognize these can be parallel
|
|
190
|
-
const state = agent.getState();
|
|
191
|
-
expect(state.currentStep).toBeLessThanOrEqual(2);
|
|
192
|
-
});
|
|
193
|
-
});
|
|
194
|
-
|
|
195
|
-
describe('self-correction', () => {
|
|
196
|
-
test('should retry failed steps with corrections', async () => {
|
|
197
|
-
agent.setState({
|
|
198
|
-
currentTask: 'Fix syntax error',
|
|
199
|
-
plan: ['Edit file with error'],
|
|
200
|
-
currentStep: 0
|
|
201
|
-
});
|
|
202
|
-
|
|
203
|
-
// First attempt fails
|
|
204
|
-
mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
|
|
205
|
-
success: false,
|
|
206
|
-
error: 'Syntax error in edit'
|
|
207
|
-
}]);
|
|
208
|
-
|
|
209
|
-
// Agent should detect error and retry
|
|
210
|
-
const result1 = await agent.executeStep();
|
|
211
|
-
expect(result1.error).toBeDefined();
|
|
212
|
-
|
|
213
|
-
// Second attempt with correction succeeds
|
|
214
|
-
mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
|
|
215
|
-
success: true
|
|
216
|
-
}]);
|
|
217
|
-
|
|
218
|
-
const result2 = await agent.executeStep();
|
|
219
|
-
expect(result2.error).toBeUndefined();
|
|
220
|
-
expect(result2.retryCount).toBeGreaterThan(0);
|
|
221
|
-
});
|
|
222
|
-
|
|
223
|
-
test('should give up after max retries', async () => {
|
|
224
|
-
agent.setState({
|
|
225
|
-
currentTask: 'Impossible task',
|
|
226
|
-
plan: ['Do impossible thing'],
|
|
227
|
-
currentStep: 0
|
|
228
|
-
});
|
|
229
|
-
|
|
230
|
-
// All attempts fail
|
|
231
|
-
mockFunctionCalling.callFunctions.mockRejectedValue(
|
|
232
|
-
new Error('Cannot do impossible thing')
|
|
233
|
-
);
|
|
234
|
-
|
|
235
|
-
let lastResult;
|
|
236
|
-
for (let i = 0; i < 5; i++) {
|
|
237
|
-
lastResult = await agent.executeStep();
|
|
238
|
-
}
|
|
239
|
-
|
|
240
|
-
expect(lastResult!.error).toBeDefined();
|
|
241
|
-
expect(lastResult!.aborted).toBe(true);
|
|
242
|
-
});
|
|
243
|
-
});
|
|
244
|
-
|
|
245
|
-
describe('observation handling', () => {
|
|
246
|
-
test('should collect and store observations', async () => {
|
|
247
|
-
agent.setState({
|
|
248
|
-
currentTask: 'Analyze codebase',
|
|
249
|
-
plan: ['List files', 'Read main file'],
|
|
250
|
-
currentStep: 0
|
|
251
|
-
});
|
|
252
|
-
|
|
253
|
-
// Mock file listing
|
|
254
|
-
mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
|
|
255
|
-
success: true,
|
|
256
|
-
output: 'file1.js\nfile2.js\nindex.js'
|
|
257
|
-
}]);
|
|
258
|
-
|
|
259
|
-
await agent.executeStep();
|
|
260
|
-
|
|
261
|
-
const state = agent.getState();
|
|
262
|
-
expect(state.observations).toHaveLength(1);
|
|
263
|
-
expect(state.observations[0]).toContain('file1.js');
|
|
264
|
-
});
|
|
265
|
-
|
|
266
|
-
test('should use observations for context', async () => {
|
|
267
|
-
// Pre-populate observations
|
|
268
|
-
agent.setState({
|
|
269
|
-
currentTask: 'Fix bug',
|
|
270
|
-
plan: ['Find bug location', 'Fix bug'],
|
|
271
|
-
currentStep: 1,
|
|
272
|
-
observations: ['Bug is in auth.js on line 42']
|
|
273
|
-
});
|
|
274
|
-
|
|
275
|
-
// The agent should use the observation context
|
|
276
|
-
mockFunctionCalling.callFunctions.mockResolvedValueOnce([{
|
|
277
|
-
success: true,
|
|
278
|
-
result: 'Fixed bug in auth.js'
|
|
279
|
-
}]);
|
|
280
|
-
|
|
281
|
-
const result = await agent.executeStep();
|
|
282
|
-
expect(result.completed).toBe(true);
|
|
283
|
-
});
|
|
284
|
-
});
|
|
285
|
-
|
|
286
|
-
describe('complete task execution', () => {
|
|
287
|
-
test('should execute entire task from plan to completion', async () => {
|
|
288
|
-
const task = 'Add logging to application';
|
|
289
|
-
|
|
290
|
-
// Mock successful execution of all steps
|
|
291
|
-
mockFunctionCalling.callFunctions
|
|
292
|
-
.mockResolvedValueOnce([{ success: true }]) // Install logger
|
|
293
|
-
.mockResolvedValueOnce([{ success: true }]) // Create logger config
|
|
294
|
-
.mockResolvedValueOnce([{ success: true }]) // Add logging statements
|
|
295
|
-
.mockResolvedValueOnce([{ success: true }]); // Test logging
|
|
296
|
-
|
|
297
|
-
await agent.plan(task);
|
|
298
|
-
const result = await agent.execute(task);
|
|
299
|
-
|
|
300
|
-
expect(result.success).toBe(true);
|
|
301
|
-
expect(result.completedSteps.length).toBeGreaterThan(0);
|
|
302
|
-
expect(result.errors).toHaveLength(0);
|
|
303
|
-
});
|
|
304
|
-
});
|
|
305
|
-
});
|