@browserflow-ai/exploration 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (58) hide show
  1. package/dist/adapters/claude-cli.d.ts +57 -0
  2. package/dist/adapters/claude-cli.d.ts.map +1 -0
  3. package/dist/adapters/claude-cli.js +195 -0
  4. package/dist/adapters/claude-cli.js.map +1 -0
  5. package/dist/adapters/claude.d.ts +54 -0
  6. package/dist/adapters/claude.d.ts.map +1 -0
  7. package/dist/adapters/claude.js +160 -0
  8. package/dist/adapters/claude.js.map +1 -0
  9. package/dist/adapters/index.d.ts +6 -0
  10. package/dist/adapters/index.d.ts.map +1 -0
  11. package/dist/adapters/index.js +4 -0
  12. package/dist/adapters/index.js.map +1 -0
  13. package/dist/adapters/types.d.ts +196 -0
  14. package/dist/adapters/types.d.ts.map +1 -0
  15. package/dist/adapters/types.js +3 -0
  16. package/dist/adapters/types.js.map +1 -0
  17. package/dist/agent-browser-session.d.ts +62 -0
  18. package/dist/agent-browser-session.d.ts.map +1 -0
  19. package/dist/agent-browser-session.js +272 -0
  20. package/dist/agent-browser-session.js.map +1 -0
  21. package/dist/evidence.d.ts +111 -0
  22. package/dist/evidence.d.ts.map +1 -0
  23. package/dist/evidence.js +144 -0
  24. package/dist/evidence.js.map +1 -0
  25. package/dist/explorer.d.ts +180 -0
  26. package/dist/explorer.d.ts.map +1 -0
  27. package/dist/explorer.js +393 -0
  28. package/dist/explorer.js.map +1 -0
  29. package/dist/index.d.ts +15 -0
  30. package/dist/index.d.ts.map +1 -0
  31. package/dist/index.js +15 -0
  32. package/dist/index.js.map +1 -0
  33. package/dist/locator-candidates.d.ts +127 -0
  34. package/dist/locator-candidates.d.ts.map +1 -0
  35. package/dist/locator-candidates.js +358 -0
  36. package/dist/locator-candidates.js.map +1 -0
  37. package/dist/step-executor.d.ts +99 -0
  38. package/dist/step-executor.d.ts.map +1 -0
  39. package/dist/step-executor.js +646 -0
  40. package/dist/step-executor.js.map +1 -0
  41. package/package.json +34 -0
  42. package/src/adapters/claude-cli.test.ts +134 -0
  43. package/src/adapters/claude-cli.ts +240 -0
  44. package/src/adapters/claude.test.ts +195 -0
  45. package/src/adapters/claude.ts +190 -0
  46. package/src/adapters/index.ts +21 -0
  47. package/src/adapters/types.ts +207 -0
  48. package/src/agent-browser-session.test.ts +369 -0
  49. package/src/agent-browser-session.ts +349 -0
  50. package/src/evidence.test.ts +239 -0
  51. package/src/evidence.ts +203 -0
  52. package/src/explorer.test.ts +321 -0
  53. package/src/explorer.ts +565 -0
  54. package/src/index.ts +51 -0
  55. package/src/locator-candidates.test.ts +602 -0
  56. package/src/locator-candidates.ts +441 -0
  57. package/src/step-executor.test.ts +696 -0
  58. package/src/step-executor.ts +783 -0
package/package.json ADDED
@@ -0,0 +1,34 @@
1
+ {
2
+ "name": "@browserflow-ai/exploration",
3
+ "version": "0.0.6",
4
+ "description": "AI exploration engine for BrowserFlow - Human-in-the-Loop E2E Test Generation",
5
+ "type": "module",
6
+ "license": "MIT",
7
+ "repository": {
8
+ "type": "git",
9
+ "url": "https://github.com/akatz-ai/browserflow.git",
10
+ "directory": "packages/exploration"
11
+ },
12
+ "main": "./dist/index.js",
13
+ "types": "./dist/index.d.ts",
14
+ "exports": {
15
+ ".": {
16
+ "import": "./dist/index.js",
17
+ "types": "./dist/index.d.ts"
18
+ }
19
+ },
20
+ "scripts": {
21
+ "build": "tsc",
22
+ "test": "bun test",
23
+ "typecheck": "tsc --noEmit"
24
+ },
25
+ "files": [
26
+ "dist",
27
+ "src"
28
+ ],
29
+ "dependencies": {
30
+ "@anthropic-ai/sdk": "^0.71.2",
31
+ "@browserflow-ai/core": "0.0.6",
32
+ "agent-browser": "^0.5.0"
33
+ }
34
+ }
@@ -0,0 +1,134 @@
1
+ // @browserflow-ai/exploration - Claude CLI Adapter tests (with mocks)
2
+
3
+ import { describe, test, expect, mock, beforeEach } from 'bun:test';
4
+ import { ClaudeCliAdapter } from './claude-cli';
5
+ import type { EnhancedSnapshot } from './types';
6
+
7
+ // Mock child_process spawn
8
+ const mockSpawn = mock(() => {});
9
+
10
+ describe('ClaudeCliAdapter', () => {
11
+ let adapter: ClaudeCliAdapter;
12
+
13
+ beforeEach(() => {
14
+ adapter = new ClaudeCliAdapter({ model: 'haiku' });
15
+ });
16
+
17
+ test('has correct name', () => {
18
+ expect(adapter.name).toBe('claude-cli');
19
+ });
20
+
21
+ test('accepts custom configuration', () => {
22
+ const customAdapter = new ClaudeCliAdapter({
23
+ model: 'sonnet',
24
+ timeout: 60000,
25
+ cliPath: '/usr/local/bin/claude',
26
+ });
27
+ expect(customAdapter.name).toBe('claude-cli');
28
+ });
29
+
30
+ describe('explore', () => {
31
+ test('returns valid exploration output structure', async () => {
32
+ const result = await adapter.explore({
33
+ spec: { name: 'test-spec', steps: [] },
34
+ specPath: 'specs/test.yaml',
35
+ baseUrl: 'http://localhost:3001',
36
+ outputDir: '/tmp/test',
37
+ });
38
+
39
+ expect(result.spec).toBe('test-spec');
40
+ expect(result.specPath).toBe('specs/test.yaml');
41
+ expect(result.explorationId).toMatch(/^exp-\d+-[a-z0-9]+$/);
42
+ expect(result.overallStatus).toBe('completed');
43
+ expect(result.browser).toBe('chromium');
44
+ expect(result.viewport).toEqual({ width: 1280, height: 720 });
45
+ });
46
+
47
+ test('uses provided browser and viewport', async () => {
48
+ const result = await adapter.explore({
49
+ spec: { name: 'test-spec', steps: [] },
50
+ specPath: 'specs/test.yaml',
51
+ baseUrl: 'http://localhost:3001',
52
+ outputDir: '/tmp/test',
53
+ browser: 'firefox',
54
+ viewport: { width: 800, height: 600 },
55
+ });
56
+
57
+ expect(result.browser).toBe('firefox');
58
+ expect(result.viewport).toEqual({ width: 800, height: 600 });
59
+ });
60
+ });
61
+
62
+ describe('retryWithFeedback', () => {
63
+ test('delegates to explore', async () => {
64
+ const previousExploration = {
65
+ spec: 'old-spec',
66
+ specPath: 'specs/old.yaml',
67
+ explorationId: 'exp-123',
68
+ timestamp: new Date().toISOString(),
69
+ durationMs: 1000,
70
+ browser: 'chromium' as const,
71
+ viewport: { width: 1280, height: 720 },
72
+ baseUrl: 'http://localhost:3001',
73
+ steps: [],
74
+ outcomeChecks: [],
75
+ overallStatus: 'completed' as const,
76
+ errors: [],
77
+ };
78
+
79
+ const result = await adapter.retryWithFeedback({
80
+ spec: { name: 'retry-spec', steps: [] },
81
+ specPath: 'specs/retry.yaml',
82
+ baseUrl: 'http://localhost:3001',
83
+ outputDir: '/tmp/test',
84
+ previousExploration,
85
+ reviewFeedback: {
86
+ explorationId: 'exp-123',
87
+ reviewer: 'test',
88
+ steps: [],
89
+ verdict: 'rejected',
90
+ },
91
+ });
92
+
93
+ expect(result.spec).toBe('retry-spec');
94
+ });
95
+ });
96
+
97
+ describe('parseJsonResponse (via findElement error handling)', () => {
98
+ // We test JSON parsing indirectly through the error handling path
99
+ // since parseJsonResponse is private
100
+
101
+ test('handles CLI errors gracefully', async () => {
102
+ // Create adapter with non-existent CLI path to trigger error
103
+ const badAdapter = new ClaudeCliAdapter({
104
+ cliPath: '/nonexistent/claude',
105
+ timeout: 1000,
106
+ });
107
+
108
+ const snapshot: EnhancedSnapshot = {
109
+ tree: '- button "Add" [ref=e1]',
110
+ refs: { e1: { role: 'button', name: 'Add' } },
111
+ };
112
+
113
+ const result = await badAdapter.findElement('the Add button', snapshot);
114
+
115
+ expect(result.ref).toBe('NOT_FOUND');
116
+ expect(result.confidence).toBe(0);
117
+ expect(result.reasoning).toContain('CLI error');
118
+ });
119
+ });
120
+ });
121
+
122
+ describe('ClaudeCliAdapter interface compliance', () => {
123
+ test('implements AIAdapter interface', () => {
124
+ const adapter = new ClaudeCliAdapter();
125
+
126
+ // Check required properties
127
+ expect(typeof adapter.name).toBe('string');
128
+
129
+ // Check required methods
130
+ expect(typeof adapter.findElement).toBe('function');
131
+ expect(typeof adapter.explore).toBe('function');
132
+ expect(typeof adapter.retryWithFeedback).toBe('function');
133
+ });
134
+ });
@@ -0,0 +1,240 @@
1
+ // @browserflow-ai/exploration - Claude CLI Adapter
2
+ // Uses the `claude` CLI tool instead of the Anthropic SDK
3
+ // This allows users to leverage their existing Claude Code authentication
4
+
5
+ import { spawn } from 'node:child_process';
6
+
7
+ // Debug flag - set via BF_DEBUG=1 environment variable
8
+ const DEBUG = process.env.BF_DEBUG === '1';
9
+
10
+ function debug(...args: unknown[]): void {
11
+ if (DEBUG) {
12
+ console.error('[claude-cli]', ...args);
13
+ }
14
+ }
15
+ import type {
16
+ AIAdapter,
17
+ ExploreParams,
18
+ ExplorationOutput,
19
+ RetryParams,
20
+ EnhancedSnapshot,
21
+ FindElementResult,
22
+ } from './types';
23
+
24
+ /**
25
+ * Configuration options for the Claude CLI adapter
26
+ */
27
+ export interface ClaudeCliAdapterConfig {
28
+ /** Model to use (default: haiku) */
29
+ model?: string;
30
+ /** Path to claude CLI executable (default: 'claude') */
31
+ cliPath?: string;
32
+ /** Timeout in milliseconds (default: 30000) */
33
+ timeout?: number;
34
+ }
35
+
36
+ /**
37
+ * System prompt for element finding - embedded in the user prompt
38
+ */
39
+ const ELEMENT_FINDER_PROMPT = `You are helping find UI elements based on natural language descriptions.
40
+ Given an accessibility snapshot of a web page, identify the element that best matches the user's description.
41
+
42
+ Rules:
43
+ 1. Return the ref (like "e1", "e2") of the matching element
44
+ 2. If multiple elements could match, pick the most likely based on context
45
+ 3. If no element matches, use ref "NOT_FOUND" with confidence 0
46
+ 4. Consider the element's role, name, text content, and position in the hierarchy
47
+ 5. Be precise - prefer exact matches over partial matches`;
48
+
49
+ /**
50
+ * Claude CLI adapter for AI-powered browser exploration
51
+ *
52
+ * Uses the `claude` CLI tool to make LLM calls, allowing users to:
53
+ * - Use their existing Claude Code authentication
54
+ * - Leverage local CLI configuration
55
+ * - Avoid managing API keys separately
56
+ */
57
+ export class ClaudeCliAdapter implements AIAdapter {
58
+ readonly name = 'claude-cli';
59
+
60
+ private model: string;
61
+ private cliPath: string;
62
+ private timeout: number;
63
+
64
+ constructor(config: ClaudeCliAdapterConfig = {}) {
65
+ this.model = config.model ?? 'haiku';
66
+ this.cliPath = config.cliPath ?? 'claude';
67
+ this.timeout = config.timeout ?? 30000;
68
+ }
69
+
70
+ /**
71
+ * Execute claude CLI with a prompt and return the response
72
+ */
73
+ private async runClaude(prompt: string): Promise<string> {
74
+ return new Promise((resolve, reject) => {
75
+ // Note: --dangerously-skip-permissions is required for non-interactive mode
76
+ // because the CLI may otherwise wait for trust/permission dialogs
77
+ const args = ['--model', this.model, '--dangerously-skip-permissions', '-p', prompt];
78
+
79
+ debug('Spawning CLI:', this.cliPath, args.slice(0, 2).join(' '), '...');
80
+ debug('Prompt length:', prompt.length, 'chars');
81
+ const startTime = Date.now();
82
+
83
+ const proc = spawn(this.cliPath, args, {
84
+ // stdin must be 'ignore' - otherwise claude CLI waits for input
85
+ stdio: ['ignore', 'pipe', 'pipe'],
86
+ });
87
+
88
+ let stdout = '';
89
+ let stderr = '';
90
+
91
+ proc.stdout.on('data', (data: Buffer) => {
92
+ stdout += data.toString();
93
+ debug('stdout chunk:', data.toString().slice(0, 100));
94
+ });
95
+
96
+ proc.stderr.on('data', (data: Buffer) => {
97
+ stderr += data.toString();
98
+ debug('stderr chunk:', data.toString().slice(0, 200));
99
+ });
100
+
101
+ proc.on('error', (err: Error) => {
102
+ debug('spawn error:', err.message);
103
+ reject(new Error(`Failed to spawn claude CLI: ${err.message}`));
104
+ });
105
+
106
+ proc.on('close', (code: number | null) => {
107
+ const elapsed = Date.now() - startTime;
108
+ debug('CLI exited with code', code, 'after', elapsed, 'ms');
109
+ debug('stdout length:', stdout.length, 'stderr length:', stderr.length);
110
+
111
+ if (code === 0) {
112
+ resolve(stdout);
113
+ } else {
114
+ reject(new Error(`claude CLI exited with code ${code}: ${stderr}`));
115
+ }
116
+ });
117
+
118
+ // Handle timeout
119
+ const timer = setTimeout(() => {
120
+ debug('TIMEOUT after', this.timeout, 'ms - killing process');
121
+ proc.kill('SIGTERM');
122
+ reject(new Error(`claude CLI timed out after ${this.timeout}ms`));
123
+ }, this.timeout);
124
+
125
+ proc.on('close', () => clearTimeout(timer));
126
+ });
127
+ }
128
+
129
+ /**
130
+ * Parse JSON from claude CLI response
131
+ * Handles responses wrapped in markdown code blocks
132
+ */
133
+ private parseJsonResponse(response: string): Record<string, unknown> {
134
+ // Try to extract JSON from markdown code block
135
+ const jsonBlockMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/);
136
+ const jsonStr = jsonBlockMatch ? jsonBlockMatch[1].trim() : response.trim();
137
+
138
+ try {
139
+ return JSON.parse(jsonStr);
140
+ } catch {
141
+ throw new Error(`Failed to parse JSON response: ${response}`);
142
+ }
143
+ }
144
+
145
+ /**
146
+ * Find element from natural language query using Claude CLI
147
+ *
148
+ * @param query - Natural language description of the element
149
+ * @param snapshot - Browser snapshot with element tree and refs
150
+ * @returns Promise resolving to element ref with reasoning
151
+ */
152
+ async findElement(query: string, snapshot: EnhancedSnapshot): Promise<FindElementResult> {
153
+ const availableRefs = Object.keys(snapshot.refs);
154
+
155
+ debug('findElement called with query:', query);
156
+ debug('Snapshot tree length:', snapshot.tree.length);
157
+ debug('Available refs:', availableRefs.length, availableRefs.slice(0, 5).join(', '), '...');
158
+
159
+ // If snapshot is empty, return early
160
+ if (!availableRefs.length || snapshot.tree === '(no interactive elements)') {
161
+ debug('Empty snapshot - returning NOT_FOUND immediately');
162
+ return {
163
+ ref: 'NOT_FOUND',
164
+ confidence: 0,
165
+ reasoning: 'Snapshot contains no interactive elements',
166
+ };
167
+ }
168
+
169
+ const prompt = `${ELEMENT_FINDER_PROMPT}
170
+
171
+ Find the element matching this description: "${query}"
172
+
173
+ Accessibility tree:
174
+ ${snapshot.tree}
175
+
176
+ Available refs: ${availableRefs.join(', ')}
177
+
178
+ Return ONLY a JSON object with these fields:
179
+ - ref: the element reference (e.g., "e3") or "NOT_FOUND" if no match
180
+ - confidence: a number 0-1
181
+ - reasoning: why you selected this element
182
+
183
+ JSON response:`;
184
+
185
+ try {
186
+ debug('Calling runClaude...');
187
+ const response = await this.runClaude(prompt);
188
+ debug('Got response:', response.slice(0, 200));
189
+ const parsed = this.parseJsonResponse(response);
190
+ debug('Parsed result:', parsed);
191
+
192
+ return {
193
+ ref: String(parsed.ref ?? 'NOT_FOUND'),
194
+ confidence: Number(parsed.confidence ?? 0),
195
+ reasoning: String(parsed.reasoning ?? 'No reasoning provided'),
196
+ };
197
+ } catch (error) {
198
+ const errorMessage = error instanceof Error ? error.message : String(error);
199
+ debug('Error in findElement:', errorMessage);
200
+ return {
201
+ ref: 'NOT_FOUND',
202
+ confidence: 0,
203
+ reasoning: `CLI error: ${errorMessage}`,
204
+ };
205
+ }
206
+ }
207
+
208
+ /**
209
+ * Run exploration on a spec using Claude CLI
210
+ *
211
+ * Note: The primary AI method is findElement(). This explore() method is a stub
212
+ * that returns a minimal valid structure. Full exploration is orchestrated by
213
+ * the Explorer class which uses findElement() for AI-powered element discovery.
214
+ */
215
+ async explore(params: ExploreParams): Promise<ExplorationOutput> {
216
+ const explorationId = `exp-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
217
+
218
+ return {
219
+ spec: params.spec.name,
220
+ specPath: params.specPath,
221
+ explorationId,
222
+ timestamp: new Date().toISOString(),
223
+ durationMs: 0,
224
+ browser: params.browser ?? 'chromium',
225
+ viewport: params.viewport ?? { width: 1280, height: 720 },
226
+ baseUrl: params.baseUrl,
227
+ steps: [],
228
+ outcomeChecks: [],
229
+ overallStatus: 'completed',
230
+ errors: [],
231
+ };
232
+ }
233
+
234
+ /**
235
+ * Retry exploration with review feedback
236
+ */
237
+ async retryWithFeedback(params: RetryParams): Promise<ExplorationOutput> {
238
+ return this.explore(params);
239
+ }
240
+ }
@@ -0,0 +1,195 @@
1
+ // @browserflow-ai/exploration - Claude adapter tests
2
+ import { describe, it, expect, beforeEach, mock } from 'bun:test';
3
+ import { ClaudeAdapter } from './claude';
4
+
5
+ // Mock the Anthropic SDK
6
+ const mockCreate = mock(() =>
7
+ Promise.resolve({
8
+ content: [
9
+ {
10
+ type: 'tool_use',
11
+ id: 'tool_1',
12
+ name: 'select_element',
13
+ input: {
14
+ ref: 'e5',
15
+ confidence: 0.95,
16
+ reasoning: 'The element with ref e5 is a button with text "Submit" which matches the query for a submit button.',
17
+ },
18
+ },
19
+ ],
20
+ stop_reason: 'tool_use',
21
+ })
22
+ );
23
+
24
+ // Mock Anthropic constructor
25
+ mock.module('@anthropic-ai/sdk', () => ({
26
+ default: class MockAnthropic {
27
+ messages = {
28
+ create: mockCreate,
29
+ };
30
+ },
31
+ }));
32
+
33
+ describe('ClaudeAdapter', () => {
34
+ let adapter: ClaudeAdapter;
35
+
36
+ beforeEach(() => {
37
+ mockCreate.mockClear();
38
+ adapter = new ClaudeAdapter();
39
+ });
40
+
41
+ describe('constructor', () => {
42
+ it('should create adapter with default config', () => {
43
+ expect(adapter.name).toBe('claude');
44
+ });
45
+
46
+ it('should accept custom model config', () => {
47
+ const customAdapter = new ClaudeAdapter({
48
+ model: 'claude-opus-4-20250514',
49
+ maxTokens: 4096,
50
+ });
51
+ expect(customAdapter.name).toBe('claude');
52
+ });
53
+ });
54
+
55
+ describe('findElement', () => {
56
+ const sampleSnapshot = {
57
+ tree: `
58
+ <page url="http://localhost:3000/login">
59
+ <form>
60
+ <input ref="e1" type="email" placeholder="Email" />
61
+ <input ref="e2" type="password" placeholder="Password" />
62
+ <button ref="e5" type="submit">Submit</button>
63
+ </form>
64
+ </page>
65
+ `,
66
+ refs: {
67
+ e1: { tag: 'input', type: 'email', placeholder: 'Email' },
68
+ e2: { tag: 'input', type: 'password', placeholder: 'Password' },
69
+ e5: { tag: 'button', type: 'submit', text: 'Submit' },
70
+ },
71
+ };
72
+
73
+ it('should find element from natural language query', async () => {
74
+ const result = await adapter.findElement('submit button', sampleSnapshot);
75
+
76
+ expect(result.ref).toBe('e5');
77
+ expect(result.confidence).toBeGreaterThan(0);
78
+ expect(result.reasoning).toBeDefined();
79
+ });
80
+
81
+ it('should call Claude API with correct parameters', async () => {
82
+ await adapter.findElement('email input field', sampleSnapshot);
83
+
84
+ expect(mockCreate).toHaveBeenCalledTimes(1);
85
+ const callArgs = mockCreate.mock.calls[0][0];
86
+
87
+ expect(callArgs.model).toMatch(/claude/);
88
+ expect(callArgs.max_tokens).toBeGreaterThan(0);
89
+ expect(callArgs.messages).toBeDefined();
90
+ expect(callArgs.tools).toBeDefined();
91
+ });
92
+
93
+ it('should include snapshot tree in prompt', async () => {
94
+ await adapter.findElement('password field', sampleSnapshot);
95
+
96
+ const callArgs = mockCreate.mock.calls[0][0];
97
+ const userMessage = callArgs.messages.find((m: { role: string }) => m.role === 'user');
98
+
99
+ expect(userMessage.content).toContain('password field');
100
+ });
101
+
102
+ it('should use tool_use for structured output', async () => {
103
+ await adapter.findElement('submit button', sampleSnapshot);
104
+
105
+ const callArgs = mockCreate.mock.calls[0][0];
106
+ expect(callArgs.tools).toEqual(
107
+ expect.arrayContaining([
108
+ expect.objectContaining({
109
+ name: 'select_element',
110
+ }),
111
+ ])
112
+ );
113
+ });
114
+
115
+ it('should return NOT_FOUND when element not found', async () => {
116
+ mockCreate.mockImplementationOnce(() =>
117
+ Promise.resolve({
118
+ content: [
119
+ {
120
+ type: 'tool_use',
121
+ id: 'tool_1',
122
+ name: 'select_element',
123
+ input: {
124
+ ref: 'NOT_FOUND',
125
+ confidence: 0,
126
+ reasoning: 'No element matches the description "purple unicorn button".',
127
+ },
128
+ },
129
+ ],
130
+ stop_reason: 'tool_use',
131
+ })
132
+ );
133
+
134
+ const result = await adapter.findElement('purple unicorn button', sampleSnapshot);
135
+
136
+ expect(result.ref).toBe('NOT_FOUND');
137
+ expect(result.confidence).toBe(0);
138
+ });
139
+
140
+ it('should handle API errors gracefully', async () => {
141
+ mockCreate.mockImplementationOnce(() => Promise.reject(new Error('API rate limit exceeded')));
142
+
143
+ await expect(adapter.findElement('button', sampleSnapshot)).rejects.toThrow('API rate limit exceeded');
144
+ });
145
+
146
+ it('should extract ref from text response as fallback', async () => {
147
+ mockCreate.mockImplementationOnce(() =>
148
+ Promise.resolve({
149
+ content: [
150
+ {
151
+ type: 'text',
152
+ text: 'The submit button is element e5. I selected this because it has type="submit" and the text "Submit".',
153
+ },
154
+ ],
155
+ stop_reason: 'end_turn',
156
+ })
157
+ );
158
+
159
+ const result = await adapter.findElement('submit button', sampleSnapshot);
160
+
161
+ expect(result.ref).toBe('e5');
162
+ });
163
+ });
164
+
165
+ describe('findElement with ambiguous queries', () => {
166
+ const snapshotWithMultipleButtons = {
167
+ tree: `
168
+ <page>
169
+ <button ref="e1">Cancel</button>
170
+ <button ref="e2">Submit</button>
171
+ <button ref="e3">Submit</button>
172
+ </page>
173
+ `,
174
+ refs: {
175
+ e1: { tag: 'button', text: 'Cancel' },
176
+ e2: { tag: 'button', text: 'Submit' },
177
+ e3: { tag: 'button', text: 'Submit' },
178
+ },
179
+ };
180
+
181
+ it('should pick most likely element for ambiguous query', async () => {
182
+ const result = await adapter.findElement('submit button', snapshotWithMultipleButtons);
183
+
184
+ // Should return one of the submit buttons
185
+ expect(['e2', 'e3', 'e5']).toContain(result.ref);
186
+ });
187
+
188
+ it('should include reasoning for ambiguous cases', async () => {
189
+ const result = await adapter.findElement('submit button', snapshotWithMultipleButtons);
190
+
191
+ expect(result.reasoning).toBeDefined();
192
+ expect(result.reasoning.length).toBeGreaterThan(0);
193
+ });
194
+ });
195
+ });