@browserflow-ai/exploration 0.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/dist/adapters/claude-cli.d.ts +57 -0
- package/dist/adapters/claude-cli.d.ts.map +1 -0
- package/dist/adapters/claude-cli.js +195 -0
- package/dist/adapters/claude-cli.js.map +1 -0
- package/dist/adapters/claude.d.ts +54 -0
- package/dist/adapters/claude.d.ts.map +1 -0
- package/dist/adapters/claude.js +160 -0
- package/dist/adapters/claude.js.map +1 -0
- package/dist/adapters/index.d.ts +6 -0
- package/dist/adapters/index.d.ts.map +1 -0
- package/dist/adapters/index.js +4 -0
- package/dist/adapters/index.js.map +1 -0
- package/dist/adapters/types.d.ts +196 -0
- package/dist/adapters/types.d.ts.map +1 -0
- package/dist/adapters/types.js +3 -0
- package/dist/adapters/types.js.map +1 -0
- package/dist/agent-browser-session.d.ts +62 -0
- package/dist/agent-browser-session.d.ts.map +1 -0
- package/dist/agent-browser-session.js +272 -0
- package/dist/agent-browser-session.js.map +1 -0
- package/dist/evidence.d.ts +111 -0
- package/dist/evidence.d.ts.map +1 -0
- package/dist/evidence.js +144 -0
- package/dist/evidence.js.map +1 -0
- package/dist/explorer.d.ts +180 -0
- package/dist/explorer.d.ts.map +1 -0
- package/dist/explorer.js +393 -0
- package/dist/explorer.js.map +1 -0
- package/dist/index.d.ts +15 -0
- package/dist/index.d.ts.map +1 -0
- package/dist/index.js +15 -0
- package/dist/index.js.map +1 -0
- package/dist/locator-candidates.d.ts +127 -0
- package/dist/locator-candidates.d.ts.map +1 -0
- package/dist/locator-candidates.js +358 -0
- package/dist/locator-candidates.js.map +1 -0
- package/dist/step-executor.d.ts +99 -0
- package/dist/step-executor.d.ts.map +1 -0
- package/dist/step-executor.js +646 -0
- package/dist/step-executor.js.map +1 -0
- package/package.json +34 -0
- package/src/adapters/claude-cli.test.ts +134 -0
- package/src/adapters/claude-cli.ts +240 -0
- package/src/adapters/claude.test.ts +195 -0
- package/src/adapters/claude.ts +190 -0
- package/src/adapters/index.ts +21 -0
- package/src/adapters/types.ts +207 -0
- package/src/agent-browser-session.test.ts +369 -0
- package/src/agent-browser-session.ts +349 -0
- package/src/evidence.test.ts +239 -0
- package/src/evidence.ts +203 -0
- package/src/explorer.test.ts +321 -0
- package/src/explorer.ts +565 -0
- package/src/index.ts +51 -0
- package/src/locator-candidates.test.ts +602 -0
- package/src/locator-candidates.ts +441 -0
- package/src/step-executor.test.ts +696 -0
- package/src/step-executor.ts +783 -0
package/package.json
ADDED
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
{
|
|
2
|
+
"name": "@browserflow-ai/exploration",
|
|
3
|
+
"version": "0.0.6",
|
|
4
|
+
"description": "AI exploration engine for BrowserFlow - Human-in-the-Loop E2E Test Generation",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"license": "MIT",
|
|
7
|
+
"repository": {
|
|
8
|
+
"type": "git",
|
|
9
|
+
"url": "https://github.com/akatz-ai/browserflow.git",
|
|
10
|
+
"directory": "packages/exploration"
|
|
11
|
+
},
|
|
12
|
+
"main": "./dist/index.js",
|
|
13
|
+
"types": "./dist/index.d.ts",
|
|
14
|
+
"exports": {
|
|
15
|
+
".": {
|
|
16
|
+
"import": "./dist/index.js",
|
|
17
|
+
"types": "./dist/index.d.ts"
|
|
18
|
+
}
|
|
19
|
+
},
|
|
20
|
+
"scripts": {
|
|
21
|
+
"build": "tsc",
|
|
22
|
+
"test": "bun test",
|
|
23
|
+
"typecheck": "tsc --noEmit"
|
|
24
|
+
},
|
|
25
|
+
"files": [
|
|
26
|
+
"dist",
|
|
27
|
+
"src"
|
|
28
|
+
],
|
|
29
|
+
"dependencies": {
|
|
30
|
+
"@anthropic-ai/sdk": "^0.71.2",
|
|
31
|
+
"@browserflow-ai/core": "0.0.6",
|
|
32
|
+
"agent-browser": "^0.5.0"
|
|
33
|
+
}
|
|
34
|
+
}
|
|
@@ -0,0 +1,134 @@
|
|
|
1
|
+
// @browserflow-ai/exploration - Claude CLI Adapter tests (with mocks)
|
|
2
|
+
|
|
3
|
+
import { describe, test, expect, mock, beforeEach } from 'bun:test';
|
|
4
|
+
import { ClaudeCliAdapter } from './claude-cli';
|
|
5
|
+
import type { EnhancedSnapshot } from './types';
|
|
6
|
+
|
|
7
|
+
// Mock child_process spawn
|
|
8
|
+
const mockSpawn = mock(() => {});
|
|
9
|
+
|
|
10
|
+
describe('ClaudeCliAdapter', () => {
|
|
11
|
+
let adapter: ClaudeCliAdapter;
|
|
12
|
+
|
|
13
|
+
beforeEach(() => {
|
|
14
|
+
adapter = new ClaudeCliAdapter({ model: 'haiku' });
|
|
15
|
+
});
|
|
16
|
+
|
|
17
|
+
test('has correct name', () => {
|
|
18
|
+
expect(adapter.name).toBe('claude-cli');
|
|
19
|
+
});
|
|
20
|
+
|
|
21
|
+
test('accepts custom configuration', () => {
|
|
22
|
+
const customAdapter = new ClaudeCliAdapter({
|
|
23
|
+
model: 'sonnet',
|
|
24
|
+
timeout: 60000,
|
|
25
|
+
cliPath: '/usr/local/bin/claude',
|
|
26
|
+
});
|
|
27
|
+
expect(customAdapter.name).toBe('claude-cli');
|
|
28
|
+
});
|
|
29
|
+
|
|
30
|
+
describe('explore', () => {
|
|
31
|
+
test('returns valid exploration output structure', async () => {
|
|
32
|
+
const result = await adapter.explore({
|
|
33
|
+
spec: { name: 'test-spec', steps: [] },
|
|
34
|
+
specPath: 'specs/test.yaml',
|
|
35
|
+
baseUrl: 'http://localhost:3001',
|
|
36
|
+
outputDir: '/tmp/test',
|
|
37
|
+
});
|
|
38
|
+
|
|
39
|
+
expect(result.spec).toBe('test-spec');
|
|
40
|
+
expect(result.specPath).toBe('specs/test.yaml');
|
|
41
|
+
expect(result.explorationId).toMatch(/^exp-\d+-[a-z0-9]+$/);
|
|
42
|
+
expect(result.overallStatus).toBe('completed');
|
|
43
|
+
expect(result.browser).toBe('chromium');
|
|
44
|
+
expect(result.viewport).toEqual({ width: 1280, height: 720 });
|
|
45
|
+
});
|
|
46
|
+
|
|
47
|
+
test('uses provided browser and viewport', async () => {
|
|
48
|
+
const result = await adapter.explore({
|
|
49
|
+
spec: { name: 'test-spec', steps: [] },
|
|
50
|
+
specPath: 'specs/test.yaml',
|
|
51
|
+
baseUrl: 'http://localhost:3001',
|
|
52
|
+
outputDir: '/tmp/test',
|
|
53
|
+
browser: 'firefox',
|
|
54
|
+
viewport: { width: 800, height: 600 },
|
|
55
|
+
});
|
|
56
|
+
|
|
57
|
+
expect(result.browser).toBe('firefox');
|
|
58
|
+
expect(result.viewport).toEqual({ width: 800, height: 600 });
|
|
59
|
+
});
|
|
60
|
+
});
|
|
61
|
+
|
|
62
|
+
describe('retryWithFeedback', () => {
|
|
63
|
+
test('delegates to explore', async () => {
|
|
64
|
+
const previousExploration = {
|
|
65
|
+
spec: 'old-spec',
|
|
66
|
+
specPath: 'specs/old.yaml',
|
|
67
|
+
explorationId: 'exp-123',
|
|
68
|
+
timestamp: new Date().toISOString(),
|
|
69
|
+
durationMs: 1000,
|
|
70
|
+
browser: 'chromium' as const,
|
|
71
|
+
viewport: { width: 1280, height: 720 },
|
|
72
|
+
baseUrl: 'http://localhost:3001',
|
|
73
|
+
steps: [],
|
|
74
|
+
outcomeChecks: [],
|
|
75
|
+
overallStatus: 'completed' as const,
|
|
76
|
+
errors: [],
|
|
77
|
+
};
|
|
78
|
+
|
|
79
|
+
const result = await adapter.retryWithFeedback({
|
|
80
|
+
spec: { name: 'retry-spec', steps: [] },
|
|
81
|
+
specPath: 'specs/retry.yaml',
|
|
82
|
+
baseUrl: 'http://localhost:3001',
|
|
83
|
+
outputDir: '/tmp/test',
|
|
84
|
+
previousExploration,
|
|
85
|
+
reviewFeedback: {
|
|
86
|
+
explorationId: 'exp-123',
|
|
87
|
+
reviewer: 'test',
|
|
88
|
+
steps: [],
|
|
89
|
+
verdict: 'rejected',
|
|
90
|
+
},
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
expect(result.spec).toBe('retry-spec');
|
|
94
|
+
});
|
|
95
|
+
});
|
|
96
|
+
|
|
97
|
+
describe('parseJsonResponse (via findElement error handling)', () => {
|
|
98
|
+
// We test JSON parsing indirectly through the error handling path
|
|
99
|
+
// since parseJsonResponse is private
|
|
100
|
+
|
|
101
|
+
test('handles CLI errors gracefully', async () => {
|
|
102
|
+
// Create adapter with non-existent CLI path to trigger error
|
|
103
|
+
const badAdapter = new ClaudeCliAdapter({
|
|
104
|
+
cliPath: '/nonexistent/claude',
|
|
105
|
+
timeout: 1000,
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
const snapshot: EnhancedSnapshot = {
|
|
109
|
+
tree: '- button "Add" [ref=e1]',
|
|
110
|
+
refs: { e1: { role: 'button', name: 'Add' } },
|
|
111
|
+
};
|
|
112
|
+
|
|
113
|
+
const result = await badAdapter.findElement('the Add button', snapshot);
|
|
114
|
+
|
|
115
|
+
expect(result.ref).toBe('NOT_FOUND');
|
|
116
|
+
expect(result.confidence).toBe(0);
|
|
117
|
+
expect(result.reasoning).toContain('CLI error');
|
|
118
|
+
});
|
|
119
|
+
});
|
|
120
|
+
});
|
|
121
|
+
|
|
122
|
+
describe('ClaudeCliAdapter interface compliance', () => {
|
|
123
|
+
test('implements AIAdapter interface', () => {
|
|
124
|
+
const adapter = new ClaudeCliAdapter();
|
|
125
|
+
|
|
126
|
+
// Check required properties
|
|
127
|
+
expect(typeof adapter.name).toBe('string');
|
|
128
|
+
|
|
129
|
+
// Check required methods
|
|
130
|
+
expect(typeof adapter.findElement).toBe('function');
|
|
131
|
+
expect(typeof adapter.explore).toBe('function');
|
|
132
|
+
expect(typeof adapter.retryWithFeedback).toBe('function');
|
|
133
|
+
});
|
|
134
|
+
});
|
|
@@ -0,0 +1,240 @@
|
|
|
1
|
+
// @browserflow-ai/exploration - Claude CLI Adapter
|
|
2
|
+
// Uses the `claude` CLI tool instead of the Anthropic SDK
|
|
3
|
+
// This allows users to leverage their existing Claude Code authentication
|
|
4
|
+
|
|
5
|
+
import { spawn } from 'node:child_process';
|
|
6
|
+
|
|
7
|
+
// Debug flag - set via BF_DEBUG=1 environment variable
|
|
8
|
+
const DEBUG = process.env.BF_DEBUG === '1';
|
|
9
|
+
|
|
10
|
+
function debug(...args: unknown[]): void {
|
|
11
|
+
if (DEBUG) {
|
|
12
|
+
console.error('[claude-cli]', ...args);
|
|
13
|
+
}
|
|
14
|
+
}
|
|
15
|
+
import type {
|
|
16
|
+
AIAdapter,
|
|
17
|
+
ExploreParams,
|
|
18
|
+
ExplorationOutput,
|
|
19
|
+
RetryParams,
|
|
20
|
+
EnhancedSnapshot,
|
|
21
|
+
FindElementResult,
|
|
22
|
+
} from './types';
|
|
23
|
+
|
|
24
|
+
/**
|
|
25
|
+
* Configuration options for the Claude CLI adapter
|
|
26
|
+
*/
|
|
27
|
+
export interface ClaudeCliAdapterConfig {
|
|
28
|
+
/** Model to use (default: haiku) */
|
|
29
|
+
model?: string;
|
|
30
|
+
/** Path to claude CLI executable (default: 'claude') */
|
|
31
|
+
cliPath?: string;
|
|
32
|
+
/** Timeout in milliseconds (default: 30000) */
|
|
33
|
+
timeout?: number;
|
|
34
|
+
}
|
|
35
|
+
|
|
36
|
+
/**
|
|
37
|
+
* System prompt for element finding - embedded in the user prompt
|
|
38
|
+
*/
|
|
39
|
+
const ELEMENT_FINDER_PROMPT = `You are helping find UI elements based on natural language descriptions.
|
|
40
|
+
Given an accessibility snapshot of a web page, identify the element that best matches the user's description.
|
|
41
|
+
|
|
42
|
+
Rules:
|
|
43
|
+
1. Return the ref (like "e1", "e2") of the matching element
|
|
44
|
+
2. If multiple elements could match, pick the most likely based on context
|
|
45
|
+
3. If no element matches, use ref "NOT_FOUND" with confidence 0
|
|
46
|
+
4. Consider the element's role, name, text content, and position in the hierarchy
|
|
47
|
+
5. Be precise - prefer exact matches over partial matches`;
|
|
48
|
+
|
|
49
|
+
/**
|
|
50
|
+
* Claude CLI adapter for AI-powered browser exploration
|
|
51
|
+
*
|
|
52
|
+
* Uses the `claude` CLI tool to make LLM calls, allowing users to:
|
|
53
|
+
* - Use their existing Claude Code authentication
|
|
54
|
+
* - Leverage local CLI configuration
|
|
55
|
+
* - Avoid managing API keys separately
|
|
56
|
+
*/
|
|
57
|
+
export class ClaudeCliAdapter implements AIAdapter {
|
|
58
|
+
readonly name = 'claude-cli';
|
|
59
|
+
|
|
60
|
+
private model: string;
|
|
61
|
+
private cliPath: string;
|
|
62
|
+
private timeout: number;
|
|
63
|
+
|
|
64
|
+
constructor(config: ClaudeCliAdapterConfig = {}) {
|
|
65
|
+
this.model = config.model ?? 'haiku';
|
|
66
|
+
this.cliPath = config.cliPath ?? 'claude';
|
|
67
|
+
this.timeout = config.timeout ?? 30000;
|
|
68
|
+
}
|
|
69
|
+
|
|
70
|
+
/**
|
|
71
|
+
* Execute claude CLI with a prompt and return the response
|
|
72
|
+
*/
|
|
73
|
+
private async runClaude(prompt: string): Promise<string> {
|
|
74
|
+
return new Promise((resolve, reject) => {
|
|
75
|
+
// Note: --dangerously-skip-permissions is required for non-interactive mode
|
|
76
|
+
// because the CLI may otherwise wait for trust/permission dialogs
|
|
77
|
+
const args = ['--model', this.model, '--dangerously-skip-permissions', '-p', prompt];
|
|
78
|
+
|
|
79
|
+
debug('Spawning CLI:', this.cliPath, args.slice(0, 2).join(' '), '...');
|
|
80
|
+
debug('Prompt length:', prompt.length, 'chars');
|
|
81
|
+
const startTime = Date.now();
|
|
82
|
+
|
|
83
|
+
const proc = spawn(this.cliPath, args, {
|
|
84
|
+
// stdin must be 'ignore' - otherwise claude CLI waits for input
|
|
85
|
+
stdio: ['ignore', 'pipe', 'pipe'],
|
|
86
|
+
});
|
|
87
|
+
|
|
88
|
+
let stdout = '';
|
|
89
|
+
let stderr = '';
|
|
90
|
+
|
|
91
|
+
proc.stdout.on('data', (data: Buffer) => {
|
|
92
|
+
stdout += data.toString();
|
|
93
|
+
debug('stdout chunk:', data.toString().slice(0, 100));
|
|
94
|
+
});
|
|
95
|
+
|
|
96
|
+
proc.stderr.on('data', (data: Buffer) => {
|
|
97
|
+
stderr += data.toString();
|
|
98
|
+
debug('stderr chunk:', data.toString().slice(0, 200));
|
|
99
|
+
});
|
|
100
|
+
|
|
101
|
+
proc.on('error', (err: Error) => {
|
|
102
|
+
debug('spawn error:', err.message);
|
|
103
|
+
reject(new Error(`Failed to spawn claude CLI: ${err.message}`));
|
|
104
|
+
});
|
|
105
|
+
|
|
106
|
+
proc.on('close', (code: number | null) => {
|
|
107
|
+
const elapsed = Date.now() - startTime;
|
|
108
|
+
debug('CLI exited with code', code, 'after', elapsed, 'ms');
|
|
109
|
+
debug('stdout length:', stdout.length, 'stderr length:', stderr.length);
|
|
110
|
+
|
|
111
|
+
if (code === 0) {
|
|
112
|
+
resolve(stdout);
|
|
113
|
+
} else {
|
|
114
|
+
reject(new Error(`claude CLI exited with code ${code}: ${stderr}`));
|
|
115
|
+
}
|
|
116
|
+
});
|
|
117
|
+
|
|
118
|
+
// Handle timeout
|
|
119
|
+
const timer = setTimeout(() => {
|
|
120
|
+
debug('TIMEOUT after', this.timeout, 'ms - killing process');
|
|
121
|
+
proc.kill('SIGTERM');
|
|
122
|
+
reject(new Error(`claude CLI timed out after ${this.timeout}ms`));
|
|
123
|
+
}, this.timeout);
|
|
124
|
+
|
|
125
|
+
proc.on('close', () => clearTimeout(timer));
|
|
126
|
+
});
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
/**
|
|
130
|
+
* Parse JSON from claude CLI response
|
|
131
|
+
* Handles responses wrapped in markdown code blocks
|
|
132
|
+
*/
|
|
133
|
+
private parseJsonResponse(response: string): Record<string, unknown> {
|
|
134
|
+
// Try to extract JSON from markdown code block
|
|
135
|
+
const jsonBlockMatch = response.match(/```(?:json)?\s*([\s\S]*?)```/);
|
|
136
|
+
const jsonStr = jsonBlockMatch ? jsonBlockMatch[1].trim() : response.trim();
|
|
137
|
+
|
|
138
|
+
try {
|
|
139
|
+
return JSON.parse(jsonStr);
|
|
140
|
+
} catch {
|
|
141
|
+
throw new Error(`Failed to parse JSON response: ${response}`);
|
|
142
|
+
}
|
|
143
|
+
}
|
|
144
|
+
|
|
145
|
+
/**
|
|
146
|
+
* Find element from natural language query using Claude CLI
|
|
147
|
+
*
|
|
148
|
+
* @param query - Natural language description of the element
|
|
149
|
+
* @param snapshot - Browser snapshot with element tree and refs
|
|
150
|
+
* @returns Promise resolving to element ref with reasoning
|
|
151
|
+
*/
|
|
152
|
+
async findElement(query: string, snapshot: EnhancedSnapshot): Promise<FindElementResult> {
|
|
153
|
+
const availableRefs = Object.keys(snapshot.refs);
|
|
154
|
+
|
|
155
|
+
debug('findElement called with query:', query);
|
|
156
|
+
debug('Snapshot tree length:', snapshot.tree.length);
|
|
157
|
+
debug('Available refs:', availableRefs.length, availableRefs.slice(0, 5).join(', '), '...');
|
|
158
|
+
|
|
159
|
+
// If snapshot is empty, return early
|
|
160
|
+
if (!availableRefs.length || snapshot.tree === '(no interactive elements)') {
|
|
161
|
+
debug('Empty snapshot - returning NOT_FOUND immediately');
|
|
162
|
+
return {
|
|
163
|
+
ref: 'NOT_FOUND',
|
|
164
|
+
confidence: 0,
|
|
165
|
+
reasoning: 'Snapshot contains no interactive elements',
|
|
166
|
+
};
|
|
167
|
+
}
|
|
168
|
+
|
|
169
|
+
const prompt = `${ELEMENT_FINDER_PROMPT}
|
|
170
|
+
|
|
171
|
+
Find the element matching this description: "${query}"
|
|
172
|
+
|
|
173
|
+
Accessibility tree:
|
|
174
|
+
${snapshot.tree}
|
|
175
|
+
|
|
176
|
+
Available refs: ${availableRefs.join(', ')}
|
|
177
|
+
|
|
178
|
+
Return ONLY a JSON object with these fields:
|
|
179
|
+
- ref: the element reference (e.g., "e3") or "NOT_FOUND" if no match
|
|
180
|
+
- confidence: a number 0-1
|
|
181
|
+
- reasoning: why you selected this element
|
|
182
|
+
|
|
183
|
+
JSON response:`;
|
|
184
|
+
|
|
185
|
+
try {
|
|
186
|
+
debug('Calling runClaude...');
|
|
187
|
+
const response = await this.runClaude(prompt);
|
|
188
|
+
debug('Got response:', response.slice(0, 200));
|
|
189
|
+
const parsed = this.parseJsonResponse(response);
|
|
190
|
+
debug('Parsed result:', parsed);
|
|
191
|
+
|
|
192
|
+
return {
|
|
193
|
+
ref: String(parsed.ref ?? 'NOT_FOUND'),
|
|
194
|
+
confidence: Number(parsed.confidence ?? 0),
|
|
195
|
+
reasoning: String(parsed.reasoning ?? 'No reasoning provided'),
|
|
196
|
+
};
|
|
197
|
+
} catch (error) {
|
|
198
|
+
const errorMessage = error instanceof Error ? error.message : String(error);
|
|
199
|
+
debug('Error in findElement:', errorMessage);
|
|
200
|
+
return {
|
|
201
|
+
ref: 'NOT_FOUND',
|
|
202
|
+
confidence: 0,
|
|
203
|
+
reasoning: `CLI error: ${errorMessage}`,
|
|
204
|
+
};
|
|
205
|
+
}
|
|
206
|
+
}
|
|
207
|
+
|
|
208
|
+
/**
|
|
209
|
+
* Run exploration on a spec using Claude CLI
|
|
210
|
+
*
|
|
211
|
+
* Note: The primary AI method is findElement(). This explore() method is a stub
|
|
212
|
+
* that returns a minimal valid structure. Full exploration is orchestrated by
|
|
213
|
+
* the Explorer class which uses findElement() for AI-powered element discovery.
|
|
214
|
+
*/
|
|
215
|
+
async explore(params: ExploreParams): Promise<ExplorationOutput> {
|
|
216
|
+
const explorationId = `exp-${Date.now()}-${Math.random().toString(36).slice(2, 8)}`;
|
|
217
|
+
|
|
218
|
+
return {
|
|
219
|
+
spec: params.spec.name,
|
|
220
|
+
specPath: params.specPath,
|
|
221
|
+
explorationId,
|
|
222
|
+
timestamp: new Date().toISOString(),
|
|
223
|
+
durationMs: 0,
|
|
224
|
+
browser: params.browser ?? 'chromium',
|
|
225
|
+
viewport: params.viewport ?? { width: 1280, height: 720 },
|
|
226
|
+
baseUrl: params.baseUrl,
|
|
227
|
+
steps: [],
|
|
228
|
+
outcomeChecks: [],
|
|
229
|
+
overallStatus: 'completed',
|
|
230
|
+
errors: [],
|
|
231
|
+
};
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
/**
|
|
235
|
+
* Retry exploration with review feedback
|
|
236
|
+
*/
|
|
237
|
+
async retryWithFeedback(params: RetryParams): Promise<ExplorationOutput> {
|
|
238
|
+
return this.explore(params);
|
|
239
|
+
}
|
|
240
|
+
}
|
|
@@ -0,0 +1,195 @@
|
|
|
1
|
+
// @browserflow-ai/exploration - Claude adapter tests
|
|
2
|
+
import { describe, it, expect, beforeEach, mock } from 'bun:test';
|
|
3
|
+
import { ClaudeAdapter } from './claude';
|
|
4
|
+
|
|
5
|
+
// Mock the Anthropic SDK
|
|
6
|
+
const mockCreate = mock(() =>
|
|
7
|
+
Promise.resolve({
|
|
8
|
+
content: [
|
|
9
|
+
{
|
|
10
|
+
type: 'tool_use',
|
|
11
|
+
id: 'tool_1',
|
|
12
|
+
name: 'select_element',
|
|
13
|
+
input: {
|
|
14
|
+
ref: 'e5',
|
|
15
|
+
confidence: 0.95,
|
|
16
|
+
reasoning: 'The element with ref e5 is a button with text "Submit" which matches the query for a submit button.',
|
|
17
|
+
},
|
|
18
|
+
},
|
|
19
|
+
],
|
|
20
|
+
stop_reason: 'tool_use',
|
|
21
|
+
})
|
|
22
|
+
);
|
|
23
|
+
|
|
24
|
+
// Mock Anthropic constructor
|
|
25
|
+
mock.module('@anthropic-ai/sdk', () => ({
|
|
26
|
+
default: class MockAnthropic {
|
|
27
|
+
messages = {
|
|
28
|
+
create: mockCreate,
|
|
29
|
+
};
|
|
30
|
+
},
|
|
31
|
+
}));
|
|
32
|
+
|
|
33
|
+
describe('ClaudeAdapter', () => {
|
|
34
|
+
let adapter: ClaudeAdapter;
|
|
35
|
+
|
|
36
|
+
beforeEach(() => {
|
|
37
|
+
mockCreate.mockClear();
|
|
38
|
+
adapter = new ClaudeAdapter();
|
|
39
|
+
});
|
|
40
|
+
|
|
41
|
+
describe('constructor', () => {
|
|
42
|
+
it('should create adapter with default config', () => {
|
|
43
|
+
expect(adapter.name).toBe('claude');
|
|
44
|
+
});
|
|
45
|
+
|
|
46
|
+
it('should accept custom model config', () => {
|
|
47
|
+
const customAdapter = new ClaudeAdapter({
|
|
48
|
+
model: 'claude-opus-4-20250514',
|
|
49
|
+
maxTokens: 4096,
|
|
50
|
+
});
|
|
51
|
+
expect(customAdapter.name).toBe('claude');
|
|
52
|
+
});
|
|
53
|
+
});
|
|
54
|
+
|
|
55
|
+
describe('findElement', () => {
|
|
56
|
+
const sampleSnapshot = {
|
|
57
|
+
tree: `
|
|
58
|
+
<page url="http://localhost:3000/login">
|
|
59
|
+
<form>
|
|
60
|
+
<input ref="e1" type="email" placeholder="Email" />
|
|
61
|
+
<input ref="e2" type="password" placeholder="Password" />
|
|
62
|
+
<button ref="e5" type="submit">Submit</button>
|
|
63
|
+
</form>
|
|
64
|
+
</page>
|
|
65
|
+
`,
|
|
66
|
+
refs: {
|
|
67
|
+
e1: { tag: 'input', type: 'email', placeholder: 'Email' },
|
|
68
|
+
e2: { tag: 'input', type: 'password', placeholder: 'Password' },
|
|
69
|
+
e5: { tag: 'button', type: 'submit', text: 'Submit' },
|
|
70
|
+
},
|
|
71
|
+
};
|
|
72
|
+
|
|
73
|
+
it('should find element from natural language query', async () => {
|
|
74
|
+
const result = await adapter.findElement('submit button', sampleSnapshot);
|
|
75
|
+
|
|
76
|
+
expect(result.ref).toBe('e5');
|
|
77
|
+
expect(result.confidence).toBeGreaterThan(0);
|
|
78
|
+
expect(result.reasoning).toBeDefined();
|
|
79
|
+
});
|
|
80
|
+
|
|
81
|
+
it('should call Claude API with correct parameters', async () => {
|
|
82
|
+
await adapter.findElement('email input field', sampleSnapshot);
|
|
83
|
+
|
|
84
|
+
expect(mockCreate).toHaveBeenCalledTimes(1);
|
|
85
|
+
const callArgs = mockCreate.mock.calls[0][0];
|
|
86
|
+
|
|
87
|
+
expect(callArgs.model).toMatch(/claude/);
|
|
88
|
+
expect(callArgs.max_tokens).toBeGreaterThan(0);
|
|
89
|
+
expect(callArgs.messages).toBeDefined();
|
|
90
|
+
expect(callArgs.tools).toBeDefined();
|
|
91
|
+
});
|
|
92
|
+
|
|
93
|
+
it('should include snapshot tree in prompt', async () => {
|
|
94
|
+
await adapter.findElement('password field', sampleSnapshot);
|
|
95
|
+
|
|
96
|
+
const callArgs = mockCreate.mock.calls[0][0];
|
|
97
|
+
const userMessage = callArgs.messages.find((m: { role: string }) => m.role === 'user');
|
|
98
|
+
|
|
99
|
+
expect(userMessage.content).toContain('password field');
|
|
100
|
+
});
|
|
101
|
+
|
|
102
|
+
it('should use tool_use for structured output', async () => {
|
|
103
|
+
await adapter.findElement('submit button', sampleSnapshot);
|
|
104
|
+
|
|
105
|
+
const callArgs = mockCreate.mock.calls[0][0];
|
|
106
|
+
expect(callArgs.tools).toEqual(
|
|
107
|
+
expect.arrayContaining([
|
|
108
|
+
expect.objectContaining({
|
|
109
|
+
name: 'select_element',
|
|
110
|
+
}),
|
|
111
|
+
])
|
|
112
|
+
);
|
|
113
|
+
});
|
|
114
|
+
|
|
115
|
+
it('should return NOT_FOUND when element not found', async () => {
|
|
116
|
+
mockCreate.mockImplementationOnce(() =>
|
|
117
|
+
Promise.resolve({
|
|
118
|
+
content: [
|
|
119
|
+
{
|
|
120
|
+
type: 'tool_use',
|
|
121
|
+
id: 'tool_1',
|
|
122
|
+
name: 'select_element',
|
|
123
|
+
input: {
|
|
124
|
+
ref: 'NOT_FOUND',
|
|
125
|
+
confidence: 0,
|
|
126
|
+
reasoning: 'No element matches the description "purple unicorn button".',
|
|
127
|
+
},
|
|
128
|
+
},
|
|
129
|
+
],
|
|
130
|
+
stop_reason: 'tool_use',
|
|
131
|
+
})
|
|
132
|
+
);
|
|
133
|
+
|
|
134
|
+
const result = await adapter.findElement('purple unicorn button', sampleSnapshot);
|
|
135
|
+
|
|
136
|
+
expect(result.ref).toBe('NOT_FOUND');
|
|
137
|
+
expect(result.confidence).toBe(0);
|
|
138
|
+
});
|
|
139
|
+
|
|
140
|
+
it('should handle API errors gracefully', async () => {
|
|
141
|
+
mockCreate.mockImplementationOnce(() => Promise.reject(new Error('API rate limit exceeded')));
|
|
142
|
+
|
|
143
|
+
await expect(adapter.findElement('button', sampleSnapshot)).rejects.toThrow('API rate limit exceeded');
|
|
144
|
+
});
|
|
145
|
+
|
|
146
|
+
it('should extract ref from text response as fallback', async () => {
|
|
147
|
+
mockCreate.mockImplementationOnce(() =>
|
|
148
|
+
Promise.resolve({
|
|
149
|
+
content: [
|
|
150
|
+
{
|
|
151
|
+
type: 'text',
|
|
152
|
+
text: 'The submit button is element e5. I selected this because it has type="submit" and the text "Submit".',
|
|
153
|
+
},
|
|
154
|
+
],
|
|
155
|
+
stop_reason: 'end_turn',
|
|
156
|
+
})
|
|
157
|
+
);
|
|
158
|
+
|
|
159
|
+
const result = await adapter.findElement('submit button', sampleSnapshot);
|
|
160
|
+
|
|
161
|
+
expect(result.ref).toBe('e5');
|
|
162
|
+
});
|
|
163
|
+
});
|
|
164
|
+
|
|
165
|
+
describe('findElement with ambiguous queries', () => {
|
|
166
|
+
const snapshotWithMultipleButtons = {
|
|
167
|
+
tree: `
|
|
168
|
+
<page>
|
|
169
|
+
<button ref="e1">Cancel</button>
|
|
170
|
+
<button ref="e2">Submit</button>
|
|
171
|
+
<button ref="e3">Submit</button>
|
|
172
|
+
</page>
|
|
173
|
+
`,
|
|
174
|
+
refs: {
|
|
175
|
+
e1: { tag: 'button', text: 'Cancel' },
|
|
176
|
+
e2: { tag: 'button', text: 'Submit' },
|
|
177
|
+
e3: { tag: 'button', text: 'Submit' },
|
|
178
|
+
},
|
|
179
|
+
};
|
|
180
|
+
|
|
181
|
+
it('should pick most likely element for ambiguous query', async () => {
|
|
182
|
+
const result = await adapter.findElement('submit button', snapshotWithMultipleButtons);
|
|
183
|
+
|
|
184
|
+
// Should return one of the submit buttons
|
|
185
|
+
expect(['e2', 'e3', 'e5']).toContain(result.ref);
|
|
186
|
+
});
|
|
187
|
+
|
|
188
|
+
it('should include reasoning for ambiguous cases', async () => {
|
|
189
|
+
const result = await adapter.findElement('submit button', snapshotWithMultipleButtons);
|
|
190
|
+
|
|
191
|
+
expect(result.reasoning).toBeDefined();
|
|
192
|
+
expect(result.reasoning.length).toBeGreaterThan(0);
|
|
193
|
+
});
|
|
194
|
+
});
|
|
195
|
+
});
|