btcp-browser-agent 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/package.json +8 -9
- package/packages/core/dist/actions.d.ts +97 -0
- package/packages/core/dist/actions.js +940 -0
- package/packages/core/dist/errors.d.ts +138 -0
- package/packages/core/dist/errors.js +157 -0
- package/packages/core/dist/index.d.ts +120 -0
- package/packages/core/dist/index.js +134 -0
- package/packages/core/dist/ref-map.d.ts +16 -0
- package/packages/core/dist/ref-map.js +91 -0
- package/packages/core/dist/snapshot.d.ts +37 -0
- package/packages/core/dist/snapshot.js +751 -0
- package/packages/core/dist/types.d.ts +396 -0
- package/packages/core/dist/types.js +7 -0
- package/packages/extension/dist/background.d.ts +227 -0
- package/packages/extension/dist/background.js +737 -0
- package/packages/extension/dist/content.d.ts +18 -0
- package/packages/extension/dist/content.js +149 -0
- package/packages/extension/dist/index.d.ts +228 -0
- package/packages/extension/dist/index.js +350 -0
- package/packages/extension/dist/session-manager.d.ts +87 -0
- package/packages/extension/dist/session-manager.js +322 -0
- package/packages/extension/{src/session-types.ts → dist/session-types.d.ts} +113 -144
- package/packages/extension/dist/session-types.js +5 -0
- package/packages/extension/dist/types.d.ts +88 -0
- package/packages/extension/dist/types.js +7 -0
- package/CLAUDE.md +0 -230
- package/SKILL.md +0 -143
- package/SNAPSHOT_IMPROVEMENTS.md +0 -302
- package/USAGE.md +0 -146
- package/dist/index.d.ts.map +0 -1
- package/dist/index.js.map +0 -1
- package/docs/browser-cli-design.md +0 -500
- package/examples/chrome-extension/CHANGELOG.md +0 -210
- package/examples/chrome-extension/DEBUG.md +0 -231
- package/examples/chrome-extension/ERROR_FIXED.md +0 -147
- package/examples/chrome-extension/QUICK_TEST.md +0 -189
- package/examples/chrome-extension/README.md +0 -149
- package/examples/chrome-extension/SESSION_ONLY_MODE.md +0 -305
- package/examples/chrome-extension/TEST_WITH_YOUR_TABS.md +0 -97
- package/examples/chrome-extension/build.js +0 -43
- package/examples/chrome-extension/manifest.json +0 -37
- package/examples/chrome-extension/package-lock.json +0 -1063
- package/examples/chrome-extension/package.json +0 -21
- package/examples/chrome-extension/popup.html +0 -195
- package/examples/chrome-extension/src/background.ts +0 -12
- package/examples/chrome-extension/src/content.ts +0 -7
- package/examples/chrome-extension/src/popup.ts +0 -303
- package/examples/chrome-extension/src/scenario-google-github.ts +0 -389
- package/examples/chrome-extension/test-page.html +0 -127
- package/examples/chrome-extension/tests/README.md +0 -206
- package/examples/chrome-extension/tests/scenario-google-to-github-star.ts +0 -380
- package/examples/chrome-extension/tsconfig.json +0 -14
- package/examples/snapshots/README.md +0 -207
- package/examples/snapshots/amazon-com-detail.html +0 -9528
- package/examples/snapshots/amazon-com-detail.snapshot.txt +0 -997
- package/examples/snapshots/convert-snapshots.ts +0 -97
- package/examples/snapshots/edition-cnn-com.html +0 -13292
- package/examples/snapshots/edition-cnn-com.snapshot.txt +0 -562
- package/examples/snapshots/github-com-microsoft-vscode.html +0 -2916
- package/examples/snapshots/github-com-microsoft-vscode.snapshot.txt +0 -455
- package/examples/snapshots/google-search.html +0 -20012
- package/examples/snapshots/google-search.snapshot.txt +0 -195
- package/examples/snapshots/metadata.json +0 -86
- package/examples/snapshots/npr-org-templates.html +0 -2031
- package/examples/snapshots/npr-org-templates.snapshot.txt +0 -224
- package/examples/snapshots/stackoverflow-com.html +0 -5216
- package/examples/snapshots/stackoverflow-com.snapshot.txt +0 -2404
- package/examples/snapshots/test-all-mode.html +0 -46
- package/examples/snapshots/test-all-mode.snapshot.txt +0 -5
- package/examples/snapshots/validate.test.ts +0 -296
- package/packages/cli/package.json +0 -42
- package/packages/cli/src/__tests__/cli.test.ts +0 -434
- package/packages/cli/src/__tests__/errors.test.ts +0 -226
- package/packages/cli/src/__tests__/executor.test.ts +0 -275
- package/packages/cli/src/__tests__/formatter.test.ts +0 -260
- package/packages/cli/src/__tests__/parser.test.ts +0 -288
- package/packages/cli/src/__tests__/suggestions.test.ts +0 -255
- package/packages/cli/src/commands/back.ts +0 -22
- package/packages/cli/src/commands/check.ts +0 -33
- package/packages/cli/src/commands/clear.ts +0 -33
- package/packages/cli/src/commands/click.ts +0 -32
- package/packages/cli/src/commands/closetab.ts +0 -31
- package/packages/cli/src/commands/eval.ts +0 -41
- package/packages/cli/src/commands/fill.ts +0 -30
- package/packages/cli/src/commands/focus.ts +0 -33
- package/packages/cli/src/commands/forward.ts +0 -22
- package/packages/cli/src/commands/goto.ts +0 -34
- package/packages/cli/src/commands/help.ts +0 -162
- package/packages/cli/src/commands/hover.ts +0 -34
- package/packages/cli/src/commands/index.ts +0 -129
- package/packages/cli/src/commands/newtab.ts +0 -35
- package/packages/cli/src/commands/press.ts +0 -40
- package/packages/cli/src/commands/reload.ts +0 -25
- package/packages/cli/src/commands/screenshot.ts +0 -27
- package/packages/cli/src/commands/scroll.ts +0 -64
- package/packages/cli/src/commands/select.ts +0 -35
- package/packages/cli/src/commands/snapshot.ts +0 -21
- package/packages/cli/src/commands/tab.ts +0 -32
- package/packages/cli/src/commands/tabs.ts +0 -26
- package/packages/cli/src/commands/text.ts +0 -27
- package/packages/cli/src/commands/title.ts +0 -17
- package/packages/cli/src/commands/type.ts +0 -38
- package/packages/cli/src/commands/uncheck.ts +0 -33
- package/packages/cli/src/commands/url.ts +0 -17
- package/packages/cli/src/commands/wait.ts +0 -54
- package/packages/cli/src/errors.ts +0 -164
- package/packages/cli/src/executor.ts +0 -68
- package/packages/cli/src/formatter.ts +0 -215
- package/packages/cli/src/index.ts +0 -257
- package/packages/cli/src/parser.ts +0 -195
- package/packages/cli/src/suggestions.ts +0 -207
- package/packages/cli/src/terminal/Terminal.ts +0 -365
- package/packages/cli/src/terminal/index.ts +0 -5
- package/packages/cli/src/types.ts +0 -155
- package/packages/cli/tsconfig.json +0 -20
- package/packages/core/package.json +0 -35
- package/packages/core/src/actions.ts +0 -1210
- package/packages/core/src/errors.ts +0 -296
- package/packages/core/src/index.test.ts +0 -638
- package/packages/core/src/index.ts +0 -220
- package/packages/core/src/ref-map.ts +0 -107
- package/packages/core/src/snapshot.ts +0 -873
- package/packages/core/src/types.ts +0 -536
- package/packages/core/tsconfig.json +0 -23
- package/packages/extension/README.md +0 -129
- package/packages/extension/package.json +0 -43
- package/packages/extension/src/background.ts +0 -888
- package/packages/extension/src/content.ts +0 -172
- package/packages/extension/src/index.ts +0 -579
- package/packages/extension/src/session-manager.ts +0 -385
- package/packages/extension/src/types.ts +0 -162
- package/packages/extension/tsconfig.json +0 -28
- package/src/index.ts +0 -64
- package/tsconfig.build.json +0 -12
- package/tsconfig.json +0 -26
- package/vitest.config.ts +0 -13
package/CLAUDE.md
DELETED
|
@@ -1,230 +0,0 @@
|
|
|
1
|
-
# CLAUDE.md
|
|
2
|
-
|
|
3
|
-
This file provides guidance to Claude Code (claude.ai/code) when working with code in this repository.
|
|
4
|
-
|
|
5
|
-
## Project Overview
|
|
6
|
-
|
|
7
|
-
BTCP Browser Agent is a browser-native implementation for AI agents to interact with web pages. It provides a clean separation between browser-level operations (tabs, navigation, screenshots) and DOM-level operations (clicking, typing, reading elements).
|
|
8
|
-
|
|
9
|
-
## Build Commands
|
|
10
|
-
|
|
11
|
-
```bash
|
|
12
|
-
# Build all packages
|
|
13
|
-
npm run build
|
|
14
|
-
|
|
15
|
-
# Build individual packages (in order)
|
|
16
|
-
npm run build:packages
|
|
17
|
-
|
|
18
|
-
# Build Chrome extension example
|
|
19
|
-
cd examples/chrome-extension && npm run build
|
|
20
|
-
|
|
21
|
-
# Watch mode for extension development
|
|
22
|
-
cd examples/chrome-extension && npm run watch
|
|
23
|
-
|
|
24
|
-
# Clean build artifacts
|
|
25
|
-
npm run clean
|
|
26
|
-
```
|
|
27
|
-
|
|
28
|
-
## Testing
|
|
29
|
-
|
|
30
|
-
```bash
|
|
31
|
-
# Run all tests
|
|
32
|
-
npm test
|
|
33
|
-
|
|
34
|
-
# Watch mode for test development
|
|
35
|
-
npm run test:watch
|
|
36
|
-
|
|
37
|
-
# Type checking only
|
|
38
|
-
npm run typecheck
|
|
39
|
-
```
|
|
40
|
-
|
|
41
|
-
## Architecture
|
|
42
|
-
|
|
43
|
-
### Three-Layer Command Flow
|
|
44
|
-
|
|
45
|
-
Commands flow through three distinct layers:
|
|
46
|
-
|
|
47
|
-
1. **Client Layer** (`packages/extension/src/index.ts`)
|
|
48
|
-
- `createClient()` provides high-level API methods
|
|
49
|
-
- Sends commands via `chrome.runtime.sendMessage`
|
|
50
|
-
- Used in popups, external scripts
|
|
51
|
-
|
|
52
|
-
2. **Background Layer** (`packages/extension/src/background.ts`)
|
|
53
|
-
- `BackgroundAgent` handles browser-level operations (tabs, navigation, screenshots)
|
|
54
|
-
- Routes DOM commands to ContentAgent via `chrome.tabs.sendMessage`
|
|
55
|
-
- `SessionManager` maintains tab group state across extension lifecycle
|
|
56
|
-
|
|
57
|
-
3. **Content Layer** (`packages/core/src/actions.ts`)
|
|
58
|
-
- `DOMActions` executes all DOM operations
|
|
59
|
-
- `createSnapshot()` generates accessibility tree with element refs
|
|
60
|
-
- Runs in content script context (one per tab)
|
|
61
|
-
|
|
62
|
-
### Command Type System
|
|
63
|
-
|
|
64
|
-
Commands are strongly typed with a discriminated union:
|
|
65
|
-
|
|
66
|
-
- **CoreAction** (46 actions in `packages/core/src/types.ts`): DOM operations like `click`, `type`, `snapshot`, `highlight`
|
|
67
|
-
- **ExtensionAction** (11 actions in `packages/extension/src/types.ts`): Browser operations like `navigate`, `screenshot`, `tabNew`
|
|
68
|
-
- All commands extend `BaseCommand` with `id` and `action` fields
|
|
69
|
-
|
|
70
|
-
### Element References and Snapshot API
|
|
71
|
-
|
|
72
|
-
The snapshot system creates stable element references:
|
|
73
|
-
|
|
74
|
-
```typescript
|
|
75
|
-
// Snapshot returns a string (accessibility tree with embedded @ref:N markers)
|
|
76
|
-
const tree = await client.snapshot();
|
|
77
|
-
// Example output: "- BUTTON \"Submit\" [@ref:1]"
|
|
78
|
-
|
|
79
|
-
// Use refs in commands: { action: 'click', selector: '@ref:5' }
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
**Important**: The snapshot API returns a `string` directly (the accessibility tree). Element refs are embedded as `@ref:N` markers in the tree and stored internally in a `RefMap` (WeakRef-based for memory management). Refs persist across commands within the same content script session and are used internally for the `highlight` feature.
|
|
83
|
-
|
|
84
|
-
## Package Structure
|
|
85
|
-
|
|
86
|
-
```
|
|
87
|
-
btcp-browser-agent/ # Monorepo root
|
|
88
|
-
├── packages/
|
|
89
|
-
│ ├── core/ # @btcp/core - DOM operations (ContentAgent)
|
|
90
|
-
│ │ ├── src/actions.ts # DOMActions class - all DOM command handlers
|
|
91
|
-
│ │ ├── src/snapshot.ts # Accessibility tree generation
|
|
92
|
-
│ │ ├── src/types.ts # CoreAction types (46 actions)
|
|
93
|
-
│ │ └── src/ref-map.ts # Element reference management
|
|
94
|
-
│ │
|
|
95
|
-
│ ├── extension/ # @btcp/extension - Browser operations
|
|
96
|
-
│ │ ├── src/background.ts # BackgroundAgent (service worker)
|
|
97
|
-
│ │ ├── src/session-manager.ts # Tab group persistence
|
|
98
|
-
│ │ ├── src/content.ts # Content script setup
|
|
99
|
-
│ │ └── src/index.ts # Client API + types
|
|
100
|
-
│ │
|
|
101
|
-
│ └── cli/ # @btcp/cli - Command-line interface
|
|
102
|
-
│ ├── src/commands/ # 28 CLI command implementations
|
|
103
|
-
│ ├── src/parser.ts # Natural language command parser
|
|
104
|
-
│ └── src/executor.ts # Command execution + suggestions
|
|
105
|
-
│
|
|
106
|
-
└── examples/
|
|
107
|
-
├── chrome-extension/ # Full extension demo (popup + background + content)
|
|
108
|
-
└── snapshots/ # Snapshot generation utilities
|
|
109
|
-
```
|
|
110
|
-
|
|
111
|
-
## Key Concepts
|
|
112
|
-
|
|
113
|
-
### Highlight vs CLI Commands
|
|
114
|
-
|
|
115
|
-
The core `DOMActions` class supports `highlight` and `clearHighlight` actions, but these are NOT registered as CLI commands. To use them:
|
|
116
|
-
|
|
117
|
-
- In popup: Use `client.execute({ id: '...', action: 'highlight' })`
|
|
118
|
-
- NOT: `cli.execute('highlight')` (will fail with "Unknown command")
|
|
119
|
-
|
|
120
|
-
The CLI package (`packages/cli/src/commands/index.ts`) has 28 registered commands. If a core action is missing from the CLI registry, use the raw `client.execute()` method.
|
|
121
|
-
|
|
122
|
-
### Session Management
|
|
123
|
-
|
|
124
|
-
The extension uses Chrome Tab Groups for session isolation:
|
|
125
|
-
|
|
126
|
-
- `SessionManager` persists active session to `chrome.storage.session`
|
|
127
|
-
- On extension reload, reconnects to stored group if it still exists
|
|
128
|
-
- All extension operations are scoped to the active session's tabs
|
|
129
|
-
- Create session: `client.groupCreate()` → returns `{ group: GroupInfo }`
|
|
130
|
-
|
|
131
|
-
### Multi-Tab Operations
|
|
132
|
-
|
|
133
|
-
Background agent supports two patterns for multi-tab work:
|
|
134
|
-
|
|
135
|
-
```typescript
|
|
136
|
-
// Pattern 1: tab() handle - interact without switching active tab
|
|
137
|
-
const tab = agent.tab(tabId);
|
|
138
|
-
await tab.snapshot();
|
|
139
|
-
await tab.click('@ref:5');
|
|
140
|
-
|
|
141
|
-
// Pattern 2: Pass tabId in execute options
|
|
142
|
-
await agent.execute(command, { tabId });
|
|
143
|
-
```
|
|
144
|
-
|
|
145
|
-
Both avoid unnecessary tab switching for better performance.
|
|
146
|
-
|
|
147
|
-
### Snapshot Formats
|
|
148
|
-
|
|
149
|
-
Snapshots support two output formats:
|
|
150
|
-
|
|
151
|
-
- `format: 'tree'` (default): Flat accessibility tree with refs
|
|
152
|
-
- `format: 'html'`: HTML structure with semantic xpaths
|
|
153
|
-
|
|
154
|
-
The tree format is optimized for AI consumption and includes role-based element descriptions.
|
|
155
|
-
|
|
156
|
-
## Development Workflow
|
|
157
|
-
|
|
158
|
-
### Working on Core Package
|
|
159
|
-
|
|
160
|
-
```bash
|
|
161
|
-
# 1. Make changes in packages/core/src/
|
|
162
|
-
# 2. Build core package
|
|
163
|
-
npm run build:packages
|
|
164
|
-
|
|
165
|
-
# 3. Run tests
|
|
166
|
-
npm test -- packages/core
|
|
167
|
-
|
|
168
|
-
# 4. If working on extension example, rebuild it
|
|
169
|
-
cd examples/chrome-extension && npm run build
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
### Working on Extension Example
|
|
173
|
-
|
|
174
|
-
```bash
|
|
175
|
-
cd examples/chrome-extension
|
|
176
|
-
|
|
177
|
-
# Watch mode - auto-rebuilds on changes
|
|
178
|
-
npm run watch
|
|
179
|
-
|
|
180
|
-
# Load unpacked extension in Chrome:
|
|
181
|
-
# 1. Open chrome://extensions
|
|
182
|
-
# 2. Enable "Developer mode"
|
|
183
|
-
# 3. Click "Load unpacked"
|
|
184
|
-
# 4. Select examples/chrome-extension/dist/
|
|
185
|
-
```
|
|
186
|
-
|
|
187
|
-
### Adding New Core Actions
|
|
188
|
-
|
|
189
|
-
1. Add action type to `CoreAction` in `packages/core/src/types.ts`
|
|
190
|
-
2. Create command interface extending `BaseCommand`
|
|
191
|
-
3. Add case to `DOMActions.dispatch()` in `packages/core/src/actions.ts`
|
|
192
|
-
4. Implement handler method in `DOMActions` class
|
|
193
|
-
5. Optionally create CLI command wrapper in `packages/cli/src/commands/`
|
|
194
|
-
|
|
195
|
-
### Adding New CLI Commands
|
|
196
|
-
|
|
197
|
-
1. Create command file in `packages/cli/src/commands/[name].ts`
|
|
198
|
-
2. Export `CommandHandler` with `name`, `description`, `execute()`, `examples`
|
|
199
|
-
3. Register in `packages/cli/src/commands/index.ts`
|
|
200
|
-
4. Add suggestions mapping in `packages/cli/src/suggestions.ts` if needed
|
|
201
|
-
|
|
202
|
-
## Common Pitfalls
|
|
203
|
-
|
|
204
|
-
1. **CLI vs Client API**: CLI commands are a subset of core actions. Use `client.execute()` for actions not in CLI registry.
|
|
205
|
-
|
|
206
|
-
2. **Ref Lifecycle**: Element refs are cleared on navigation and only valid within the content script that created them.
|
|
207
|
-
|
|
208
|
-
3. **Cross-Origin**: ContentAgent only works same-origin when used standalone. Extension context bypasses this via content scripts.
|
|
209
|
-
|
|
210
|
-
4. **Build Order**: Always build packages before examples (`npm run build:packages` then `cd examples/chrome-extension && npm run build`).
|
|
211
|
-
|
|
212
|
-
5. **Session Persistence**: Session state survives extension reload via `chrome.storage.session`, but actual tab groups can be deleted by the user.
|
|
213
|
-
|
|
214
|
-
## Import Paths
|
|
215
|
-
|
|
216
|
-
The package exports multiple entry points:
|
|
217
|
-
|
|
218
|
-
```typescript
|
|
219
|
-
// Main re-exports (convenience)
|
|
220
|
-
import { createContentAgent } from '@btcp/browser-agent';
|
|
221
|
-
|
|
222
|
-
// Direct package imports (preferred for clarity)
|
|
223
|
-
import { createContentAgent } from '@btcp/browser-agent/core';
|
|
224
|
-
import { BackgroundAgent, createClient } from '@btcp/browser-agent/extension';
|
|
225
|
-
import { createCLI } from '@btcp/browser-agent/cli';
|
|
226
|
-
|
|
227
|
-
// Extension-specific entry points
|
|
228
|
-
import '@btcp/browser-agent/extension/content'; // Content script setup
|
|
229
|
-
import '@btcp/browser-agent/extension/background'; // Background script setup
|
|
230
|
-
```
|
package/SKILL.md
DELETED
|
@@ -1,143 +0,0 @@
|
|
|
1
|
-
---
|
|
2
|
-
name: agent-browser
|
|
3
|
-
description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
|
|
4
|
-
---
|
|
5
|
-
|
|
6
|
-
# Browser Automation with agent-browser
|
|
7
|
-
|
|
8
|
-
## Quick start
|
|
9
|
-
|
|
10
|
-
```bash
|
|
11
|
-
agent-browser open <url> # Navigate to page
|
|
12
|
-
agent-browser snapshot -i # Get interactive elements with refs
|
|
13
|
-
agent-browser click @e1 # Click element by ref
|
|
14
|
-
agent-browser fill @e2 "text" # Fill input by ref
|
|
15
|
-
agent-browser close # Close browser
|
|
16
|
-
```
|
|
17
|
-
|
|
18
|
-
## Core workflow
|
|
19
|
-
|
|
20
|
-
1. Navigate: `agent-browser open <url>`
|
|
21
|
-
2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
|
|
22
|
-
3. Interact using refs from the snapshot
|
|
23
|
-
4. Re-snapshot after navigation or significant DOM changes
|
|
24
|
-
|
|
25
|
-
## Commands
|
|
26
|
-
|
|
27
|
-
### Navigation
|
|
28
|
-
```bash
|
|
29
|
-
agent-browser open <url> # Navigate to URL
|
|
30
|
-
agent-browser back # Go back
|
|
31
|
-
agent-browser forward # Go forward
|
|
32
|
-
agent-browser reload # Reload page
|
|
33
|
-
agent-browser close # Close browser
|
|
34
|
-
```
|
|
35
|
-
|
|
36
|
-
### Snapshot (page analysis)
|
|
37
|
-
```bash
|
|
38
|
-
agent-browser snapshot # Full accessibility tree
|
|
39
|
-
agent-browser snapshot -i # Interactive elements only (recommended)
|
|
40
|
-
agent-browser snapshot -c # Compact output
|
|
41
|
-
agent-browser snapshot -d 3 # Limit depth to 3
|
|
42
|
-
```
|
|
43
|
-
|
|
44
|
-
### Interactions (use @refs from snapshot)
|
|
45
|
-
```bash
|
|
46
|
-
agent-browser click @e1 # Click
|
|
47
|
-
agent-browser dblclick @e1 # Double-click
|
|
48
|
-
agent-browser fill @e2 "text" # Clear and type
|
|
49
|
-
agent-browser type @e2 "text" # Type without clearing
|
|
50
|
-
agent-browser press Enter # Press key
|
|
51
|
-
agent-browser press Control+a # Key combination
|
|
52
|
-
agent-browser hover @e1 # Hover
|
|
53
|
-
agent-browser check @e1 # Check checkbox
|
|
54
|
-
agent-browser uncheck @e1 # Uncheck checkbox
|
|
55
|
-
agent-browser select @e1 "value" # Select dropdown
|
|
56
|
-
agent-browser scroll down 500 # Scroll page
|
|
57
|
-
agent-browser scrollintoview @e1 # Scroll element into view
|
|
58
|
-
```
|
|
59
|
-
|
|
60
|
-
### Get information
|
|
61
|
-
```bash
|
|
62
|
-
agent-browser get text @e1 # Get element text
|
|
63
|
-
agent-browser get value @e1 # Get input value
|
|
64
|
-
agent-browser get title # Get page title
|
|
65
|
-
agent-browser get url # Get current URL
|
|
66
|
-
```
|
|
67
|
-
|
|
68
|
-
### Screenshots
|
|
69
|
-
```bash
|
|
70
|
-
agent-browser screenshot # Screenshot to stdout
|
|
71
|
-
agent-browser screenshot path.png # Save to file
|
|
72
|
-
agent-browser screenshot --full # Full page
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
### Wait
|
|
76
|
-
```bash
|
|
77
|
-
agent-browser wait @e1 # Wait for element
|
|
78
|
-
agent-browser wait 2000 # Wait milliseconds
|
|
79
|
-
agent-browser wait --text "Success" # Wait for text
|
|
80
|
-
agent-browser wait --load networkidle # Wait for network idle
|
|
81
|
-
```
|
|
82
|
-
|
|
83
|
-
### Semantic locators (alternative to refs)
|
|
84
|
-
```bash
|
|
85
|
-
agent-browser find role button click --name "Submit"
|
|
86
|
-
agent-browser find text "Sign In" click
|
|
87
|
-
agent-browser find label "Email" fill "user@test.com"
|
|
88
|
-
```
|
|
89
|
-
|
|
90
|
-
## Example: Form submission
|
|
91
|
-
|
|
92
|
-
```bash
|
|
93
|
-
agent-browser open https://example.com/form
|
|
94
|
-
agent-browser snapshot -i
|
|
95
|
-
# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
|
|
96
|
-
|
|
97
|
-
agent-browser fill @e1 "user@example.com"
|
|
98
|
-
agent-browser fill @e2 "password123"
|
|
99
|
-
agent-browser click @e3
|
|
100
|
-
agent-browser wait --load networkidle
|
|
101
|
-
agent-browser snapshot -i # Check result
|
|
102
|
-
```
|
|
103
|
-
|
|
104
|
-
## Example: Authentication with saved state
|
|
105
|
-
|
|
106
|
-
```bash
|
|
107
|
-
# Login once
|
|
108
|
-
agent-browser open https://app.example.com/login
|
|
109
|
-
agent-browser snapshot -i
|
|
110
|
-
agent-browser fill @e1 "username"
|
|
111
|
-
agent-browser fill @e2 "password"
|
|
112
|
-
agent-browser click @e3
|
|
113
|
-
agent-browser wait --url "**/dashboard"
|
|
114
|
-
agent-browser state save auth.json
|
|
115
|
-
|
|
116
|
-
# Later sessions: load saved state
|
|
117
|
-
agent-browser state load auth.json
|
|
118
|
-
agent-browser open https://app.example.com/dashboard
|
|
119
|
-
```
|
|
120
|
-
|
|
121
|
-
## Sessions (parallel browsers)
|
|
122
|
-
|
|
123
|
-
```bash
|
|
124
|
-
agent-browser --session test1 open site-a.com
|
|
125
|
-
agent-browser --session test2 open site-b.com
|
|
126
|
-
agent-browser session list
|
|
127
|
-
```
|
|
128
|
-
|
|
129
|
-
## JSON output (for parsing)
|
|
130
|
-
|
|
131
|
-
Add `--json` for machine-readable output:
|
|
132
|
-
```bash
|
|
133
|
-
agent-browser snapshot -i --json
|
|
134
|
-
agent-browser get text @e1 --json
|
|
135
|
-
```
|
|
136
|
-
|
|
137
|
-
## Debugging
|
|
138
|
-
|
|
139
|
-
```bash
|
|
140
|
-
agent-browser open example.com --headed # Show browser window
|
|
141
|
-
agent-browser console # View console messages
|
|
142
|
-
agent-browser errors # View page errors
|
|
143
|
-
```
|
package/SNAPSHOT_IMPROVEMENTS.md
DELETED
|
@@ -1,302 +0,0 @@
|
|
|
1
|
-
# Snapshot System Improvements - Implementation Summary
|
|
2
|
-
|
|
3
|
-
**Date:** 2026-01-16
|
|
4
|
-
**Status:** ✅ Complete
|
|
5
|
-
|
|
6
|
-
## Overview
|
|
7
|
-
|
|
8
|
-
Comprehensive improvements to the BTCP Browser Agent snapshot system focusing on completeness, robustness, and AI agent usability. All improvements are backward compatible with opt-in enhanced features.
|
|
9
|
-
|
|
10
|
-
## Critical Issues Fixed
|
|
11
|
-
|
|
12
|
-
### 1. CSS Selector Generation Error Recovery ✅
|
|
13
|
-
**Problem:** Catastrophic failures on complex sites (Amazon: 3 refs, Stack Overflow: 9 elements)
|
|
14
|
-
**Solution:** Multi-layer fallback strategy
|
|
15
|
-
- Try-catch around CSS.escape operations
|
|
16
|
-
- Fallback to simplified selector generation
|
|
17
|
-
- Graceful degradation prevents complete failures
|
|
18
|
-
|
|
19
|
-
**Results:**
|
|
20
|
-
- Amazon: 3 refs → 101 refs (3,366% improvement)
|
|
21
|
-
- Stack Overflow: 9 elements → 227 refs (2,422% improvement)
|
|
22
|
-
|
|
23
|
-
### 2. Adaptive Depth Thresholds Rebalanced ✅
|
|
24
|
-
**Problem:** Overly aggressive depth limiting (depth 3 on 800+ elements)
|
|
25
|
-
**Solution:** Context-aware thresholds prioritizing completeness
|
|
26
|
-
- Interactive mode: 1500/3000 element thresholds (vs 300/500/800)
|
|
27
|
-
- Minimum depth: 5 (vs 3) for usability
|
|
28
|
-
- Mode-specific adjustment (interactive gets higher limits)
|
|
29
|
-
|
|
30
|
-
**Configuration:**
|
|
31
|
-
```typescript
|
|
32
|
-
minDepth?: number; // Default: 5 (was hardcoded 3)
|
|
33
|
-
```
|
|
34
|
-
|
|
35
|
-
### 3. Error Boundary with Partial Results ✅
|
|
36
|
-
**Problem:** Complete failure on processing interruptions
|
|
37
|
-
**Solution:** Try-catch wrapper with metrics reporting
|
|
38
|
-
- Captures partial results on error
|
|
39
|
-
- Reports processing errors in warnings section
|
|
40
|
-
- Maintains ref metadata for captured elements
|
|
41
|
-
|
|
42
|
-
## Quality & Transparency Features
|
|
43
|
-
|
|
44
|
-
### 4. Enhanced Snapshot Header with Quality Metrics ✅
|
|
45
|
-
**Before:**
|
|
46
|
-
```
|
|
47
|
-
SNAPSHOT: elements=800 depth=3/10 (auto-limited: extremely large page)
|
|
48
|
-
```
|
|
49
|
-
|
|
50
|
-
**After:**
|
|
51
|
-
```
|
|
52
|
-
SNAPSHOT: elements=1288 refs=101 captured=101/101 quality=high depth=10/10 mode=interactive,compact
|
|
53
|
-
```
|
|
54
|
-
|
|
55
|
-
**New Metrics:**
|
|
56
|
-
- `refs=X` - Total interactive references captured
|
|
57
|
-
- `captured=X/Y` - Captured vs total interactive elements ratio
|
|
58
|
-
- `quality=high|medium|low` - AI-friendly quality indicator
|
|
59
|
-
|
|
60
|
-
**Quality Calculation:**
|
|
61
|
-
- High: ≥80% capture rate, no depth limiting
|
|
62
|
-
- Medium: ≥50% capture rate OR (depth limited AND ≥60% capture)
|
|
63
|
-
- Low: <50% capture rate
|
|
64
|
-
|
|
65
|
-
### 5. Element Importance Scoring ✅
|
|
66
|
-
**Purpose:** Help AI agents prioritize actions
|
|
67
|
-
|
|
68
|
-
**Importance Levels:**
|
|
69
|
-
- `primary` - CTAs, submit buttons, primary navigation
|
|
70
|
-
- `secondary` - Standard interactive elements
|
|
71
|
-
- `utility` - Close buttons, back-to-top, dismissals
|
|
72
|
-
|
|
73
|
-
**Detection Strategy:**
|
|
74
|
-
- Class names: `.primary`, `.cta`, `.btn-primary`
|
|
75
|
-
- Submit buttons: `type="submit"`
|
|
76
|
-
- Navigation links: `element.closest('nav')`
|
|
77
|
-
- Utility patterns: "close", "dismiss", "cancel" in labels
|
|
78
|
-
|
|
79
|
-
**Added to refs metadata:**
|
|
80
|
-
```typescript
|
|
81
|
-
refs: {
|
|
82
|
-
"@ref:5": {
|
|
83
|
-
importance: 'primary',
|
|
84
|
-
// ... other metadata
|
|
85
|
-
}
|
|
86
|
-
}
|
|
87
|
-
```
|
|
88
|
-
|
|
89
|
-
### 6. Link Context Extraction ✅
|
|
90
|
-
**Problem:** Ambiguous link text ("click here", "learn more") unusable for AI
|
|
91
|
-
|
|
92
|
-
**Solution:** Extract surrounding text context for disambiguation
|
|
93
|
-
|
|
94
|
-
**Triggers:** Links with ambiguous text
|
|
95
|
-
- "click here", "learn more", "read more", "more", "here", "link"
|
|
96
|
-
|
|
97
|
-
**Output:**
|
|
98
|
-
```
|
|
99
|
-
LINK "Learn more" @ref:10 href=/docs
|
|
100
|
-
→ context: "Feature XYZ allows advanced automation. Learn more about..."
|
|
101
|
-
```
|
|
102
|
-
|
|
103
|
-
**Implementation:**
|
|
104
|
-
- Clones parent element
|
|
105
|
-
- Removes link node
|
|
106
|
-
- Extracts first 100 chars of surrounding text
|
|
107
|
-
- Only shown when context adds value
|
|
108
|
-
|
|
109
|
-
### 7. Content Preview for Long Text ✅
|
|
110
|
-
**Purpose:** Provide article/description previews without bloat
|
|
111
|
-
|
|
112
|
-
**Trigger:** Text blocks >200 characters with `contentPreview: true`
|
|
113
|
-
|
|
114
|
-
**Output:**
|
|
115
|
-
```
|
|
116
|
-
TEXT "This is a very long article text that continues for many paragraphs and discusses..."
|
|
117
|
-
→ (1,234 additional characters not shown)
|
|
118
|
-
```
|
|
119
|
-
|
|
120
|
-
## Enhanced Type Definitions
|
|
121
|
-
|
|
122
|
-
### Extended SnapshotOptions
|
|
123
|
-
```typescript
|
|
124
|
-
interface SnapshotOptions {
|
|
125
|
-
// ... existing options
|
|
126
|
-
minDepth?: number; // Minimum depth (default: 5)
|
|
127
|
-
samplingStrategy?: 'importance' | 'balanced' | 'depth-first'; // Reserved for future
|
|
128
|
-
contentPreview?: boolean; // Long text preview (default: true)
|
|
129
|
-
landmarks?: boolean; // Landmark grouping (default: true)
|
|
130
|
-
incremental?: boolean; // Delta snapshots (reserved for future)
|
|
131
|
-
baseSnapshot?: SnapshotData; // Base for incremental (reserved for future)
|
|
132
|
-
}
|
|
133
|
-
```
|
|
134
|
-
|
|
135
|
-
### Enhanced SnapshotData
|
|
136
|
-
```typescript
|
|
137
|
-
interface SnapshotData {
|
|
138
|
-
tree: string;
|
|
139
|
-
refs: Record<string, {
|
|
140
|
-
// ... existing fields
|
|
141
|
-
importance?: 'primary' | 'secondary' | 'utility'; // NEW
|
|
142
|
-
context?: string; // NEW
|
|
143
|
-
}>;
|
|
144
|
-
metadata?: { // NEW
|
|
145
|
-
totalInteractiveElements?: number;
|
|
146
|
-
capturedElements?: number;
|
|
147
|
-
quality?: 'high' | 'medium' | 'low';
|
|
148
|
-
depthLimited?: boolean;
|
|
149
|
-
warnings?: string[];
|
|
150
|
-
};
|
|
151
|
-
}
|
|
152
|
-
```
|
|
153
|
-
|
|
154
|
-
## Performance Impact
|
|
155
|
-
|
|
156
|
-
### Generation Time
|
|
157
|
-
- Amazon: 1,704ms (acceptable for complex page)
|
|
158
|
-
- Stack Overflow: 434ms (excellent for large page)
|
|
159
|
-
- GitHub: 66ms (optimal for well-structured page)
|
|
160
|
-
- **All < 5s target** ✅
|
|
161
|
-
|
|
162
|
-
### Output Size
|
|
163
|
-
- Amazon: 10.3 KB (3 refs → 101 refs, still compact)
|
|
164
|
-
- Stack Overflow: 23.5 KB (9 elements → 227 refs)
|
|
165
|
-
- All pages < 50KB ✅
|
|
166
|
-
|
|
167
|
-
### Compression Ratio
|
|
168
|
-
- Amazon: 2.9 MB → 10.3 KB (**99.65% reduction**)
|
|
169
|
-
- Stack Overflow: 858 KB → 23.5 KB (**97.26% reduction**)
|
|
170
|
-
- GitHub: 455 KB → 2.9 KB (**99.36% reduction**)
|
|
171
|
-
|
|
172
|
-
## Validation Results
|
|
173
|
-
|
|
174
|
-
**Overall:** 92.4% validation pass rate (61/66 checks)
|
|
175
|
-
|
|
176
|
-
### Perfect Categories (100% pass)
|
|
177
|
-
- ✅ Page header structure
|
|
178
|
-
- ✅ Snapshot header with statistics
|
|
179
|
-
- ✅ Heading level formatting
|
|
180
|
-
- ✅ Children indicators
|
|
181
|
-
- ✅ Bounding boxes (all 569 refs)
|
|
182
|
-
- ✅ Viewport detection (all 569 refs)
|
|
183
|
-
- ✅ Performance (<5s)
|
|
184
|
-
- ✅ Output size (<50KB)
|
|
185
|
-
|
|
186
|
-
### Partial Pass Categories
|
|
187
|
-
- ⚠️ Button labels: 50% (due to icon-only buttons in source HTML)
|
|
188
|
-
- ⚠️ Link labels: 67% (due to icon-only links in source HTML)
|
|
189
|
-
|
|
190
|
-
**Note:** Label failures reflect accessibility issues in source HTML, not snapshot quality issues.
|
|
191
|
-
|
|
192
|
-
## Real-World Test Results
|
|
193
|
-
|
|
194
|
-
### Amazon Product Detail Page
|
|
195
|
-
- **Elements:** 109 captured (1,288 total in page)
|
|
196
|
-
- **Refs:** 101 interactive elements
|
|
197
|
-
- **Quality:** High (100% capture rate)
|
|
198
|
-
- **Depth:** Full 10/10 (no limiting)
|
|
199
|
-
- **Usable:** ✅ All product actions captured
|
|
200
|
-
|
|
201
|
-
### Stack Overflow Question Page
|
|
202
|
-
- **Elements:** 241 captured (925 total in page)
|
|
203
|
-
- **Refs:** 227 interactive elements
|
|
204
|
-
- **Quality:** High (100% capture rate)
|
|
205
|
-
- **Depth:** Full 10/10 (no limiting)
|
|
206
|
-
- **Usable:** ✅ All voting, commenting, navigation captured
|
|
207
|
-
|
|
208
|
-
### CNN News Article
|
|
209
|
-
- **Elements:** 43 captured
|
|
210
|
-
- **Refs:** 39 interactive elements
|
|
211
|
-
- **Quality:** High (100% capture rate)
|
|
212
|
-
- **Validation:** 11/11 checks passed (perfect)
|
|
213
|
-
|
|
214
|
-
### GitHub Repository
|
|
215
|
-
- **Elements:** 37 captured
|
|
216
|
-
- **Refs:** 35 interactive elements
|
|
217
|
-
- **Quality:** High (100% capture rate)
|
|
218
|
-
- **Validation:** 11/11 checks passed (perfect)
|
|
219
|
-
|
|
220
|
-
## Code Quality
|
|
221
|
-
|
|
222
|
-
### Error Handling
|
|
223
|
-
- CSS selector generation: Try-catch with fallback
|
|
224
|
-
- Element processing: Error boundary with partial results
|
|
225
|
-
- Count pass: Try-catch with estimation fallback
|
|
226
|
-
- Ref generation: Minimal fallback on error
|
|
227
|
-
|
|
228
|
-
### Token Impact
|
|
229
|
-
- ~500 lines added to `snapshot.ts`
|
|
230
|
-
- ~200 lines modified (refactoring)
|
|
231
|
-
- Type definitions extended
|
|
232
|
-
- Zero breaking changes (backward compatible)
|
|
233
|
-
|
|
234
|
-
## Future Enhancements (Reserved)
|
|
235
|
-
|
|
236
|
-
### Planned Features (API ready)
|
|
237
|
-
1. **Incremental Snapshots** - Delta snapshots for dynamic pages
|
|
238
|
-
2. **Smart Element Sampling** - Importance-based sampling under depth limits
|
|
239
|
-
3. **Landmark-Based Navigation** - Grouping by ARIA landmarks
|
|
240
|
-
|
|
241
|
-
### Options Reserved
|
|
242
|
-
```typescript
|
|
243
|
-
samplingStrategy?: 'importance' | 'balanced' | 'depth-first';
|
|
244
|
-
incremental?: boolean;
|
|
245
|
-
baseSnapshot?: SnapshotData;
|
|
246
|
-
landmarks?: boolean; // Currently enabled but not grouping yet
|
|
247
|
-
```
|
|
248
|
-
|
|
249
|
-
## Migration Guide
|
|
250
|
-
|
|
251
|
-
### For Existing Code
|
|
252
|
-
**No changes required** - all improvements are opt-in or automatic enhancements.
|
|
253
|
-
|
|
254
|
-
### To Enable New Features
|
|
255
|
-
```typescript
|
|
256
|
-
const snapshot = createSnapshot(document, refMap, {
|
|
257
|
-
maxDepth: 10,
|
|
258
|
-
minDepth: 5, // NEW: Enforce minimum depth
|
|
259
|
-
contentPreview: true, // NEW: Enable long text preview
|
|
260
|
-
landmarks: true, // NEW: Enable landmark detection
|
|
261
|
-
interactive: true,
|
|
262
|
-
compact: true
|
|
263
|
-
});
|
|
264
|
-
|
|
265
|
-
// Access new metadata
|
|
266
|
-
console.log(snapshot.metadata?.quality); // 'high' | 'medium' | 'low'
|
|
267
|
-
console.log(snapshot.metadata?.capturedElements); // Number of refs captured
|
|
268
|
-
console.log(snapshot.metadata?.totalInteractiveElements); // Total available
|
|
269
|
-
|
|
270
|
-
// Access enhanced refs
|
|
271
|
-
Object.entries(snapshot.refs).forEach(([ref, data]) => {
|
|
272
|
-
if (data.importance === 'primary') {
|
|
273
|
-
// Prioritize primary actions
|
|
274
|
-
}
|
|
275
|
-
if (data.context) {
|
|
276
|
-
// Use context for ambiguous links
|
|
277
|
-
}
|
|
278
|
-
});
|
|
279
|
-
```
|
|
280
|
-
|
|
281
|
-
## Metrics Summary
|
|
282
|
-
|
|
283
|
-
| Metric | Before | After | Improvement |
|
|
284
|
-
|--------|--------|-------|-------------|
|
|
285
|
-
| Amazon refs | 3 | 101 | +3,366% |
|
|
286
|
-
| Stack Overflow refs | 9 | 227 | +2,422% |
|
|
287
|
-
| Validation pass rate | ~85% | 92.4% | +7.4% |
|
|
288
|
-
| Error recovery | None | Full | ∞ |
|
|
289
|
-
| Quality transparency | None | High/Med/Low | New feature |
|
|
290
|
-
| AI usability | Low | High | Significant |
|
|
291
|
-
|
|
292
|
-
## Conclusion
|
|
293
|
-
|
|
294
|
-
The snapshot system has been transformed from a brittle prototype with catastrophic failures on complex sites to a production-ready system with:
|
|
295
|
-
|
|
296
|
-
✅ **Robustness** - Graceful degradation on all error types
|
|
297
|
-
✅ **Completeness** - 97-100% capture rate on complex real-world sites
|
|
298
|
-
✅ **Transparency** - Quality metrics guide AI agent behavior
|
|
299
|
-
✅ **Performance** - All operations <5s with 97-99% compression
|
|
300
|
-
✅ **Usability** - Enhanced metadata (importance, context) for better AI decisions
|
|
301
|
-
|
|
302
|
-
**Ready for production use** including complex e-commerce, Q&A platforms, news sites, and code repositories.
|