btcp-browser-agent 0.1.16 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +21 -21
- package/README.md +338 -338
- package/package.json +69 -69
- package/packages/core/dist/actions.js +35 -35
- package/packages/core/dist/snapshot.js +50 -0
- package/packages/core/dist/types.d.ts +2 -1
- package/packages/extension/dist/background.js +1 -1
- package/packages/extension/dist/session-manager.d.ts +10 -1
- package/packages/extension/dist/session-manager.js +49 -10
package/LICENSE
CHANGED
|
@@ -1,21 +1,21 @@
|
|
|
1
|
-
MIT License
|
|
2
|
-
|
|
3
|
-
Copyright (c) 2026 browser-tool-calling-protocol
|
|
4
|
-
|
|
5
|
-
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
-
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
-
in the Software without restriction, including without limitation the rights
|
|
8
|
-
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
-
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
-
furnished to do so, subject to the following conditions:
|
|
11
|
-
|
|
12
|
-
The above copyright notice and this permission notice shall be included in all
|
|
13
|
-
copies or substantial portions of the Software.
|
|
14
|
-
|
|
15
|
-
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
-
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
-
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
-
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
-
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
-
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
-
SOFTWARE.
|
|
1
|
+
MIT License
|
|
2
|
+
|
|
3
|
+
Copyright (c) 2026 browser-tool-calling-protocol
|
|
4
|
+
|
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
|
7
|
+
in the Software without restriction, including without limitation the rights
|
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
|
10
|
+
furnished to do so, subject to the following conditions:
|
|
11
|
+
|
|
12
|
+
The above copyright notice and this permission notice shall be included in all
|
|
13
|
+
copies or substantial portions of the Software.
|
|
14
|
+
|
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
|
21
|
+
SOFTWARE.
|
package/README.md
CHANGED
|
@@ -1,338 +1,338 @@
|
|
|
1
|
-
# btcp-browser-agent
|
|
2
|
-
|
|
3
|
-
Give AI agents the power to see and control any browser.
|
|
4
|
-
|
|
5
|
-
A lightweight foundation for building AI systems that need browser access — automation, testing, web agents, or any browser-based workflow.
|
|
6
|
-
|
|
7
|
-
## Why This Package?
|
|
8
|
-
|
|
9
|
-
AI agents struggle with browsers because:
|
|
10
|
-
- Raw HTML is too noisy (thousands of nodes)
|
|
11
|
-
- CSS selectors break when layouts change
|
|
12
|
-
- No stable way to reference elements across turns
|
|
13
|
-
|
|
14
|
-
**Browser Agent solves this with smart snapshots:**
|
|
15
|
-
|
|
16
|
-
```
|
|
17
|
-
BUTTON "Submit" [@ref:0]
|
|
18
|
-
TEXTBOX "Email" [required] [@ref:1]
|
|
19
|
-
LINK "Forgot password?" [@ref:2]
|
|
20
|
-
```
|
|
21
|
-
|
|
22
|
-
One command gives your agent a clean, semantic view of any page. Stable `@ref` markers let it interact without fragile selectors.
|
|
23
|
-
|
|
24
|
-
## Features
|
|
25
|
-
|
|
26
|
-
- **Smart Snapshots** - Accessibility tree format optimized for AI comprehension
|
|
27
|
-
- **Stable Element Refs** - `@ref:N` markers that survive DOM changes within a session
|
|
28
|
-
- **Full Browser Control** - Navigation, tabs, screenshots, keyboard/mouse
|
|
29
|
-
- **46 DOM Actions** - Click, type, fill, scroll, hover, and more
|
|
30
|
-
- **Two Modes** - Chrome extension (full control) or standalone (same-origin)
|
|
31
|
-
|
|
32
|
-
## Quick Example
|
|
33
|
-
|
|
34
|
-
```typescript
|
|
35
|
-
import { createClient } from 'btcp-browser-agent/extension';
|
|
36
|
-
|
|
37
|
-
const agent = createClient();
|
|
38
|
-
|
|
39
|
-
// Navigate and understand the page
|
|
40
|
-
await agent.navigate('https://example.com');
|
|
41
|
-
const snapshot = await agent.snapshot();
|
|
42
|
-
// Returns: BUTTON "Login" [@ref:0], TEXTBOX "Email" [@ref:1], ...
|
|
43
|
-
|
|
44
|
-
// Interact using refs - no CSS selectors needed
|
|
45
|
-
await agent.fill('@ref:1', 'user@example.com');
|
|
46
|
-
await agent.click('@ref:0');
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
## Use Cases
|
|
50
|
-
|
|
51
|
-
- **AI Assistants** - Let LLMs browse the web and complete tasks for users
|
|
52
|
-
- **Browser Agents** - Foundation for autonomous web agents that research, navigate, and act
|
|
53
|
-
- **Automated Testing** - Reliable UI tests with stable element refs that don't break on layout changes
|
|
54
|
-
- **Web Automation** - Form filling, data extraction, multi-step workflow automation
|
|
55
|
-
- **Web Scraping** - Extract structured data with semantic understanding of page content
|
|
56
|
-
|
|
57
|
-
## Installation
|
|
58
|
-
|
|
59
|
-
```bash
|
|
60
|
-
npm install btcp-browser-agent
|
|
61
|
-
```
|
|
62
|
-
|
|
63
|
-
## Usage Modes
|
|
64
|
-
|
|
65
|
-
### Extension Mode (Full Browser Control)
|
|
66
|
-
|
|
67
|
-
For Chrome extensions with cross-origin access, tab management, and screenshots.
|
|
68
|
-
|
|
69
|
-
**Background Script:**
|
|
70
|
-
```typescript
|
|
71
|
-
import { BackgroundAgent, setupMessageListener } from 'btcp-browser-agent/extension';
|
|
72
|
-
|
|
73
|
-
// Option 1: Just set up message routing
|
|
74
|
-
setupMessageListener();
|
|
75
|
-
|
|
76
|
-
// Option 2: Use BackgroundAgent directly for programmatic control
|
|
77
|
-
const agent = new BackgroundAgent();
|
|
78
|
-
await agent.navigate('https://example.com');
|
|
79
|
-
await agent.screenshot();
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
**Content Script:**
|
|
83
|
-
```typescript
|
|
84
|
-
import { createContentAgent } from 'btcp-browser-agent';
|
|
85
|
-
|
|
86
|
-
const agent = createContentAgent();
|
|
87
|
-
|
|
88
|
-
// Take a snapshot
|
|
89
|
-
const { data } = await agent.execute({ action: 'snapshot' });
|
|
90
|
-
console.log(data.tree); // Accessibility tree with refs
|
|
91
|
-
|
|
92
|
-
// Click an element using ref from snapshot
|
|
93
|
-
await agent.execute({ action: 'click', selector: '@ref:5' });
|
|
94
|
-
```
|
|
95
|
-
|
|
96
|
-
**Popup (sending commands via messaging):**
|
|
97
|
-
```typescript
|
|
98
|
-
import { createClient } from 'btcp-browser-agent';
|
|
99
|
-
|
|
100
|
-
const client = createClient();
|
|
101
|
-
|
|
102
|
-
// Navigate and interact
|
|
103
|
-
await client.navigate('https://example.com');
|
|
104
|
-
const snapshot = await client.snapshot();
|
|
105
|
-
await client.click('@ref:5');
|
|
106
|
-
const screenshot = await client.screenshot();
|
|
107
|
-
```
|
|
108
|
-
|
|
109
|
-
### Standalone Mode (No Extension)
|
|
110
|
-
|
|
111
|
-
For use directly in a web page (limited to same-origin, no tab management):
|
|
112
|
-
|
|
113
|
-
```typescript
|
|
114
|
-
import { createContentAgent } from 'btcp-browser-agent';
|
|
115
|
-
|
|
116
|
-
const agent = createContentAgent();
|
|
117
|
-
|
|
118
|
-
// Take a snapshot
|
|
119
|
-
const { data } = await agent.execute({ action: 'snapshot' });
|
|
120
|
-
|
|
121
|
-
// Interact with elements
|
|
122
|
-
await agent.execute({ action: 'click', selector: '@ref:5' });
|
|
123
|
-
await agent.execute({ action: 'fill', selector: '@ref:3', value: 'Hello' });
|
|
124
|
-
```
|
|
125
|
-
|
|
126
|
-
## API Reference
|
|
127
|
-
|
|
128
|
-
### BackgroundAgent (Extension Background Script)
|
|
129
|
-
|
|
130
|
-
High-level browser orchestrator that runs in the extension's background script.
|
|
131
|
-
|
|
132
|
-
```typescript
|
|
133
|
-
import { BackgroundAgent } from 'btcp-browser-agent/extension';
|
|
134
|
-
|
|
135
|
-
const agent = new BackgroundAgent();
|
|
136
|
-
|
|
137
|
-
// Tab Management
|
|
138
|
-
await agent.newTab({ url: 'https://example.com' });
|
|
139
|
-
await agent.switchTab(tabId);
|
|
140
|
-
await agent.closeTab(tabId);
|
|
141
|
-
const tabs = await agent.listTabs();
|
|
142
|
-
|
|
143
|
-
// Navigation
|
|
144
|
-
await agent.navigate('https://example.com');
|
|
145
|
-
await agent.back();
|
|
146
|
-
await agent.forward();
|
|
147
|
-
await agent.reload();
|
|
148
|
-
|
|
149
|
-
// Screenshots
|
|
150
|
-
const screenshot = await agent.screenshot({ format: 'png' });
|
|
151
|
-
|
|
152
|
-
// Execute commands (routes to ContentAgent for DOM operations)
|
|
153
|
-
await agent.execute({ action: 'click', selector: '#submit' });
|
|
154
|
-
```
|
|
155
|
-
|
|
156
|
-
#### Multi-Tab Operations
|
|
157
|
-
|
|
158
|
-
```typescript
|
|
159
|
-
// Open tabs
|
|
160
|
-
const tab1 = await agent.newTab({ url: 'https://google.com' });
|
|
161
|
-
const tab2 = await agent.newTab({ url: 'https://github.com', active: false });
|
|
162
|
-
|
|
163
|
-
// Method 1: tab() handle - interact without switching
|
|
164
|
-
const githubTab = agent.tab(tab2.id);
|
|
165
|
-
await githubTab.snapshot();
|
|
166
|
-
await githubTab.click('@ref:5');
|
|
167
|
-
|
|
168
|
-
// Method 2: Specify tabId in execute
|
|
169
|
-
await agent.execute(
|
|
170
|
-
{ action: 'getText', selector: 'h1' },
|
|
171
|
-
{ tabId: tab2.id }
|
|
172
|
-
);
|
|
173
|
-
|
|
174
|
-
// Active tab stays tab1 (no switching needed)
|
|
175
|
-
```
|
|
176
|
-
|
|
177
|
-
### ContentAgent (Content Script)
|
|
178
|
-
|
|
179
|
-
DOM automation agent that runs in content scripts or web pages.
|
|
180
|
-
|
|
181
|
-
```typescript
|
|
182
|
-
import { createContentAgent } from 'btcp-browser-agent';
|
|
183
|
-
|
|
184
|
-
const agent = createContentAgent();
|
|
185
|
-
|
|
186
|
-
// Execute commands
|
|
187
|
-
const response = await agent.execute({ action: 'snapshot' });
|
|
188
|
-
```
|
|
189
|
-
|
|
190
|
-
#### Available Actions
|
|
191
|
-
|
|
192
|
-
**DOM Reading:**
|
|
193
|
-
| Action | Description |
|
|
194
|
-
|--------|-------------|
|
|
195
|
-
| `snapshot` | Get accessibility tree with element refs |
|
|
196
|
-
| `getText` | Get element text content |
|
|
197
|
-
| `getAttribute` | Get element attribute value |
|
|
198
|
-
| `isVisible` | Check if element is visible |
|
|
199
|
-
| `isEnabled` | Check if element is enabled |
|
|
200
|
-
| `isChecked` | Check if checkbox/radio is checked |
|
|
201
|
-
| `getBoundingBox` | Get element dimensions |
|
|
202
|
-
|
|
203
|
-
**Element Interaction:**
|
|
204
|
-
| Action | Description |
|
|
205
|
-
|--------|-------------|
|
|
206
|
-
| `click` | Click an element |
|
|
207
|
-
| `dblclick` | Double-click an element |
|
|
208
|
-
| `type` | Type text (keystroke by keystroke) |
|
|
209
|
-
| `fill` | Fill input (instant) |
|
|
210
|
-
| `clear` | Clear input value |
|
|
211
|
-
| `check` | Check checkbox |
|
|
212
|
-
| `uncheck` | Uncheck checkbox |
|
|
213
|
-
| `select` | Select dropdown option |
|
|
214
|
-
| `hover` | Hover over element |
|
|
215
|
-
| `focus` | Focus element |
|
|
216
|
-
| `blur` | Remove focus |
|
|
217
|
-
|
|
218
|
-
**Keyboard/Mouse:**
|
|
219
|
-
| Action | Description |
|
|
220
|
-
|--------|-------------|
|
|
221
|
-
| `press` | Press a key |
|
|
222
|
-
| `keyDown` | Key down event |
|
|
223
|
-
| `keyUp` | Key up event |
|
|
224
|
-
|
|
225
|
-
**Other:**
|
|
226
|
-
| Action | Description |
|
|
227
|
-
|--------|-------------|
|
|
228
|
-
| `scroll` | Scroll page or element |
|
|
229
|
-
| `scrollIntoView` | Scroll element into view |
|
|
230
|
-
| `wait` | Wait for element state |
|
|
231
|
-
| `evaluate` | Execute JavaScript |
|
|
232
|
-
|
|
233
|
-
### Element Refs
|
|
234
|
-
|
|
235
|
-
The `snapshot` action returns element references for stable selection:
|
|
236
|
-
|
|
237
|
-
```typescript
|
|
238
|
-
const { data } = await agent.execute({ action: 'snapshot' });
|
|
239
|
-
// data.tree: "BUTTON 'Submit' [@ref:5]\nTEXTBOX 'Email' [@ref:3]"
|
|
240
|
-
|
|
241
|
-
// Use refs in subsequent commands
|
|
242
|
-
await agent.execute({ action: 'click', selector: '@ref:5' });
|
|
243
|
-
```
|
|
244
|
-
|
|
245
|
-
### Script Injection
|
|
246
|
-
|
|
247
|
-
Inject custom JavaScript into the page's main world and communicate with it:
|
|
248
|
-
|
|
249
|
-
```typescript
|
|
250
|
-
// Inject a helper script
|
|
251
|
-
await client.scriptInject(`
|
|
252
|
-
window.addEventListener('message', (e) => {
|
|
253
|
-
if (e.data?.type === 'btcp:script-command') {
|
|
254
|
-
const { commandId, payload } = e.data;
|
|
255
|
-
// Handle command and respond
|
|
256
|
-
window.postMessage({
|
|
257
|
-
type: 'btcp:script-ack',
|
|
258
|
-
commandId,
|
|
259
|
-
result: { /* your data */ }
|
|
260
|
-
}, '*');
|
|
261
|
-
}
|
|
262
|
-
});
|
|
263
|
-
`, { scriptId: 'helper' });
|
|
264
|
-
|
|
265
|
-
// Send commands to injected script
|
|
266
|
-
const result = await client.scriptSend(
|
|
267
|
-
{ action: 'getData', id: '123' },
|
|
268
|
-
{ scriptId: 'helper' }
|
|
269
|
-
);
|
|
270
|
-
```
|
|
271
|
-
|
|
272
|
-
**Why script injection?**
|
|
273
|
-
- Access page-level APIs (fetch with page cookies, window globals)
|
|
274
|
-
- Interact with page frameworks (React state, etc.)
|
|
275
|
-
- Execute code with full page context
|
|
276
|
-
|
|
277
|
-
## Architecture
|
|
278
|
-
|
|
279
|
-
The package provides a clean separation between browser-level and DOM-level operations:
|
|
280
|
-
|
|
281
|
-
```
|
|
282
|
-
┌─────────────────────────────────────────────────────────────────┐
|
|
283
|
-
│ Background Script (Extension Service Worker) │
|
|
284
|
-
│ ┌─────────────────────────────────────────────────────────────┐│
|
|
285
|
-
│ │ BackgroundAgent ││
|
|
286
|
-
│ │ - Tab management (create, close, switch, list) ││
|
|
287
|
-
│ │ - Navigation (goto, back, forward, reload) ││
|
|
288
|
-
│ │ - Screenshots (chrome.tabs.captureVisibleTab) ││
|
|
289
|
-
│ │ - Routes DOM commands → ContentAgent ││
|
|
290
|
-
│ └─────────────────────────────────────────────────────────────┘│
|
|
291
|
-
└─────────────────────────────────────────────────────────────────┘
|
|
292
|
-
│
|
|
293
|
-
chrome.tabs.sendMessage
|
|
294
|
-
▼
|
|
295
|
-
┌─────────────────────────────────────────────────────────────────┐
|
|
296
|
-
│ Content Script (Per Tab) │
|
|
297
|
-
│ ┌─────────────────────────────────────────────────────────────┐│
|
|
298
|
-
│ │ ContentAgent ││
|
|
299
|
-
│ │ - DOM snapshot (accessibility tree) ││
|
|
300
|
-
│ │ - Element interaction (click, type, fill, hover) ││
|
|
301
|
-
│ │ - DOM queries (getText, getAttribute, isVisible) ││
|
|
302
|
-
│ │ - Keyboard/mouse events ││
|
|
303
|
-
│ └─────────────────────────────────────────────────────────────┘│
|
|
304
|
-
└─────────────────────────────────────────────────────────────────┘
|
|
305
|
-
```
|
|
306
|
-
|
|
307
|
-
## Package Structure
|
|
308
|
-
|
|
309
|
-
```
|
|
310
|
-
btcp-browser-agent/
|
|
311
|
-
├── @btcp/core # ContentAgent - DOM operations
|
|
312
|
-
│ ├── createContentAgent()
|
|
313
|
-
│ ├── DOMActions
|
|
314
|
-
│ └── createSnapshot()
|
|
315
|
-
│
|
|
316
|
-
├── @btcp/extension # BackgroundAgent - Browser operations
|
|
317
|
-
│ ├── BackgroundAgent
|
|
318
|
-
│ ├── setupMessageListener()
|
|
319
|
-
│ └── createClient()
|
|
320
|
-
│
|
|
321
|
-
└── btcp-browser-agent # Main package - re-exports both
|
|
322
|
-
```
|
|
323
|
-
|
|
324
|
-
## Capabilities Comparison
|
|
325
|
-
|
|
326
|
-
| Capability | ContentAgent (Standalone) | BackgroundAgent (Extension) |
|
|
327
|
-
|------------|--------------------------|--------------------------|
|
|
328
|
-
| DOM Snapshot | Yes | Yes (via ContentAgent) |
|
|
329
|
-
| Element Clicks | Yes | Yes (via ContentAgent) |
|
|
330
|
-
| Form Filling | Yes | Yes (via ContentAgent) |
|
|
331
|
-
| Cross-origin | Same-origin only | Any page |
|
|
332
|
-
| Tab Management | No | Yes |
|
|
333
|
-
| Navigation | No | Yes |
|
|
334
|
-
| Screenshots | No | Yes |
|
|
335
|
-
|
|
336
|
-
## License
|
|
337
|
-
|
|
338
|
-
Apache-2.0
|
|
1
|
+
# btcp-browser-agent
|
|
2
|
+
|
|
3
|
+
Give AI agents the power to see and control any browser.
|
|
4
|
+
|
|
5
|
+
A lightweight foundation for building AI systems that need browser access — automation, testing, web agents, or any browser-based workflow.
|
|
6
|
+
|
|
7
|
+
## Why This Package?
|
|
8
|
+
|
|
9
|
+
AI agents struggle with browsers because:
|
|
10
|
+
- Raw HTML is too noisy (thousands of nodes)
|
|
11
|
+
- CSS selectors break when layouts change
|
|
12
|
+
- No stable way to reference elements across turns
|
|
13
|
+
|
|
14
|
+
**Browser Agent solves this with smart snapshots:**
|
|
15
|
+
|
|
16
|
+
```
|
|
17
|
+
BUTTON "Submit" [@ref:0]
|
|
18
|
+
TEXTBOX "Email" [required] [@ref:1]
|
|
19
|
+
LINK "Forgot password?" [@ref:2]
|
|
20
|
+
```
|
|
21
|
+
|
|
22
|
+
One command gives your agent a clean, semantic view of any page. Stable `@ref` markers let it interact without fragile selectors.
|
|
23
|
+
|
|
24
|
+
## Features
|
|
25
|
+
|
|
26
|
+
- **Smart Snapshots** - Accessibility tree format optimized for AI comprehension
|
|
27
|
+
- **Stable Element Refs** - `@ref:N` markers that survive DOM changes within a session
|
|
28
|
+
- **Full Browser Control** - Navigation, tabs, screenshots, keyboard/mouse
|
|
29
|
+
- **46 DOM Actions** - Click, type, fill, scroll, hover, and more
|
|
30
|
+
- **Two Modes** - Chrome extension (full control) or standalone (same-origin)
|
|
31
|
+
|
|
32
|
+
## Quick Example
|
|
33
|
+
|
|
34
|
+
```typescript
|
|
35
|
+
import { createClient } from 'btcp-browser-agent/extension';
|
|
36
|
+
|
|
37
|
+
const agent = createClient();
|
|
38
|
+
|
|
39
|
+
// Navigate and understand the page
|
|
40
|
+
await agent.navigate('https://example.com');
|
|
41
|
+
const snapshot = await agent.snapshot();
|
|
42
|
+
// Returns: BUTTON "Login" [@ref:0], TEXTBOX "Email" [@ref:1], ...
|
|
43
|
+
|
|
44
|
+
// Interact using refs - no CSS selectors needed
|
|
45
|
+
await agent.fill('@ref:1', 'user@example.com');
|
|
46
|
+
await agent.click('@ref:0');
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
## Use Cases
|
|
50
|
+
|
|
51
|
+
- **AI Assistants** - Let LLMs browse the web and complete tasks for users
|
|
52
|
+
- **Browser Agents** - Foundation for autonomous web agents that research, navigate, and act
|
|
53
|
+
- **Automated Testing** - Reliable UI tests with stable element refs that don't break on layout changes
|
|
54
|
+
- **Web Automation** - Form filling, data extraction, multi-step workflow automation
|
|
55
|
+
- **Web Scraping** - Extract structured data with semantic understanding of page content
|
|
56
|
+
|
|
57
|
+
## Installation
|
|
58
|
+
|
|
59
|
+
```bash
|
|
60
|
+
npm install btcp-browser-agent
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
## Usage Modes
|
|
64
|
+
|
|
65
|
+
### Extension Mode (Full Browser Control)
|
|
66
|
+
|
|
67
|
+
For Chrome extensions with cross-origin access, tab management, and screenshots.
|
|
68
|
+
|
|
69
|
+
**Background Script:**
|
|
70
|
+
```typescript
|
|
71
|
+
import { BackgroundAgent, setupMessageListener } from 'btcp-browser-agent/extension';
|
|
72
|
+
|
|
73
|
+
// Option 1: Just set up message routing
|
|
74
|
+
setupMessageListener();
|
|
75
|
+
|
|
76
|
+
// Option 2: Use BackgroundAgent directly for programmatic control
|
|
77
|
+
const agent = new BackgroundAgent();
|
|
78
|
+
await agent.navigate('https://example.com');
|
|
79
|
+
await agent.screenshot();
|
|
80
|
+
```
|
|
81
|
+
|
|
82
|
+
**Content Script:**
|
|
83
|
+
```typescript
|
|
84
|
+
import { createContentAgent } from 'btcp-browser-agent';
|
|
85
|
+
|
|
86
|
+
const agent = createContentAgent();
|
|
87
|
+
|
|
88
|
+
// Take a snapshot
|
|
89
|
+
const { data } = await agent.execute({ action: 'snapshot' });
|
|
90
|
+
console.log(data.tree); // Accessibility tree with refs
|
|
91
|
+
|
|
92
|
+
// Click an element using ref from snapshot
|
|
93
|
+
await agent.execute({ action: 'click', selector: '@ref:5' });
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
**Popup (sending commands via messaging):**
|
|
97
|
+
```typescript
|
|
98
|
+
import { createClient } from 'btcp-browser-agent';
|
|
99
|
+
|
|
100
|
+
const client = createClient();
|
|
101
|
+
|
|
102
|
+
// Navigate and interact
|
|
103
|
+
await client.navigate('https://example.com');
|
|
104
|
+
const snapshot = await client.snapshot();
|
|
105
|
+
await client.click('@ref:5');
|
|
106
|
+
const screenshot = await client.screenshot();
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Standalone Mode (No Extension)
|
|
110
|
+
|
|
111
|
+
For use directly in a web page (limited to same-origin, no tab management):
|
|
112
|
+
|
|
113
|
+
```typescript
|
|
114
|
+
import { createContentAgent } from 'btcp-browser-agent';
|
|
115
|
+
|
|
116
|
+
const agent = createContentAgent();
|
|
117
|
+
|
|
118
|
+
// Take a snapshot
|
|
119
|
+
const { data } = await agent.execute({ action: 'snapshot' });
|
|
120
|
+
|
|
121
|
+
// Interact with elements
|
|
122
|
+
await agent.execute({ action: 'click', selector: '@ref:5' });
|
|
123
|
+
await agent.execute({ action: 'fill', selector: '@ref:3', value: 'Hello' });
|
|
124
|
+
```
|
|
125
|
+
|
|
126
|
+
## API Reference
|
|
127
|
+
|
|
128
|
+
### BackgroundAgent (Extension Background Script)
|
|
129
|
+
|
|
130
|
+
High-level browser orchestrator that runs in the extension's background script.
|
|
131
|
+
|
|
132
|
+
```typescript
|
|
133
|
+
import { BackgroundAgent } from 'btcp-browser-agent/extension';
|
|
134
|
+
|
|
135
|
+
const agent = new BackgroundAgent();
|
|
136
|
+
|
|
137
|
+
// Tab Management
|
|
138
|
+
await agent.newTab({ url: 'https://example.com' });
|
|
139
|
+
await agent.switchTab(tabId);
|
|
140
|
+
await agent.closeTab(tabId);
|
|
141
|
+
const tabs = await agent.listTabs();
|
|
142
|
+
|
|
143
|
+
// Navigation
|
|
144
|
+
await agent.navigate('https://example.com');
|
|
145
|
+
await agent.back();
|
|
146
|
+
await agent.forward();
|
|
147
|
+
await agent.reload();
|
|
148
|
+
|
|
149
|
+
// Screenshots
|
|
150
|
+
const screenshot = await agent.screenshot({ format: 'png' });
|
|
151
|
+
|
|
152
|
+
// Execute commands (routes to ContentAgent for DOM operations)
|
|
153
|
+
await agent.execute({ action: 'click', selector: '#submit' });
|
|
154
|
+
```
|
|
155
|
+
|
|
156
|
+
#### Multi-Tab Operations
|
|
157
|
+
|
|
158
|
+
```typescript
|
|
159
|
+
// Open tabs
|
|
160
|
+
const tab1 = await agent.newTab({ url: 'https://google.com' });
|
|
161
|
+
const tab2 = await agent.newTab({ url: 'https://github.com', active: false });
|
|
162
|
+
|
|
163
|
+
// Method 1: tab() handle - interact without switching
|
|
164
|
+
const githubTab = agent.tab(tab2.id);
|
|
165
|
+
await githubTab.snapshot();
|
|
166
|
+
await githubTab.click('@ref:5');
|
|
167
|
+
|
|
168
|
+
// Method 2: Specify tabId in execute
|
|
169
|
+
await agent.execute(
|
|
170
|
+
{ action: 'getText', selector: 'h1' },
|
|
171
|
+
{ tabId: tab2.id }
|
|
172
|
+
);
|
|
173
|
+
|
|
174
|
+
// Active tab stays tab1 (no switching needed)
|
|
175
|
+
```
|
|
176
|
+
|
|
177
|
+
### ContentAgent (Content Script)
|
|
178
|
+
|
|
179
|
+
DOM automation agent that runs in content scripts or web pages.
|
|
180
|
+
|
|
181
|
+
```typescript
|
|
182
|
+
import { createContentAgent } from 'btcp-browser-agent';
|
|
183
|
+
|
|
184
|
+
const agent = createContentAgent();
|
|
185
|
+
|
|
186
|
+
// Execute commands
|
|
187
|
+
const response = await agent.execute({ action: 'snapshot' });
|
|
188
|
+
```
|
|
189
|
+
|
|
190
|
+
#### Available Actions
|
|
191
|
+
|
|
192
|
+
**DOM Reading:**
|
|
193
|
+
| Action | Description |
|
|
194
|
+
|--------|-------------|
|
|
195
|
+
| `snapshot` | Get accessibility tree with element refs |
|
|
196
|
+
| `getText` | Get element text content |
|
|
197
|
+
| `getAttribute` | Get element attribute value |
|
|
198
|
+
| `isVisible` | Check if element is visible |
|
|
199
|
+
| `isEnabled` | Check if element is enabled |
|
|
200
|
+
| `isChecked` | Check if checkbox/radio is checked |
|
|
201
|
+
| `getBoundingBox` | Get element dimensions |
|
|
202
|
+
|
|
203
|
+
**Element Interaction:**
|
|
204
|
+
| Action | Description |
|
|
205
|
+
|--------|-------------|
|
|
206
|
+
| `click` | Click an element |
|
|
207
|
+
| `dblclick` | Double-click an element |
|
|
208
|
+
| `type` | Type text (keystroke by keystroke) |
|
|
209
|
+
| `fill` | Fill input (instant) |
|
|
210
|
+
| `clear` | Clear input value |
|
|
211
|
+
| `check` | Check checkbox |
|
|
212
|
+
| `uncheck` | Uncheck checkbox |
|
|
213
|
+
| `select` | Select dropdown option |
|
|
214
|
+
| `hover` | Hover over element |
|
|
215
|
+
| `focus` | Focus element |
|
|
216
|
+
| `blur` | Remove focus |
|
|
217
|
+
|
|
218
|
+
**Keyboard/Mouse:**
|
|
219
|
+
| Action | Description |
|
|
220
|
+
|--------|-------------|
|
|
221
|
+
| `press` | Press a key |
|
|
222
|
+
| `keyDown` | Key down event |
|
|
223
|
+
| `keyUp` | Key up event |
|
|
224
|
+
|
|
225
|
+
**Other:**
|
|
226
|
+
| Action | Description |
|
|
227
|
+
|--------|-------------|
|
|
228
|
+
| `scroll` | Scroll page or element |
|
|
229
|
+
| `scrollIntoView` | Scroll element into view |
|
|
230
|
+
| `wait` | Wait for element state |
|
|
231
|
+
| `evaluate` | Execute JavaScript |
|
|
232
|
+
|
|
233
|
+
### Element Refs
|
|
234
|
+
|
|
235
|
+
The `snapshot` action returns element references for stable selection:
|
|
236
|
+
|
|
237
|
+
```typescript
|
|
238
|
+
const { data } = await agent.execute({ action: 'snapshot' });
|
|
239
|
+
// data.tree: "BUTTON 'Submit' [@ref:5]\nTEXTBOX 'Email' [@ref:3]"
|
|
240
|
+
|
|
241
|
+
// Use refs in subsequent commands
|
|
242
|
+
await agent.execute({ action: 'click', selector: '@ref:5' });
|
|
243
|
+
```
|
|
244
|
+
|
|
245
|
+
### Script Injection
|
|
246
|
+
|
|
247
|
+
Inject custom JavaScript into the page's main world and communicate with it:
|
|
248
|
+
|
|
249
|
+
```typescript
|
|
250
|
+
// Inject a helper script
|
|
251
|
+
await client.scriptInject(`
|
|
252
|
+
window.addEventListener('message', (e) => {
|
|
253
|
+
if (e.data?.type === 'btcp:script-command') {
|
|
254
|
+
const { commandId, payload } = e.data;
|
|
255
|
+
// Handle command and respond
|
|
256
|
+
window.postMessage({
|
|
257
|
+
type: 'btcp:script-ack',
|
|
258
|
+
commandId,
|
|
259
|
+
result: { /* your data */ }
|
|
260
|
+
}, '*');
|
|
261
|
+
}
|
|
262
|
+
});
|
|
263
|
+
`, { scriptId: 'helper' });
|
|
264
|
+
|
|
265
|
+
// Send commands to injected script
|
|
266
|
+
const result = await client.scriptSend(
|
|
267
|
+
{ action: 'getData', id: '123' },
|
|
268
|
+
{ scriptId: 'helper' }
|
|
269
|
+
);
|
|
270
|
+
```
|
|
271
|
+
|
|
272
|
+
**Why script injection?**
|
|
273
|
+
- Access page-level APIs (fetch with page cookies, window globals)
|
|
274
|
+
- Interact with page frameworks (React state, etc.)
|
|
275
|
+
- Execute code with full page context
|
|
276
|
+
|
|
277
|
+
## Architecture
|
|
278
|
+
|
|
279
|
+
The package provides a clean separation between browser-level and DOM-level operations:
|
|
280
|
+
|
|
281
|
+
```
|
|
282
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
283
|
+
│ Background Script (Extension Service Worker) │
|
|
284
|
+
│ ┌─────────────────────────────────────────────────────────────┐│
|
|
285
|
+
│ │ BackgroundAgent ││
|
|
286
|
+
│ │ - Tab management (create, close, switch, list) ││
|
|
287
|
+
│ │ - Navigation (goto, back, forward, reload) ││
|
|
288
|
+
│ │ - Screenshots (chrome.tabs.captureVisibleTab) ││
|
|
289
|
+
│ │ - Routes DOM commands → ContentAgent ││
|
|
290
|
+
│ └─────────────────────────────────────────────────────────────┘│
|
|
291
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
292
|
+
│
|
|
293
|
+
chrome.tabs.sendMessage
|
|
294
|
+
▼
|
|
295
|
+
┌─────────────────────────────────────────────────────────────────┐
|
|
296
|
+
│ Content Script (Per Tab) │
|
|
297
|
+
│ ┌─────────────────────────────────────────────────────────────┐│
|
|
298
|
+
│ │ ContentAgent ││
|
|
299
|
+
│ │ - DOM snapshot (accessibility tree) ││
|
|
300
|
+
│ │ - Element interaction (click, type, fill, hover) ││
|
|
301
|
+
│ │ - DOM queries (getText, getAttribute, isVisible) ││
|
|
302
|
+
│ │ - Keyboard/mouse events ││
|
|
303
|
+
│ └─────────────────────────────────────────────────────────────┘│
|
|
304
|
+
└─────────────────────────────────────────────────────────────────┘
|
|
305
|
+
```
|
|
306
|
+
|
|
307
|
+
## Package Structure
|
|
308
|
+
|
|
309
|
+
```
|
|
310
|
+
btcp-browser-agent/
|
|
311
|
+
├── @btcp/core # ContentAgent - DOM operations
|
|
312
|
+
│ ├── createContentAgent()
|
|
313
|
+
│ ├── DOMActions
|
|
314
|
+
│ └── createSnapshot()
|
|
315
|
+
│
|
|
316
|
+
├── @btcp/extension # BackgroundAgent - Browser operations
|
|
317
|
+
│ ├── BackgroundAgent
|
|
318
|
+
│ ├── setupMessageListener()
|
|
319
|
+
│ └── createClient()
|
|
320
|
+
│
|
|
321
|
+
└── btcp-browser-agent # Main package - re-exports both
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
## Capabilities Comparison
|
|
325
|
+
|
|
326
|
+
| Capability | ContentAgent (Standalone) | BackgroundAgent (Extension) |
|
|
327
|
+
|------------|--------------------------|--------------------------|
|
|
328
|
+
| DOM Snapshot | Yes | Yes (via ContentAgent) |
|
|
329
|
+
| Element Clicks | Yes | Yes (via ContentAgent) |
|
|
330
|
+
| Form Filling | Yes | Yes (via ContentAgent) |
|
|
331
|
+
| Cross-origin | Same-origin only | Any page |
|
|
332
|
+
| Tab Management | No | Yes |
|
|
333
|
+
| Navigation | No | Yes |
|
|
334
|
+
| Screenshots | No | Yes |
|
|
335
|
+
|
|
336
|
+
## License
|
|
337
|
+
|
|
338
|
+
Apache-2.0
|
package/package.json
CHANGED
|
@@ -1,69 +1,69 @@
|
|
|
1
|
-
{
|
|
2
|
-
"name": "btcp-browser-agent",
|
|
3
|
-
"version": "0.1.
|
|
4
|
-
"description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
|
|
5
|
-
"type": "module",
|
|
6
|
-
"main": "dist/index.js",
|
|
7
|
-
"types": "dist/index.d.ts",
|
|
8
|
-
"exports": {
|
|
9
|
-
".": {
|
|
10
|
-
"types": "./dist/index.d.ts",
|
|
11
|
-
"import": "./dist/index.js",
|
|
12
|
-
"default": "./dist/index.js"
|
|
13
|
-
},
|
|
14
|
-
"./core": {
|
|
15
|
-
"types": "./packages/core/dist/index.d.ts",
|
|
16
|
-
"import": "./packages/core/dist/index.js",
|
|
17
|
-
"default": "./packages/core/dist/index.js"
|
|
18
|
-
},
|
|
19
|
-
"./extension": {
|
|
20
|
-
"types": "./packages/extension/dist/index.d.ts",
|
|
21
|
-
"import": "./packages/extension/dist/index.js",
|
|
22
|
-
"default": "./packages/extension/dist/index.js"
|
|
23
|
-
},
|
|
24
|
-
"./extension/content": {
|
|
25
|
-
"types": "./packages/extension/dist/content.d.ts",
|
|
26
|
-
"import": "./packages/extension/dist/content.js",
|
|
27
|
-
"default": "./packages/extension/dist/content.js"
|
|
28
|
-
},
|
|
29
|
-
"./extension/background": {
|
|
30
|
-
"types": "./packages/extension/dist/background.d.ts",
|
|
31
|
-
"import": "./packages/extension/dist/background.js",
|
|
32
|
-
"default": "./packages/extension/dist/background.js"
|
|
33
|
-
}
|
|
34
|
-
},
|
|
35
|
-
"scripts": {
|
|
36
|
-
"build": "npm run build:packages && tsc -p tsconfig.build.json",
|
|
37
|
-
"build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
|
|
38
|
-
"clean": "rm -rf dist packages/*/dist",
|
|
39
|
-
"prepare": "npm run build",
|
|
40
|
-
"test": "vitest run",
|
|
41
|
-
"test:watch": "vitest",
|
|
42
|
-
"typecheck": "tsc --noEmit"
|
|
43
|
-
},
|
|
44
|
-
"workspaces": [
|
|
45
|
-
"packages/core",
|
|
46
|
-
"packages/extension",
|
|
47
|
-
"packages/cli"
|
|
48
|
-
],
|
|
49
|
-
"files": [
|
|
50
|
-
"dist",
|
|
51
|
-
"packages/core/dist",
|
|
52
|
-
"packages/extension/dist",
|
|
53
|
-
"!**/__tests__",
|
|
54
|
-
"!**/*.map"
|
|
55
|
-
],
|
|
56
|
-
"license": "Apache-2.0",
|
|
57
|
-
"repository": {
|
|
58
|
-
"type": "git",
|
|
59
|
-
"url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
|
|
60
|
-
},
|
|
61
|
-
"dependencies": {},
|
|
62
|
-
"devDependencies": {
|
|
63
|
-
"@types/chrome": "^0.0.268",
|
|
64
|
-
"@types/node": "^20.10.0",
|
|
65
|
-
"jsdom": "^24.0.0",
|
|
66
|
-
"typescript": "^5.3.0",
|
|
67
|
-
"vitest": "^2.0.0"
|
|
68
|
-
}
|
|
69
|
-
}
|
|
1
|
+
{
|
|
2
|
+
"name": "btcp-browser-agent",
|
|
3
|
+
"version": "0.1.17",
|
|
4
|
+
"description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
|
|
5
|
+
"type": "module",
|
|
6
|
+
"main": "dist/index.js",
|
|
7
|
+
"types": "dist/index.d.ts",
|
|
8
|
+
"exports": {
|
|
9
|
+
".": {
|
|
10
|
+
"types": "./dist/index.d.ts",
|
|
11
|
+
"import": "./dist/index.js",
|
|
12
|
+
"default": "./dist/index.js"
|
|
13
|
+
},
|
|
14
|
+
"./core": {
|
|
15
|
+
"types": "./packages/core/dist/index.d.ts",
|
|
16
|
+
"import": "./packages/core/dist/index.js",
|
|
17
|
+
"default": "./packages/core/dist/index.js"
|
|
18
|
+
},
|
|
19
|
+
"./extension": {
|
|
20
|
+
"types": "./packages/extension/dist/index.d.ts",
|
|
21
|
+
"import": "./packages/extension/dist/index.js",
|
|
22
|
+
"default": "./packages/extension/dist/index.js"
|
|
23
|
+
},
|
|
24
|
+
"./extension/content": {
|
|
25
|
+
"types": "./packages/extension/dist/content.d.ts",
|
|
26
|
+
"import": "./packages/extension/dist/content.js",
|
|
27
|
+
"default": "./packages/extension/dist/content.js"
|
|
28
|
+
},
|
|
29
|
+
"./extension/background": {
|
|
30
|
+
"types": "./packages/extension/dist/background.d.ts",
|
|
31
|
+
"import": "./packages/extension/dist/background.js",
|
|
32
|
+
"default": "./packages/extension/dist/background.js"
|
|
33
|
+
}
|
|
34
|
+
},
|
|
35
|
+
"scripts": {
|
|
36
|
+
"build": "npm run build:packages && tsc -p tsconfig.build.json",
|
|
37
|
+
"build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
|
|
38
|
+
"clean": "rm -rf dist packages/*/dist",
|
|
39
|
+
"prepare": "npm run build",
|
|
40
|
+
"test": "vitest run",
|
|
41
|
+
"test:watch": "vitest",
|
|
42
|
+
"typecheck": "tsc --noEmit"
|
|
43
|
+
},
|
|
44
|
+
"workspaces": [
|
|
45
|
+
"packages/core",
|
|
46
|
+
"packages/extension",
|
|
47
|
+
"packages/cli"
|
|
48
|
+
],
|
|
49
|
+
"files": [
|
|
50
|
+
"dist",
|
|
51
|
+
"packages/core/dist",
|
|
52
|
+
"packages/extension/dist",
|
|
53
|
+
"!**/__tests__",
|
|
54
|
+
"!**/*.map"
|
|
55
|
+
],
|
|
56
|
+
"license": "Apache-2.0",
|
|
57
|
+
"repository": {
|
|
58
|
+
"type": "git",
|
|
59
|
+
"url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
|
|
60
|
+
},
|
|
61
|
+
"dependencies": {},
|
|
62
|
+
"devDependencies": {
|
|
63
|
+
"@types/chrome": "^0.0.268",
|
|
64
|
+
"@types/node": "^20.10.0",
|
|
65
|
+
"jsdom": "^24.0.0",
|
|
66
|
+
"typescript": "^5.3.0",
|
|
67
|
+
"vitest": "^2.0.0"
|
|
68
|
+
}
|
|
69
|
+
}
|
|
@@ -864,15 +864,15 @@ export class DOMActions {
|
|
|
864
864
|
// Create overlay container with absolute positioning covering entire document
|
|
865
865
|
this.overlayContainer = this.document.createElement('div');
|
|
866
866
|
this.overlayContainer.id = 'btcp-highlight-overlay';
|
|
867
|
-
this.overlayContainer.style.cssText = `
|
|
868
|
-
position: absolute;
|
|
869
|
-
top: 0;
|
|
870
|
-
left: 0;
|
|
871
|
-
width: ${this.document.documentElement.scrollWidth}px;
|
|
872
|
-
height: ${this.document.documentElement.scrollHeight}px;
|
|
873
|
-
pointer-events: none;
|
|
874
|
-
z-index: 999999;
|
|
875
|
-
contain: layout style paint;
|
|
867
|
+
this.overlayContainer.style.cssText = `
|
|
868
|
+
position: absolute;
|
|
869
|
+
top: 0;
|
|
870
|
+
left: 0;
|
|
871
|
+
width: ${this.document.documentElement.scrollWidth}px;
|
|
872
|
+
height: ${this.document.documentElement.scrollHeight}px;
|
|
873
|
+
pointer-events: none;
|
|
874
|
+
z-index: 999999;
|
|
875
|
+
contain: layout style paint;
|
|
876
876
|
`;
|
|
877
877
|
let highlightedCount = 0;
|
|
878
878
|
// Create border overlays and labels for each ref
|
|
@@ -893,17 +893,17 @@ export class DOMActions {
|
|
|
893
893
|
const border = this.document.createElement('div');
|
|
894
894
|
border.className = 'btcp-ref-border';
|
|
895
895
|
border.dataset.ref = ref;
|
|
896
|
-
border.style.cssText = `
|
|
897
|
-
position: absolute;
|
|
898
|
-
width: ${bbox.width}px;
|
|
899
|
-
height: ${bbox.height}px;
|
|
900
|
-
transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
|
|
901
|
-
border: 2px solid rgba(59, 130, 246, 0.8);
|
|
902
|
-
border-radius: 2px;
|
|
903
|
-
box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
|
|
904
|
-
pointer-events: none;
|
|
905
|
-
will-change: transform;
|
|
906
|
-
contain: layout style paint;
|
|
896
|
+
border.style.cssText = `
|
|
897
|
+
position: absolute;
|
|
898
|
+
width: ${bbox.width}px;
|
|
899
|
+
height: ${bbox.height}px;
|
|
900
|
+
transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
|
|
901
|
+
border: 2px solid rgba(59, 130, 246, 0.8);
|
|
902
|
+
border-radius: 2px;
|
|
903
|
+
box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
|
|
904
|
+
pointer-events: none;
|
|
905
|
+
will-change: transform;
|
|
906
|
+
contain: layout style paint;
|
|
907
907
|
`;
|
|
908
908
|
// Create label
|
|
909
909
|
const label = this.document.createElement('div');
|
|
@@ -911,21 +911,21 @@ export class DOMActions {
|
|
|
911
911
|
label.dataset.ref = ref;
|
|
912
912
|
// Extract number from ref (e.g., "@ref:5" -> "5")
|
|
913
913
|
label.textContent = ref.replace('@ref:', '');
|
|
914
|
-
label.style.cssText = `
|
|
915
|
-
position: absolute;
|
|
916
|
-
transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
|
|
917
|
-
background: rgba(59, 130, 246, 0.9);
|
|
918
|
-
color: white;
|
|
919
|
-
padding: 2px 6px;
|
|
920
|
-
border-radius: 3px;
|
|
921
|
-
font-family: monospace;
|
|
922
|
-
font-size: 11px;
|
|
923
|
-
font-weight: bold;
|
|
924
|
-
box-shadow: 0 2px 4px rgba(0,0,0,0.3);
|
|
925
|
-
pointer-events: none;
|
|
926
|
-
white-space: nowrap;
|
|
927
|
-
will-change: transform;
|
|
928
|
-
contain: layout style paint;
|
|
914
|
+
label.style.cssText = `
|
|
915
|
+
position: absolute;
|
|
916
|
+
transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
|
|
917
|
+
background: rgba(59, 130, 246, 0.9);
|
|
918
|
+
color: white;
|
|
919
|
+
padding: 2px 6px;
|
|
920
|
+
border-radius: 3px;
|
|
921
|
+
font-family: monospace;
|
|
922
|
+
font-size: 11px;
|
|
923
|
+
font-weight: bold;
|
|
924
|
+
box-shadow: 0 2px 4px rgba(0,0,0,0.3);
|
|
925
|
+
pointer-events: none;
|
|
926
|
+
white-space: nowrap;
|
|
927
|
+
will-change: transform;
|
|
928
|
+
contain: layout style paint;
|
|
929
929
|
`;
|
|
930
930
|
this.overlayContainer.appendChild(border);
|
|
931
931
|
this.overlayContainer.appendChild(label);
|
|
@@ -723,6 +723,53 @@ function getSectionName(element) {
|
|
|
723
723
|
}
|
|
724
724
|
return '';
|
|
725
725
|
}
|
|
726
|
+
/**
|
|
727
|
+
* Create head snapshot - lightweight HTTP HEAD-style page overview
|
|
728
|
+
* Returns page metadata without DOM traversal for fast verification
|
|
729
|
+
*/
|
|
730
|
+
function createHeadSnapshot(document, _refMap, options) {
|
|
731
|
+
const { root = document.body } = options;
|
|
732
|
+
const win = document.defaultView || window;
|
|
733
|
+
// Count elements (lightweight - no deep traversal)
|
|
734
|
+
const allElements = root.querySelectorAll('*');
|
|
735
|
+
const interactiveSelector = 'button, a[href], input, textarea, select, [role="button"], [tabindex]:not([tabindex="-1"])';
|
|
736
|
+
const interactiveElements = root.querySelectorAll(interactiveSelector);
|
|
737
|
+
// Page status detection
|
|
738
|
+
const viewportArea = win.innerWidth * win.innerHeight;
|
|
739
|
+
const hasInteractive = interactiveElements.length > 0;
|
|
740
|
+
const isComplete = document.readyState === 'complete';
|
|
741
|
+
let status = 'loading';
|
|
742
|
+
if (viewportArea === 0) {
|
|
743
|
+
status = 'loading';
|
|
744
|
+
}
|
|
745
|
+
else if (!hasInteractive) {
|
|
746
|
+
status = 'empty';
|
|
747
|
+
}
|
|
748
|
+
else if (isComplete) {
|
|
749
|
+
status = 'ready';
|
|
750
|
+
}
|
|
751
|
+
else {
|
|
752
|
+
status = 'interactive';
|
|
753
|
+
}
|
|
754
|
+
// Build output
|
|
755
|
+
const output = [
|
|
756
|
+
`URL: ${document.location?.href || 'about:blank'}`,
|
|
757
|
+
`TITLE: ${document.title || 'Untitled'}`,
|
|
758
|
+
`VIEWPORT: ${win.innerWidth}x${win.innerHeight}`,
|
|
759
|
+
`STATUS: ${status}`,
|
|
760
|
+
`ELEMENTS: total=${allElements.length} interactive=${interactiveElements.length}`,
|
|
761
|
+
`READY_STATE: ${document.readyState}`
|
|
762
|
+
].join('\n');
|
|
763
|
+
return {
|
|
764
|
+
tree: output,
|
|
765
|
+
refs: {}, // No refs in head mode
|
|
766
|
+
metadata: {
|
|
767
|
+
totalInteractiveElements: interactiveElements.length,
|
|
768
|
+
capturedElements: 0,
|
|
769
|
+
quality: 'high'
|
|
770
|
+
}
|
|
771
|
+
};
|
|
772
|
+
}
|
|
726
773
|
/**
|
|
727
774
|
* Create outline snapshot - structural overview with metadata
|
|
728
775
|
*/
|
|
@@ -1219,6 +1266,9 @@ export function createSnapshot(document, refMap, options = {}) {
|
|
|
1219
1266
|
const { root = document.body, maxDepth = 50, includeHidden = false, mode = 'interactive', format = 'tree', grep: grepPattern } = options;
|
|
1220
1267
|
// Dispatch based on mode
|
|
1221
1268
|
const effectiveMode = mode;
|
|
1269
|
+
if (effectiveMode === 'head') {
|
|
1270
|
+
return createHeadSnapshot(document, refMap, { ...options, root });
|
|
1271
|
+
}
|
|
1222
1272
|
if (effectiveMode === 'outline') {
|
|
1223
1273
|
return createOutlineSnapshot(document, refMap, { ...options, root });
|
|
1224
1274
|
}
|
|
@@ -96,7 +96,7 @@ export interface GrepOptions {
|
|
|
96
96
|
/**
|
|
97
97
|
* Snapshot mode determines what content to capture
|
|
98
98
|
*/
|
|
99
|
-
export type SnapshotMode = 'interactive' | 'outline' | 'content';
|
|
99
|
+
export type SnapshotMode = 'interactive' | 'outline' | 'content' | 'head';
|
|
100
100
|
/**
|
|
101
101
|
* Snapshot output format
|
|
102
102
|
*/
|
|
@@ -115,6 +115,7 @@ export interface SnapshotCommand extends BaseCommand {
|
|
|
115
115
|
baseSnapshot?: SnapshotData;
|
|
116
116
|
/**
|
|
117
117
|
* Snapshot mode:
|
|
118
|
+
* - 'head': Lightweight page overview (URL, title, element counts, status)
|
|
118
119
|
* - 'interactive': Find clickable elements (default)
|
|
119
120
|
* - 'outline': Understand page structure with xpaths + metadata
|
|
120
121
|
* - 'content': Extract text content from sections
|
|
@@ -446,7 +446,7 @@ export class BackgroundAgent {
|
|
|
446
446
|
resolve({
|
|
447
447
|
id: command.id,
|
|
448
448
|
success: false,
|
|
449
|
-
error: chrome.runtime.lastError
|
|
449
|
+
error: chrome.runtime.lastError?.message || 'Failed to send message to tab',
|
|
450
450
|
});
|
|
451
451
|
}
|
|
452
452
|
else if (!response) {
|
|
@@ -27,6 +27,7 @@ export declare class SessionManager {
|
|
|
27
27
|
private initializationPromise;
|
|
28
28
|
private maxSession;
|
|
29
29
|
private maxOpenTab;
|
|
30
|
+
private ensureSessionPromise;
|
|
30
31
|
constructor(options?: SessionManagerOptions);
|
|
31
32
|
/**
|
|
32
33
|
* Wait for SessionManager to finish initialization
|
|
@@ -62,8 +63,10 @@ export declare class SessionManager {
|
|
|
62
63
|
reconnectSession(groupId: number): Promise<boolean>;
|
|
63
64
|
/**
|
|
64
65
|
* Create a new tab group
|
|
66
|
+
* @param options Group creation options
|
|
67
|
+
* @param internal If true, bypasses session limit check (used by ensureSession)
|
|
65
68
|
*/
|
|
66
|
-
createGroup(options?: GroupCreateOptions): Promise<GroupInfo>;
|
|
69
|
+
createGroup(options?: GroupCreateOptions, internal?: boolean): Promise<GroupInfo>;
|
|
67
70
|
/**
|
|
68
71
|
* Update an existing tab group
|
|
69
72
|
*/
|
|
@@ -136,8 +139,14 @@ export declare class SessionManager {
|
|
|
136
139
|
/**
|
|
137
140
|
* Ensure a session exists - restore from storage, use existing, or create new
|
|
138
141
|
* Returns the session group ID (creates if needed)
|
|
142
|
+
*
|
|
143
|
+
* This method is atomic - concurrent calls will wait for the same promise
|
|
139
144
|
*/
|
|
140
145
|
ensureSession(): Promise<number>;
|
|
146
|
+
/**
|
|
147
|
+
* Internal implementation of ensureSession
|
|
148
|
+
*/
|
|
149
|
+
private _doEnsureSession;
|
|
141
150
|
/**
|
|
142
151
|
* Get the primary tab in session (ensures session exists first)
|
|
143
152
|
* Returns the first tab in the session group
|
|
@@ -15,6 +15,7 @@ export class SessionManager {
|
|
|
15
15
|
initializationPromise;
|
|
16
16
|
maxSession;
|
|
17
17
|
maxOpenTab;
|
|
18
|
+
ensureSessionPromise = null;
|
|
18
19
|
constructor(options = {}) {
|
|
19
20
|
this.maxSession = options.maxSession ?? 1;
|
|
20
21
|
this.maxOpenTab = options.maxOpenTab ?? 1;
|
|
@@ -222,17 +223,21 @@ export class SessionManager {
|
|
|
222
223
|
}
|
|
223
224
|
/**
|
|
224
225
|
* Create a new tab group
|
|
226
|
+
* @param options Group creation options
|
|
227
|
+
* @param internal If true, bypasses session limit check (used by ensureSession)
|
|
225
228
|
*/
|
|
226
|
-
async createGroup(options = {}) {
|
|
229
|
+
async createGroup(options = {}, internal = false) {
|
|
227
230
|
// Wait for initialization to complete first
|
|
228
231
|
await this.waitForInitialization();
|
|
229
|
-
console.log('[SessionManager] createGroup called with options:', options);
|
|
230
|
-
// Check if we can create a new session
|
|
231
|
-
|
|
232
|
-
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
232
|
+
console.log('[SessionManager] createGroup called with options:', options, 'internal:', internal);
|
|
233
|
+
// Check if we can create a new session (skip if internal call from ensureSession)
|
|
234
|
+
if (!internal) {
|
|
235
|
+
const canCreate = await this.canCreateSession();
|
|
236
|
+
if (!canCreate) {
|
|
237
|
+
const count = await this.getSessionCount();
|
|
238
|
+
throw new Error(`Maximum session limit reached (${count}/${this.maxSession}). ` +
|
|
239
|
+
`Close an existing session before creating a new one.`);
|
|
240
|
+
}
|
|
236
241
|
}
|
|
237
242
|
const { tabIds = [], title = this.generateSessionName(), color = 'blue', collapsed = false, } = options;
|
|
238
243
|
// If no tabIds provided, create a new blank tab for the session
|
|
@@ -512,18 +517,42 @@ export class SessionManager {
|
|
|
512
517
|
/**
|
|
513
518
|
* Ensure a session exists - restore from storage, use existing, or create new
|
|
514
519
|
* Returns the session group ID (creates if needed)
|
|
520
|
+
*
|
|
521
|
+
* This method is atomic - concurrent calls will wait for the same promise
|
|
515
522
|
*/
|
|
516
523
|
async ensureSession() {
|
|
524
|
+
// Return existing promise if already in progress
|
|
525
|
+
if (this.ensureSessionPromise) {
|
|
526
|
+
console.log('[SessionManager] ensureSession already in progress, waiting...');
|
|
527
|
+
return this.ensureSessionPromise;
|
|
528
|
+
}
|
|
529
|
+
// Create new promise and store it
|
|
530
|
+
this.ensureSessionPromise = this._doEnsureSession();
|
|
531
|
+
try {
|
|
532
|
+
return await this.ensureSessionPromise;
|
|
533
|
+
}
|
|
534
|
+
finally {
|
|
535
|
+
// Clear promise when done
|
|
536
|
+
this.ensureSessionPromise = null;
|
|
537
|
+
}
|
|
538
|
+
}
|
|
539
|
+
/**
|
|
540
|
+
* Internal implementation of ensureSession
|
|
541
|
+
*/
|
|
542
|
+
async _doEnsureSession() {
|
|
517
543
|
await this.waitForInitialization();
|
|
544
|
+
console.log('[SessionManager] Starting ensureSession...');
|
|
518
545
|
// Step 1: Already have active session
|
|
519
546
|
if (this.activeSessionGroupId !== null) {
|
|
520
547
|
// Verify it still exists
|
|
521
548
|
try {
|
|
522
549
|
await chrome.tabGroups.get(this.activeSessionGroupId);
|
|
550
|
+
console.log('[SessionManager] Active session still valid:', this.activeSessionGroupId);
|
|
523
551
|
return this.activeSessionGroupId;
|
|
524
552
|
}
|
|
525
553
|
catch {
|
|
526
554
|
// Group no longer exists, continue to restore/create
|
|
555
|
+
console.log('[SessionManager] Active session no longer exists');
|
|
527
556
|
this.activeSessionGroupId = null;
|
|
528
557
|
}
|
|
529
558
|
}
|
|
@@ -531,23 +560,33 @@ export class SessionManager {
|
|
|
531
560
|
const result = await chrome.storage.session.get(SESSION_STORAGE_KEY);
|
|
532
561
|
const stored = result[SESSION_STORAGE_KEY];
|
|
533
562
|
if (stored?.groupId) {
|
|
563
|
+
console.log('[SessionManager] Step 2: Attempting to reconnect to stored session:', stored.groupId);
|
|
534
564
|
const reconnected = await this.reconnectSession(stored.groupId);
|
|
535
565
|
if (reconnected && this.activeSessionGroupId !== null) {
|
|
566
|
+
console.log('[SessionManager] Reconnected to stored session:', this.activeSessionGroupId);
|
|
536
567
|
return this.activeSessionGroupId;
|
|
537
568
|
}
|
|
538
569
|
}
|
|
539
570
|
// Step 3: Find existing BTCP group
|
|
571
|
+
console.log('[SessionManager] Step 3: Looking for existing BTCP groups...');
|
|
540
572
|
const groups = await chrome.tabGroups.query({});
|
|
573
|
+
console.log('[SessionManager] Found tab groups:', groups.map(g => ({ id: g.id, title: g.title })));
|
|
541
574
|
const btcpGroup = groups.find(g => g.title?.startsWith('BTCP'));
|
|
542
575
|
if (btcpGroup) {
|
|
576
|
+
console.log('[SessionManager] Found existing BTCP group:', btcpGroup.id, btcpGroup.title);
|
|
543
577
|
const used = await this.useExistingGroupAsSession(btcpGroup.id);
|
|
544
578
|
if (used && this.activeSessionGroupId !== null) {
|
|
579
|
+
console.log('[SessionManager] Successfully reused existing session:', this.activeSessionGroupId);
|
|
545
580
|
return this.activeSessionGroupId;
|
|
546
581
|
}
|
|
547
582
|
}
|
|
548
|
-
|
|
583
|
+
else {
|
|
584
|
+
console.log('[SessionManager] No existing BTCP groups found');
|
|
585
|
+
}
|
|
586
|
+
// Step 4: Create new session (bypass limit check since we already tried to reuse)
|
|
549
587
|
console.log('[SessionManager] No existing session found, creating new one...');
|
|
550
|
-
const newGroup = await this.createGroup({ color: 'blue' });
|
|
588
|
+
const newGroup = await this.createGroup({ color: 'blue' }, true); // internal=true bypasses limit
|
|
589
|
+
console.log('[SessionManager] Created new session:', newGroup.id);
|
|
551
590
|
return newGroup.id;
|
|
552
591
|
}
|
|
553
592
|
/**
|