surfagent 1.1.1 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/AGENT.md +212 -0
- package/API.md +137 -0
- package/CLAUDE.md +6 -0
- package/SURFAGENT_BUG_REPORT.md +199 -0
- package/dist/api/act.d.ts +19 -0
- package/dist/api/act.js +101 -1
- package/dist/api/recon.js +12 -2
- package/dist/api/server.js +13 -2
- package/dist/browser.js +4 -4
- package/dist/cli.js +4 -3
- package/package.json +1 -1
- package/src/api/act.ts +119 -1
- package/src/api/recon.ts +10 -2
- package/src/api/server.ts +14 -2
- package/src/browser.ts +4 -4
- package/src/cli.ts +3 -3
package/AGENT.md
ADDED
|
@@ -0,0 +1,212 @@
|
|
|
1
|
+
## IMPORTANT: Never kill Chrome
|
|
2
|
+
|
|
3
|
+
`surfagent` launches a **separate Chrome window** with its own profile. The user's personal Chrome stays untouched. NEVER run `pkill Chrome`, `killall Chrome`, or any command that kills Chrome processes. If you need to restart the debug session, use `surfagent start` — it will launch a new one without affecting anything.
|
|
4
|
+
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
## Critical Rule: Always Close Unused Tabs
|
|
8
|
+
|
|
9
|
+
After completing any task, close tabs you no longer need:
|
|
10
|
+
```bash
|
|
11
|
+
curl -X POST localhost:3456/click -H 'Content-Type: application/json' -d '{"tab":"0","text":"close"}'
|
|
12
|
+
# Or via CLI:
|
|
13
|
+
node dist/browser.js close all
|
|
14
|
+
```
|
|
15
|
+
|
|
16
|
+
---
|
|
17
|
+
|
|
18
|
+
## Setup
|
|
19
|
+
|
|
20
|
+
### Start everything (one command)
|
|
21
|
+
|
|
22
|
+
```bash
|
|
23
|
+
surfagent start
|
|
24
|
+
```
|
|
25
|
+
|
|
26
|
+
This will:
|
|
27
|
+
1. Check if Chrome debug session is already running on port 9222
|
|
28
|
+
2. If not, launch a **new Chrome window** with a separate profile (`/tmp/surfagent-chrome`)
|
|
29
|
+
3. Copy cookies from the user's default Chrome profile (preserves logins)
|
|
30
|
+
4. Start the API server on `http://localhost:3456`
|
|
31
|
+
|
|
32
|
+
The user's personal Chrome browser is NOT affected. A second Chrome window will appear — this is the debug session that the API controls.
|
|
33
|
+
|
|
34
|
+
### Other commands
|
|
35
|
+
|
|
36
|
+
```bash
|
|
37
|
+
surfagent start # Chrome + API (recommended)
|
|
38
|
+
surfagent chrome # Launch Chrome debug session only
|
|
39
|
+
surfagent api # Start API only (Chrome must already be running)
|
|
40
|
+
surfagent health # Check if Chrome and API are running
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
---
|
|
44
|
+
|
|
45
|
+
## API Endpoints (preferred for agents)
|
|
46
|
+
|
|
47
|
+
The API is the primary interface. Always **recon first, then act**.
|
|
48
|
+
|
|
49
|
+
See `API.md` for full documentation with examples.
|
|
50
|
+
|
|
51
|
+
```bash
|
|
52
|
+
# Recon a page — get full map of elements, forms, selectors
|
|
53
|
+
curl -X POST localhost:3456/recon -H 'Content-Type: application/json' -d '{"url":"https://example.com","keepTab":true}'
|
|
54
|
+
|
|
55
|
+
# Read page content — structured text, tables, notifications
|
|
56
|
+
curl -X POST localhost:3456/read -H 'Content-Type: application/json' -d '{"tab":"0"}'
|
|
57
|
+
|
|
58
|
+
# Fill form fields — real CDP keystrokes
|
|
59
|
+
curl -X POST localhost:3456/fill -H 'Content-Type: application/json' -d '{"tab":"0","fields":[{"selector":"#email","value":"test@example.com"}],"submit":"enter"}'
|
|
60
|
+
|
|
61
|
+
# Click an element
|
|
62
|
+
curl -X POST localhost:3456/click -H 'Content-Type: application/json' -d '{"tab":"0","text":"Submit"}'
|
|
63
|
+
|
|
64
|
+
# Scroll
|
|
65
|
+
curl -X POST localhost:3456/scroll -H 'Content-Type: application/json' -d '{"tab":"0","direction":"down","amount":1000}'
|
|
66
|
+
|
|
67
|
+
# Navigate (same tab)
|
|
68
|
+
curl -X POST localhost:3456/navigate -H 'Content-Type: application/json' -d '{"tab":"0","url":"https://example.com"}'
|
|
69
|
+
|
|
70
|
+
# Go back
|
|
71
|
+
curl -X POST localhost:3456/navigate -H 'Content-Type: application/json' -d '{"tab":"0","back":true}'
|
|
72
|
+
|
|
73
|
+
# Run JavaScript in a tab or iframe
|
|
74
|
+
curl -X POST localhost:3456/eval -H 'Content-Type: application/json' -d '{"tab":"0","expression":"document.title"}'
|
|
75
|
+
|
|
76
|
+
# Bring tab to front
|
|
77
|
+
curl -X POST localhost:3456/focus -H 'Content-Type: application/json' -d '{"tab":"0"}'
|
|
78
|
+
|
|
79
|
+
# Raw key typing — no clear step, for Google Sheets / contenteditable / canvas
|
|
80
|
+
curl -X POST localhost:3456/type -H 'Content-Type: application/json' -d '{"tab":"0","keys":"Hello","submit":"tab"}'
|
|
81
|
+
|
|
82
|
+
# Captcha detection and interaction (experimental)
|
|
83
|
+
curl -X POST localhost:3456/captcha -H 'Content-Type: application/json' -d '{"tab":"0","action":"detect"}'
|
|
84
|
+
|
|
85
|
+
# Dispatch DOM events — for React SPAs where /click or /fill submit fails
|
|
86
|
+
curl -X POST localhost:3456/dispatch -H 'Content-Type: application/json' -d '{"tab":"0","selector":"form[role=search]","event":"submit"}'
|
|
87
|
+
|
|
88
|
+
# React debug — find event handlers on element and ancestors
|
|
89
|
+
curl -X POST localhost:3456/dispatch -H 'Content-Type: application/json' -d '{"tab":"0","selector":"[role=option]","reactDebug":true}'
|
|
90
|
+
|
|
91
|
+
# List tabs
|
|
92
|
+
curl localhost:3456/tabs
|
|
93
|
+
|
|
94
|
+
# Health check
|
|
95
|
+
curl localhost:3456/health
|
|
96
|
+
```
|
|
97
|
+
|
|
98
|
+
### Google Sheets
|
|
99
|
+
|
|
100
|
+
Google Sheets requires `/type` instead of `/fill` for cell input (because `/fill` does Ctrl+A which selects all cells). Use the name box to navigate, then `/type` to enter data:
|
|
101
|
+
|
|
102
|
+
```bash
|
|
103
|
+
# 1. Click the name box
|
|
104
|
+
curl -X POST localhost:3456/click -H 'Content-Type: application/json' -d '{"tab":"sheets","selector":"#t-name-box"}'
|
|
105
|
+
|
|
106
|
+
# 2. Navigate to a cell
|
|
107
|
+
curl -X POST localhost:3456/fill -H 'Content-Type: application/json' -d '{"tab":"sheets","fields":[{"selector":"#t-name-box","value":"A1","clear":true}],"submit":"enter"}'
|
|
108
|
+
|
|
109
|
+
# 3. Type into the cell (Tab moves right, Enter moves down)
|
|
110
|
+
curl -X POST localhost:3456/type -H 'Content-Type: application/json' -d '{"tab":"sheets","keys":"=SUM(B2:B10)","submit":"tab"}'
|
|
111
|
+
```
|
|
112
|
+
|
|
113
|
+
Some Sheets buttons (Add Sheet +, toolbar) only respond to CDP mouse events, not DOM clicks. See `API.md` for the CDP mouse click pattern.
|
|
114
|
+
|
|
115
|
+
**Warning:** Navigating away from unsaved Sheets triggers a native Chrome "Leave page?" dialog that blocks ALL CDP commands. See `API.md` > "Native Chrome Dialogs" for detection and dismissal via Swift AX API.
|
|
116
|
+
|
|
117
|
+
### Tab Targeting
|
|
118
|
+
|
|
119
|
+
All endpoints accept a `tab` field:
|
|
120
|
+
- `"0"` — by index
|
|
121
|
+
- `"github"` — partial URL/title match
|
|
122
|
+
- `"cdpn.io"` — matches cross-origin iframes too
|
|
123
|
+
|
|
124
|
+
---
|
|
125
|
+
|
|
126
|
+
## CLI Commands
|
|
127
|
+
|
|
128
|
+
The CLI is useful for quick manual operations and debugging.
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
node dist/browser.js list # List all open tabs
|
|
132
|
+
node dist/browser.js content <tab> # Get text content from a tab
|
|
133
|
+
node dist/browser.js content <tab> -s "sel" # Get text from specific CSS selector
|
|
134
|
+
node dist/browser.js content-all # Get content from all tabs
|
|
135
|
+
node dist/browser.js elements <tab> # List interactive elements
|
|
136
|
+
node dist/browser.js click <tab> <text> # Click element by text
|
|
137
|
+
node dist/browser.js type <tab> <text> # Type into input field
|
|
138
|
+
node dist/browser.js open <url> # Navigate to URL
|
|
139
|
+
node dist/browser.js open <url> --new-tab # Open in new tab
|
|
140
|
+
node dist/browser.js screenshot <tab> -o f # Take screenshot
|
|
141
|
+
node dist/browser.js search <query> # Search across all tabs
|
|
142
|
+
node dist/browser.js html <tab> # Get full HTML of page
|
|
143
|
+
node dist/browser.js html <tab> -s "sel" # Get HTML of specific element
|
|
144
|
+
node dist/browser.js desc <tab> # Get item description from JSON-LD/meta
|
|
145
|
+
node dist/browser.js close <tab> # Close a specific tab
|
|
146
|
+
node dist/browser.js close all # Close all tabs except first
|
|
147
|
+
```
|
|
148
|
+
|
|
149
|
+
---
|
|
150
|
+
|
|
151
|
+
## Architecture
|
|
152
|
+
|
|
153
|
+
```
|
|
154
|
+
surfagent start
|
|
155
|
+
│
|
|
156
|
+
├── Launches Chrome (separate window, separate profile)
|
|
157
|
+
│ └── --remote-debugging-port=9222
|
|
158
|
+
│ --user-data-dir=/tmp/surfagent-chrome
|
|
159
|
+
│
|
|
160
|
+
└── Starts API Server (:3456)
|
|
161
|
+
│
|
|
162
|
+
├── src/api/recon.ts (page reconnaissance)
|
|
163
|
+
├── src/api/act.ts (fill, click, scroll, read, navigate, eval, dispatch, captcha)
|
|
164
|
+
└── src/api/server.ts (HTTP routing)
|
|
165
|
+
│
|
|
166
|
+
└── src/chrome/ (CDP connection layer)
|
|
167
|
+
│
|
|
168
|
+
▼
|
|
169
|
+
Chrome (:9222) ← separate window, user's Chrome untouched
|
|
170
|
+
```
|
|
171
|
+
|
|
172
|
+
---
|
|
173
|
+
|
|
174
|
+
## Troubleshooting
|
|
175
|
+
|
|
176
|
+
**"Cannot connect to Chrome"**
|
|
177
|
+
- Run `surfagent start` — it handles everything
|
|
178
|
+
- If Chrome is already running with debug mode, use `surfagent api` to just start the API
|
|
179
|
+
|
|
180
|
+
**A second Chrome window appeared**
|
|
181
|
+
- This is expected. `surfagent` runs its own Chrome with a separate profile. Your personal Chrome is not affected. Close the surfagent Chrome window when you're done.
|
|
182
|
+
|
|
183
|
+
**Elements not found**
|
|
184
|
+
- Always `/recon` first to see what's available
|
|
185
|
+
- Try partial text matching with `/click`
|
|
186
|
+
- Some elements are `role="option"` or `li` with `aria-label` — use selector from recon
|
|
187
|
+
|
|
188
|
+
**Form fields not filling**
|
|
189
|
+
- Use the API `/fill` endpoint — it uses real CDP keystrokes
|
|
190
|
+
- For SPAs, use `"submit": "enter"` instead of clicking submit buttons
|
|
191
|
+
|
|
192
|
+
**React SPA — click/submit doesn't trigger navigation**
|
|
193
|
+
- This happens when React event handlers are on ancestor elements, not the element you're clicking
|
|
194
|
+
- Use `"submit": "form"` in `/fill` — dispatches a native submit event on the nearest `<form>`, which React picks up
|
|
195
|
+
- For non-form cases, use `/dispatch` with `"reactDebug": true` to find which ancestor has the handler, then `/dispatch` the right event on it
|
|
196
|
+
- Example (X.com search): `/fill` with `submit:"form"` works where `submit:"enter"` fails because the autocomplete combobox swallows the Enter key
|
|
197
|
+
|
|
198
|
+
**Links opening new tabs instead of navigating**
|
|
199
|
+
- The API `/click` handles `target="_blank"` automatically
|
|
200
|
+
- It overrides the target and navigates in the same tab
|
|
201
|
+
|
|
202
|
+
**Cross-origin iframes**
|
|
203
|
+
- Target them by their domain: `"tab": "cdpn.io"`
|
|
204
|
+
- CDP connects to iframes as separate targets
|
|
205
|
+
|
|
206
|
+
**Tab hidden behind other tabs**
|
|
207
|
+
- Use `/focus` to bring it to front
|
|
208
|
+
- `/navigate` does this automatically
|
|
209
|
+
|
|
210
|
+
**Too many tabs open**
|
|
211
|
+
- Use `close all` to close all but first tab
|
|
212
|
+
- Always clean up after tasks
|
package/API.md
CHANGED
|
@@ -324,6 +324,7 @@ Fill form fields using real CDP keyboard input. This simulates actual keystrokes
|
|
|
324
324
|
|
|
325
325
|
**Submit options:**
|
|
326
326
|
- `"enter"` — press Enter key via CDP. Best option for single-page apps (SPAs).
|
|
327
|
+
- `"form"` — dispatch a native `submit` event on the nearest `<form>` ancestor. **Use this for React SPAs** where Enter is intercepted by autocomplete/combobox widgets (e.g. X.com search, GitHub search).
|
|
327
328
|
- `"auto"` — finds and clicks the nearest `button[type="submit"]` or `input[type="submit"]`.
|
|
328
329
|
- `"#my-button"` — clicks a specific selector.
|
|
329
330
|
|
|
@@ -447,6 +448,55 @@ Check if the API can connect to Chrome.
|
|
|
447
448
|
|
|
448
449
|
---
|
|
449
450
|
|
|
451
|
+
### POST /dispatch
|
|
452
|
+
|
|
453
|
+
Dispatch any DOM event on any element. Built to solve React/Vue/Angular SPAs where `.click()` and CDP key events don't trigger framework event handlers.
|
|
454
|
+
|
|
455
|
+
**Dispatch an event:**
|
|
456
|
+
```json
|
|
457
|
+
{ "tab": "0", "selector": "form[role=search]", "event": "submit" }
|
|
458
|
+
```
|
|
459
|
+
|
|
460
|
+
**With options:**
|
|
461
|
+
```json
|
|
462
|
+
{ "tab": "0", "selector": "#my-input", "event": "keydown", "eventInit": { "key": "Enter", "code": "Enter" } }
|
|
463
|
+
```
|
|
464
|
+
|
|
465
|
+
**React debug mode** — find all React event handlers on an element and its ancestors:
|
|
466
|
+
```json
|
|
467
|
+
{ "tab": "0", "selector": "[role=option]", "reactDebug": true }
|
|
468
|
+
```
|
|
469
|
+
|
|
470
|
+
**Parameters:**
|
|
471
|
+
- `selector` (string, required) — CSS selector for target element
|
|
472
|
+
- `event` (string, required unless reactDebug) — Event type: `"submit"`, `"click"`, `"input"`, `"change"`, `"keydown"`, `"pointerdown"`, etc.
|
|
473
|
+
- `bubbles` (boolean) — Default: `true`. Set to `false` to prevent event bubbling.
|
|
474
|
+
- `cancelable` (boolean) — Default: `true`
|
|
475
|
+
- `detail` (any) — Payload for CustomEvent
|
|
476
|
+
- `eventInit` (object) — Extra properties merged into the event constructor (e.g. `{key: "Enter"}` for KeyboardEvent)
|
|
477
|
+
- `reactDebug` (boolean) — Instead of dispatching, return all React event handlers found walking up the DOM tree from the selector
|
|
478
|
+
|
|
479
|
+
**Event response:**
|
|
480
|
+
```json
|
|
481
|
+
{ "success": true, "dispatched": "submit on FORM[role=search]", "_dispatchMs": 25 }
|
|
482
|
+
```
|
|
483
|
+
|
|
484
|
+
**React debug response:**
|
|
485
|
+
```json
|
|
486
|
+
{
|
|
487
|
+
"success": true,
|
|
488
|
+
"reactHandlers": [
|
|
489
|
+
{ "tag": "FORM", "role": "search", "testid": null, "className": "...", "handlers": ["onSubmit"] },
|
|
490
|
+
{ "tag": "DIV", "role": null, "testid": null, "className": "...", "handlers": ["onKeyDown"] },
|
|
491
|
+
{ "tag": "DIV", "role": null, "testid": null, "className": "...", "handlers": ["onClick"] }
|
|
492
|
+
]
|
|
493
|
+
}
|
|
494
|
+
```
|
|
495
|
+
|
|
496
|
+
**When to use:** When `/click` or `/fill` submit doesn't trigger navigation or actions on React SPAs. Use `reactDebug` first to find which ancestor has the handler, then dispatch the right event on it.
|
|
497
|
+
|
|
498
|
+
---
|
|
499
|
+
|
|
450
500
|
### POST /type
|
|
451
501
|
|
|
452
502
|
Raw CDP key typing without clearing the field first. Use this for apps like **Google Sheets**, contenteditable elements, or any context where `/fill`'s Ctrl+A clear step causes side effects (e.g., selecting all cells instead of clearing a field).
|
|
@@ -613,9 +663,32 @@ const CDP = require('chrome-remote-interface');
|
|
|
613
663
|
|
|
614
664
|
**Using the menu search:** Google Sheets has a menu search box (`input[aria-label="Menus"]` or `input[aria-label="Menus (Option+/)"]`). Use `/fill` to type a command (e.g., "Insert chart"), then `/click` on the matching result.
|
|
615
665
|
|
|
666
|
+
**Clearing/removing wrong cell entries:** You cannot send Delete or Backspace keys to Google Sheets via CDP — they don't reach the grid. Instead, overwrite the cell with a space:
|
|
667
|
+
|
|
668
|
+
```
|
|
669
|
+
1. POST /click { "tab": "sheets", "selector": "#t-name-box" }
|
|
670
|
+
2. POST /fill { "tab": "sheets", "fields": [{ "selector": "#t-name-box", "value": "F1", "clear": true }], "submit": "enter" }
|
|
671
|
+
→ Navigate to the cell you want to clear
|
|
672
|
+
3. POST /type { "tab": "sheets", "keys": " ", "submit": "enter" }
|
|
673
|
+
→ Overwrite with a space (effectively blanks the cell)
|
|
674
|
+
```
|
|
675
|
+
|
|
676
|
+
Repeat for each cell. Do NOT try `Delete`, `Backspace`, or `Cmd+Z` via CDP key events — they are silently ignored by the Sheets grid. The `/type` space-overwrite is the only reliable method.
|
|
677
|
+
|
|
678
|
+
**Avoiding wrong entries in the first place:** When entering rows of data, always navigate to the first cell of each new row via the name box. Pressing Enter after the last column does NOT return to column A — it moves down within the same column. So after completing a row with Tab across columns:
|
|
679
|
+
|
|
680
|
+
```
|
|
681
|
+
# Wrong: pressing Enter after last column stays in that column
|
|
682
|
+
# Right: use name box to jump to start of next row
|
|
683
|
+
POST /click { "tab": "sheets", "selector": "#t-name-box" }
|
|
684
|
+
POST /fill { "tab": "sheets", "fields": [{ "selector": "#t-name-box", "value": "A3", "clear": true }], "submit": "enter" }
|
|
685
|
+
```
|
|
686
|
+
|
|
616
687
|
**Key gotchas:**
|
|
617
688
|
- Never use `/fill` directly on Google Sheets cells — it will wipe data via Ctrl+A
|
|
618
689
|
- Always navigate to a cell via the name box first, then `/type`
|
|
690
|
+
- Always use the name box to navigate to the start of each new row — Tab+Enter does not wrap back to column A
|
|
691
|
+
- CDP keyboard events (Delete, Backspace, Cmd+Z) do not work on the Sheets grid — use space-overwrite instead
|
|
619
692
|
- Some buttons (Add Sheet, menu items) only respond to CDP mouse events, not DOM clicks
|
|
620
693
|
- Navigating away from unsaved Sheets triggers a native Chrome dialog — see the "Native Chrome Dialogs" section below
|
|
621
694
|
|
|
@@ -758,6 +831,70 @@ if let windows = windowsRef as? [AXUIElement] {
|
|
|
758
831
|
|
|
759
832
|
---
|
|
760
833
|
|
|
834
|
+
## React SPAs — When `/click` and `/fill` Submit Don't Work
|
|
835
|
+
|
|
836
|
+
React, Vue, and Angular use synthetic event systems with event delegation. Sometimes `.click()` and CDP key events don't trigger framework handlers — especially on comboboxes, autocomplete widgets, and custom dropdowns.
|
|
837
|
+
|
|
838
|
+
**Symptoms:**
|
|
839
|
+
- `/click` returns `success: true` but nothing happens
|
|
840
|
+
- `/fill` with `submit: "enter"` fills the input but doesn't navigate
|
|
841
|
+
- CDP `Input.dispatchMouseEvent` / `Input.dispatchKeyEvent` are silently ignored
|
|
842
|
+
|
|
843
|
+
**Diagnosis — use `/dispatch` with `reactDebug`:**
|
|
844
|
+
|
|
845
|
+
```bash
|
|
846
|
+
# Find which elements have React handlers and what events they listen for
|
|
847
|
+
curl -X POST localhost:3456/dispatch -d '{"tab":"0","selector":"[role=option]","reactDebug":true}'
|
|
848
|
+
```
|
|
849
|
+
|
|
850
|
+
This walks up the DOM from your target element, inspecting `__reactProps$*` on each ancestor, and returns every React event handler it finds. The response tells you exactly which element to target and which event to dispatch.
|
|
851
|
+
|
|
852
|
+
**Fix — dispatch the right event on the right element:**
|
|
853
|
+
|
|
854
|
+
```bash
|
|
855
|
+
# Dispatch a submit event on a form (most common fix for search boxes)
|
|
856
|
+
curl -X POST localhost:3456/dispatch -d '{"tab":"0","selector":"form[role=search]","event":"submit"}'
|
|
857
|
+
|
|
858
|
+
# Or dispatch a click on the ancestor that has the onClick handler
|
|
859
|
+
curl -X POST localhost:3456/dispatch -d '{"tab":"0","selector":"div[data-testid=wrapper]","event":"click"}'
|
|
860
|
+
```
|
|
861
|
+
|
|
862
|
+
**Or use `/fill` with `submit: "form"` (one-step shortcut):**
|
|
863
|
+
|
|
864
|
+
```bash
|
|
865
|
+
curl -X POST localhost:3456/fill -d '{"tab":"0","fields":[{"selector":"input[aria-label=\"Search query\"]","value":"my query"}],"submit":"form"}'
|
|
866
|
+
```
|
|
867
|
+
|
|
868
|
+
### X.com (Twitter) Search — worked example
|
|
869
|
+
|
|
870
|
+
X.com's search combobox is a textbook case. The `role="option"` autocomplete suggestions have **zero event handlers** — the `onClick` lives on a distant ancestor DIV, `onKeyDown` on a separate container, and `onSubmit` on the form.
|
|
871
|
+
|
|
872
|
+
**What works:**
|
|
873
|
+
```
|
|
874
|
+
POST /fill { "tab": "0", "fields": [{ "selector": "input[aria-label=\"Search query\"]", "value": "query" }], "submit": "form" }
|
|
875
|
+
```
|
|
876
|
+
|
|
877
|
+
**Fallback — URL navigation:**
|
|
878
|
+
```
|
|
879
|
+
POST /navigate { "tab": "0", "url": "https://x.com/search?q=your%20query&src=typed_query&f=top" }
|
|
880
|
+
```
|
|
881
|
+
Query parameters: `q` (query), `f` (`top`, `latest`, `people`, `photos`, `videos`).
|
|
882
|
+
|
|
883
|
+
### General debugging workflow for any React SPA
|
|
884
|
+
|
|
885
|
+
```
|
|
886
|
+
1. Try /click or /fill with submit:"enter" first — it works on most sites
|
|
887
|
+
2. If it fails:
|
|
888
|
+
POST /dispatch { "tab": "0", "selector": "THE_STUCK_ELEMENT", "reactDebug": true }
|
|
889
|
+
→ Read the handler tree to find which ancestor has which handler
|
|
890
|
+
3. Dispatch the right event:
|
|
891
|
+
POST /dispatch { "tab": "0", "selector": "THE_ANCESTOR", "event": "THE_EVENT" }
|
|
892
|
+
4. If it's a form with an input, use submit:"form" shortcut:
|
|
893
|
+
POST /fill { "tab": "0", "fields": [...], "submit": "form" }
|
|
894
|
+
```
|
|
895
|
+
|
|
896
|
+
---
|
|
897
|
+
|
|
761
898
|
## Important Notes
|
|
762
899
|
|
|
763
900
|
- **Always recon before acting.** The selectors you need come from the recon response.
|
package/CLAUDE.md
CHANGED
|
@@ -82,6 +82,12 @@ curl -X POST localhost:3456/type -H 'Content-Type: application/json' -d '{"tab":
|
|
|
82
82
|
# Captcha detection and interaction (experimental)
|
|
83
83
|
curl -X POST localhost:3456/captcha -H 'Content-Type: application/json' -d '{"tab":"0","action":"detect"}'
|
|
84
84
|
|
|
85
|
+
# Dispatch DOM events (React SPA workaround when /click or /fill submit fails)
|
|
86
|
+
curl -X POST localhost:3456/dispatch -H 'Content-Type: application/json' -d '{"tab":"0","selector":"form[role=search]","event":"submit"}'
|
|
87
|
+
|
|
88
|
+
# React debug — find event handlers on element ancestors
|
|
89
|
+
curl -X POST localhost:3456/dispatch -H 'Content-Type: application/json' -d '{"tab":"0","selector":"[role=option]","reactDebug":true}'
|
|
90
|
+
|
|
85
91
|
# List tabs
|
|
86
92
|
curl localhost:3456/tabs
|
|
87
93
|
|
|
@@ -0,0 +1,199 @@
|
|
|
1
|
+
# Surfagent v1.1.0 — QA Bug Report
|
|
2
|
+
|
|
3
|
+
**Date:** 2026-04-13
|
|
4
|
+
**Tester:** Claude (automated QA)
|
|
5
|
+
**Environment:** macOS Darwin 24.5.0, Chrome via CDP port 9222, API port 3500
|
|
6
|
+
**Scope:** All API endpoints, CLI commands, error handling, edge cases (single-tab browsing)
|
|
7
|
+
|
|
8
|
+
---
|
|
9
|
+
|
|
10
|
+
## BUGS
|
|
11
|
+
|
|
12
|
+
### BUG-1: `/click` response `clicked` field returns empty text [Medium]
|
|
13
|
+
|
|
14
|
+
**Steps:** Click any element by text or selector on any page.
|
|
15
|
+
**Expected:** `clicked` field should contain the element tag + visible text (e.g. `"A: Show HN"`).
|
|
16
|
+
**Actual:** Returns empty text like `"A: "`, `"TEXTAREA: "` on many elements.
|
|
17
|
+
|
|
18
|
+
**Root cause:** In `act.ts:219`, the click handler uses `el.innerText || el.value` but many elements (links with nested spans, textareas before user input) return empty `innerText`. The code also doesn't check `el.textContent` or `el.getAttribute('aria-label')` as fallbacks.
|
|
19
|
+
|
|
20
|
+
**File:** `src/api/act.ts`, lines 218-219
|
|
21
|
+
**Suggested fix:** Use the same `getText()` helper from `recon.ts` which checks `innerText`, `textContent`, `value`, `aria-label`, `title`, `alt`, and `name`.
|
|
22
|
+
|
|
23
|
+
---
|
|
24
|
+
|
|
25
|
+
### BUG-2: HTML entities in tab titles not decoded [Low]
|
|
26
|
+
|
|
27
|
+
**Steps:** Open a page with `&` in the title (e.g. BBC). Call `GET /tabs` or CLI `list`.
|
|
28
|
+
**Expected:** Title shows `&` (decoded).
|
|
29
|
+
**Actual:** Title shows `&` (HTML entity).
|
|
30
|
+
|
|
31
|
+
**Example:** `"BBC Home - Breaking News, World News, US News, Sports, Business, Innovation, Climate, Culture, Travel, Video & Audio"`
|
|
32
|
+
|
|
33
|
+
**Root cause:** Chrome CDP returns HTML-encoded titles for some pages. `tabs.ts` passes them through without decoding.
|
|
34
|
+
|
|
35
|
+
**File:** `src/chrome/tabs.ts`
|
|
36
|
+
**Suggested fix:** Decode HTML entities in title after receiving from CDP (e.g. replace `&` -> `&`, `<` -> `<`, etc.).
|
|
37
|
+
|
|
38
|
+
---
|
|
39
|
+
|
|
40
|
+
### BUG-3: `reconUrl` title discrepancy vs `reconTab` [Low]
|
|
41
|
+
|
|
42
|
+
**Steps:** Call `/recon` with `{"url":"https://httpbin.org/html", "keepTab":true}`. Then call `/recon` with `{"tab":"httpbin"}`.
|
|
43
|
+
**Expected:** Both return the same title.
|
|
44
|
+
**Actual:**
|
|
45
|
+
- `reconUrl` returns `title: ""` (uses `document.title` which is empty)
|
|
46
|
+
- `reconTab` returns `title: "httpbin.org/html"` (uses CDP target title, which Chrome auto-generates)
|
|
47
|
+
|
|
48
|
+
**Root cause:** `reconUrl` reads title via `document.title` in JS. `reconTab` reads from CDP target metadata. When a page has no `<title>` tag, these diverge.
|
|
49
|
+
|
|
50
|
+
**File:** `src/api/recon.ts`, lines 345-348 vs 434
|
|
51
|
+
**Suggested fix:** In `reconUrl`, fall back to CDP target title when `document.title` is empty.
|
|
52
|
+
|
|
53
|
+
---
|
|
54
|
+
|
|
55
|
+
### BUG-4: Windows platform: `getChromePath()` uses Unix-only `test -f` [Medium]
|
|
56
|
+
|
|
57
|
+
**Steps:** Run `surfagent start` on Windows.
|
|
58
|
+
**Expected:** Chrome is found at standard Windows paths.
|
|
59
|
+
**Actual:** `execSync('test -f "..."')` always fails on Windows (no `test` command in cmd.exe).
|
|
60
|
+
|
|
61
|
+
**File:** `src/cli.ts`, lines 56-58
|
|
62
|
+
**Suggested fix:** Use `fs.existsSync()` instead of `execSync('test -f ...')` for cross-platform compatibility.
|
|
63
|
+
|
|
64
|
+
---
|
|
65
|
+
|
|
66
|
+
### BUG-5: `-h` flag conflict in CLI — `--host` shadows `--help` [Medium]
|
|
67
|
+
|
|
68
|
+
**Steps:** Run `node dist/browser.js -h`
|
|
69
|
+
**Expected:** Shows help output.
|
|
70
|
+
**Actual:** Error: `option '-h, --host <string>' argument missing`
|
|
71
|
+
|
|
72
|
+
**Root cause:** Commander.js reserves `-h` for `--help` by default, but the program overrides it with `-h, --host`.
|
|
73
|
+
|
|
74
|
+
**File:** `src/browser.ts`, line 27
|
|
75
|
+
**Suggested fix:** Change host shorthand to `-H` or `--host` only (no shorthand). Keep `-h` for help.
|
|
76
|
+
|
|
77
|
+
---
|
|
78
|
+
|
|
79
|
+
### BUG-6: CLI version mismatch [Low]
|
|
80
|
+
|
|
81
|
+
**Steps:** Run `node dist/browser.js --version`
|
|
82
|
+
**Expected:** Shows `1.1.0` (matching package.json).
|
|
83
|
+
**Actual:** Shows `1.0.0`.
|
|
84
|
+
|
|
85
|
+
**File:** `src/browser.ts`, line 20 — `.version('1.0.0')` should be `1.1.0`
|
|
86
|
+
**Suggested fix:** Read version from package.json or update hardcoded string.
|
|
87
|
+
|
|
88
|
+
---
|
|
89
|
+
|
|
90
|
+
### BUG-7: CLI program name is "browser" not "surfagent" [Low]
|
|
91
|
+
|
|
92
|
+
**Steps:** Run `node dist/browser.js --help`
|
|
93
|
+
**Expected:** `Usage: surfagent [options] [command]`
|
|
94
|
+
**Actual:** `Usage: browser [options] [command]`
|
|
95
|
+
|
|
96
|
+
**File:** `src/browser.ts`, line 19 — `.name('browser')` should be `.name('surfagent')`
|
|
97
|
+
|
|
98
|
+
---
|
|
99
|
+
|
|
100
|
+
### BUG-8: Help text references wrong repo URL [Low]
|
|
101
|
+
|
|
102
|
+
**Steps:** Run `npx surfagent help`
|
|
103
|
+
**Actual output:** `Full API docs: https://github.com/AllAboutAI-YT/webpilot#readme`
|
|
104
|
+
**Expected:** Should reference `https://github.com/AllAboutAI-YT/surfagent#readme`
|
|
105
|
+
|
|
106
|
+
**File:** `src/cli.ts`, line 133
|
|
107
|
+
|
|
108
|
+
---
|
|
109
|
+
|
|
110
|
+
### BUG-9: 404 error message lists only 3 of 12+ endpoints [Low]
|
|
111
|
+
|
|
112
|
+
**Steps:** Hit any unknown endpoint (e.g. `GET /notreal`).
|
|
113
|
+
**Actual:** `"Not found. Endpoints: POST /recon, GET /tabs, GET /health"`
|
|
114
|
+
**Expected:** Should list all endpoints or link to docs.
|
|
115
|
+
|
|
116
|
+
**File:** `src/api/server.ts`, line 211
|
|
117
|
+
**Suggested fix:** List all endpoints or say `"Not found. See GET /health for status or docs at <url>"`
|
|
118
|
+
|
|
119
|
+
---
|
|
120
|
+
|
|
121
|
+
### BUG-10: Error messages echo back full user input (info leak / response bloat) [Low]
|
|
122
|
+
|
|
123
|
+
**Steps:** Send a request with an extremely long tab pattern (e.g. 10,000 chars).
|
|
124
|
+
**Actual:** Error response includes the full 10K string: `"Tab not found: AAAA...AAAA"`
|
|
125
|
+
**Expected:** Error should truncate the echoed input.
|
|
126
|
+
|
|
127
|
+
**File:** `src/api/act.ts`, line 25
|
|
128
|
+
**Suggested fix:** Truncate `tabPattern` in error messages to ~100 chars.
|
|
129
|
+
|
|
130
|
+
---
|
|
131
|
+
|
|
132
|
+
## IMPROVEMENTS
|
|
133
|
+
|
|
134
|
+
### IMP-1: No `/close` API endpoint [High]
|
|
135
|
+
|
|
136
|
+
The CLI has a `close` command but there is no HTTP API equivalent. Agents using the API cannot close tabs without using the CLI. The CLAUDE.md docs suggest using `/click` with text "close" which is unreliable.
|
|
137
|
+
|
|
138
|
+
**Suggested:** Add `POST /close` endpoint with `{"tab": "...", "all": false}`.
|
|
139
|
+
|
|
140
|
+
---
|
|
141
|
+
|
|
142
|
+
### IMP-2: `/fill` returns HTTP 200 even when all fields fail [Medium]
|
|
143
|
+
|
|
144
|
+
When every field in a `/fill` request fails (e.g. all selectors not found), the response is HTTP 200 with `success: false` in each field. An agent checking only HTTP status would think the operation succeeded.
|
|
145
|
+
|
|
146
|
+
**Suggested:** Return HTTP 207 (Multi-Status) or at minimum add a top-level `"allSucceeded": false` field.
|
|
147
|
+
|
|
148
|
+
---
|
|
149
|
+
|
|
150
|
+
### IMP-3: No timeout parameter on `/click` with `waitAfter` [Low]
|
|
151
|
+
|
|
152
|
+
`/click` supports `waitAfter` but caps it at 10 seconds silently. There's no feedback that the wait was capped.
|
|
153
|
+
|
|
154
|
+
**Suggested:** Return `"waitCapped": true` in response when `waitAfter` exceeds 10s.
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
### IMP-4: `/scroll` contentPreview unreliable on pages with fixed/sticky sidebars [Low]
|
|
159
|
+
|
|
160
|
+
The scroll content preview skips `position: fixed/sticky` elements via `getComputedStyle` check on `el.closest('nav, aside, ...')`, but many sticky sidebars use `position: sticky` on a non-nav/aside element. Some content may be incorrectly filtered.
|
|
161
|
+
|
|
162
|
+
---
|
|
163
|
+
|
|
164
|
+
### IMP-5: `/dismiss` has limited cookie banner coverage [Low]
|
|
165
|
+
|
|
166
|
+
Tested on BBC — no cookie banner dismissed despite BBC having a consent banner. The consent patterns list is good but may miss banners that use custom markup (not standard `<button>` elements, or that appear inside shadow DOM or iframes).
|
|
167
|
+
|
|
168
|
+
---
|
|
169
|
+
|
|
170
|
+
### IMP-6: No `POST /screenshot` API endpoint [Medium]
|
|
171
|
+
|
|
172
|
+
The CLI has a `screenshot` command but there's no HTTP API equivalent. Screenshots are essential for agent visual verification.
|
|
173
|
+
|
|
174
|
+
**Suggested:** Add `POST /screenshot` returning base64-encoded image or saving to a path.
|
|
175
|
+
|
|
176
|
+
---
|
|
177
|
+
|
|
178
|
+
### IMP-7: `startChrome()` copies cookies but not LocalStorage/IndexedDB [Low]
|
|
179
|
+
|
|
180
|
+
Cookies are copied from the default Chrome profile, preserving some logins. But many modern apps use LocalStorage or IndexedDB for auth tokens (e.g. SPAs), which are not copied.
|
|
181
|
+
|
|
182
|
+
---
|
|
183
|
+
|
|
184
|
+
### IMP-8: No rate limiting or request queuing [Low]
|
|
185
|
+
|
|
186
|
+
Concurrent CDP connections to the same tab could cause race conditions. While basic parallel reads worked in testing, concurrent writes (fill + click on same tab) could conflict.
|
|
187
|
+
|
|
188
|
+
**Suggested:** Add optional request queuing per tab to serialize mutations.
|
|
189
|
+
|
|
190
|
+
---
|
|
191
|
+
|
|
192
|
+
## SUMMARY
|
|
193
|
+
|
|
194
|
+
| Category | Count | Critical | Medium | Low |
|
|
195
|
+
|----------|-------|----------|--------|-----|
|
|
196
|
+
| Bugs | 10 | 0 | 3 | 7 |
|
|
197
|
+
| Improvements | 8 | 0 | 3 | 5 |
|
|
198
|
+
|
|
199
|
+
**Overall assessment:** The core functionality (recon, navigate, fill, click, scroll, read, eval) works reliably. Error handling is solid across all endpoints with proper HTTP status codes and descriptive messages. The main issues are cosmetic/documentation bugs and missing API parity with the CLI. The most impactful improvements would be adding `/close` and `/screenshot` API endpoints.
|
package/dist/api/act.d.ts
CHANGED
|
@@ -99,3 +99,22 @@ export declare function typeKeys(tabPattern: string, keys: string, options: {
|
|
|
99
99
|
typed: number;
|
|
100
100
|
submitted?: boolean;
|
|
101
101
|
}>;
|
|
102
|
+
export interface DispatchRequest {
|
|
103
|
+
tab: string;
|
|
104
|
+
selector: string;
|
|
105
|
+
event: string;
|
|
106
|
+
bubbles?: boolean;
|
|
107
|
+
cancelable?: boolean;
|
|
108
|
+
detail?: any;
|
|
109
|
+
eventInit?: Record<string, any>;
|
|
110
|
+
reactDebug?: boolean;
|
|
111
|
+
}
|
|
112
|
+
export declare function dispatchEvent(request: DispatchRequest, options: {
|
|
113
|
+
port?: number;
|
|
114
|
+
host?: string;
|
|
115
|
+
}): Promise<{
|
|
116
|
+
success: boolean;
|
|
117
|
+
dispatched?: string;
|
|
118
|
+
reactHandlers?: any[];
|
|
119
|
+
error?: string;
|
|
120
|
+
}>;
|
package/dist/api/act.js
CHANGED
|
@@ -20,7 +20,7 @@ async function resolveTab(tabPattern, port, host) {
|
|
|
20
20
|
}
|
|
21
21
|
}
|
|
22
22
|
if (!tab)
|
|
23
|
-
throw new Error(`Tab not found: ${tabPattern}`);
|
|
23
|
+
throw new Error(`Tab not found: ${tabPattern.length > 100 ? tabPattern.substring(0, 100) + '...' : tabPattern}`);
|
|
24
24
|
return tab;
|
|
25
25
|
}
|
|
26
26
|
export async function fillFields(request, options) {
|
|
@@ -142,6 +142,29 @@ export async function fillFields(request, options) {
|
|
|
142
142
|
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
143
143
|
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
144
144
|
}
|
|
145
|
+
else if (request.submit === 'form') {
|
|
146
|
+
// Dispatch native submit event on nearest form — works on React SPAs where Enter is intercepted
|
|
147
|
+
// (e.g. X.com search combobox, autocomplete widgets that swallow Enter key)
|
|
148
|
+
await client.Runtime.evaluate({
|
|
149
|
+
expression: `
|
|
150
|
+
(function() {
|
|
151
|
+
// Find the last filled field and its nearest form ancestor
|
|
152
|
+
const lastSelector = ${JSON.stringify(request.fields.length > 0 ? request.fields[request.fields.length - 1].selector : null)};
|
|
153
|
+
let form;
|
|
154
|
+
if (lastSelector) {
|
|
155
|
+
const field = document.querySelector(lastSelector);
|
|
156
|
+
form = field ? field.closest('form') : null;
|
|
157
|
+
}
|
|
158
|
+
if (!form) {
|
|
159
|
+
form = document.querySelector('form');
|
|
160
|
+
}
|
|
161
|
+
if (!form) throw new Error('No form found');
|
|
162
|
+
form.dispatchEvent(new Event('submit', { bubbles: true, cancelable: true }));
|
|
163
|
+
})()
|
|
164
|
+
`,
|
|
165
|
+
returnByValue: true
|
|
166
|
+
});
|
|
167
|
+
}
|
|
145
168
|
else {
|
|
146
169
|
const submitSelector = request.submit === 'auto'
|
|
147
170
|
? 'button[type="submit"], input[type="submit"]'
|
|
@@ -707,3 +730,80 @@ export async function typeKeys(tabPattern, keys, options) {
|
|
|
707
730
|
throw error;
|
|
708
731
|
}
|
|
709
732
|
}
|
|
733
|
+
export async function dispatchEvent(request, options) {
|
|
734
|
+
const port = options.port || 9222;
|
|
735
|
+
const host = options.host || 'localhost';
|
|
736
|
+
const tab = await resolveTab(request.tab, port, host);
|
|
737
|
+
const client = await connectToTab(tab.id, port, host);
|
|
738
|
+
try {
|
|
739
|
+
const result = await client.Runtime.evaluate({
|
|
740
|
+
expression: `
|
|
741
|
+
(function() {
|
|
742
|
+
const selector = ${JSON.stringify(request.selector)};
|
|
743
|
+
const eventType = ${JSON.stringify(request.event)};
|
|
744
|
+
const bubbles = ${request.bubbles !== false};
|
|
745
|
+
const cancelable = ${request.cancelable !== false};
|
|
746
|
+
const detail = ${JSON.stringify(request.detail || null)};
|
|
747
|
+
const extraInit = ${JSON.stringify(request.eventInit || {})};
|
|
748
|
+
const reactDebug = ${JSON.stringify(!!request.reactDebug)};
|
|
749
|
+
|
|
750
|
+
const el = document.querySelector(selector);
|
|
751
|
+
if (!el) return { success: false, error: 'Element not found: ' + selector };
|
|
752
|
+
|
|
753
|
+
// React debug: walk up tree and find all React event handlers
|
|
754
|
+
if (reactDebug) {
|
|
755
|
+
const handlers = [];
|
|
756
|
+
let current = el;
|
|
757
|
+
while (current && current !== document.documentElement) {
|
|
758
|
+
const propsKey = Object.keys(current).find(k => k.startsWith('__reactProps'));
|
|
759
|
+
if (propsKey) {
|
|
760
|
+
const props = current[propsKey] || {};
|
|
761
|
+
const reactHandlers = Object.keys(props).filter(k => typeof props[k] === 'function' && k.startsWith('on'));
|
|
762
|
+
if (reactHandlers.length > 0) {
|
|
763
|
+
handlers.push({
|
|
764
|
+
tag: current.tagName,
|
|
765
|
+
role: current.getAttribute('role'),
|
|
766
|
+
testid: current.getAttribute('data-testid'),
|
|
767
|
+
className: (current.className || '').toString().substring(0, 60),
|
|
768
|
+
handlers: reactHandlers
|
|
769
|
+
});
|
|
770
|
+
}
|
|
771
|
+
}
|
|
772
|
+
current = current.parentElement;
|
|
773
|
+
}
|
|
774
|
+
return { success: true, reactHandlers: handlers };
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// Build the event object
|
|
778
|
+
let event;
|
|
779
|
+
const init = { bubbles, cancelable, ...extraInit };
|
|
780
|
+
|
|
781
|
+
// Use specific event constructors for better compatibility
|
|
782
|
+
if (eventType === 'click' || eventType === 'mousedown' || eventType === 'mouseup' || eventType === 'dblclick') {
|
|
783
|
+
event = new MouseEvent(eventType, init);
|
|
784
|
+
} else if (eventType === 'keydown' || eventType === 'keyup' || eventType === 'keypress') {
|
|
785
|
+
event = new KeyboardEvent(eventType, init);
|
|
786
|
+
} else if (eventType === 'input' || eventType === 'change') {
|
|
787
|
+
event = new Event(eventType, init);
|
|
788
|
+
} else if (eventType === 'pointerdown' || eventType === 'pointerup' || eventType === 'pointermove') {
|
|
789
|
+
event = new PointerEvent(eventType, init);
|
|
790
|
+
} else if (detail !== null) {
|
|
791
|
+
event = new CustomEvent(eventType, { ...init, detail });
|
|
792
|
+
} else {
|
|
793
|
+
event = new Event(eventType, init);
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
el.dispatchEvent(event);
|
|
797
|
+
return { success: true, dispatched: eventType + ' on ' + el.tagName + (el.getAttribute('role') ? '[role=' + el.getAttribute('role') + ']' : '') };
|
|
798
|
+
})()
|
|
799
|
+
`,
|
|
800
|
+
returnByValue: true
|
|
801
|
+
});
|
|
802
|
+
await client.close();
|
|
803
|
+
return result.result.value;
|
|
804
|
+
}
|
|
805
|
+
catch (error) {
|
|
806
|
+
await client.close();
|
|
807
|
+
throw error;
|
|
808
|
+
}
|
|
809
|
+
}
|
package/dist/api/recon.js
CHANGED
|
@@ -286,7 +286,17 @@ export async function reconUrl(url, options) {
|
|
|
286
286
|
expression: 'document.title',
|
|
287
287
|
returnByValue: true
|
|
288
288
|
});
|
|
289
|
-
|
|
289
|
+
let title = titleResult.result.value || '';
|
|
290
|
+
// Fall back to CDP target title when document.title is empty (e.g. pages with no <title> tag)
|
|
291
|
+
if (!title) {
|
|
292
|
+
try {
|
|
293
|
+
const targets = await CDP.List({ port, host });
|
|
294
|
+
const t = targets.find((t) => t.id === target.id);
|
|
295
|
+
if (t?.title)
|
|
296
|
+
title = t.title;
|
|
297
|
+
}
|
|
298
|
+
catch { }
|
|
299
|
+
}
|
|
290
300
|
// Get current URL (may have redirected)
|
|
291
301
|
const urlResult = await client.Runtime.evaluate({
|
|
292
302
|
expression: 'window.location.href',
|
|
@@ -307,7 +317,7 @@ export async function reconUrl(url, options) {
|
|
|
307
317
|
}
|
|
308
318
|
return {
|
|
309
319
|
url: finalUrl,
|
|
310
|
-
title,
|
|
320
|
+
title: title || target.title || '',
|
|
311
321
|
tabId: target.id,
|
|
312
322
|
timestamp: new Date().toISOString(),
|
|
313
323
|
meta: data.meta,
|
package/dist/api/server.js
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import http from 'node:http';
|
|
3
3
|
import { reconUrl, reconTab } from './recon.js';
|
|
4
|
-
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays, typeKeys } from './act.js';
|
|
4
|
+
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays, typeKeys, dispatchEvent } from './act.js';
|
|
5
5
|
import { getAllTabs } from '../chrome/tabs.js';
|
|
6
6
|
const PORT = parseInt(process.env.API_PORT || '3456', 10);
|
|
7
7
|
const CDP_PORT = parseInt(process.env.CDP_PORT || '9222', 10);
|
|
@@ -150,6 +150,16 @@ const server = http.createServer(async (req, res) => {
|
|
|
150
150
|
const result = await typeKeys(body.tab, body.keys, { port: CDP_PORT, host: CDP_HOST, submit: body.submit });
|
|
151
151
|
return json(res, 200, result);
|
|
152
152
|
}
|
|
153
|
+
// POST /dispatch — dispatch DOM events on elements (React SPA workaround)
|
|
154
|
+
if (path === '/dispatch' && req.method === 'POST') {
|
|
155
|
+
const body = parseBody(await readBody(req));
|
|
156
|
+
if (!body.tab || !body.selector || (!body.event && !body.reactDebug)) {
|
|
157
|
+
return json(res, 400, { error: 'Provide "tab", "selector", and "event" (e.g. "submit", "click"). Add "reactDebug":true to inspect React handlers instead.' });
|
|
158
|
+
}
|
|
159
|
+
const start = Date.now();
|
|
160
|
+
const result = await dispatchEvent(body, { port: CDP_PORT, host: CDP_HOST });
|
|
161
|
+
return json(res, 200, { ...result, _dispatchMs: Date.now() - start });
|
|
162
|
+
}
|
|
153
163
|
// POST /navigate — go to url, back, or forward in same tab
|
|
154
164
|
if (path === '/navigate' && req.method === 'POST') {
|
|
155
165
|
const body = parseBody(await readBody(req));
|
|
@@ -180,7 +190,7 @@ const server = http.createServer(async (req, res) => {
|
|
|
180
190
|
return json(res, 503, { status: 'error', cdpConnected: false });
|
|
181
191
|
}
|
|
182
192
|
}
|
|
183
|
-
json(res, 404, { error: 'Not found. Endpoints: POST /recon,
|
|
193
|
+
json(res, 404, { error: 'Not found. Endpoints: POST /recon, /read, /fill, /click, /type, /scroll, /navigate, /eval, /dispatch, /dismiss, /captcha, /focus | GET /tabs, /health' });
|
|
184
194
|
}
|
|
185
195
|
catch (error) {
|
|
186
196
|
const message = error instanceof Error ? error.message : String(error);
|
|
@@ -212,6 +222,7 @@ server.listen(PORT, () => {
|
|
|
212
222
|
console.log(` POST /recon — { url: "..." } or { tab: "0" }`);
|
|
213
223
|
console.log(` POST /fill — { tab, fields: [{ selector, value }], submit? }`);
|
|
214
224
|
console.log(` POST /click — { tab, selector? , text? }`);
|
|
225
|
+
console.log(` POST /dispatch— { tab, selector, event, reactDebug? }`);
|
|
215
226
|
console.log(` GET /tabs — list open Chrome tabs`);
|
|
216
227
|
console.log(` GET /health — check CDP connection`);
|
|
217
228
|
});
|
package/dist/browser.js
CHANGED
|
@@ -14,13 +14,13 @@ import { descCommand } from './commands/desc.js';
|
|
|
14
14
|
import { closeCommand } from './commands/close.js';
|
|
15
15
|
const program = new Command();
|
|
16
16
|
program
|
|
17
|
-
.name('
|
|
18
|
-
.description('CLI
|
|
19
|
-
.version('1.
|
|
17
|
+
.name('surfagent')
|
|
18
|
+
.description('Browser automation CLI for AI agents — interact with Chrome tabs via CDP')
|
|
19
|
+
.version('1.1.1');
|
|
20
20
|
// Global options
|
|
21
21
|
program
|
|
22
22
|
.option('-p, --port <number>', 'Chrome debugging port', '9222')
|
|
23
|
-
.option('-
|
|
23
|
+
.option('-H, --host <string>', 'Chrome debugging host', 'localhost');
|
|
24
24
|
// List command
|
|
25
25
|
program
|
|
26
26
|
.command('list')
|
package/dist/cli.js
CHANGED
|
@@ -1,5 +1,6 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
import { execSync, spawn } from 'node:child_process';
|
|
3
|
+
import fs from 'node:fs';
|
|
3
4
|
import http from 'node:http';
|
|
4
5
|
import path from 'node:path';
|
|
5
6
|
import { fileURLToPath } from 'node:url';
|
|
@@ -49,8 +50,8 @@ function getChromePath() {
|
|
|
49
50
|
};
|
|
50
51
|
for (const p of paths[os] || []) {
|
|
51
52
|
try {
|
|
52
|
-
|
|
53
|
-
|
|
53
|
+
if (fs.existsSync(p))
|
|
54
|
+
return p;
|
|
54
55
|
}
|
|
55
56
|
catch {
|
|
56
57
|
continue;
|
|
@@ -120,7 +121,7 @@ Environment variables:
|
|
|
120
121
|
CHROME_USER_DATA_DIR Chrome profile directory (default: /tmp/surfagent-chrome)
|
|
121
122
|
|
|
122
123
|
After starting, your AI agent can call http://localhost:3456
|
|
123
|
-
Full API docs: https://github.com/AllAboutAI-YT/
|
|
124
|
+
Full API docs: https://github.com/AllAboutAI-YT/surfagent#readme
|
|
124
125
|
`);
|
|
125
126
|
return;
|
|
126
127
|
}
|
package/package.json
CHANGED
package/src/api/act.ts
CHANGED
|
@@ -22,7 +22,7 @@ async function resolveTab(tabPattern: string, port: number, host: string): Promi
|
|
|
22
22
|
}
|
|
23
23
|
}
|
|
24
24
|
|
|
25
|
-
if (!tab) throw new Error(`Tab not found: ${tabPattern}`);
|
|
25
|
+
if (!tab) throw new Error(`Tab not found: ${tabPattern.length > 100 ? tabPattern.substring(0, 100) + '...' : tabPattern}`);
|
|
26
26
|
return tab;
|
|
27
27
|
}
|
|
28
28
|
|
|
@@ -163,6 +163,28 @@ export async function fillFields(
|
|
|
163
163
|
// Press Enter via CDP — works on SPAs like YouTube
|
|
164
164
|
await cdp.Input.dispatchKeyEvent({ type: 'keyDown', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
165
165
|
await cdp.Input.dispatchKeyEvent({ type: 'keyUp', key: 'Enter', code: 'Enter', windowsVirtualKeyCode: 13, nativeVirtualKeyCode: 13 });
|
|
166
|
+
} else if (request.submit === 'form') {
|
|
167
|
+
// Dispatch native submit event on nearest form — works on React SPAs where Enter is intercepted
|
|
168
|
+
// (e.g. X.com search combobox, autocomplete widgets that swallow Enter key)
|
|
169
|
+
await client.Runtime.evaluate({
|
|
170
|
+
expression: `
|
|
171
|
+
(function() {
|
|
172
|
+
// Find the last filled field and its nearest form ancestor
|
|
173
|
+
const lastSelector = ${JSON.stringify(request.fields.length > 0 ? request.fields[request.fields.length - 1].selector : null)};
|
|
174
|
+
let form;
|
|
175
|
+
if (lastSelector) {
|
|
176
|
+
const field = document.querySelector(lastSelector);
|
|
177
|
+
form = field ? field.closest('form') : null;
|
|
178
|
+
}
|
|
179
|
+
if (!form) {
|
|
180
|
+
form = document.querySelector('form');
|
|
181
|
+
}
|
|
182
|
+
if (!form) throw new Error('No form found');
|
|
183
|
+
form.dispatchEvent(new Event('submit', { bubbles: true, cancelable: true }));
|
|
184
|
+
})()
|
|
185
|
+
`,
|
|
186
|
+
returnByValue: true
|
|
187
|
+
});
|
|
166
188
|
} else {
|
|
167
189
|
const submitSelector = request.submit === 'auto'
|
|
168
190
|
? 'button[type="submit"], input[type="submit"]'
|
|
@@ -815,3 +837,99 @@ export async function typeKeys(
|
|
|
815
837
|
throw error;
|
|
816
838
|
}
|
|
817
839
|
}
|
|
840
|
+
|
|
841
|
+
// POST /dispatch — dispatch any DOM event on any element
|
|
842
|
+
// Solves React SPAs where .click() and CDP key events don't trigger synthetic event handlers
|
|
843
|
+
export interface DispatchRequest {
|
|
844
|
+
tab: string;
|
|
845
|
+
selector: string; // CSS selector for target element
|
|
846
|
+
event: string; // Event type: "submit", "click", "input", "change", "focus", "blur", etc.
|
|
847
|
+
bubbles?: boolean; // Default: true
|
|
848
|
+
cancelable?: boolean; // Default: true
|
|
849
|
+
detail?: any; // For CustomEvent detail payload
|
|
850
|
+
eventInit?: Record<string, any>; // Extra event init properties (e.g. {key: "Enter"} for KeyboardEvent)
|
|
851
|
+
reactDebug?: boolean; // If true, return React event handlers found on element and ancestors
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
export async function dispatchEvent(
|
|
855
|
+
request: DispatchRequest,
|
|
856
|
+
options: { port?: number; host?: string }
|
|
857
|
+
): Promise<{ success: boolean; dispatched?: string; reactHandlers?: any[]; error?: string }> {
|
|
858
|
+
const port = options.port || 9222;
|
|
859
|
+
const host = options.host || 'localhost';
|
|
860
|
+
|
|
861
|
+
const tab = await resolveTab(request.tab, port, host);
|
|
862
|
+
const client = await connectToTab(tab.id, port, host);
|
|
863
|
+
|
|
864
|
+
try {
|
|
865
|
+
const result = await client.Runtime.evaluate({
|
|
866
|
+
expression: `
|
|
867
|
+
(function() {
|
|
868
|
+
const selector = ${JSON.stringify(request.selector)};
|
|
869
|
+
const eventType = ${JSON.stringify(request.event)};
|
|
870
|
+
const bubbles = ${request.bubbles !== false};
|
|
871
|
+
const cancelable = ${request.cancelable !== false};
|
|
872
|
+
const detail = ${JSON.stringify(request.detail || null)};
|
|
873
|
+
const extraInit = ${JSON.stringify(request.eventInit || {})};
|
|
874
|
+
const reactDebug = ${JSON.stringify(!!request.reactDebug)};
|
|
875
|
+
|
|
876
|
+
const el = document.querySelector(selector);
|
|
877
|
+
if (!el) return { success: false, error: 'Element not found: ' + selector };
|
|
878
|
+
|
|
879
|
+
// React debug: walk up tree and find all React event handlers
|
|
880
|
+
if (reactDebug) {
|
|
881
|
+
const handlers = [];
|
|
882
|
+
let current = el;
|
|
883
|
+
while (current && current !== document.documentElement) {
|
|
884
|
+
const propsKey = Object.keys(current).find(k => k.startsWith('__reactProps'));
|
|
885
|
+
if (propsKey) {
|
|
886
|
+
const props = current[propsKey] || {};
|
|
887
|
+
const reactHandlers = Object.keys(props).filter(k => typeof props[k] === 'function' && k.startsWith('on'));
|
|
888
|
+
if (reactHandlers.length > 0) {
|
|
889
|
+
handlers.push({
|
|
890
|
+
tag: current.tagName,
|
|
891
|
+
role: current.getAttribute('role'),
|
|
892
|
+
testid: current.getAttribute('data-testid'),
|
|
893
|
+
className: (current.className || '').toString().substring(0, 60),
|
|
894
|
+
handlers: reactHandlers
|
|
895
|
+
});
|
|
896
|
+
}
|
|
897
|
+
}
|
|
898
|
+
current = current.parentElement;
|
|
899
|
+
}
|
|
900
|
+
return { success: true, reactHandlers: handlers };
|
|
901
|
+
}
|
|
902
|
+
|
|
903
|
+
// Build the event object
|
|
904
|
+
let event;
|
|
905
|
+
const init = { bubbles, cancelable, ...extraInit };
|
|
906
|
+
|
|
907
|
+
// Use specific event constructors for better compatibility
|
|
908
|
+
if (eventType === 'click' || eventType === 'mousedown' || eventType === 'mouseup' || eventType === 'dblclick') {
|
|
909
|
+
event = new MouseEvent(eventType, init);
|
|
910
|
+
} else if (eventType === 'keydown' || eventType === 'keyup' || eventType === 'keypress') {
|
|
911
|
+
event = new KeyboardEvent(eventType, init);
|
|
912
|
+
} else if (eventType === 'input' || eventType === 'change') {
|
|
913
|
+
event = new Event(eventType, init);
|
|
914
|
+
} else if (eventType === 'pointerdown' || eventType === 'pointerup' || eventType === 'pointermove') {
|
|
915
|
+
event = new PointerEvent(eventType, init);
|
|
916
|
+
} else if (detail !== null) {
|
|
917
|
+
event = new CustomEvent(eventType, { ...init, detail });
|
|
918
|
+
} else {
|
|
919
|
+
event = new Event(eventType, init);
|
|
920
|
+
}
|
|
921
|
+
|
|
922
|
+
el.dispatchEvent(event);
|
|
923
|
+
return { success: true, dispatched: eventType + ' on ' + el.tagName + (el.getAttribute('role') ? '[role=' + el.getAttribute('role') + ']' : '') };
|
|
924
|
+
})()
|
|
925
|
+
`,
|
|
926
|
+
returnByValue: true
|
|
927
|
+
});
|
|
928
|
+
|
|
929
|
+
await client.close();
|
|
930
|
+
return result.result.value as any;
|
|
931
|
+
} catch (error) {
|
|
932
|
+
await client.close();
|
|
933
|
+
throw error;
|
|
934
|
+
}
|
|
935
|
+
}
|
package/src/api/recon.ts
CHANGED
|
@@ -343,7 +343,15 @@ export async function reconUrl(
|
|
|
343
343
|
expression: 'document.title',
|
|
344
344
|
returnByValue: true
|
|
345
345
|
});
|
|
346
|
-
|
|
346
|
+
let title = titleResult.result.value as string || '';
|
|
347
|
+
// Fall back to CDP target title when document.title is empty (e.g. pages with no <title> tag)
|
|
348
|
+
if (!title) {
|
|
349
|
+
try {
|
|
350
|
+
const targets = await CDP.List({ port, host });
|
|
351
|
+
const t = targets.find((t: any) => t.id === target.id);
|
|
352
|
+
if (t?.title) title = t.title;
|
|
353
|
+
} catch {}
|
|
354
|
+
}
|
|
347
355
|
|
|
348
356
|
// Get current URL (may have redirected)
|
|
349
357
|
const urlResult = await client.Runtime.evaluate({
|
|
@@ -370,7 +378,7 @@ export async function reconUrl(
|
|
|
370
378
|
|
|
371
379
|
return {
|
|
372
380
|
url: finalUrl,
|
|
373
|
-
title,
|
|
381
|
+
title: title || target.title || '',
|
|
374
382
|
tabId: target.id,
|
|
375
383
|
timestamp: new Date().toISOString(),
|
|
376
384
|
meta: data.meta,
|
package/src/api/server.ts
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
import http from 'node:http';
|
|
4
4
|
import { reconUrl, reconTab } from './recon.js';
|
|
5
|
-
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays, typeKeys } from './act.js';
|
|
5
|
+
import { fillFields, clickElement, scrollPage, navigatePage, evalInTab, focusTab, readPage, captchaInteract, dismissOverlays, typeKeys, dispatchEvent } from './act.js';
|
|
6
6
|
import { getAllTabs } from '../chrome/tabs.js';
|
|
7
7
|
|
|
8
8
|
const PORT = parseInt(process.env.API_PORT || '3456', 10);
|
|
@@ -176,6 +176,17 @@ const server = http.createServer(async (req, res) => {
|
|
|
176
176
|
return json(res, 200, result);
|
|
177
177
|
}
|
|
178
178
|
|
|
179
|
+
// POST /dispatch — dispatch DOM events on elements (React SPA workaround)
|
|
180
|
+
if (path === '/dispatch' && req.method === 'POST') {
|
|
181
|
+
const body = parseBody(await readBody(req));
|
|
182
|
+
if (!body.tab || !body.selector || (!body.event && !body.reactDebug)) {
|
|
183
|
+
return json(res, 400, { error: 'Provide "tab", "selector", and "event" (e.g. "submit", "click"). Add "reactDebug":true to inspect React handlers instead.' });
|
|
184
|
+
}
|
|
185
|
+
const start = Date.now();
|
|
186
|
+
const result = await dispatchEvent(body, { port: CDP_PORT, host: CDP_HOST });
|
|
187
|
+
return json(res, 200, { ...result, _dispatchMs: Date.now() - start });
|
|
188
|
+
}
|
|
189
|
+
|
|
179
190
|
// POST /navigate — go to url, back, or forward in same tab
|
|
180
191
|
if (path === '/navigate' && req.method === 'POST') {
|
|
181
192
|
const body = parseBody(await readBody(req));
|
|
@@ -208,7 +219,7 @@ const server = http.createServer(async (req, res) => {
|
|
|
208
219
|
}
|
|
209
220
|
}
|
|
210
221
|
|
|
211
|
-
json(res, 404, { error: 'Not found. Endpoints: POST /recon,
|
|
222
|
+
json(res, 404, { error: 'Not found. Endpoints: POST /recon, /read, /fill, /click, /type, /scroll, /navigate, /eval, /dispatch, /dismiss, /captcha, /focus | GET /tabs, /health' });
|
|
212
223
|
} catch (error) {
|
|
213
224
|
const message = error instanceof Error ? error.message : String(error);
|
|
214
225
|
console.error(`[${new Date().toISOString()}] Error:`, message);
|
|
@@ -242,6 +253,7 @@ server.listen(PORT, () => {
|
|
|
242
253
|
console.log(` POST /recon — { url: "..." } or { tab: "0" }`);
|
|
243
254
|
console.log(` POST /fill — { tab, fields: [{ selector, value }], submit? }`);
|
|
244
255
|
console.log(` POST /click — { tab, selector? , text? }`);
|
|
256
|
+
console.log(` POST /dispatch— { tab, selector, event, reactDebug? }`);
|
|
245
257
|
console.log(` GET /tabs — list open Chrome tabs`);
|
|
246
258
|
console.log(` GET /health — check CDP connection`);
|
|
247
259
|
});
|
package/src/browser.ts
CHANGED
|
@@ -17,14 +17,14 @@ import { closeCommand } from './commands/close.js';
|
|
|
17
17
|
const program = new Command();
|
|
18
18
|
|
|
19
19
|
program
|
|
20
|
-
.name('
|
|
21
|
-
.description('CLI
|
|
22
|
-
.version('1.
|
|
20
|
+
.name('surfagent')
|
|
21
|
+
.description('Browser automation CLI for AI agents — interact with Chrome tabs via CDP')
|
|
22
|
+
.version('1.1.1');
|
|
23
23
|
|
|
24
24
|
// Global options
|
|
25
25
|
program
|
|
26
26
|
.option('-p, --port <number>', 'Chrome debugging port', '9222')
|
|
27
|
-
.option('-
|
|
27
|
+
.option('-H, --host <string>', 'Chrome debugging host', 'localhost');
|
|
28
28
|
|
|
29
29
|
// List command
|
|
30
30
|
program
|
package/src/cli.ts
CHANGED
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
#!/usr/bin/env node
|
|
2
2
|
|
|
3
3
|
import { execSync, spawn } from 'node:child_process';
|
|
4
|
+
import fs from 'node:fs';
|
|
4
5
|
import http from 'node:http';
|
|
5
6
|
import path from 'node:path';
|
|
6
7
|
import { fileURLToPath } from 'node:url';
|
|
@@ -54,8 +55,7 @@ function getChromePath(): string | null {
|
|
|
54
55
|
|
|
55
56
|
for (const p of paths[os] || []) {
|
|
56
57
|
try {
|
|
57
|
-
|
|
58
|
-
return p;
|
|
58
|
+
if (fs.existsSync(p)) return p;
|
|
59
59
|
} catch {
|
|
60
60
|
continue;
|
|
61
61
|
}
|
|
@@ -130,7 +130,7 @@ Environment variables:
|
|
|
130
130
|
CHROME_USER_DATA_DIR Chrome profile directory (default: /tmp/surfagent-chrome)
|
|
131
131
|
|
|
132
132
|
After starting, your AI agent can call http://localhost:3456
|
|
133
|
-
Full API docs: https://github.com/AllAboutAI-YT/
|
|
133
|
+
Full API docs: https://github.com/AllAboutAI-YT/surfagent#readme
|
|
134
134
|
`);
|
|
135
135
|
return;
|
|
136
136
|
}
|