browser-ctl 0.2.4__tar.gz → 0.2.6__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/PKG-INFO +8 -2
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/README.md +7 -1
- browser_ctl-0.2.6/browser_ctl/SKILL.md +193 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/cli.py +33 -24
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/client.py +98 -50
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/extension/background.js +130 -5
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/extension/manifest.json +1 -1
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/server.py +6 -4
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl.egg-info/PKG-INFO +8 -2
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/pyproject.toml +1 -1
- browser_ctl-0.2.4/browser_ctl/SKILL.md +0 -238
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/LICENSE +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/__init__.py +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/__main__.py +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/extension/icon-128.png +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/extension/icon-16.png +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/extension/icon-32.png +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl/extension/icon-48.png +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl.egg-info/SOURCES.txt +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl.egg-info/dependency_links.txt +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl.egg-info/entry_points.txt +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl.egg-info/requires.txt +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/browser_ctl.egg-info/top_level.txt +0 -0
- {browser_ctl-0.2.4 → browser_ctl-0.2.6}/setup.cfg +0 -0
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: browser-ctl
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Control your browser from the command line via a Chrome extension + WebSocket bridge
|
|
5
5
|
Author-email: geb <853934146@qq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -68,6 +68,7 @@ Tools like [browser-use](https://github.com/browser-use/browser-use), [Playwrigh
|
|
|
68
68
|
| **Complex SDK integration** — requires importing libraries and writing async code | browser-use, Stagehand | Pure CLI with JSON output — any LLM can call `bctl click "button"` |
|
|
69
69
|
| **Heavy dependencies** — Playwright alone pulls ~50 MB of packages + browser binary | Playwright, Puppeteer | CLI is stdlib-only; server needs only `aiohttp` |
|
|
70
70
|
| **Token-inefficient for LLMs** — verbose API calls waste context window tokens | SDK-based tools | Concise commands: `bctl text h1` vs pages of boilerplate |
|
|
71
|
+
| **Broken clicks on SPAs** — programmatic clicks get blocked by popup blockers | Puppeteer, Playwright | Intercepts `window.open()` and navigates via `chrome.tabs` — SPA-compatible |
|
|
71
72
|
|
|
72
73
|
<br>
|
|
73
74
|
|
|
@@ -218,6 +219,10 @@ All `<sel>` arguments accept CSS selectors **or** element refs from `snapshot` (
|
|
|
218
219
|
| `bctl ping` | Check server & extension status |
|
|
219
220
|
| `bctl serve` | Start server in foreground |
|
|
220
221
|
| `bctl stop` | Stop server |
|
|
222
|
+
| `bctl setup` | Install extension to `~/.browser-ctl/extension/` + open Chrome extensions page |
|
|
223
|
+
| `bctl setup cursor` | Install AI skill (`SKILL.md`) into Cursor IDE |
|
|
224
|
+
| `bctl setup opencode` | Install AI skill into OpenCode |
|
|
225
|
+
| `bctl setup <path>` | Install AI skill to a custom directory |
|
|
221
226
|
|
|
222
227
|
<br>
|
|
223
228
|
|
|
@@ -379,9 +384,10 @@ Non-zero exit code on errors — works naturally with `set -e` and `&&` chains.
|
|
|
379
384
|
|
|
380
385
|
| Component | Details |
|
|
381
386
|
|-----------|---------|
|
|
382
|
-
| **CLI** | Stdlib only,
|
|
387
|
+
| **CLI** | Stdlib only, raw-socket HTTP (zero heavy imports, ~5ms cold start) |
|
|
383
388
|
| **Bridge Server** | Async relay (aiohttp), auto-daemonizes |
|
|
384
389
|
| **Extension** | MV3 service worker, auto-reconnects via `chrome.alarms` |
|
|
390
|
+
| **Click** | Three-phase: pointer events → MAIN-world click → `window.open()` interception for SPA compatibility |
|
|
385
391
|
| **Eval** | Dual strategy: MAIN-world injection (fast) + CDP fallback (CSP-safe) |
|
|
386
392
|
|
|
387
393
|
<br>
|
|
@@ -42,6 +42,7 @@ Tools like [browser-use](https://github.com/browser-use/browser-use), [Playwrigh
|
|
|
42
42
|
| **Complex SDK integration** — requires importing libraries and writing async code | browser-use, Stagehand | Pure CLI with JSON output — any LLM can call `bctl click "button"` |
|
|
43
43
|
| **Heavy dependencies** — Playwright alone pulls ~50 MB of packages + browser binary | Playwright, Puppeteer | CLI is stdlib-only; server needs only `aiohttp` |
|
|
44
44
|
| **Token-inefficient for LLMs** — verbose API calls waste context window tokens | SDK-based tools | Concise commands: `bctl text h1` vs pages of boilerplate |
|
|
45
|
+
| **Broken clicks on SPAs** — programmatic clicks get blocked by popup blockers | Puppeteer, Playwright | Intercepts `window.open()` and navigates via `chrome.tabs` — SPA-compatible |
|
|
45
46
|
|
|
46
47
|
<br>
|
|
47
48
|
|
|
@@ -192,6 +193,10 @@ All `<sel>` arguments accept CSS selectors **or** element refs from `snapshot` (
|
|
|
192
193
|
| `bctl ping` | Check server & extension status |
|
|
193
194
|
| `bctl serve` | Start server in foreground |
|
|
194
195
|
| `bctl stop` | Stop server |
|
|
196
|
+
| `bctl setup` | Install extension to `~/.browser-ctl/extension/` + open Chrome extensions page |
|
|
197
|
+
| `bctl setup cursor` | Install AI skill (`SKILL.md`) into Cursor IDE |
|
|
198
|
+
| `bctl setup opencode` | Install AI skill into OpenCode |
|
|
199
|
+
| `bctl setup <path>` | Install AI skill to a custom directory |
|
|
195
200
|
|
|
196
201
|
<br>
|
|
197
202
|
|
|
@@ -353,9 +358,10 @@ Non-zero exit code on errors — works naturally with `set -e` and `&&` chains.
|
|
|
353
358
|
|
|
354
359
|
| Component | Details |
|
|
355
360
|
|-----------|---------|
|
|
356
|
-
| **CLI** | Stdlib only,
|
|
361
|
+
| **CLI** | Stdlib only, raw-socket HTTP (zero heavy imports, ~5ms cold start) |
|
|
357
362
|
| **Bridge Server** | Async relay (aiohttp), auto-daemonizes |
|
|
358
363
|
| **Extension** | MV3 service worker, auto-reconnects via `chrome.alarms` |
|
|
364
|
+
| **Click** | Three-phase: pointer events → MAIN-world click → `window.open()` interception for SPA compatibility |
|
|
359
365
|
| **Eval** | Dual strategy: MAIN-world injection (fast) + CDP fallback (CSP-safe) |
|
|
360
366
|
|
|
361
367
|
<br>
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: browser-ctl
|
|
3
|
+
description: Control the user's Chrome browser via CLI commands that return JSON. Use when the user asks to interact with a browser, navigate web pages, click elements, extract page content, take screenshots, download files, or perform any browser automation task.
|
|
4
|
+
---
|
|
5
|
+
|
|
6
|
+
# Browser-Ctl
|
|
7
|
+
|
|
8
|
+
Control Chrome via CLI. All commands return JSON to stdout.
|
|
9
|
+
|
|
10
|
+
## Prerequisites
|
|
11
|
+
|
|
12
|
+
- Chrome with the Browser-Ctl extension loaded
|
|
13
|
+
- Bridge server (auto-starts on first `bctl` command)
|
|
14
|
+
|
|
15
|
+
## Always Start With
|
|
16
|
+
|
|
17
|
+
```bash
|
|
18
|
+
bctl ping
|
|
19
|
+
```
|
|
20
|
+
|
|
21
|
+
If extension is not connected, tell the user to check Chrome and the extension.
|
|
22
|
+
|
|
23
|
+
## Core Principle: Text-First Page Perception
|
|
24
|
+
|
|
25
|
+
**NEVER use `bctl screenshot` to understand page state.** Use text-based commands:
|
|
26
|
+
|
|
27
|
+
1. `bctl status` — current URL + title
|
|
28
|
+
2. `bctl text "<sel>"` — read visible text
|
|
29
|
+
3. `bctl select "<sel>"` — discover page structure (tag, text, id, class, href, src, aria-label)
|
|
30
|
+
4. `bctl snapshot` — list all interactive elements with refs (e0, e1, …)
|
|
31
|
+
5. `bctl count "<sel>"` — check if elements exist and how many
|
|
32
|
+
6. `bctl attr "<sel>" "<name>"` — get specific attributes
|
|
33
|
+
|
|
34
|
+
Only use `bctl screenshot` when the user explicitly asks for a visual capture.
|
|
35
|
+
|
|
36
|
+
## Commands
|
|
37
|
+
|
|
38
|
+
### Navigation
|
|
39
|
+
```
|
|
40
|
+
bctl navigate <url> Go to URL (aliases: nav, go; auto-prepends https://)
|
|
41
|
+
bctl back Go back
|
|
42
|
+
bctl forward Go forward (alias: fwd)
|
|
43
|
+
bctl reload Reload page
|
|
44
|
+
```
|
|
45
|
+
|
|
46
|
+
### Interaction
|
|
47
|
+
All `<sel>` accept CSS selectors or snapshot refs (e.g. `e5`).
|
|
48
|
+
```
|
|
49
|
+
bctl click <sel> [-i N] [-t text] Click element; -t filters by visible text
|
|
50
|
+
bctl dblclick <sel> [-i N] [-t text] Double-click
|
|
51
|
+
bctl hover <sel> [-i N] [-t text] Hover (triggers mouseover)
|
|
52
|
+
bctl focus <sel> [-i N] [-t text] Focus element
|
|
53
|
+
bctl type <sel> <text> Type text (replaces existing; React-compatible)
|
|
54
|
+
bctl input-text <sel> <text> [--clear] [--delay ms] Char-by-char (rich editors)
|
|
55
|
+
bctl press <key> Press key: Enter, Escape, Tab, ArrowDown, etc.
|
|
56
|
+
bctl check <sel> [-i N] [-t text] Check checkbox/radio
|
|
57
|
+
bctl uncheck <sel> [-i N] [-t text] Uncheck checkbox
|
|
58
|
+
bctl scroll <dir|sel> [n] Scroll: up/down/top/bottom/<selector> [pixels]
|
|
59
|
+
bctl select-option <sel> <val> [--text] Select dropdown option (alias: sopt)
|
|
60
|
+
bctl drag <src> [target] [--dx N --dy N] Drag element to target or by offset
|
|
61
|
+
```
|
|
62
|
+
|
|
63
|
+
### Query
|
|
64
|
+
```
|
|
65
|
+
bctl snapshot [--all] List interactive elements as e0, e1, … (alias: snap)
|
|
66
|
+
bctl text [sel] Get text content (default: body)
|
|
67
|
+
bctl html [sel] Get innerHTML
|
|
68
|
+
bctl attr <sel> [name] Get attribute(s) [-i N for Nth element]
|
|
69
|
+
bctl select <sel> [-l N] List matching elements (alias: sel, limit default: 20)
|
|
70
|
+
bctl count <sel> Count matching elements
|
|
71
|
+
bctl status Current page URL and title
|
|
72
|
+
bctl is-visible <sel> Check if element is visible (returns rect)
|
|
73
|
+
bctl get-value <sel> Get form element value (input/select/textarea)
|
|
74
|
+
```
|
|
75
|
+
|
|
76
|
+
### JavaScript
|
|
77
|
+
```
|
|
78
|
+
bctl eval <code> Execute JS in page context (MAIN world)
|
|
79
|
+
```
|
|
80
|
+
|
|
81
|
+
### Tabs
|
|
82
|
+
```
|
|
83
|
+
bctl tabs List all tabs (id, url, title, active)
|
|
84
|
+
bctl tab <id> Switch to tab
|
|
85
|
+
bctl new-tab [url] Open new tab
|
|
86
|
+
bctl close-tab [id] Close tab (default: active)
|
|
87
|
+
```
|
|
88
|
+
|
|
89
|
+
### Screenshot & Download
|
|
90
|
+
```
|
|
91
|
+
bctl screenshot [path] Capture screenshot (alias: ss)
|
|
92
|
+
bctl download <target> Download file/image (alias: dl) [-o path] [-i N]
|
|
93
|
+
bctl upload <sel> <files> Upload file(s) to <input type="file">
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
Downloads use `chrome.downloads` API and carry the browser's full auth session — use
|
|
97
|
+
this instead of `curl` for sites requiring login.
|
|
98
|
+
|
|
99
|
+
### Wait
|
|
100
|
+
```
|
|
101
|
+
bctl wait <sel|seconds> Wait for element or sleep [timeout]
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Dialog
|
|
105
|
+
```
|
|
106
|
+
bctl dialog [accept|dismiss] [--text <val>] Handle next alert/confirm/prompt
|
|
107
|
+
```
|
|
108
|
+
|
|
109
|
+
### Batch / Pipe
|
|
110
|
+
```
|
|
111
|
+
bctl pipe Read commands from stdin (one per line, JSONL output)
|
|
112
|
+
bctl batch '<c1>' '<c2>' Execute multiple commands in one call
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
Use `bctl pipe` for 2+ consecutive commands on the same page — merges into a single
|
|
116
|
+
browser call, reducing overhead by ~90%.
|
|
117
|
+
|
|
118
|
+
### Server
|
|
119
|
+
```
|
|
120
|
+
bctl ping Check server and extension status
|
|
121
|
+
bctl serve Start server (foreground)
|
|
122
|
+
bctl stop Stop server
|
|
123
|
+
```
|
|
124
|
+
|
|
125
|
+
## Output Format
|
|
126
|
+
|
|
127
|
+
All commands return JSON:
|
|
128
|
+
- Success: `{"success": true, "data": {...}}`
|
|
129
|
+
- Error: `{"success": false, "error": "..."}`
|
|
130
|
+
|
|
131
|
+
Parse with `jq`: `bctl status | jq -r '.data.title'`
|
|
132
|
+
|
|
133
|
+
## Best Practices
|
|
134
|
+
|
|
135
|
+
### Snapshot-first Workflow
|
|
136
|
+
Use `bctl snapshot` to get a numbered list of interactive elements, then operate by
|
|
137
|
+
ref. This eliminates guessing CSS selectors on unfamiliar pages:
|
|
138
|
+
```bash
|
|
139
|
+
bctl snapshot # List all interactive elements
|
|
140
|
+
bctl click e3 # Click the 3rd element
|
|
141
|
+
bctl type e7 "hello world" # Type into the 7th element
|
|
142
|
+
```
|
|
143
|
+
|
|
144
|
+
### Click by Text (SPA-friendly)
|
|
145
|
+
Use `-t` to filter by visible text — ideal for SPAs where class names are dynamic:
|
|
146
|
+
```bash
|
|
147
|
+
bctl click "button" -t "Submit" # click button containing "Submit"
|
|
148
|
+
```
|
|
149
|
+
|
|
150
|
+
### SPA Video Sites (Tencent Video, Bilibili, etc.)
|
|
151
|
+
`bctl click` intercepts `window.open()` calls from SPA frameworks and opens the
|
|
152
|
+
target URL via `chrome.tabs.create`. Just click like a normal user:
|
|
153
|
+
```bash
|
|
154
|
+
bctl go "https://v.qq.com" && bctl wait 2
|
|
155
|
+
bctl type "input" "西游记" && bctl press Enter && bctl wait 3
|
|
156
|
+
bctl click ".root.list-item .poster-view" -i 0 # opens video in new tab
|
|
157
|
+
```
|
|
158
|
+
|
|
159
|
+
Fallback — extract content ID and navigate directly:
|
|
160
|
+
```bash
|
|
161
|
+
bctl attr ".root.list-item [dt-eid='poster']" "dt-params" | grep -o 'cid=[^&]*'
|
|
162
|
+
bctl go "https://v.qq.com/x/cover/<cid>.html"
|
|
163
|
+
```
|
|
164
|
+
|
|
165
|
+
### Waiting Strategy
|
|
166
|
+
- After navigation: `bctl wait 2-3` or `bctl wait "<selector>" 10`
|
|
167
|
+
- After hover for overlay: `bctl wait 1`
|
|
168
|
+
- AI generation: **poll** with `bctl wait 5 && bctl count "selector"` in a loop
|
|
169
|
+
|
|
170
|
+
### Data Extraction
|
|
171
|
+
Prefer `bctl select` over `bctl eval` — it's more reliable, works on all sites,
|
|
172
|
+
and returns text/href/id/class/aria-label automatically.
|
|
173
|
+
|
|
174
|
+
## Efficiency Tips
|
|
175
|
+
|
|
176
|
+
1. **NEVER screenshot to "see" the page.** Use `status` + `text` + `select` + `snapshot`.
|
|
177
|
+
2. **Use `count` before `click`** when you expect multiple matches.
|
|
178
|
+
3. **Use `download` for authenticated resources** — never `curl` from sites behind login.
|
|
179
|
+
4. **Use `hover` before clicking overlay buttons** — many UIs hide actions until hover.
|
|
180
|
+
5. **Check `tabs` after tab-opening actions** — popups may switch the active tab.
|
|
181
|
+
6. **Chain commands** with `&&`: `bctl go "https://example.com" && bctl wait 2 && bctl status`
|
|
182
|
+
|
|
183
|
+
## Known Limitations
|
|
184
|
+
|
|
185
|
+
- `eval` blocked by Trusted Types on some sites (Gemini, YouTube) — use `attr`/`select` instead
|
|
186
|
+
- `screenshot` captures visible viewport only — scroll for full-page capture
|
|
187
|
+
- Without `-i`, `click` always hits the FIRST match — use `count` to check first
|
|
188
|
+
|
|
189
|
+
## Error Handling
|
|
190
|
+
|
|
191
|
+
- `bctl ping` shows `"extension": false` → user must check Chrome and the extension
|
|
192
|
+
- Selector fails → use `bctl select` or `bctl count` to debug
|
|
193
|
+
- Dynamic content → use `bctl wait` before interacting
|
|
@@ -8,24 +8,23 @@ via HTTP POST to localhost:19876/command.
|
|
|
8
8
|
from __future__ import annotations
|
|
9
9
|
|
|
10
10
|
import argparse
|
|
11
|
-
import base64
|
|
12
11
|
import json
|
|
13
12
|
import os
|
|
14
|
-
import platform
|
|
15
13
|
import shlex
|
|
16
|
-
import shutil
|
|
17
|
-
import subprocess
|
|
18
14
|
import sys
|
|
19
15
|
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
16
|
+
# Lazy-loaded — avoids importing urllib/subprocess on module load.
|
|
17
|
+
# Populated on first use via _client().
|
|
18
|
+
_client_mod = None
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
def _client():
|
|
22
|
+
"""Lazy import of browser_ctl.client to avoid startup overhead."""
|
|
23
|
+
global _client_mod
|
|
24
|
+
if _client_mod is None:
|
|
25
|
+
from browser_ctl import client as _mod
|
|
26
|
+
_client_mod = _mod
|
|
27
|
+
return _client_mod
|
|
29
28
|
|
|
30
29
|
SKILL_TARGETS = {
|
|
31
30
|
"cursor": os.path.join(os.path.expanduser("~"), ".cursor", "skills-cursor"),
|
|
@@ -68,7 +67,7 @@ CONTENT_SCRIPT_OPS = frozenset({
|
|
|
68
67
|
|
|
69
68
|
def handle_pipe(args):
|
|
70
69
|
"""Read commands from stdin, execute them with smart batching, print JSONL."""
|
|
71
|
-
|
|
70
|
+
_client().ensure_server_optimistic()
|
|
72
71
|
parser = build_parser()
|
|
73
72
|
|
|
74
73
|
# Collect all commands first
|
|
@@ -93,7 +92,7 @@ def handle_pipe(args):
|
|
|
93
92
|
|
|
94
93
|
def handle_batch(args):
|
|
95
94
|
"""Execute multiple commands given as CLI arguments with smart batching."""
|
|
96
|
-
|
|
95
|
+
_client().ensure_server_optimistic()
|
|
97
96
|
parser = build_parser()
|
|
98
97
|
|
|
99
98
|
pending: list[tuple[str, dict]] = []
|
|
@@ -130,7 +129,7 @@ def _execute_with_batching(commands: list[tuple[str, dict]], continue_on_error:
|
|
|
130
129
|
|
|
131
130
|
if len(batch) == 1:
|
|
132
131
|
# Single command — use normal endpoint (no overhead)
|
|
133
|
-
result = send_raw(batch[0]["action"], batch[0]["params"])
|
|
132
|
+
result = _client().send_raw(batch[0]["action"], batch[0]["params"])
|
|
134
133
|
print(json.dumps(result, ensure_ascii=False), flush=True)
|
|
135
134
|
if not result.get("success"):
|
|
136
135
|
had_error = True
|
|
@@ -138,7 +137,7 @@ def _execute_with_batching(commands: list[tuple[str, dict]], continue_on_error:
|
|
|
138
137
|
sys.exit(1)
|
|
139
138
|
else:
|
|
140
139
|
# Multiple consecutive content-script ops — use /batch
|
|
141
|
-
result = send_batch(batch)
|
|
140
|
+
result = _client().send_batch(batch)
|
|
142
141
|
if result.get("success") and "results" in result.get("data", {}):
|
|
143
142
|
for r in result["data"]["results"]:
|
|
144
143
|
print(json.dumps(r, ensure_ascii=False), flush=True)
|
|
@@ -154,7 +153,7 @@ def _execute_with_batching(commands: list[tuple[str, dict]], continue_on_error:
|
|
|
154
153
|
sys.exit(1)
|
|
155
154
|
else:
|
|
156
155
|
# Non-batchable command — send individually
|
|
157
|
-
result = send_raw(action, params)
|
|
156
|
+
result = _client().send_raw(action, params)
|
|
158
157
|
print(json.dumps(result, ensure_ascii=False), flush=True)
|
|
159
158
|
if not result.get("success"):
|
|
160
159
|
had_error = True
|
|
@@ -352,14 +351,15 @@ def build_parser() -> argparse.ArgumentParser:
|
|
|
352
351
|
|
|
353
352
|
def handle_screenshot(args):
|
|
354
353
|
"""Screenshot needs special handling for file save."""
|
|
355
|
-
|
|
356
|
-
result = send_raw("screenshot", {})
|
|
354
|
+
_client().ensure_server_optimistic()
|
|
355
|
+
result = _client().send_raw("screenshot", {})
|
|
357
356
|
if not result.get("success"):
|
|
358
357
|
print(json.dumps(result, ensure_ascii=False))
|
|
359
358
|
sys.exit(1)
|
|
360
359
|
|
|
361
360
|
if args.path:
|
|
362
361
|
# Save to file
|
|
362
|
+
import base64
|
|
363
363
|
b64 = result["data"]["base64"]
|
|
364
364
|
img_bytes = base64.b64decode(b64)
|
|
365
365
|
with open(args.path, "wb") as f:
|
|
@@ -377,7 +377,7 @@ def handle_download(args):
|
|
|
377
377
|
we send only the basename to the extension and then move the downloaded
|
|
378
378
|
file to the requested location.
|
|
379
379
|
"""
|
|
380
|
-
|
|
380
|
+
_client().ensure_server_optimistic()
|
|
381
381
|
|
|
382
382
|
target = args.target
|
|
383
383
|
output = args.output
|
|
@@ -394,13 +394,14 @@ def handle_download(args):
|
|
|
394
394
|
else:
|
|
395
395
|
params["filename"] = output
|
|
396
396
|
|
|
397
|
-
result = send_raw("download", params)
|
|
397
|
+
result = _client().send_raw("download", params)
|
|
398
398
|
if not result.get("success"):
|
|
399
399
|
print(json.dumps(result, ensure_ascii=False))
|
|
400
400
|
sys.exit(1)
|
|
401
401
|
|
|
402
402
|
# Move downloaded file to the requested absolute path
|
|
403
403
|
if move_to and result.get("data", {}).get("filename"):
|
|
404
|
+
import shutil
|
|
404
405
|
src_path = result["data"]["filename"]
|
|
405
406
|
try:
|
|
406
407
|
shutil.move(src_path, move_to)
|
|
@@ -417,6 +418,7 @@ def handle_download(args):
|
|
|
417
418
|
|
|
418
419
|
def handle_serve(args):
|
|
419
420
|
"""Run server in foreground."""
|
|
421
|
+
from browser_ctl.client import DEFAULT_PORT
|
|
420
422
|
os.execvp(sys.executable, [sys.executable, "-m", "browser_ctl.server", "--port", str(DEFAULT_PORT)])
|
|
421
423
|
|
|
422
424
|
|
|
@@ -464,6 +466,11 @@ def _get_package_version() -> str:
|
|
|
464
466
|
|
|
465
467
|
def _install_extension() -> str | None:
|
|
466
468
|
"""Copy extension to ~/.browser-ctl/extension/ and try to open Chrome extensions page."""
|
|
469
|
+
import platform
|
|
470
|
+
import shutil
|
|
471
|
+
import subprocess
|
|
472
|
+
from browser_ctl.client import BCTL_HOME
|
|
473
|
+
|
|
467
474
|
src = _get_extension_source_dir()
|
|
468
475
|
if not src:
|
|
469
476
|
return None
|
|
@@ -513,6 +520,8 @@ def _install_extension() -> str | None:
|
|
|
513
520
|
|
|
514
521
|
def _install_skill(target_dir: str) -> str:
|
|
515
522
|
"""Copy SKILL.md into <target_dir>/browser-ctl/."""
|
|
523
|
+
import shutil
|
|
524
|
+
|
|
516
525
|
src = os.path.join(os.path.dirname(os.path.abspath(__file__)), "SKILL.md")
|
|
517
526
|
if not os.path.isfile(src):
|
|
518
527
|
raise FileNotFoundError("SKILL.md not found in browser_ctl package.")
|
|
@@ -687,7 +696,7 @@ def main():
|
|
|
687
696
|
handle_serve(args)
|
|
688
697
|
return
|
|
689
698
|
if cmd == "stop":
|
|
690
|
-
stop_server()
|
|
699
|
+
_client().stop_server()
|
|
691
700
|
return
|
|
692
701
|
|
|
693
702
|
# Screenshot (special handling)
|
|
@@ -712,7 +721,7 @@ def main():
|
|
|
712
721
|
|
|
713
722
|
# Standard command: parse args, send to server
|
|
714
723
|
action, params = args_to_action_params(cmd, args)
|
|
715
|
-
send_command(action, params)
|
|
724
|
+
_client().send_command(action, params)
|
|
716
725
|
|
|
717
726
|
|
|
718
727
|
if __name__ == "__main__":
|
|
@@ -2,25 +2,93 @@
|
|
|
2
2
|
|
|
3
3
|
Handles server lifecycle (start/stop/health) and command relay.
|
|
4
4
|
Zero external dependencies (stdlib only).
|
|
5
|
+
Uses raw sockets for minimal import overhead (~5ms vs ~30ms for urllib).
|
|
5
6
|
"""
|
|
6
7
|
|
|
7
8
|
from __future__ import annotations
|
|
8
9
|
|
|
9
10
|
import json
|
|
10
11
|
import os
|
|
11
|
-
import
|
|
12
|
+
import socket
|
|
12
13
|
import sys
|
|
13
|
-
import tempfile
|
|
14
|
-
import time
|
|
15
|
-
import urllib.error
|
|
16
|
-
import urllib.request
|
|
17
14
|
|
|
18
15
|
DEFAULT_PORT = 19876
|
|
19
|
-
|
|
20
|
-
PID_FILE = os.path.join(tempfile.gettempdir(), f"bctl-{DEFAULT_PORT}.pid")
|
|
16
|
+
_HOST = "127.0.0.1"
|
|
21
17
|
|
|
22
18
|
BCTL_HOME = os.path.join(os.path.expanduser("~"), ".browser-ctl")
|
|
23
19
|
|
|
20
|
+
|
|
21
|
+
def _pid_file() -> str:
|
|
22
|
+
import tempfile
|
|
23
|
+
return os.path.join(tempfile.gettempdir(), f"bctl-{DEFAULT_PORT}.pid")
|
|
24
|
+
|
|
25
|
+
|
|
26
|
+
# ---------------------------------------------------------------------------
|
|
27
|
+
# Lightweight HTTP via raw sockets (avoids importing urllib — saves ~25ms)
|
|
28
|
+
# ---------------------------------------------------------------------------
|
|
29
|
+
|
|
30
|
+
|
|
31
|
+
def _http_post(path: str, body: bytes, timeout: float = 35) -> dict:
|
|
32
|
+
"""Send HTTP POST to bridge server, return parsed JSON response."""
|
|
33
|
+
try:
|
|
34
|
+
sock = socket.create_connection((_HOST, DEFAULT_PORT), timeout=timeout)
|
|
35
|
+
except (ConnectionRefusedError, OSError):
|
|
36
|
+
return {"success": False, "error": "Cannot connect to server"}
|
|
37
|
+
try:
|
|
38
|
+
req = (
|
|
39
|
+
f"POST {path} HTTP/1.0\r\n"
|
|
40
|
+
f"Host: {_HOST}:{DEFAULT_PORT}\r\n"
|
|
41
|
+
f"Content-Type: application/json\r\n"
|
|
42
|
+
f"Content-Length: {len(body)}\r\n"
|
|
43
|
+
f"\r\n"
|
|
44
|
+
).encode("utf-8") + body
|
|
45
|
+
sock.sendall(req)
|
|
46
|
+
|
|
47
|
+
# Read response
|
|
48
|
+
chunks = []
|
|
49
|
+
while True:
|
|
50
|
+
chunk = sock.recv(65536)
|
|
51
|
+
if not chunk:
|
|
52
|
+
break
|
|
53
|
+
chunks.append(chunk)
|
|
54
|
+
data = b"".join(chunks)
|
|
55
|
+
finally:
|
|
56
|
+
sock.close()
|
|
57
|
+
|
|
58
|
+
# Parse HTTP response — skip headers, find JSON body
|
|
59
|
+
parts = data.split(b"\r\n\r\n", 1)
|
|
60
|
+
if len(parts) < 2:
|
|
61
|
+
return {"success": False, "error": "Invalid response from server"}
|
|
62
|
+
try:
|
|
63
|
+
return json.loads(parts[1].decode("utf-8"))
|
|
64
|
+
except (json.JSONDecodeError, UnicodeDecodeError):
|
|
65
|
+
return {"success": False, "error": "Invalid response from server"}
|
|
66
|
+
|
|
67
|
+
|
|
68
|
+
def _http_get(path: str, timeout: float = 1) -> int:
|
|
69
|
+
"""Send HTTP GET, return status code (0 on failure)."""
|
|
70
|
+
try:
|
|
71
|
+
sock = socket.create_connection((_HOST, DEFAULT_PORT), timeout=timeout)
|
|
72
|
+
except (ConnectionRefusedError, OSError):
|
|
73
|
+
return 0
|
|
74
|
+
try:
|
|
75
|
+
req = (
|
|
76
|
+
f"GET {path} HTTP/1.0\r\n"
|
|
77
|
+
f"Host: {_HOST}:{DEFAULT_PORT}\r\n"
|
|
78
|
+
f"\r\n"
|
|
79
|
+
).encode("utf-8")
|
|
80
|
+
sock.sendall(req)
|
|
81
|
+
# Only need the status line
|
|
82
|
+
resp = sock.recv(1024)
|
|
83
|
+
finally:
|
|
84
|
+
sock.close()
|
|
85
|
+
try:
|
|
86
|
+
status_line = resp.split(b"\r\n", 1)[0]
|
|
87
|
+
return int(status_line.split(b" ", 2)[1])
|
|
88
|
+
except (IndexError, ValueError):
|
|
89
|
+
return 0
|
|
90
|
+
|
|
91
|
+
|
|
24
92
|
# ---------------------------------------------------------------------------
|
|
25
93
|
# Server management
|
|
26
94
|
# ---------------------------------------------------------------------------
|
|
@@ -28,23 +96,17 @@ BCTL_HOME = os.path.join(os.path.expanduser("~"), ".browser-ctl")
|
|
|
28
96
|
|
|
29
97
|
def is_server_running() -> bool:
|
|
30
98
|
"""Check if bridge server is running (PID exists AND HTTP health check passes)."""
|
|
31
|
-
|
|
99
|
+
pid_file = _pid_file()
|
|
100
|
+
if not os.path.exists(pid_file):
|
|
32
101
|
return False
|
|
33
102
|
try:
|
|
34
|
-
with open(
|
|
103
|
+
with open(pid_file) as f:
|
|
35
104
|
pid = int(f.read().strip())
|
|
36
105
|
os.kill(pid, 0) # Check process exists
|
|
37
106
|
except (OSError, ValueError):
|
|
38
107
|
return False
|
|
39
108
|
# Process exists — verify it is actually accepting HTTP connections.
|
|
40
|
-
|
|
41
|
-
# shutting down (port already closed).
|
|
42
|
-
try:
|
|
43
|
-
req = urllib.request.Request(f"{SERVER_URL}/health")
|
|
44
|
-
resp = urllib.request.urlopen(req, timeout=1)
|
|
45
|
-
return resp.status == 200
|
|
46
|
-
except Exception:
|
|
47
|
-
return False
|
|
109
|
+
return _http_get("/health") == 200
|
|
48
110
|
|
|
49
111
|
|
|
50
112
|
def start_server() -> bool:
|
|
@@ -52,6 +114,7 @@ def start_server() -> bool:
|
|
|
52
114
|
if is_server_running():
|
|
53
115
|
return False
|
|
54
116
|
|
|
117
|
+
import subprocess
|
|
55
118
|
cmd = [sys.executable, "-m", "browser_ctl.server", "--port", str(DEFAULT_PORT), "--daemon"]
|
|
56
119
|
subprocess.Popen(
|
|
57
120
|
cmd,
|
|
@@ -61,15 +124,11 @@ def start_server() -> bool:
|
|
|
61
124
|
)
|
|
62
125
|
|
|
63
126
|
# Wait for server to become responsive
|
|
127
|
+
import time
|
|
64
128
|
for _ in range(60): # 3 seconds max
|
|
65
129
|
time.sleep(0.05)
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
resp = urllib.request.urlopen(req, timeout=0.5)
|
|
69
|
-
if resp.status == 200:
|
|
70
|
-
return True
|
|
71
|
-
except Exception:
|
|
72
|
-
pass
|
|
130
|
+
if _http_get("/health") == 200:
|
|
131
|
+
return True
|
|
73
132
|
|
|
74
133
|
print(json.dumps({"success": False, "error": "Failed to start bridge server"}))
|
|
75
134
|
sys.exit(1)
|
|
@@ -83,6 +142,7 @@ def stop_server():
|
|
|
83
142
|
result = send_raw("shutdown", {})
|
|
84
143
|
if result.get("success"):
|
|
85
144
|
# Wait briefly for server to fully stop and clean up PID file
|
|
145
|
+
import time
|
|
86
146
|
for _ in range(20):
|
|
87
147
|
time.sleep(0.05)
|
|
88
148
|
if not is_server_running():
|
|
@@ -106,24 +166,16 @@ def ensure_server():
|
|
|
106
166
|
def send_raw(action: str, params: dict) -> dict:
|
|
107
167
|
"""Send command to bridge server, return parsed response."""
|
|
108
168
|
body = json.dumps({"action": action, "params": params}).encode("utf-8")
|
|
109
|
-
|
|
110
|
-
f"{SERVER_URL}/command",
|
|
111
|
-
data=body,
|
|
112
|
-
headers={"Content-Type": "application/json"},
|
|
113
|
-
)
|
|
114
|
-
try:
|
|
115
|
-
resp = urllib.request.urlopen(req, timeout=35)
|
|
116
|
-
return json.loads(resp.read().decode("utf-8"))
|
|
117
|
-
except urllib.error.URLError as e:
|
|
118
|
-
return {"success": False, "error": f"Cannot connect to server: {e}"}
|
|
119
|
-
except json.JSONDecodeError:
|
|
120
|
-
return {"success": False, "error": "Invalid response from server"}
|
|
169
|
+
return _http_post("/command", body)
|
|
121
170
|
|
|
122
171
|
|
|
123
172
|
def send_command(action: str, params: dict):
|
|
124
|
-
"""
|
|
125
|
-
ensure_server()
|
|
173
|
+
"""Optimistic send: try command first, start server only on failure."""
|
|
126
174
|
result = send_raw(action, params)
|
|
175
|
+
if not result.get("success") and "Cannot connect" in result.get("error", ""):
|
|
176
|
+
# Server not running — start it and retry
|
|
177
|
+
start_server()
|
|
178
|
+
result = send_raw(action, params)
|
|
127
179
|
print(json.dumps(result, ensure_ascii=False))
|
|
128
180
|
if not result.get("success"):
|
|
129
181
|
sys.exit(1)
|
|
@@ -132,15 +184,11 @@ def send_command(action: str, params: dict):
|
|
|
132
184
|
def send_batch(commands: list[dict]) -> dict:
|
|
133
185
|
"""Send multiple commands to /batch endpoint, return parsed response."""
|
|
134
186
|
body = json.dumps({"commands": commands}).encode("utf-8")
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
143
|
-
except urllib.error.URLError as e:
|
|
144
|
-
return {"success": False, "error": f"Cannot connect to server: {e}"}
|
|
145
|
-
except json.JSONDecodeError:
|
|
146
|
-
return {"success": False, "error": "Invalid response from server"}
|
|
187
|
+
return _http_post("/batch", body, timeout=120)
|
|
188
|
+
|
|
189
|
+
|
|
190
|
+
def ensure_server_optimistic() -> None:
|
|
191
|
+
"""Start server if not running. Optimistic — only checks on first call."""
|
|
192
|
+
result = send_raw("ping", {})
|
|
193
|
+
if not result.get("success") and "Cannot connect" in result.get("error", ""):
|
|
194
|
+
start_server()
|
|
@@ -143,9 +143,9 @@ async function handleAction(action, params) {
|
|
|
143
143
|
case "reload":
|
|
144
144
|
return await doReload();
|
|
145
145
|
|
|
146
|
-
// -- Interaction
|
|
146
|
+
// -- Interaction --
|
|
147
147
|
case "click":
|
|
148
|
-
return await
|
|
148
|
+
return await doClick(params);
|
|
149
149
|
case "hover":
|
|
150
150
|
return await runInPage("hover", params);
|
|
151
151
|
case "type":
|
|
@@ -438,6 +438,128 @@ function waitForDownload(downloadId, timeoutMs = 30000) {
|
|
|
438
438
|
});
|
|
439
439
|
}
|
|
440
440
|
|
|
441
|
+
// ---------------------------------------------------------------------------
|
|
442
|
+
// Click — Three-phase approach for maximum SPA compatibility:
|
|
443
|
+
// Phase 1 (ISOLATED): Find element, scrollIntoView, dispatch pointer/mouse events
|
|
444
|
+
// Phase 2 (MAIN): Hook window.open to capture blocked popup URLs, then dispatch
|
|
445
|
+
// click event. The click triggers the site's normal handler which
|
|
446
|
+
// may call window.open() — but since isTrusted=false, Chrome blocks
|
|
447
|
+
// the popup. Our hook captures the URL instead.
|
|
448
|
+
// Phase 3 (background): If window.open was intercepted, navigate via chrome.tabs.
|
|
449
|
+
// ---------------------------------------------------------------------------
|
|
450
|
+
|
|
451
|
+
async function doClick(params) {
|
|
452
|
+
const tab = await activeTab();
|
|
453
|
+
|
|
454
|
+
// Phase 1: Find element + dispatch pointer/mouse events (ISOLATED world)
|
|
455
|
+
const phase1 = await chrome.scripting.executeScript({
|
|
456
|
+
target: { tabId: tab.id },
|
|
457
|
+
func: (selector, index, textFilter) => {
|
|
458
|
+
function qs(sel, idx, tf) {
|
|
459
|
+
if (sel && /^e\d+$/.test(sel)) {
|
|
460
|
+
const el = document.querySelector(`[data-bctl-ref="${sel}"]`);
|
|
461
|
+
if (!el) throw new Error(`Ref not found: ${sel}`);
|
|
462
|
+
return el;
|
|
463
|
+
}
|
|
464
|
+
if (!sel) return document.body;
|
|
465
|
+
let els = Array.from(document.querySelectorAll(sel));
|
|
466
|
+
if (tf) {
|
|
467
|
+
const lc = tf.toLowerCase();
|
|
468
|
+
els = els.filter((e) => e.textContent && e.textContent.toLowerCase().includes(lc));
|
|
469
|
+
}
|
|
470
|
+
if (!els.length) throw new Error(`Element not found: ${sel}${tf ? ` (text: "${tf}")` : ""}`);
|
|
471
|
+
const i = idx ?? 0;
|
|
472
|
+
const actual = i < 0 ? els.length + i : i;
|
|
473
|
+
if (actual < 0 || actual >= els.length)
|
|
474
|
+
throw new Error(`Index ${i} out of range (0..${els.length - 1}) for: ${sel}`);
|
|
475
|
+
return els[actual];
|
|
476
|
+
}
|
|
477
|
+
try {
|
|
478
|
+
const el = qs(selector, index, textFilter);
|
|
479
|
+
el.scrollIntoView({ block: "center", behavior: "instant" });
|
|
480
|
+
const rect = el.getBoundingClientRect();
|
|
481
|
+
const cx = rect.left + rect.width / 2;
|
|
482
|
+
const cy = rect.top + rect.height / 2;
|
|
483
|
+
const mOpts = { bubbles: true, cancelable: true, clientX: cx, clientY: cy, button: 0 };
|
|
484
|
+
el.dispatchEvent(new PointerEvent("pointerdown", { ...mOpts, pointerId: 1 }));
|
|
485
|
+
el.dispatchEvent(new MouseEvent("mousedown", mOpts));
|
|
486
|
+
el.dispatchEvent(new PointerEvent("pointerup", { ...mOpts, pointerId: 1 }));
|
|
487
|
+
el.dispatchEvent(new MouseEvent("mouseup", mOpts));
|
|
488
|
+
el.setAttribute("data-bctl-click-target", "1");
|
|
489
|
+
const total = selector ? document.querySelectorAll(selector).length : 1;
|
|
490
|
+
return { success: true, cx, cy, total };
|
|
491
|
+
} catch (e) {
|
|
492
|
+
return { success: false, error: e.message };
|
|
493
|
+
}
|
|
494
|
+
},
|
|
495
|
+
args: [params.selector, params.index, params.text],
|
|
496
|
+
});
|
|
497
|
+
|
|
498
|
+
const info = phase1[0]?.result;
|
|
499
|
+
if (!info || !info.success) throw new Error(info?.error || "Failed to locate element");
|
|
500
|
+
|
|
501
|
+
// Phase 2a: Install window.open hook in MAIN world (persists across ticks)
|
|
502
|
+
await chrome.scripting.executeScript({
|
|
503
|
+
target: { tabId: tab.id },
|
|
504
|
+
func: () => {
|
|
505
|
+
window.__bctlOrigOpen = window.open;
|
|
506
|
+
window.__bctlCapturedUrl = null;
|
|
507
|
+
window.open = function (url) {
|
|
508
|
+
if (url && typeof url === "string" && url.startsWith("http")) {
|
|
509
|
+
window.__bctlCapturedUrl = url;
|
|
510
|
+
}
|
|
511
|
+
return null; // Block the popup — we'll navigate via chrome.tabs
|
|
512
|
+
};
|
|
513
|
+
},
|
|
514
|
+
world: "MAIN",
|
|
515
|
+
});
|
|
516
|
+
|
|
517
|
+
// Phase 2b: Dispatch click in MAIN world (site's async handler will call
|
|
518
|
+
// window.open on a later tick — our hook persists and captures it)
|
|
519
|
+
await chrome.scripting.executeScript({
|
|
520
|
+
target: { tabId: tab.id },
|
|
521
|
+
func: (cx, cy) => {
|
|
522
|
+
const el = document.querySelector("[data-bctl-click-target]");
|
|
523
|
+
if (el) {
|
|
524
|
+
el.removeAttribute("data-bctl-click-target");
|
|
525
|
+
const opts = { bubbles: true, cancelable: true, clientX: cx, clientY: cy, button: 0, view: window };
|
|
526
|
+
el.dispatchEvent(new MouseEvent("click", opts));
|
|
527
|
+
}
|
|
528
|
+
},
|
|
529
|
+
args: [info.cx, info.cy],
|
|
530
|
+
world: "MAIN",
|
|
531
|
+
});
|
|
532
|
+
|
|
533
|
+
// Phase 2c: Wait for async handlers, then read captured URL and restore
|
|
534
|
+
await new Promise((r) => setTimeout(r, 200));
|
|
535
|
+
|
|
536
|
+
const phase2c = await chrome.scripting.executeScript({
|
|
537
|
+
target: { tabId: tab.id },
|
|
538
|
+
func: () => {
|
|
539
|
+
const url = window.__bctlCapturedUrl;
|
|
540
|
+
window.open = window.__bctlOrigOpen;
|
|
541
|
+
delete window.__bctlCapturedUrl;
|
|
542
|
+
delete window.__bctlOrigOpen;
|
|
543
|
+
return { capturedUrl: url };
|
|
544
|
+
},
|
|
545
|
+
world: "MAIN",
|
|
546
|
+
});
|
|
547
|
+
|
|
548
|
+
const clickResult = phase2c[0]?.result;
|
|
549
|
+
|
|
550
|
+
// Phase 3: If window.open was intercepted, navigate via chrome.tabs
|
|
551
|
+
if (clickResult?.capturedUrl) {
|
|
552
|
+
await chrome.tabs.create({ url: clickResult.capturedUrl, active: true });
|
|
553
|
+
}
|
|
554
|
+
|
|
555
|
+
return {
|
|
556
|
+
clicked: params.selector || "body",
|
|
557
|
+
index: params.index ?? 0,
|
|
558
|
+
total: info.total,
|
|
559
|
+
text: params.text || null,
|
|
560
|
+
};
|
|
561
|
+
}
|
|
562
|
+
|
|
441
563
|
// ---------------------------------------------------------------------------
|
|
442
564
|
// Press key (via debugger or content script)
|
|
443
565
|
// ---------------------------------------------------------------------------
|
|
@@ -792,17 +914,20 @@ async function contentScriptHandler(commands) {
|
|
|
792
914
|
case "click": {
|
|
793
915
|
const el = qs(params.selector, params.index, params.text);
|
|
794
916
|
el.scrollIntoView({ block: "center", behavior: "instant" });
|
|
795
|
-
// Dispatch full pointer/mouse sequence
|
|
917
|
+
// Dispatch full pointer/mouse sequence for Vue/React SPA compatibility.
|
|
918
|
+
// We dispatch a MouseEvent("click") WITH coordinates first (for frameworks
|
|
919
|
+
// that use event delegation based on clientX/clientY, e.g. Tencent Video),
|
|
796
920
|
// then call native el.click() which produces a trusted (isTrusted:true) event
|
|
797
|
-
//
|
|
921
|
+
// (for sites like GitHub that require trusted events).
|
|
798
922
|
const rect = el.getBoundingClientRect();
|
|
799
923
|
const cx = rect.left + rect.width / 2;
|
|
800
924
|
const cy = rect.top + rect.height / 2;
|
|
801
|
-
const mOpts = { bubbles: true, cancelable: true, clientX: cx, clientY: cy, button: 0 };
|
|
925
|
+
const mOpts = { bubbles: true, cancelable: true, clientX: cx, clientY: cy, button: 0, view: window };
|
|
802
926
|
el.dispatchEvent(new PointerEvent("pointerdown", { ...mOpts, pointerId: 1 }));
|
|
803
927
|
el.dispatchEvent(new MouseEvent("mousedown", mOpts));
|
|
804
928
|
el.dispatchEvent(new PointerEvent("pointerup", { ...mOpts, pointerId: 1 }));
|
|
805
929
|
el.dispatchEvent(new MouseEvent("mouseup", mOpts));
|
|
930
|
+
el.dispatchEvent(new MouseEvent("click", mOpts));
|
|
806
931
|
el.click();
|
|
807
932
|
const total = params.selector ? document.querySelectorAll(params.selector).length : 1;
|
|
808
933
|
return { clicked: params.selector || "body", index: params.index ?? 0, total, text: params.text || null };
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
{
|
|
2
2
|
"manifest_version": 3,
|
|
3
3
|
"name": "Browser-Ctl",
|
|
4
|
-
"version": "0.2.
|
|
4
|
+
"version": "0.2.6",
|
|
5
5
|
"description": "Developer tool for CLI-driven browser automation. Control Chrome via command-line — navigate, click, type, query DOM, capture screenshots, and download files, all through a local WebSocket bridge.",
|
|
6
6
|
"permissions": [
|
|
7
7
|
"tabs",
|
|
@@ -189,13 +189,15 @@ async def health_handler(request: web.Request) -> web.Response:
|
|
|
189
189
|
# Helpers
|
|
190
190
|
# ---------------------------------------------------------------------------
|
|
191
191
|
|
|
192
|
-
# Operations that run inside content scripts and can be batched
|
|
193
|
-
# Operations executed inside content scripts — can be batched into a single
|
|
192
|
+
# Operations that run inside content scripts and can be batched into a single
|
|
194
193
|
# chrome.scripting.executeScript call. "eval" is excluded because it uses
|
|
195
194
|
# MAIN-world script-tag injection + CDP debugger fallback.
|
|
196
195
|
_CONTENT_SCRIPT_OPS = frozenset({
|
|
197
|
-
"click", "
|
|
198
|
-
"
|
|
196
|
+
"click", "dblclick", "hover", "focus", "type", "input-text",
|
|
197
|
+
"press", "check", "uncheck",
|
|
198
|
+
"text", "html", "attr", "select", "count", "snapshot",
|
|
199
|
+
"is-visible", "get-value",
|
|
200
|
+
"scroll", "select-option", "drag", "wait",
|
|
199
201
|
})
|
|
200
202
|
|
|
201
203
|
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: browser-ctl
|
|
3
|
-
Version: 0.2.
|
|
3
|
+
Version: 0.2.6
|
|
4
4
|
Summary: Control your browser from the command line via a Chrome extension + WebSocket bridge
|
|
5
5
|
Author-email: geb <853934146@qq.com>
|
|
6
6
|
License-Expression: MIT
|
|
@@ -68,6 +68,7 @@ Tools like [browser-use](https://github.com/browser-use/browser-use), [Playwrigh
|
|
|
68
68
|
| **Complex SDK integration** — requires importing libraries and writing async code | browser-use, Stagehand | Pure CLI with JSON output — any LLM can call `bctl click "button"` |
|
|
69
69
|
| **Heavy dependencies** — Playwright alone pulls ~50 MB of packages + browser binary | Playwright, Puppeteer | CLI is stdlib-only; server needs only `aiohttp` |
|
|
70
70
|
| **Token-inefficient for LLMs** — verbose API calls waste context window tokens | SDK-based tools | Concise commands: `bctl text h1` vs pages of boilerplate |
|
|
71
|
+
| **Broken clicks on SPAs** — programmatic clicks get blocked by popup blockers | Puppeteer, Playwright | Intercepts `window.open()` and navigates via `chrome.tabs` — SPA-compatible |
|
|
71
72
|
|
|
72
73
|
<br>
|
|
73
74
|
|
|
@@ -218,6 +219,10 @@ All `<sel>` arguments accept CSS selectors **or** element refs from `snapshot` (
|
|
|
218
219
|
| `bctl ping` | Check server & extension status |
|
|
219
220
|
| `bctl serve` | Start server in foreground |
|
|
220
221
|
| `bctl stop` | Stop server |
|
|
222
|
+
| `bctl setup` | Install extension to `~/.browser-ctl/extension/` + open Chrome extensions page |
|
|
223
|
+
| `bctl setup cursor` | Install AI skill (`SKILL.md`) into Cursor IDE |
|
|
224
|
+
| `bctl setup opencode` | Install AI skill into OpenCode |
|
|
225
|
+
| `bctl setup <path>` | Install AI skill to a custom directory |
|
|
221
226
|
|
|
222
227
|
<br>
|
|
223
228
|
|
|
@@ -379,9 +384,10 @@ Non-zero exit code on errors — works naturally with `set -e` and `&&` chains.
|
|
|
379
384
|
|
|
380
385
|
| Component | Details |
|
|
381
386
|
|-----------|---------|
|
|
382
|
-
| **CLI** | Stdlib only,
|
|
387
|
+
| **CLI** | Stdlib only, raw-socket HTTP (zero heavy imports, ~5ms cold start) |
|
|
383
388
|
| **Bridge Server** | Async relay (aiohttp), auto-daemonizes |
|
|
384
389
|
| **Extension** | MV3 service worker, auto-reconnects via `chrome.alarms` |
|
|
390
|
+
| **Click** | Three-phase: pointer events → MAIN-world click → `window.open()` interception for SPA compatibility |
|
|
385
391
|
| **Eval** | Dual strategy: MAIN-world injection (fast) + CDP fallback (CSP-safe) |
|
|
386
392
|
|
|
387
393
|
<br>
|
|
@@ -1,238 +0,0 @@
|
|
|
1
|
-
# browser-ctl
|
|
2
|
-
|
|
3
|
-
CLI tool for browser automation. Control Chrome from the terminal via `bctl` commands.
|
|
4
|
-
All commands communicate through a Chrome extension + WebSocket bridge and return JSON.
|
|
5
|
-
|
|
6
|
-
## When to Use
|
|
7
|
-
|
|
8
|
-
Use browser-ctl when you need to:
|
|
9
|
-
- Navigate web pages, click elements, type text, press keys
|
|
10
|
-
- Snapshot interactive elements and operate them by ref (e0, e1, …)
|
|
11
|
-
- Query the DOM: get text, HTML, attributes, values, or count elements
|
|
12
|
-
- Take screenshots or download files (preserves browser auth/cookies)
|
|
13
|
-
- Execute arbitrary JavaScript in the page context
|
|
14
|
-
- Manage browser tabs (list, switch, open, close)
|
|
15
|
-
- Automate browser workflows for testing or data extraction
|
|
16
|
-
|
|
17
|
-
## Prerequisites
|
|
18
|
-
|
|
19
|
-
- Chrome with the Browser-Ctl extension loaded
|
|
20
|
-
- Bridge server (auto-starts with any `bctl` command)
|
|
21
|
-
|
|
22
|
-
## Commands
|
|
23
|
-
|
|
24
|
-
### Navigation
|
|
25
|
-
```
|
|
26
|
-
bctl navigate <url> Navigate to URL (aliases: nav, go; auto-prepends https://)
|
|
27
|
-
bctl back Go back
|
|
28
|
-
bctl forward Go forward (alias: fwd)
|
|
29
|
-
bctl reload Reload page
|
|
30
|
-
```
|
|
31
|
-
|
|
32
|
-
### Interaction
|
|
33
|
-
All `<sel>` arguments accept CSS selectors or element refs (e.g. `e5` from `snapshot`).
|
|
34
|
-
```
|
|
35
|
-
bctl click <sel> [-i N] [-t text] Click element; -t filters by visible text (substring)
|
|
36
|
-
bctl dblclick <sel> [-i N] [-t text] Double-click element
|
|
37
|
-
bctl hover <sel> [-i N] [-t text] Hover over element
|
|
38
|
-
bctl focus <sel> [-i N] [-t text] Focus element
|
|
39
|
-
bctl type <sel> <text> Type text (replaces existing; React-compatible)
|
|
40
|
-
bctl input-text <sel> <text> [--clear] [--delay ms] Char-by-char typing (rich text editors)
|
|
41
|
-
bctl press <key> Press key — Enter submits forms, Escape closes dialogs
|
|
42
|
-
bctl check <sel> [-i N] [-t text] Check checkbox/radio
|
|
43
|
-
bctl uncheck <sel> [-i N] [-t text] Uncheck checkbox
|
|
44
|
-
bctl scroll <dir|sel> [n] Scroll page: up/down/top/bottom or element into view
|
|
45
|
-
bctl select-option <sel> <val> [--text] Select <select> dropdown option (alias: sopt)
|
|
46
|
-
bctl drag <src> [target] Drag element to target [--dx N --dy N for offset]
|
|
47
|
-
```
|
|
48
|
-
|
|
49
|
-
### Query
|
|
50
|
-
```
|
|
51
|
-
bctl snapshot [--all] List interactive elements with refs e0, e1, … (alias: snap)
|
|
52
|
-
bctl text [sel] Get text content (default: body)
|
|
53
|
-
bctl html [sel] Get innerHTML
|
|
54
|
-
bctl attr <sel> [name] Get attribute(s) [-i N for Nth element]
|
|
55
|
-
bctl select <sel> [-l N] List matching elements (alias: sel, limit default: 20)
|
|
56
|
-
bctl count <sel> Count matching elements
|
|
57
|
-
bctl status Current page URL and title
|
|
58
|
-
bctl is-visible <sel> Check if element is visible (returns rect)
|
|
59
|
-
bctl get-value <sel> Get value of form element (input/select/textarea)
|
|
60
|
-
```
|
|
61
|
-
|
|
62
|
-
### JavaScript
|
|
63
|
-
```
|
|
64
|
-
bctl eval <code> Execute JS in page context
|
|
65
|
-
```
|
|
66
|
-
|
|
67
|
-
### Tabs
|
|
68
|
-
```
|
|
69
|
-
bctl tabs List all tabs
|
|
70
|
-
bctl tab <id> Switch to tab
|
|
71
|
-
bctl new-tab [url] Open new tab
|
|
72
|
-
bctl close-tab [id] Close tab (default: active)
|
|
73
|
-
```
|
|
74
|
-
|
|
75
|
-
### Screenshot & Download
|
|
76
|
-
```
|
|
77
|
-
bctl screenshot [path] Capture screenshot (alias: ss)
|
|
78
|
-
bctl download <target> Download file/image (alias: dl) [-o path] [-i N]
|
|
79
|
-
bctl upload <sel> <files> Upload file(s) to <input type="file">
|
|
80
|
-
```
|
|
81
|
-
|
|
82
|
-
### Wait
|
|
83
|
-
```
|
|
84
|
-
bctl wait <sel|seconds> Wait for element or sleep [timeout]
|
|
85
|
-
```
|
|
86
|
-
|
|
87
|
-
### Dialog
|
|
88
|
-
```
|
|
89
|
-
bctl dialog [accept|dismiss] [--text <val>] Handle next alert/confirm/prompt
|
|
90
|
-
```
|
|
91
|
-
|
|
92
|
-
### Batch / Pipe
|
|
93
|
-
```
|
|
94
|
-
bctl pipe Read commands from stdin (one per line, JSONL output)
|
|
95
|
-
bctl batch '<c1>' '<c2>' Execute multiple commands in one call
|
|
96
|
-
```
|
|
97
|
-
|
|
98
|
-
### Server
|
|
99
|
-
```
|
|
100
|
-
bctl ping Check server and extension status
|
|
101
|
-
bctl serve Start server (foreground)
|
|
102
|
-
bctl stop Stop server
|
|
103
|
-
```
|
|
104
|
-
|
|
105
|
-
## Output Format
|
|
106
|
-
|
|
107
|
-
All commands return JSON:
|
|
108
|
-
- Success: `{"success": true, "data": {...}}`
|
|
109
|
-
- Error: `{"success": false, "error": "..."}`
|
|
110
|
-
|
|
111
|
-
## Tips & Best Practices
|
|
112
|
-
|
|
113
|
-
### Snapshot-first Workflow (recommended for AI agents)
|
|
114
|
-
- **Use `bctl snapshot` to get a numbered list of interactive elements**, then operate
|
|
115
|
-
by ref (e.g. `bctl click e5`). This eliminates guessing CSS selectors.
|
|
116
|
-
- Refs are assigned as `data-bctl-ref` attributes and persist until the next snapshot.
|
|
117
|
-
- Example:
|
|
118
|
-
```bash
|
|
119
|
-
bctl snapshot # List all interactive elements
|
|
120
|
-
bctl click e3 # Click the 3rd interactive element
|
|
121
|
-
bctl type e7 "hello world" # Type into the 7th element
|
|
122
|
-
bctl input-text e7 "hello" --clear --delay 20 # Char-by-char for rich editors
|
|
123
|
-
```
|
|
124
|
-
|
|
125
|
-
### Data Extraction
|
|
126
|
-
- **Prefer `bctl select` over `bctl eval`** for extracting structured DOM data — it's
|
|
127
|
-
more reliable across all sites, returns text/href/id/class/aria-label automatically,
|
|
128
|
-
and doesn't require complex JS strings.
|
|
129
|
-
- Use `bctl text <sel>` for simple text extraction and `bctl attr <sel> [name]` for
|
|
130
|
-
specific attributes. Chain with `-i N` for Nth element.
|
|
131
|
-
- Reserve `bctl eval` for cases that truly need complex JS logic (e.g. mapping/filtering,
|
|
132
|
-
accessing page-defined variables, or computing derived values).
|
|
133
|
-
|
|
134
|
-
### Search & Scrape Workflow
|
|
135
|
-
A typical pattern for searching a site and extracting results:
|
|
136
|
-
```bash
|
|
137
|
-
bctl go "https://site.com/search?q=keyword" # Navigate
|
|
138
|
-
bctl wait ".results" 10 # Wait for results
|
|
139
|
-
bctl select ".result-item a" -l 10 # Extract links
|
|
140
|
-
bctl attr ".result-item a" href -i 0 # Get specific attribute
|
|
141
|
-
```
|
|
142
|
-
|
|
143
|
-
### Waiting Strategy
|
|
144
|
-
- Always `bctl wait <selector> [timeout]` or `bctl wait <seconds>` after navigation
|
|
145
|
-
before querying — SPAs like YouTube take time to render content.
|
|
146
|
-
- Prefer waiting for a specific element over a fixed delay when possible.
|
|
147
|
-
|
|
148
|
-
### Clicking by Text (SPA-friendly)
|
|
149
|
-
- Use `--text` (`-t`) to filter elements by visible text — ideal for SPAs (React,
|
|
150
|
-
Vue, etc.) where CSS class names are dynamically generated and unreliable.
|
|
151
|
-
- Example: `bctl click "button" -t "Submit"` clicks the first `<button>` whose
|
|
152
|
-
visible text contains "Submit" (case-insensitive substring match).
|
|
153
|
-
- This avoids fragile selectors like `button.css-1a2b3c4` and eliminates the need
|
|
154
|
-
for `bctl eval 'document.querySelector(...).click()'` workarounds.
|
|
155
|
-
|
|
156
|
-
### Batch / Pipe (prefer for multi-step workflows)
|
|
157
|
-
- **Always use `bctl pipe` when performing 2+ consecutive commands** on the same
|
|
158
|
-
page. Consecutive DOM operations (click, type, scroll, wait…) are automatically
|
|
159
|
-
merged into a single browser call, reducing overhead by ~90%.
|
|
160
|
-
- Pipe reads from stdin, one command per line (`#` comments and blank lines OK).
|
|
161
|
-
Each line is a normal bctl command without the `bctl` prefix.
|
|
162
|
-
- Output is JSONL — one JSON object per command.
|
|
163
|
-
- Example (fill a form in one shot):
|
|
164
|
-
```
|
|
165
|
-
bctl pipe <<'EOF'
|
|
166
|
-
type "#email" "user@example.com"
|
|
167
|
-
type "#password" "secret"
|
|
168
|
-
click "button[type=submit]"
|
|
169
|
-
EOF
|
|
170
|
-
```
|
|
171
|
-
|
|
172
|
-
### Shell Quoting
|
|
173
|
-
- Wrap CSS selectors in double quotes: `bctl click "button.submit"`
|
|
174
|
-
- For `bctl eval`, use double quotes for the outer string and single quotes inside:
|
|
175
|
-
`bctl eval "document.querySelector('h1').textContent"`
|
|
176
|
-
|
|
177
|
-
## Examples
|
|
178
|
-
|
|
179
|
-
```bash
|
|
180
|
-
# Navigate and inspect
|
|
181
|
-
bctl go https://example.com
|
|
182
|
-
bctl status
|
|
183
|
-
bctl text h1
|
|
184
|
-
|
|
185
|
-
# Snapshot workflow (recommended)
|
|
186
|
-
bctl snapshot # See all interactive elements as e0, e1, …
|
|
187
|
-
bctl click e3 # Click element by ref
|
|
188
|
-
bctl type e5 "hello" # Type into element by ref
|
|
189
|
-
bctl get-value e5 # Read form value
|
|
190
|
-
bctl is-visible e3 # Check visibility
|
|
191
|
-
|
|
192
|
-
# Click by selector or by text
|
|
193
|
-
bctl click "button.login"
|
|
194
|
-
bctl click "button" -t "Sign in" # click button containing "Sign in"
|
|
195
|
-
bctl dblclick "td.cell" # double-click
|
|
196
|
-
bctl type "input[name=q]" "search query"
|
|
197
|
-
bctl press Enter
|
|
198
|
-
|
|
199
|
-
# Character-by-character input (rich text editors, contenteditable)
|
|
200
|
-
bctl input-text "div[contenteditable]" "hello" --clear --delay 20
|
|
201
|
-
|
|
202
|
-
# Checkbox / radio
|
|
203
|
-
bctl check "input#agree"
|
|
204
|
-
bctl uncheck "input#newsletter"
|
|
205
|
-
|
|
206
|
-
# Scroll a long page
|
|
207
|
-
bctl scroll down # Scroll down ~80% viewport
|
|
208
|
-
bctl scroll down 500 # Scroll down 500px
|
|
209
|
-
bctl scroll up # Scroll up
|
|
210
|
-
bctl scroll top # Scroll to top
|
|
211
|
-
bctl scroll bottom # Scroll to bottom
|
|
212
|
-
bctl scroll "#section-3" # Scroll element into view
|
|
213
|
-
|
|
214
|
-
# Form interaction
|
|
215
|
-
bctl select-option "select#country" "US" # Select by value
|
|
216
|
-
bctl select-option "select#lang" "English" --text # Select by visible text
|
|
217
|
-
bctl upload "input[type=file]" ./photo.jpg # Upload file
|
|
218
|
-
|
|
219
|
-
# Handle dialogs (call BEFORE triggering action)
|
|
220
|
-
bctl dialog accept # Auto-accept next alert/confirm
|
|
221
|
-
bctl dialog dismiss # Dismiss next confirm
|
|
222
|
-
bctl dialog accept --text "yes" # Answer next prompt with "yes"
|
|
223
|
-
|
|
224
|
-
# Drag and drop
|
|
225
|
-
bctl drag ".card-1" ".column-done" # Drag to target element
|
|
226
|
-
bctl drag ".slider-handle" --dx 100 --dy 0 # Drag by pixel offset
|
|
227
|
-
|
|
228
|
-
# Wait then screenshot
|
|
229
|
-
bctl wait ".loaded" 10
|
|
230
|
-
bctl ss page.png
|
|
231
|
-
|
|
232
|
-
# Download with browser auth
|
|
233
|
-
bctl download "https://site.com/file.pdf" -o file.pdf
|
|
234
|
-
|
|
235
|
-
# Extract structured data (prefer select over eval)
|
|
236
|
-
bctl select "a.video-link" -l 10
|
|
237
|
-
bctl eval "JSON.stringify(Array.from(document.querySelectorAll('a')).slice(0,5).map(a=>({text:a.textContent.trim(),href:a.href})))"
|
|
238
|
-
```
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|