browse-agent-cli 0.0.3 → 0.0.5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +4 -0
- package/dist/cli.js +15 -0
- package/package.json +11 -1
- package/skills/SKILL.md +11 -11
- package/skills/references/examples.md +30 -30
package/README.md
CHANGED
|
@@ -36,6 +36,9 @@ browse-agent setup
|
|
|
36
36
|
# Launch browser session
|
|
37
37
|
browse-agent launch
|
|
38
38
|
|
|
39
|
+
# Start only background service (do not launch browser)
|
|
40
|
+
browse-agent launch --server-only
|
|
41
|
+
|
|
39
42
|
# Navigate and extract
|
|
40
43
|
browse-agent navigate "https://example.com"
|
|
41
44
|
browse-agent get-content --format text
|
|
@@ -109,6 +112,7 @@ import {
|
|
|
109
112
|
|
|
110
113
|
- `--browser <name>`: `chrome | chromium | edge | brave`
|
|
111
114
|
- `--headless`
|
|
115
|
+
- `--server-only`
|
|
112
116
|
- `--port <number>`
|
|
113
117
|
- `--servicePort <number>`
|
|
114
118
|
- `--timeout <ms>`
|
package/dist/cli.js
CHANGED
|
@@ -77,6 +77,10 @@ function parseArgs(argv) {
|
|
|
77
77
|
flags.headless = true;
|
|
78
78
|
continue;
|
|
79
79
|
}
|
|
80
|
+
if (arg === "--server-only") {
|
|
81
|
+
flags["server-only"] = true;
|
|
82
|
+
continue;
|
|
83
|
+
}
|
|
80
84
|
if (arg === "--all") {
|
|
81
85
|
flags.all = true;
|
|
82
86
|
continue;
|
|
@@ -129,6 +133,7 @@ Feature commands (require launch first):
|
|
|
129
133
|
Options:
|
|
130
134
|
--browser <name> Browser: chrome | chromium | edge | brave (default: chrome)
|
|
131
135
|
--headless Run in headless mode
|
|
136
|
+
--server-only Start background service only, skip browser launch
|
|
132
137
|
--port <number> WebSocket port for browser-agent (default: 9315)
|
|
133
138
|
--servicePort <number> Local service port (default: 9316)
|
|
134
139
|
--tabId <id> Target tab ID (from navigate or tabs list output)
|
|
@@ -142,6 +147,7 @@ Options:
|
|
|
142
147
|
Examples:
|
|
143
148
|
browse-agent setup
|
|
144
149
|
browse-agent launch --browser edge --headless
|
|
150
|
+
browse-agent launch --server-only
|
|
145
151
|
browse-agent navigate https://example.com
|
|
146
152
|
browse-agent get-content --format text
|
|
147
153
|
browse-agent get-dom "h1" --property innerText
|
|
@@ -299,6 +305,15 @@ try {
|
|
|
299
305
|
case "launch": {
|
|
300
306
|
const service = await ensureServiceRunning(flags);
|
|
301
307
|
const servicePort = service.servicePort;
|
|
308
|
+
if (flags["server-only"] === true) {
|
|
309
|
+
const status = await requestService(servicePort, "/status", "GET");
|
|
310
|
+
console.log(JSON.stringify({
|
|
311
|
+
servicePort,
|
|
312
|
+
mode: "server-only",
|
|
313
|
+
...status
|
|
314
|
+
}, null, 2));
|
|
315
|
+
break;
|
|
316
|
+
}
|
|
302
317
|
if (!service.newlyStarted) {
|
|
303
318
|
const status = await requestService(servicePort, "/status", "GET");
|
|
304
319
|
if (status.running === true) {
|
package/package.json
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
{
|
|
2
2
|
"name": "browse-agent-cli",
|
|
3
|
-
"version": "0.0.
|
|
3
|
+
"version": "0.0.5",
|
|
4
4
|
"type": "module",
|
|
5
5
|
"description": "TypeScript CLI for browse-agent",
|
|
6
6
|
"main": "./dist/cli.js",
|
|
@@ -37,5 +37,15 @@
|
|
|
37
37
|
"tsdown": "^0.13.3",
|
|
38
38
|
"typescript": "^5.4.0"
|
|
39
39
|
},
|
|
40
|
+
"bugs": {
|
|
41
|
+
"url": "https://github.com/imlinhanchao/browse-agent/issues"
|
|
42
|
+
},
|
|
43
|
+
"homepage": "https://github.com/imlinhanchao/browse-agent#readme",
|
|
44
|
+
"author": {
|
|
45
|
+
"name": "Hancel.Lin",
|
|
46
|
+
"email": "imlinhanchao@foxmail.com",
|
|
47
|
+
"url": "https://github.com/imlinhanchao"
|
|
48
|
+
},
|
|
49
|
+
"keywords": ["chrome", "browser", "automation", "websocket", "extension"],
|
|
40
50
|
"license": "MIT"
|
|
41
51
|
}
|
package/skills/SKILL.md
CHANGED
|
@@ -6,7 +6,7 @@ argument-hint: "URL to visit, or describe what data to extract from the web"
|
|
|
6
6
|
|
|
7
7
|
# Browse Agent — Web Browsing & Data Extraction
|
|
8
8
|
|
|
9
|
-
Control a real Chrome browser to visit web pages, extract content, take screenshots, query DOM elements, and run JavaScript
|
|
9
|
+
Control a real Chrome browser to visit web pages, extract content, take screenshots, query DOM elements, and run JavaScript.
|
|
10
10
|
|
|
11
11
|
## Prerequisites
|
|
12
12
|
|
|
@@ -19,7 +19,7 @@ Control a real Chrome browser to visit web pages, extract content, take screensh
|
|
|
19
19
|
browse-agent setup
|
|
20
20
|
```
|
|
21
21
|
|
|
22
|
-
Auto-detects local vs global mode.
|
|
22
|
+
Auto-detects local vs global mode. The setup checks existing state — if already installed, it exits immediately.
|
|
23
23
|
|
|
24
24
|
## Usage Procedure — Step-by-Step (Recommended)
|
|
25
25
|
|
|
@@ -28,7 +28,7 @@ Browse **interactively** — one command per step. Observe each result before de
|
|
|
28
28
|
### Step 1: Launch Browser
|
|
29
29
|
|
|
30
30
|
```bash
|
|
31
|
-
browse-agent launch
|
|
31
|
+
browse-agent launch
|
|
32
32
|
```
|
|
33
33
|
|
|
34
34
|
Returns session info as JSON. The browser stays open until you close it.
|
|
@@ -39,22 +39,22 @@ Run each command separately. Read the output, then decide the next action:
|
|
|
39
39
|
|
|
40
40
|
```bash
|
|
41
41
|
# Open a page — returns { tabId, url, title }
|
|
42
|
-
browse-agent navigate "https://example.com"
|
|
42
|
+
browse-agent navigate "https://example.com"
|
|
43
43
|
|
|
44
44
|
# Read the page content — returns { content, url, title }
|
|
45
|
-
browse-agent get-content --format text
|
|
45
|
+
browse-agent get-content --format text
|
|
46
46
|
|
|
47
47
|
# Specify a tab by ID (from navigate output) — use --tabId with any feature command
|
|
48
|
-
browse-agent get-content --format text --tabId 123
|
|
48
|
+
browse-agent get-content --format text --tabId 123
|
|
49
49
|
|
|
50
50
|
# Query specific elements — returns { result }
|
|
51
|
-
browse-agent get-dom "h1" --property innerText
|
|
51
|
+
browse-agent get-dom "h1" --property innerText
|
|
52
52
|
|
|
53
53
|
# Run JavaScript — returns { result }
|
|
54
|
-
browse-agent evaluate "document.title"
|
|
54
|
+
browse-agent evaluate "document.title"
|
|
55
55
|
|
|
56
56
|
# Take a screenshot — returns { data (base64), format, width, height }
|
|
57
|
-
browse-agent screenshot visible
|
|
57
|
+
browse-agent screenshot visible
|
|
58
58
|
```
|
|
59
59
|
|
|
60
60
|
**Key principle**: Each command outputs JSON to stdout. Use `--tabId <id>` to target a specific tab (ID comes from `navigate` or `tabs list` output). Parse the output, reason about it, then choose the next command. Don't pre-plan the entire interaction.
|
|
@@ -62,7 +62,7 @@ browse-agent screenshot visible 2>/dev/null
|
|
|
62
62
|
### Step 3: Close Browser
|
|
63
63
|
|
|
64
64
|
```bash
|
|
65
|
-
browse-agent close
|
|
65
|
+
browse-agent close
|
|
66
66
|
```
|
|
67
67
|
|
|
68
68
|
### Typical Interaction Flow
|
|
@@ -108,7 +108,7 @@ await browse(async (agent) => {
|
|
|
108
108
|
});
|
|
109
109
|
```
|
|
110
110
|
|
|
111
|
-
Run with: `node _browse_task.mjs
|
|
111
|
+
Run with: `node _browse_task.mjs`
|
|
112
112
|
|
|
113
113
|
For full API and script examples, see [API Reference](./references/api.md) and [Examples](./references/examples.md).
|
|
114
114
|
|
|
@@ -6,79 +6,79 @@
|
|
|
6
6
|
|
|
7
7
|
```bash
|
|
8
8
|
# 1. Launch browser
|
|
9
|
-
browse-agent launch
|
|
9
|
+
browse-agent launch
|
|
10
10
|
|
|
11
11
|
# 2. Navigate to page
|
|
12
|
-
browse-agent navigate "https://example.com"
|
|
12
|
+
browse-agent navigate "https://example.com"
|
|
13
13
|
|
|
14
14
|
# 3. Read content — decide next step based on output
|
|
15
|
-
browse-agent get-content --format text
|
|
15
|
+
browse-agent get-content --format text
|
|
16
16
|
|
|
17
17
|
# 4. Done — close browser
|
|
18
|
-
browse-agent close
|
|
18
|
+
browse-agent close
|
|
19
19
|
```
|
|
20
20
|
|
|
21
21
|
### Find Specific Content on a Page
|
|
22
22
|
|
|
23
23
|
```bash
|
|
24
|
-
browse-agent launch
|
|
25
|
-
browse-agent navigate "https://news.ycombinator.com"
|
|
24
|
+
browse-agent launch
|
|
25
|
+
browse-agent navigate "https://news.ycombinator.com"
|
|
26
26
|
|
|
27
27
|
# First, read the page to understand structure
|
|
28
|
-
browse-agent get-content --format text
|
|
28
|
+
browse-agent get-content --format text
|
|
29
29
|
|
|
30
30
|
# Then query specific elements based on what you found
|
|
31
|
-
browse-agent get-dom ".titleline > a" --property innerText --all
|
|
31
|
+
browse-agent get-dom ".titleline > a" --property innerText --all
|
|
32
32
|
|
|
33
|
-
browse-agent close
|
|
33
|
+
browse-agent close
|
|
34
34
|
```
|
|
35
35
|
|
|
36
36
|
### Screenshot and Inspect
|
|
37
37
|
|
|
38
38
|
```bash
|
|
39
|
-
browse-agent launch
|
|
40
|
-
browse-agent navigate "https://example.com"
|
|
39
|
+
browse-agent launch
|
|
40
|
+
browse-agent navigate "https://example.com"
|
|
41
41
|
|
|
42
42
|
# Take a screenshot to see the page visually
|
|
43
|
-
browse-agent screenshot visible
|
|
43
|
+
browse-agent screenshot visible
|
|
44
44
|
|
|
45
45
|
# Run JS to count elements, check state, etc.
|
|
46
|
-
browse-agent evaluate "document.querySelectorAll('a').length"
|
|
46
|
+
browse-agent evaluate "document.querySelectorAll('a').length"
|
|
47
47
|
|
|
48
|
-
browse-agent close
|
|
48
|
+
browse-agent close
|
|
49
49
|
```
|
|
50
50
|
|
|
51
51
|
### Multi-Page Exploration
|
|
52
52
|
|
|
53
53
|
```bash
|
|
54
|
-
browse-agent launch
|
|
54
|
+
browse-agent launch
|
|
55
55
|
|
|
56
56
|
# Visit first page
|
|
57
|
-
browse-agent navigate "https://example.com"
|
|
58
|
-
browse-agent get-content --format text
|
|
57
|
+
browse-agent navigate "https://example.com"
|
|
58
|
+
browse-agent get-content --format text
|
|
59
59
|
|
|
60
60
|
# Visit second page (based on what you found)
|
|
61
|
-
browse-agent navigate "https://example.org"
|
|
62
|
-
browse-agent get-content --format text
|
|
61
|
+
browse-agent navigate "https://example.org"
|
|
62
|
+
browse-agent get-content --format text
|
|
63
63
|
|
|
64
64
|
# Manage tabs
|
|
65
|
-
browse-agent tabs list
|
|
66
|
-
browse-agent tabs close 123
|
|
65
|
+
browse-agent tabs list
|
|
66
|
+
browse-agent tabs close 123
|
|
67
67
|
|
|
68
|
-
browse-agent close
|
|
68
|
+
browse-agent close
|
|
69
69
|
```
|
|
70
70
|
|
|
71
71
|
### Use Logged-in Browser Profile
|
|
72
72
|
|
|
73
73
|
```bash
|
|
74
74
|
# Launch with user's default browser profile (preserves cookies/sessions)
|
|
75
|
-
browse-agent launch --browser chrome
|
|
75
|
+
browse-agent launch --browser chrome
|
|
76
76
|
# ⚠ Close all Chrome windows first!
|
|
77
77
|
|
|
78
|
-
USE_USER_PROFILE=true browse-agent launch
|
|
79
|
-
browse-agent navigate "https://github.com/notifications"
|
|
80
|
-
browse-agent get-content --format text
|
|
81
|
-
browse-agent close
|
|
78
|
+
USE_USER_PROFILE=true browse-agent launch
|
|
79
|
+
browse-agent navigate "https://github.com/notifications"
|
|
80
|
+
browse-agent get-content --format text
|
|
81
|
+
browse-agent close
|
|
82
82
|
```
|
|
83
83
|
|
|
84
84
|
## One-Shot Script Examples
|
|
@@ -173,13 +173,13 @@ await browse(async (agent) => {
|
|
|
173
173
|
|
|
174
174
|
```bash
|
|
175
175
|
# Use user's default Chrome profile (keeps login sessions)
|
|
176
|
-
USE_USER_PROFILE=true node _browse_task.mjs
|
|
176
|
+
USE_USER_PROFILE=true node _browse_task.mjs
|
|
177
177
|
|
|
178
178
|
# Use Edge with user profile
|
|
179
|
-
BROWSER=edge USE_USER_PROFILE=true node _browse_task.mjs
|
|
179
|
+
BROWSER=edge USE_USER_PROFILE=true node _browse_task.mjs
|
|
180
180
|
|
|
181
181
|
# Custom executable path
|
|
182
|
-
CHROME_PATH=/path/to/browser node _browse_task.mjs
|
|
182
|
+
CHROME_PATH=/path/to/browser node _browse_task.mjs
|
|
183
183
|
```
|
|
184
184
|
|
|
185
185
|
### Options via Code
|