browse-agent-cli 0.0.4 → 0.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/README.md CHANGED
@@ -36,6 +36,9 @@ browse-agent setup
36
36
  # Launch browser session
37
37
  browse-agent launch
38
38
 
39
+ # Start only background service (do not launch browser)
40
+ browse-agent launch --server-only
41
+
39
42
  # Navigate and extract
40
43
  browse-agent navigate "https://example.com"
41
44
  browse-agent get-content --format text
@@ -109,6 +112,7 @@ import {
109
112
 
110
113
  - `--browser <name>`: `chrome | chromium | edge | brave`
111
114
  - `--headless`
115
+ - `--server-only`
112
116
  - `--port <number>`
113
117
  - `--servicePort <number>`
114
118
  - `--timeout <ms>`
package/dist/cli.js CHANGED
@@ -77,6 +77,10 @@ function parseArgs(argv) {
77
77
  flags.headless = true;
78
78
  continue;
79
79
  }
80
+ if (arg === "--server-only") {
81
+ flags["server-only"] = true;
82
+ continue;
83
+ }
80
84
  if (arg === "--all") {
81
85
  flags.all = true;
82
86
  continue;
@@ -129,6 +133,7 @@ Feature commands (require launch first):
129
133
  Options:
130
134
  --browser <name> Browser: chrome | chromium | edge | brave (default: chrome)
131
135
  --headless Run in headless mode
136
+ --server-only Start background service only, skip browser launch
132
137
  --port <number> WebSocket port for browser-agent (default: 9315)
133
138
  --servicePort <number> Local service port (default: 9316)
134
139
  --tabId <id> Target tab ID (from navigate or tabs list output)
@@ -142,6 +147,7 @@ Options:
142
147
  Examples:
143
148
  browse-agent setup
144
149
  browse-agent launch --browser edge --headless
150
+ browse-agent launch --server-only
145
151
  browse-agent navigate https://example.com
146
152
  browse-agent get-content --format text
147
153
  browse-agent get-dom "h1" --property innerText
@@ -154,7 +160,7 @@ Examples:
154
160
  `.trim());
155
161
  }
156
162
  function resolveSkillSourceDir() {
157
- return join(dirname(fileURLToPath(import.meta.url)), "..", "skill");
163
+ return join(dirname(fileURLToPath(import.meta.url)), "..", "skills");
158
164
  }
159
165
  function isNonEmptyDirectory(path) {
160
166
  if (!existsSync(path)) return false;
@@ -299,6 +305,15 @@ try {
299
305
  case "launch": {
300
306
  const service = await ensureServiceRunning(flags);
301
307
  const servicePort = service.servicePort;
308
+ if (flags["server-only"] === true) {
309
+ const status = await requestService(servicePort, "/status", "GET");
310
+ console.log(JSON.stringify({
311
+ servicePort,
312
+ mode: "server-only",
313
+ ...status
314
+ }, null, 2));
315
+ break;
316
+ }
302
317
  if (!service.newlyStarted) {
303
318
  const status = await requestService(servicePort, "/status", "GET");
304
319
  if (status.running === true) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "browse-agent-cli",
3
- "version": "0.0.4",
3
+ "version": "0.0.6",
4
4
  "type": "module",
5
5
  "description": "TypeScript CLI for browse-agent",
6
6
  "main": "./dist/cli.js",
package/skills/SKILL.md CHANGED
@@ -28,7 +28,7 @@ Browse **interactively** — one command per step. Observe each result before de
28
28
  ### Step 1: Launch Browser
29
29
 
30
30
  ```bash
31
- browse-agent launch 2>/dev/null
31
+ browse-agent launch
32
32
  ```
33
33
 
34
34
  Returns session info as JSON. The browser stays open until you close it.
@@ -39,22 +39,22 @@ Run each command separately. Read the output, then decide the next action:
39
39
 
40
40
  ```bash
41
41
  # Open a page — returns { tabId, url, title }
42
- browse-agent navigate "https://example.com" 2>/dev/null
42
+ browse-agent navigate "https://example.com"
43
43
 
44
44
  # Read the page content — returns { content, url, title }
45
- browse-agent get-content --format text 2>/dev/null
45
+ browse-agent get-content --format text
46
46
 
47
47
  # Specify a tab by ID (from navigate output) — use --tabId with any feature command
48
- browse-agent get-content --format text --tabId 123 2>/dev/null
48
+ browse-agent get-content --format text --tabId 123
49
49
 
50
50
  # Query specific elements — returns { result }
51
- browse-agent get-dom "h1" --property innerText 2>/dev/null
51
+ browse-agent get-dom "h1" --property innerText
52
52
 
53
53
  # Run JavaScript — returns { result }
54
- browse-agent evaluate "document.title" 2>/dev/null
54
+ browse-agent evaluate "document.title"
55
55
 
56
56
  # Take a screenshot — returns { data (base64), format, width, height }
57
- browse-agent screenshot visible 2>/dev/null
57
+ browse-agent screenshot visible
58
58
  ```
59
59
 
60
60
  **Key principle**: Each command outputs JSON to stdout. Use `--tabId <id>` to target a specific tab (ID comes from `navigate` or `tabs list` output). Parse the output, reason about it, then choose the next command. Don't pre-plan the entire interaction.
@@ -62,7 +62,7 @@ browse-agent screenshot visible 2>/dev/null
62
62
  ### Step 3: Close Browser
63
63
 
64
64
  ```bash
65
- browse-agent close 2>/dev/null
65
+ browse-agent close
66
66
  ```
67
67
 
68
68
  ### Typical Interaction Flow
@@ -108,7 +108,7 @@ await browse(async (agent) => {
108
108
  });
109
109
  ```
110
110
 
111
- Run with: `node _browse_task.mjs 2>/dev/null`
111
+ Run with: `node _browse_task.mjs`
112
112
 
113
113
  For full API and script examples, see [API Reference](./references/api.md) and [Examples](./references/examples.md).
114
114
 
@@ -16,6 +16,7 @@ browse-agent setup --global
16
16
  # Launch browser
17
17
  browse-agent launch
18
18
  browse-agent launch --browser edge --headless
19
+ browse-agent launch --server-only
19
20
 
20
21
  # Check connection
21
22
  browse-agent connect
@@ -67,6 +68,7 @@ browse-agent tabs close 123
67
68
  | `--global` | setup, clear | Use global installation (`~/.browse-agent/`) |
68
69
  | `--browser <name>` | launch | Browser: `chrome` \| `chromium` \| `edge` \| `brave` |
69
70
  | `--headless` | launch | Run in headless mode |
71
+ | `--server-only` | launch | Start service only, skip browser launch |
70
72
  | `--port <number>` | launch, connect, feature cmds | WebSocket port (default: 9315) |
71
73
  | `--tabId <id>` | all feature cmds | Target a specific tab (ID from `navigate` or `tabs list`) |
72
74
  | `--format <type>` | get-content, screenshot | Content format (`text`/`html`) or screenshot format (`png`/`jpeg`) |
@@ -6,79 +6,79 @@
6
6
 
7
7
  ```bash
8
8
  # 1. Launch browser
9
- browse-agent launch 2>/dev/null
9
+ browse-agent launch
10
10
 
11
11
  # 2. Navigate to page
12
- browse-agent navigate "https://example.com" 2>/dev/null
12
+ browse-agent navigate "https://example.com"
13
13
 
14
14
  # 3. Read content — decide next step based on output
15
- browse-agent get-content --format text 2>/dev/null
15
+ browse-agent get-content --format text
16
16
 
17
17
  # 4. Done — close browser
18
- browse-agent close 2>/dev/null
18
+ browse-agent close
19
19
  ```
20
20
 
21
21
  ### Find Specific Content on a Page
22
22
 
23
23
  ```bash
24
- browse-agent launch 2>/dev/null
25
- browse-agent navigate "https://news.ycombinator.com" 2>/dev/null
24
+ browse-agent launch
25
+ browse-agent navigate "https://news.ycombinator.com"
26
26
 
27
27
  # First, read the page to understand structure
28
- browse-agent get-content --format text 2>/dev/null
28
+ browse-agent get-content --format text
29
29
 
30
30
  # Then query specific elements based on what you found
31
- browse-agent get-dom ".titleline > a" --property innerText --all 2>/dev/null
31
+ browse-agent get-dom ".titleline > a" --property innerText --all
32
32
 
33
- browse-agent close 2>/dev/null
33
+ browse-agent close
34
34
  ```
35
35
 
36
36
  ### Screenshot and Inspect
37
37
 
38
38
  ```bash
39
- browse-agent launch 2>/dev/null
40
- browse-agent navigate "https://example.com" 2>/dev/null
39
+ browse-agent launch
40
+ browse-agent navigate "https://example.com"
41
41
 
42
42
  # Take a screenshot to see the page visually
43
- browse-agent screenshot visible 2>/dev/null
43
+ browse-agent screenshot visible
44
44
 
45
45
  # Run JS to count elements, check state, etc.
46
- browse-agent evaluate "document.querySelectorAll('a').length" 2>/dev/null
46
+ browse-agent evaluate "document.querySelectorAll('a').length"
47
47
 
48
- browse-agent close 2>/dev/null
48
+ browse-agent close
49
49
  ```
50
50
 
51
51
  ### Multi-Page Exploration
52
52
 
53
53
  ```bash
54
- browse-agent launch 2>/dev/null
54
+ browse-agent launch
55
55
 
56
56
  # Visit first page
57
- browse-agent navigate "https://example.com" 2>/dev/null
58
- browse-agent get-content --format text 2>/dev/null
57
+ browse-agent navigate "https://example.com"
58
+ browse-agent get-content --format text
59
59
 
60
60
  # Visit second page (based on what you found)
61
- browse-agent navigate "https://example.org" 2>/dev/null
62
- browse-agent get-content --format text 2>/dev/null
61
+ browse-agent navigate "https://example.org"
62
+ browse-agent get-content --format text
63
63
 
64
64
  # Manage tabs
65
- browse-agent tabs list 2>/dev/null
66
- browse-agent tabs close 123 2>/dev/null
65
+ browse-agent tabs list
66
+ browse-agent tabs close 123
67
67
 
68
- browse-agent close 2>/dev/null
68
+ browse-agent close
69
69
  ```
70
70
 
71
71
  ### Use Logged-in Browser Profile
72
72
 
73
73
  ```bash
74
74
  # Launch with user's default browser profile (preserves cookies/sessions)
75
- browse-agent launch --browser chrome 2>/dev/null
75
+ browse-agent launch --browser chrome
76
76
  # ⚠ Close all Chrome windows first!
77
77
 
78
- USE_USER_PROFILE=true browse-agent launch 2>/dev/null
79
- browse-agent navigate "https://github.com/notifications" 2>/dev/null
80
- browse-agent get-content --format text 2>/dev/null
81
- browse-agent close 2>/dev/null
78
+ USE_USER_PROFILE=true browse-agent launch
79
+ browse-agent navigate "https://github.com/notifications"
80
+ browse-agent get-content --format text
81
+ browse-agent close
82
82
  ```
83
83
 
84
84
  ## One-Shot Script Examples
@@ -173,13 +173,13 @@ await browse(async (agent) => {
173
173
 
174
174
  ```bash
175
175
  # Use user's default Chrome profile (keeps login sessions)
176
- USE_USER_PROFILE=true node _browse_task.mjs 2>/dev/null
176
+ USE_USER_PROFILE=true node _browse_task.mjs
177
177
 
178
178
  # Use Edge with user profile
179
- BROWSER=edge USE_USER_PROFILE=true node _browse_task.mjs 2>/dev/null
179
+ BROWSER=edge USE_USER_PROFILE=true node _browse_task.mjs
180
180
 
181
181
  # Custom executable path
182
- CHROME_PATH=/path/to/browser node _browse_task.mjs 2>/dev/null
182
+ CHROME_PATH=/path/to/browser node _browse_task.mjs
183
183
  ```
184
184
 
185
185
  ### Options via Code