tab-agent 0.3.4 → 0.4.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +39 -0
  2. package/README.md +201 -26
  3. package/bin/tab-agent.js +23 -8
  4. package/cli/command.js +113 -9
  5. package/cli/detect-extension.js +96 -14
  6. package/cli/launch-chrome.js +150 -0
  7. package/cli/setup.js +99 -23
  8. package/cli/start.js +65 -13
  9. package/cli/status.js +41 -7
  10. package/extension/content-script.js +218 -17
  11. package/extension/manifest.json +4 -3
  12. package/extension/manifest.safari.json +45 -0
  13. package/extension/popup/popup.html +58 -1
  14. package/extension/popup/popup.js +18 -0
  15. package/extension/service-worker.js +106 -13
  16. package/package.json +14 -3
  17. package/relay/install-native-host.sh +14 -7
  18. package/relay/native-host-wrapper.sh +1 -1
  19. package/relay/native-host.js +3 -1
  20. package/relay/server.js +124 -17
  21. package/skills/claude-code/tab-agent/SKILL.md +92 -0
  22. package/skills/codex/tab-agent/SKILL.md +92 -0
  23. package/relay/node_modules/.package-lock.json +0 -29
  24. package/relay/node_modules/ws/LICENSE +0 -20
  25. package/relay/node_modules/ws/README.md +0 -548
  26. package/relay/node_modules/ws/browser.js +0 -8
  27. package/relay/node_modules/ws/index.js +0 -13
  28. package/relay/node_modules/ws/lib/buffer-util.js +0 -131
  29. package/relay/node_modules/ws/lib/constants.js +0 -19
  30. package/relay/node_modules/ws/lib/event-target.js +0 -292
  31. package/relay/node_modules/ws/lib/extension.js +0 -203
  32. package/relay/node_modules/ws/lib/limiter.js +0 -55
  33. package/relay/node_modules/ws/lib/permessage-deflate.js +0 -528
  34. package/relay/node_modules/ws/lib/receiver.js +0 -706
  35. package/relay/node_modules/ws/lib/sender.js +0 -602
  36. package/relay/node_modules/ws/lib/stream.js +0 -161
  37. package/relay/node_modules/ws/lib/subprotocol.js +0 -62
  38. package/relay/node_modules/ws/lib/validation.js +0 -152
  39. package/relay/node_modules/ws/lib/websocket-server.js +0 -554
  40. package/relay/node_modules/ws/lib/websocket.js +0 -1393
  41. package/relay/node_modules/ws/package.json +0 -69
  42. package/relay/node_modules/ws/wrapper.mjs +0 -8
  43. package/relay/package-lock.json +0 -36
  44. package/relay/package.json +0 -12
  45. package/skills/claude-code/tab-agent.md +0 -57
  46. package/skills/codex/tab-agent.md +0 -38
package/CHANGELOG.md ADDED
@@ -0,0 +1,39 @@
1
+ # Changelog
2
+
3
+ ## [0.4.1] - 2026-03-15
4
+
5
+ ### Fixed
6
+ - `setup` now installs the native host into a stable user-library location instead of pointing Chrome at the repo checkout
7
+ - Native host installation now copies the `ws` dependency from the packaged install, so fresh npm installs can launch reliably
8
+ - Relay server now accepts the current Chrome native-host client identifier, fixing cases where Chrome was connected but commands could not see an active browser
9
+
10
+ ### Docs
11
+ - README now treats `status` as part of the setup flow and documents the native-host recovery steps that go with the new install path
12
+
13
+ ## [0.4.0] - 2026-03-14
14
+
15
+ ### Added
16
+ - Auto-launch Chrome with profile detection, saved defaults, and `--profile`
17
+ - New browser commands: `hover`, `select`, `drag`, `get`, `find`, `cookies`, `storage`, and `pdf`
18
+ - Experimental Safari support with browser selection flags
19
+ - Auto-activate toggle and bulk activation support in the extension popup
20
+
21
+ ### Fixed
22
+ - Published packages now ship a real `extension/manifest.json` for unpacked Chrome installs
23
+ - Setup can auto-detect unpacked Tab Agent extensions from Chrome profile preferences
24
+ - Status output shows the detected extension path and native host path for easier debugging
25
+ - README install steps now call out `@latest` usage to avoid stale cached `npx` installs
26
+
27
+ ## [0.1.0] - 2026-01-30
28
+
29
+ ### Added
30
+ - Chrome extension (Manifest V3)
31
+ - WebSocket relay server
32
+ - Native messaging host
33
+ - AI-readable page snapshots
34
+ - DOM actions: click, type, fill, press, select, hover, scroll
35
+ - Screenshot capture
36
+ - Multi-tab support
37
+ - Audit logging
38
+ - Claude Code skill
39
+ - Codex skill
package/README.md CHANGED
@@ -1,9 +1,16 @@
1
+ ![Tab Agent](./banner-pixel-1.svg)
2
+
1
3
  # Tab Agent
2
4
 
3
5
  [![npm version](https://img.shields.io/npm/v/tab-agent.svg)](https://www.npmjs.com/package/tab-agent)
6
+ [![Chrome](https://img.shields.io/badge/Chrome-Extension-4285F4?logo=googlechrome&logoColor=white)](https://github.com/DrHB/tab-agent)
4
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
8
 
6
- **Browser control for Claude Code and Codex** — click-to-activate security.
9
+ **Let Claude & Codex browse as YOU** — uses your existing logins, click-to-activate security.
10
+
11
+ > No headless browser. No re-authenticating. Your AI uses your actual Chrome sessions.
12
+ >
13
+ > **Safari support is experimental** — [see below](#experimental-safari-support)
7
14
 
8
15
  ```
9
16
  ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
@@ -18,30 +25,68 @@
18
25
  └───────────────────┘
19
26
  ```
20
27
 
28
+ ## Why Tab Agent?
29
+
30
+ Most browser automation tools (Playwright, Puppeteer, agent-browser) spin up a **fresh headless browser** — you start logged out of everything, and many sites actively block them. They detect headless browsers through missing plugins, `navigator.webdriver` flags, and other fingerprints.
31
+
32
+ Tab Agent is different — it uses your real Chrome with your real cookies:
33
+
34
+ ```
35
+ +---------------+--------------------------------------+----------------------------------+
36
+ | | Tab Agent | Headless Browsers |
37
+ +---------------+--------------------------------------+----------------------------------+
38
+ | Your logins | (+) Uses existing sessions | (-) Must re-authenticate |
39
+ | Security | (+) Click to activate specific tabs | (-) Full browser access |
40
+ | Privacy | (+) Credentials never leave browser | (-) Agent sees everything |
41
+ | Detection | (+) Real browser, real cookies | (-) Often blocked by anti-bot |
42
+ | Setup | (+) Uses your Chrome | (-) Downloads separate browser |
43
+ | Visibility | (+) Watch in real browser | (-) Runs hidden/headless |
44
+ +---------------+--------------------------------------+----------------------------------+
45
+ ```
46
+
47
+ **Use Tab Agent when:** Claude or Codex needs to browse as "you" — shopping with your Prime account, checking your GitHub notifications, using sites you're already logged into.
48
+
49
+ **Use headless browsers when:** CI/CD automation, web scraping, or testing with fresh sessions.
50
+
21
51
  ## Features
22
52
 
23
- - **Full browser control** — navigate, click, type, scroll, screenshot, run JavaScript
53
+ - **Full browser control** — navigate, click, type, scroll, hover, drag, screenshot, PDF, run JavaScript
24
54
  - **Uses your login sessions** — access GitHub, Gmail, Amazon without sharing credentials
25
- - **Runs in background** — relay starts automatically, works while you do other things
55
+ - **Runs in background** — run `npx tab-agent@latest start` once, then use commands while it stays running
26
56
  - **Click-to-activate security** — only tabs you explicitly enable, others stay private
27
57
  - **AI-optimized snapshots** — pages converted to text with refs `[e1]`, `[e2]` for easy targeting
28
58
  - **Works with Claude Code & Codex** — installs skills automatically
59
+ - **Auto-launches Chrome** — starts Chrome with profile selection if not already running
29
60
 
30
61
  ## Quick Start
31
62
 
32
63
  ```bash
33
- # 1. Install extension
64
+ # 1. Get the extension files
34
65
  git clone https://github.com/DrHB/tab-agent
66
+ cd tab-agent
35
67
  # Chrome: chrome://extensions → Developer mode → Load unpacked → select extension/
68
+ # The extension/ folder must contain manifest.json
36
69
 
37
70
  # 2. Setup
38
- npx tab-agent setup
71
+ npx tab-agent@latest setup
72
+
73
+ # 3. Start relay
74
+ npx tab-agent@latest start
75
+
76
+ # 4. Verify wiring
77
+ npx tab-agent@latest status
39
78
 
40
- # 3. Activate & go
79
+ # 5. Activate & go
41
80
  # Click extension icon on any tab (turns green)
42
- # Ask Claude: "Search Amazon for mechanical keyboards and find the best rated"
81
+ # Ask Claude/Codex: "Search Amazon for mechanical keyboards and find the best rated"
43
82
  ```
44
83
 
84
+ If you have an older cached `npx` install, keep using `@latest` for `setup`, `start`, and `status` so the CLI/runtime matches the current extension release.
85
+
86
+ <p align="center">
87
+ <img src="assets/toggle-demo.gif" alt="Pin extension and toggle on/off" width="600">
88
+ </p>
89
+
45
90
  ## Example Tasks
46
91
 
47
92
  ```bash
@@ -70,15 +115,41 @@ npx tab-agent click <ref> # Click element (e.g., click e5)
70
115
  npx tab-agent type <ref> <text> # Type into element
71
116
  npx tab-agent fill <ref> <value> # Fill form field
72
117
 
73
- # Navigation
118
+ # Navigation & interaction
74
119
  npx tab-agent navigate <url> # Go to URL
75
120
  npx tab-agent scroll <dir> [amount] # Scroll up/down
76
121
  npx tab-agent press <key> # Press key (Enter, Escape, Tab)
122
+ npx tab-agent hover <ref> # Hover over element
123
+ npx tab-agent select <ref> <value> # Select dropdown option
124
+ npx tab-agent drag <from> <to> # Drag and drop between elements
125
+
126
+ # Data extraction
127
+ npx tab-agent get text <ref> # Get element text
128
+ npx tab-agent get value <ref> # Get input value
129
+ npx tab-agent get attr <ref> href # Get element attribute
130
+ npx tab-agent get url # Get current URL
131
+ npx tab-agent get title # Get page title
132
+
133
+ # Semantic locators
134
+ npx tab-agent find text "Submit" # Find elements by text
135
+ npx tab-agent find role button # Find by ARIA role
136
+ npx tab-agent find label "Email" # Find by label text
137
+ npx tab-agent find placeholder "Search" # Find by placeholder
77
138
 
78
139
  # Utilities
79
140
  npx tab-agent tabs # List active tabs
80
141
  npx tab-agent wait <text> # Wait for text to appear
81
- npx tab-agent screenshot # Capture page (fallback for complex UIs)
142
+ npx tab-agent wait --url <pattern> # Wait for URL to match
143
+ npx tab-agent wait --visible <ref> # Wait for element to be visible
144
+ npx tab-agent screenshot [--full] # Capture page (fallback for complex UIs)
145
+ npx tab-agent pdf [filename.pdf] # Save page as PDF
146
+ npx tab-agent evaluate <script> # Run JavaScript
147
+ npx tab-agent cookies get # View cookies
148
+ npx tab-agent cookies clear # Clear cookies
149
+ npx tab-agent storage get [key] # Read localStorage
150
+ npx tab-agent storage set <key> <val> # Write localStorage
151
+ npx tab-agent storage remove <key> # Remove localStorage key (or use rm)
152
+ npx tab-agent storage clear # Clear localStorage
82
153
  ```
83
154
 
84
155
  **Workflow:** `snapshot` → use refs → `click`/`type` → `snapshot` again → repeat
@@ -89,35 +160,130 @@ npx tab-agent screenshot # Capture page (fallback for complex UIs)
89
160
 
90
161
  ```bash
91
162
  git clone https://github.com/DrHB/tab-agent
163
+ cd tab-agent
92
164
  ```
93
165
 
94
166
  1. Open `chrome://extensions`
95
167
  2. Enable **Developer mode** (top right)
96
168
  3. Click **Load unpacked**
97
169
  4. Select the `extension/` folder
170
+ 5. Confirm the folder contains `extension/manifest.json`
98
171
 
99
172
  ### 2. Run Setup
100
173
 
101
174
  ```bash
102
- npx tab-agent setup
175
+ npx tab-agent@latest setup
103
176
  ```
104
177
 
105
- This auto-detects your extension and configures everything.
178
+ This auto-detects unpacked and installed Tab Agent extensions, copies the native host into a stable per-user directory, and configures everything.
106
179
 
107
- ### 3. Activate Tabs
180
+ ### 3. Start Relay
181
+
182
+ ```bash
183
+ npx tab-agent@latest start
184
+ ```
185
+
186
+ Keep this running in a terminal while you use `tab-agent` commands.
187
+
188
+ ### 4. Check Status
189
+
190
+ ```bash
191
+ npx tab-agent@latest status
192
+ ```
193
+
194
+ Confirm that all three pieces are healthy before you start driving the browser:
195
+
196
+ - Native Host: Installed
197
+ - Extension: Detected
198
+ - Relay Server: Running
199
+
200
+ ### 5. Activate Tabs
108
201
 
109
202
  Click the Tab Agent icon on any tab you want to control. Green = active.
110
203
 
111
- ## Security Model
204
+ ## Auto-Launch Chrome
205
+
206
+ When you run `npx tab-agent@latest start`, Chrome is automatically launched if it's not already running. You'll be prompted to select which Chrome profile to use:
207
+
208
+ ```
209
+ Chrome is not running.
210
+
211
+ Chrome Profiles:
212
+ 1. Person 1 (Default)
213
+ 2. Work (Profile 1)
214
+ 3. Personal (Profile 2)
112
215
 
113
- | Feature | Tab Agent | Traditional Automation |
114
- |---------|--------------|----------------------|
115
- | **Access** | Only tabs you click to activate | Entire browser |
116
- | **Sessions** | Uses your cookies | Requires credentials |
117
- | **Visibility** | Green badge shows active tabs | Hidden/background |
118
- | **Control** | You choose what AI can access | Full access by default |
216
+ Select profile [1-3]:
217
+ ```
218
+
219
+ ### Non-Interactive Mode
220
+
221
+ Skip the prompt with the `--profile` flag:
222
+
223
+ ```bash
224
+ npx tab-agent@latest start --profile="Profile 1"
225
+ npx tab-agent@latest start --profile="Work"
226
+ ```
227
+
228
+ The flag matches by profile name or directory name (case-insensitive). If Chrome is already running, the prompt is skipped entirely.
229
+
230
+ ## Experimental Safari Support
231
+
232
+ > **Note:** Safari support is experimental and requires source build steps. Relay + CLI routing work, but Safari packaging is still manual in Xcode.
233
+
234
+ ### Prerequisites
235
+
236
+ - macOS 14+ (Sonoma or later)
237
+ - Xcode 15+ (free from App Store)
238
+ - Node.js 18+
239
+
240
+ ### Setup Steps
241
+
242
+ 1. **Start the relay server** (in a terminal):
243
+ ```bash
244
+ npx tab-agent@latest start
245
+ ```
246
+
247
+ 2. **Open the Safari Swift package in Xcode**:
248
+ ```bash
249
+ open -a Xcode safari/Package.swift
250
+ ```
251
+
252
+ 3. **Create/configure a Safari Web Extension target in Xcode**:
253
+ - File → New → Target → Safari Web Extension
254
+ - Point it to the shared extension assets in `extension/`
255
+ - Use `extension/manifest.safari.json` settings as the Safari manifest baseline
119
256
 
120
- Your banking, email, and sensitive tabs stay completely isolated unless you explicitly activate them.
257
+ 4. **Run the app target**:
258
+ - Click "Run" in Xcode (⌘R)
259
+
260
+ 5. **Enable the extension in Safari**:
261
+ - Safari → Settings → Extensions
262
+ - Check "Tab Agent"
263
+
264
+ 6. **Enable unsigned extensions** (required after each Safari restart):
265
+ - Safari → Develop → Allow Unsigned Extensions
266
+
267
+ 7. **Test the connection**:
268
+ ```bash
269
+ npx tab-agent tabs --browser=safari
270
+ ```
271
+
272
+ ### Using with Both Browsers
273
+
274
+ If you have both Chrome and Safari set up:
275
+
276
+ ```bash
277
+ # Auto-detect (uses whichever has activated tabs)
278
+ npx tab-agent snapshot
279
+
280
+ # Explicitly target a browser
281
+ npx tab-agent snapshot --browser=safari
282
+ npx tab-agent snapshot --browser=chrome
283
+
284
+ # See tabs from all browsers
285
+ npx tab-agent tabs
286
+ ```
121
287
 
122
288
  ## Supported Browsers
123
289
 
@@ -125,16 +291,29 @@ Your banking, email, and sensitive tabs stay completely isolated unless you expl
125
291
  - Brave
126
292
  - Microsoft Edge
127
293
  - Chromium
294
+ - Safari (experimental, manual source setup)
128
295
 
129
296
  ## Troubleshooting
130
297
 
131
298
  **Extension not detected?**
132
299
  - Make sure Developer mode is enabled in chrome://extensions
133
300
  - Reload the extension
301
+ - Run `npx tab-agent@latest setup` again after reloading so the native host is paired to the current extension ID
134
302
 
135
303
  **Commands not working?**
304
+ - Make sure relay is running: `npx tab-agent@latest start`
136
305
  - Click the extension icon — must show green "ON"
137
- - Run `npx tab-agent status` to check configuration
306
+ - Run `npx tab-agent@latest status` to check configuration
307
+
308
+ **Popup says "Native host has exited"?**
309
+ - Make sure the relay is running: `npx tab-agent@latest start`
310
+ - Reload the extension in `chrome://extensions`
311
+ - Run `npx tab-agent@latest status` to confirm the native host path and relay status
312
+ - If the extension ID changed, run `npx tab-agent@latest setup` again so Chrome is paired to the current extension
313
+
314
+ **Fresh machine install still acting like an older release?**
315
+ - Use `npx tab-agent@latest setup`, `npx tab-agent@latest start`, and `npx tab-agent@latest status` to bypass stale cached `npx` installs
316
+ - Make sure the unpacked extension and the CLI/runtime come from the same release family
138
317
 
139
318
  **No active tabs?**
140
319
  - Activate at least one tab by clicking the extension icon
@@ -142,7 +321,7 @@ Your banking, email, and sensitive tabs stay completely isolated unless you expl
142
321
  ## How It Works
143
322
 
144
323
  1. **Chrome Extension** — Injects into activated tabs, captures DOM snapshots
145
- 2. **Relay Server** — Bridges AI ↔ Extension via Chrome Native Messaging (runs in background)
324
+ 2. **Relay Server** — Bridges AI ↔ Extension via native messaging (run with `npx tab-agent@latest start`)
146
325
  3. **CLI** — Simple commands for Claude Code and Codex
147
326
 
148
327
  ```
@@ -158,7 +337,3 @@ Claude → npx tab-agent navigate "google.com/flights"
158
337
  ## License
159
338
 
160
339
  MIT
161
-
162
- ---
163
-
164
- **Keywords:** browser automation, claude code, codex, AI browser control, web automation, puppeteer alternative, playwright alternative
package/bin/tab-agent.js CHANGED
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  const command = process.argv[2];
3
+ const hasHelpFlag = process.argv.includes('--help') || process.argv.includes('-h');
3
4
 
4
5
  // Commands that go to the command module
5
- const BROWSER_COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate'];
6
+ const BROWSER_COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate', 'hover', 'select', 'drag', 'get', 'find', 'cookies', 'storage', 'pdf'];
6
7
 
7
8
  if (command === '-v' || command === '--version') {
8
9
  console.log(require('../package.json').version);
@@ -13,6 +14,9 @@ if (BROWSER_COMMANDS.includes(command)) {
13
14
  const { runCommand } = require('../cli/command.js');
14
15
  runCommand(process.argv.slice(2));
15
16
  } else {
17
+ if (hasHelpFlag) {
18
+ showHelp(0);
19
+ }
16
20
  switch (command) {
17
21
  case 'setup':
18
22
  require('../cli/setup.js');
@@ -28,9 +32,9 @@ if (BROWSER_COMMANDS.includes(command)) {
28
32
  }
29
33
  }
30
34
 
31
- function showHelp() {
35
+ function showHelp(exitCode = null) {
32
36
  console.log(`
33
- tabpilot - Give LLMs full control of your browser
37
+ tab-agent - Give LLMs full control of your browser
34
38
 
35
39
  Setup:
36
40
  setup Auto-detect extension, configure native messaging
@@ -48,18 +52,29 @@ Browser Control:
48
52
  tabs List active tabs
49
53
  wait <text|selector> Wait for text or element
50
54
  screenshot [--full] Capture page (fallback)
55
+ hover <ref> Hover over element
56
+ select <ref> <value> Select dropdown option
57
+ drag <from> <to> Drag element to another
58
+ get <prop> [ref] [attr] Get text, value, attr, url, title
59
+ find <by> <query> Find by text, role, label, placeholder, selector
60
+ cookies <get|clear> View or clear cookies
61
+ storage <get|set|remove|rm|clear> Manage localStorage/sessionStorage
62
+ pdf [filename.pdf] Save page as PDF
51
63
 
52
64
  Workflow: snapshot → click/type → snapshot → repeat
53
65
 
54
66
  Examples:
55
- npx tabpilot setup
56
- npx tabpilot snapshot
57
- npx tabpilot click e5
58
- npx tabpilot type e3 "hello world"
59
- npx tabpilot navigate "https://google.com"
67
+ npx tab-agent setup
68
+ npx tab-agent snapshot
69
+ npx tab-agent click e5
70
+ npx tab-agent type e3 "hello world"
71
+ npx tab-agent navigate "https://google.com"
60
72
 
61
73
  Version: ${require('../package.json').version}
62
74
  `);
75
+ if (typeof exitCode === 'number') {
76
+ process.exit(exitCode);
77
+ }
63
78
  if (command && command !== 'help' && command !== '--help' && command !== '-h') {
64
79
  process.exit(1);
65
80
  }
package/cli/command.js CHANGED
@@ -1,9 +1,23 @@
1
1
  // cli/command.js
2
2
  const WebSocket = require('ws');
3
3
 
4
- const COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate'];
4
+ const COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate', 'hover', 'select', 'drag', 'get', 'find', 'cookies', 'storage', 'pdf'];
5
5
 
6
6
  async function runCommand(args) {
7
+ // Extract --browser flag
8
+ let targetBrowser = null;
9
+ const browserFlagIndex = args.findIndex(a => a === '--browser' || a.startsWith('--browser='));
10
+ if (browserFlagIndex !== -1) {
11
+ const flag = args[browserFlagIndex];
12
+ if (flag.includes('=')) {
13
+ targetBrowser = flag.split('=')[1];
14
+ } else if (args[browserFlagIndex + 1]) {
15
+ targetBrowser = args[browserFlagIndex + 1];
16
+ args.splice(browserFlagIndex + 1, 1);
17
+ }
18
+ args.splice(browserFlagIndex, 1);
19
+ }
20
+
7
21
  const [command, ...params] = args;
8
22
 
9
23
  if (!command || command === 'help') {
@@ -37,10 +51,10 @@ async function runCommand(args) {
37
51
 
38
52
  // First get tabs to find tabId
39
53
  if (command === 'tabs') {
40
- ws.send(JSON.stringify({ id: 1, action: 'tabs' }));
54
+ ws.send(JSON.stringify({ id: 1, action: 'tabs', browser: targetBrowser }));
41
55
  } else {
42
56
  // Get active tab first, then run command
43
- ws.send(JSON.stringify({ id: 0, action: 'tabs' }));
57
+ ws.send(JSON.stringify({ id: 0, action: 'tabs', browser: targetBrowser }));
44
58
  }
45
59
  });
46
60
 
@@ -57,7 +71,7 @@ async function runCommand(args) {
57
71
 
58
72
  const tabId = msg.tabs[0].tabId;
59
73
  const payload = buildPayload(command, params, tabId);
60
- ws.send(JSON.stringify({ id: 1, ...payload }));
74
+ ws.send(JSON.stringify({ id: 1, ...payload, browser: targetBrowser }));
61
75
  return;
62
76
  }
63
77
 
@@ -69,6 +83,10 @@ async function runCommand(args) {
69
83
  printSnapshot(msg);
70
84
  } else if (command === 'screenshot') {
71
85
  printScreenshot(msg);
86
+ } else if (command === 'pdf') {
87
+ printPdf(msg);
88
+ } else if (command === 'find') {
89
+ printFind(msg);
72
90
  } else {
73
91
  printResult(msg);
74
92
  }
@@ -108,10 +126,14 @@ function buildPayload(command, params, tabId) {
108
126
  payload.url = params[0];
109
127
  break;
110
128
  case 'wait':
111
- if (params[0]?.startsWith('.') || params[0]?.startsWith('#')) {
129
+ if (params[0] === '--url') {
130
+ payload.urlPattern = params[1];
131
+ } else if (params[0] === '--visible') {
132
+ payload.visibleRef = params[1];
133
+ } else if (params[0]?.startsWith('.') || params[0]?.startsWith('#')) {
112
134
  payload.selector = params[0];
113
135
  } else {
114
- payload.text = params.join(' ');
136
+ payload.text = params.filter(p => !/^\d+$/.test(p)).join(' ');
115
137
  }
116
138
  payload.timeout = parseInt(params.find(p => /^\d+$/.test(p))) || 5000;
117
139
  break;
@@ -123,6 +145,39 @@ function buildPayload(command, params, tabId) {
123
145
  payload.fullPage = true;
124
146
  }
125
147
  break;
148
+ case 'hover':
149
+ payload.ref = params[0];
150
+ break;
151
+ case 'select':
152
+ payload.ref = params[0];
153
+ payload.value = params.slice(1).join(' ');
154
+ break;
155
+ case 'drag':
156
+ payload.fromRef = params[0];
157
+ payload.toRef = params[1];
158
+ break;
159
+ case 'get':
160
+ payload.subcommand = params[0]; // text, html, value, attr, url, title
161
+ payload.ref = params[1];
162
+ payload.attr = params[2]; // for get attr <ref> <name>
163
+ break;
164
+ case 'find':
165
+ payload.by = params[0]; // text, role, label, placeholder, selector
166
+ payload.query = params.slice(1).join(' ');
167
+ break;
168
+ case 'cookies':
169
+ payload.subcommand = params[0]; // get, clear
170
+ break;
171
+ case 'storage':
172
+ payload.subcommand = params[0] === 'rm' ? 'remove' : params[0]; // get, set, remove, clear
173
+ payload.storageType = params.includes('--session') ? 'session' : 'local';
174
+ // Filter out --session flag
175
+ const storageParams = params.filter(p => p !== '--session');
176
+ payload.key = storageParams[1];
177
+ payload.value = storageParams.slice(2).join(' ');
178
+ break;
179
+ case 'pdf':
180
+ break;
126
181
  }
127
182
 
128
183
  return payload;
@@ -146,14 +201,32 @@ Commands:
146
201
  wait <text|selector> Wait for text or element
147
202
  screenshot [--full] Capture page (fallback)
148
203
  evaluate <script> Run JavaScript
204
+ hover <ref> Hover over element
205
+ select <ref> <value> Select dropdown option
206
+ drag <from> <to> Drag element to another
207
+ get <prop> [ref] [attr] Get text, value, attr, url, title
208
+ find <by> <query> Find by text, role, label, placeholder, selector
209
+ cookies <get|clear> View or clear cookies
210
+ storage <get|set|remove|rm|clear> Manage localStorage/sessionStorage
211
+ pdf [filename.pdf] Save page as PDF
212
+
213
+ Options:
214
+ --browser=<chrome|safari> Target specific browser
149
215
 
150
216
  Workflow: snapshot → click/type → snapshot → repeat
151
217
 
152
218
  Examples:
153
219
  npx tab-agent snapshot
154
220
  npx tab-agent click e5
155
- npx tab-agent type e3 "hello world"
156
- npx tab-agent navigate "https://google.com"
221
+ npx tab-agent get text e3
222
+ npx tab-agent find text "Submit"
223
+ npx tab-agent find role button
224
+ npx tab-agent pdf page.pdf
225
+ npx tab-agent drag e1 e5
226
+ npx tab-agent hover e3
227
+ npx tab-agent select e7 "Option 2"
228
+ npx tab-agent cookies get
229
+ npx tab-agent storage get myKey
157
230
  `);
158
231
  }
159
232
 
@@ -164,7 +237,8 @@ function printTabs(msg) {
164
237
  }
165
238
  console.log('Active tabs:\n');
166
239
  msg.tabs.forEach((tab, i) => {
167
- console.log(` ${i + 1}. [${tab.tabId}] ${tab.title}`);
240
+ const browserTag = tab.browser ? `[${tab.browser}] ` : '';
241
+ console.log(` ${i + 1}. ${browserTag}[${tab.tabId}] ${tab.title}`);
168
242
  console.log(` ${tab.url}\n`);
169
243
  });
170
244
  }
@@ -186,6 +260,36 @@ function printScreenshot(msg) {
186
260
  console.log(msg.screenshot);
187
261
  }
188
262
 
263
+ function printPdf(msg) {
264
+ if (!msg.ok) {
265
+ console.error('Error:', msg.error);
266
+ return;
267
+ }
268
+ // Check if a filename was given via params
269
+ const outFile = process.argv.find(a => a.endsWith('.pdf'));
270
+ if (outFile) {
271
+ require('fs').writeFileSync(outFile, Buffer.from(msg.pdf, 'base64'));
272
+ console.log(`PDF saved to ${outFile}`);
273
+ } else {
274
+ console.log(msg.pdf);
275
+ }
276
+ }
277
+
278
+ function printFind(msg) {
279
+ if (!msg.ok) {
280
+ console.error('Error:', msg.error);
281
+ return;
282
+ }
283
+ if (msg.results.length === 0) {
284
+ console.log('No matches found.');
285
+ return;
286
+ }
287
+ console.log(`Found ${msg.count} match${msg.count !== 1 ? 'es' : ''}:\n`);
288
+ msg.results.forEach(r => {
289
+ console.log(` [${r.ref}] ${r.role} "${r.name}"`);
290
+ });
291
+ }
292
+
189
293
  function printResult(msg) {
190
294
  if (!msg.ok) {
191
295
  console.error('Error:', msg.error);