tab-agent 0.3.3 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/README.md +183 -31
  3. package/bin/tab-agent.js +23 -8
  4. package/cli/command.js +113 -9
  5. package/cli/detect-extension.js +96 -14
  6. package/cli/launch-chrome.js +150 -0
  7. package/cli/setup.js +57 -22
  8. package/cli/start.js +65 -13
  9. package/cli/status.js +41 -7
  10. package/extension/content-script.js +218 -17
  11. package/extension/manifest.json +4 -3
  12. package/extension/manifest.safari.json +45 -0
  13. package/extension/popup/popup.html +58 -1
  14. package/extension/popup/popup.js +18 -0
  15. package/extension/service-worker.js +106 -13
  16. package/package.json +14 -3
  17. package/relay/install-native-host.sh +2 -2
  18. package/relay/native-host-wrapper.sh +1 -1
  19. package/relay/native-host.js +3 -1
  20. package/relay/server.js +124 -17
  21. package/skills/claude-code/tab-agent/SKILL.md +92 -0
  22. package/skills/codex/tab-agent/SKILL.md +92 -0
  23. package/relay/node_modules/.package-lock.json +0 -29
  24. package/relay/node_modules/ws/LICENSE +0 -20
  25. package/relay/node_modules/ws/README.md +0 -548
  26. package/relay/node_modules/ws/browser.js +0 -8
  27. package/relay/node_modules/ws/index.js +0 -13
  28. package/relay/node_modules/ws/lib/buffer-util.js +0 -131
  29. package/relay/node_modules/ws/lib/constants.js +0 -19
  30. package/relay/node_modules/ws/lib/event-target.js +0 -292
  31. package/relay/node_modules/ws/lib/extension.js +0 -203
  32. package/relay/node_modules/ws/lib/limiter.js +0 -55
  33. package/relay/node_modules/ws/lib/permessage-deflate.js +0 -528
  34. package/relay/node_modules/ws/lib/receiver.js +0 -706
  35. package/relay/node_modules/ws/lib/sender.js +0 -602
  36. package/relay/node_modules/ws/lib/stream.js +0 -161
  37. package/relay/node_modules/ws/lib/subprotocol.js +0 -62
  38. package/relay/node_modules/ws/lib/validation.js +0 -152
  39. package/relay/node_modules/ws/lib/websocket-server.js +0 -554
  40. package/relay/node_modules/ws/lib/websocket.js +0 -1393
  41. package/relay/node_modules/ws/package.json +0 -69
  42. package/relay/node_modules/ws/wrapper.mjs +0 -8
  43. package/relay/package-lock.json +0 -36
  44. package/relay/package.json +0 -12
  45. package/skills/claude-code/tab-agent.md +0 -57
  46. package/skills/codex/tab-agent.md +0 -38
package/CHANGELOG.md ADDED
@@ -0,0 +1,29 @@
1
+ # Changelog
2
+
3
+ ## [0.4.0] - 2026-03-14
4
+
5
+ ### Added
6
+ - Auto-launch Chrome with profile detection, saved defaults, and `--profile`
7
+ - New browser commands: `hover`, `select`, `drag`, `get`, `find`, `cookies`, `storage`, and `pdf`
8
+ - Experimental Safari support with browser selection flags
9
+ - Auto-activate toggle and bulk activation support in the extension popup
10
+
11
+ ### Fixed
12
+ - Published packages now ship a real `extension/manifest.json` for unpacked Chrome installs
13
+ - Setup can auto-detect unpacked Tab Agent extensions from Chrome profile preferences
14
+ - Status output shows the detected extension path and native host path for easier debugging
15
+ - README install steps now call out `@latest` usage to avoid stale cached `npx` installs
16
+
17
+ ## [0.1.0] - 2026-01-30
18
+
19
+ ### Added
20
+ - Chrome extension (Manifest V3)
21
+ - WebSocket relay server
22
+ - Native messaging host
23
+ - AI-readable page snapshots
24
+ - DOM actions: click, type, fill, press, select, hover, scroll
25
+ - Screenshot capture
26
+ - Multi-tab support
27
+ - Audit logging
28
+ - Claude Code skill
29
+ - Codex skill
package/README.md CHANGED
@@ -1,16 +1,21 @@
1
+ ![Tab Agent](./banner-pixel-1.svg)
2
+
1
3
  # Tab Agent
2
4
 
3
5
  [![npm version](https://img.shields.io/npm/v/tab-agent.svg)](https://www.npmjs.com/package/tab-agent)
6
+ [![Chrome](https://img.shields.io/badge/Chrome-Extension-4285F4?logo=googlechrome&logoColor=white)](https://github.com/DrHB/tab-agent)
4
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
8
 
6
- **Give LLMs full control of your browser** — securely, with click-to-activate permission.
9
+ **Let Claude & Codex browse as YOU** — uses your existing logins, click-to-activate security.
7
10
 
8
- Works with Claude, ChatGPT, Codex, and any AI that can run shell commands.
11
+ > No headless browser. No re-authenticating. Your AI uses your actual Chrome sessions.
12
+ >
13
+ > **Safari support is experimental** — [see below](#experimental-safari-support)
9
14
 
10
15
  ```
11
16
  ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
12
- │ Claude / GPT │────▶│ Relay Server │────▶│ Extension │
13
- Codex / LLM │◀────│ (background) │◀────│ (Chrome) │
17
+ │ Claude Code │────▶│ Relay Server │────▶│ Extension │
18
+ or Codex │◀────│ (background) │◀────│ (Chrome) │
14
19
  └─────────────────┘ └─────────────────┘ └─────────────────┘
15
20
 
16
21
 
@@ -20,30 +25,65 @@ Works with Claude, ChatGPT, Codex, and any AI that can run shell commands.
20
25
  └───────────────────┘
21
26
  ```
22
27
 
28
+ ## Why Tab Agent?
29
+
30
+ Most browser automation tools (Playwright, Puppeteer, agent-browser) spin up a **fresh headless browser** — you start logged out of everything, and many sites actively block them. They detect headless browsers through missing plugins, `navigator.webdriver` flags, and other fingerprints.
31
+
32
+ Tab Agent is different — it uses your real Chrome with your real cookies:
33
+
34
+ ```
35
+ +---------------+--------------------------------------+----------------------------------+
36
+ | | Tab Agent | Headless Browsers |
37
+ +---------------+--------------------------------------+----------------------------------+
38
+ | Your logins | (+) Uses existing sessions | (-) Must re-authenticate |
39
+ | Security | (+) Click to activate specific tabs | (-) Full browser access |
40
+ | Privacy | (+) Credentials never leave browser | (-) Agent sees everything |
41
+ | Detection | (+) Real browser, real cookies | (-) Often blocked by anti-bot |
42
+ | Setup | (+) Uses your Chrome | (-) Downloads separate browser |
43
+ | Visibility | (+) Watch in real browser | (-) Runs hidden/headless |
44
+ +---------------+--------------------------------------+----------------------------------+
45
+ ```
46
+
47
+ **Use Tab Agent when:** Claude or Codex needs to browse as "you" — shopping with your Prime account, checking your GitHub notifications, using sites you're already logged into.
48
+
49
+ **Use headless browsers when:** CI/CD automation, web scraping, or testing with fresh sessions.
50
+
23
51
  ## Features
24
52
 
25
- - **Full browser control** — navigate, click, type, scroll, screenshot, run JavaScript
53
+ - **Full browser control** — navigate, click, type, scroll, hover, drag, screenshot, PDF, run JavaScript
26
54
  - **Uses your login sessions** — access GitHub, Gmail, Amazon without sharing credentials
27
- - **Runs in background** — relay starts automatically, works while you do other things
55
+ - **Runs in background** — run `npx tab-agent@latest start` once, then use commands while it stays running
28
56
  - **Click-to-activate security** — only tabs you explicitly enable, others stay private
29
57
  - **AI-optimized snapshots** — pages converted to text with refs `[e1]`, `[e2]` for easy targeting
30
- - **Works with any LLM** Claude, ChatGPT, Codex, or custom AI agents
58
+ - **Works with Claude Code & Codex** installs skills automatically
59
+ - **Auto-launches Chrome** — starts Chrome with profile selection if not already running
31
60
 
32
61
  ## Quick Start
33
62
 
34
63
  ```bash
35
- # 1. Install extension
64
+ # 1. Get the extension files
36
65
  git clone https://github.com/DrHB/tab-agent
66
+ cd tab-agent
37
67
  # Chrome: chrome://extensions → Developer mode → Load unpacked → select extension/
68
+ # The extension/ folder must contain manifest.json
38
69
 
39
70
  # 2. Setup
40
- npx tab-agent setup
71
+ npx tab-agent@latest setup
72
+
73
+ # 3. Start relay
74
+ npx tab-agent@latest start
41
75
 
42
- # 3. Activate & go
76
+ # 4. Activate & go
43
77
  # Click extension icon on any tab (turns green)
44
- # Ask your AI: "Search Amazon for mechanical keyboards and find the best rated"
78
+ # Ask Claude/Codex: "Search Amazon for mechanical keyboards and find the best rated"
45
79
  ```
46
80
 
81
+ If you have an older cached `npx` install, keep using `@latest` for `setup` and `start` so the CLI/runtime matches the current extension release.
82
+
83
+ <p align="center">
84
+ <img src="assets/toggle-demo.gif" alt="Pin extension and toggle on/off" width="600">
85
+ </p>
86
+
47
87
  ## Example Tasks
48
88
 
49
89
  ```bash
@@ -72,15 +112,41 @@ npx tab-agent click <ref> # Click element (e.g., click e5)
72
112
  npx tab-agent type <ref> <text> # Type into element
73
113
  npx tab-agent fill <ref> <value> # Fill form field
74
114
 
75
- # Navigation
115
+ # Navigation & interaction
76
116
  npx tab-agent navigate <url> # Go to URL
77
117
  npx tab-agent scroll <dir> [amount] # Scroll up/down
78
118
  npx tab-agent press <key> # Press key (Enter, Escape, Tab)
119
+ npx tab-agent hover <ref> # Hover over element
120
+ npx tab-agent select <ref> <value> # Select dropdown option
121
+ npx tab-agent drag <from> <to> # Drag and drop between elements
122
+
123
+ # Data extraction
124
+ npx tab-agent get text <ref> # Get element text
125
+ npx tab-agent get value <ref> # Get input value
126
+ npx tab-agent get attr <ref> href # Get element attribute
127
+ npx tab-agent get url # Get current URL
128
+ npx tab-agent get title # Get page title
129
+
130
+ # Semantic locators
131
+ npx tab-agent find text "Submit" # Find elements by text
132
+ npx tab-agent find role button # Find by ARIA role
133
+ npx tab-agent find label "Email" # Find by label text
134
+ npx tab-agent find placeholder "Search" # Find by placeholder
79
135
 
80
136
  # Utilities
81
137
  npx tab-agent tabs # List active tabs
82
138
  npx tab-agent wait <text> # Wait for text to appear
83
- npx tab-agent screenshot # Capture page (fallback for complex UIs)
139
+ npx tab-agent wait --url <pattern> # Wait for URL to match
140
+ npx tab-agent wait --visible <ref> # Wait for element to be visible
141
+ npx tab-agent screenshot [--full] # Capture page (fallback for complex UIs)
142
+ npx tab-agent pdf [filename.pdf] # Save page as PDF
143
+ npx tab-agent evaluate <script> # Run JavaScript
144
+ npx tab-agent cookies get # View cookies
145
+ npx tab-agent cookies clear # Clear cookies
146
+ npx tab-agent storage get [key] # Read localStorage
147
+ npx tab-agent storage set <key> <val> # Write localStorage
148
+ npx tab-agent storage remove <key> # Remove localStorage key (or use rm)
149
+ npx tab-agent storage clear # Clear localStorage
84
150
  ```
85
151
 
86
152
  **Workflow:** `snapshot` → use refs → `click`/`type` → `snapshot` again → repeat
@@ -91,35 +157,118 @@ npx tab-agent screenshot # Capture page (fallback for complex UIs)
91
157
 
92
158
  ```bash
93
159
  git clone https://github.com/DrHB/tab-agent
160
+ cd tab-agent
94
161
  ```
95
162
 
96
163
  1. Open `chrome://extensions`
97
164
  2. Enable **Developer mode** (top right)
98
165
  3. Click **Load unpacked**
99
166
  4. Select the `extension/` folder
167
+ 5. Confirm the folder contains `extension/manifest.json`
100
168
 
101
169
  ### 2. Run Setup
102
170
 
103
171
  ```bash
104
- npx tab-agent setup
172
+ npx tab-agent@latest setup
105
173
  ```
106
174
 
107
- This auto-detects your extension and configures everything.
175
+ This auto-detects unpacked and installed Tab Agent extensions, installs the native host, and configures everything.
108
176
 
109
- ### 3. Activate Tabs
177
+ ### 3. Start Relay
178
+
179
+ ```bash
180
+ npx tab-agent@latest start
181
+ ```
182
+
183
+ Keep this running in a terminal while you use `tab-agent` commands.
184
+
185
+ ### 4. Activate Tabs
110
186
 
111
187
  Click the Tab Agent icon on any tab you want to control. Green = active.
112
188
 
113
- ## Security Model
189
+ ## Auto-Launch Chrome
190
+
191
+ When you run `npx tab-agent@latest start`, Chrome is automatically launched if it's not already running. You'll be prompted to select which Chrome profile to use:
192
+
193
+ ```
194
+ Chrome is not running.
195
+
196
+ Chrome Profiles:
197
+ 1. Person 1 (Default)
198
+ 2. Work (Profile 1)
199
+ 3. Personal (Profile 2)
200
+
201
+ Select profile [1-3]:
202
+ ```
114
203
 
115
- | Feature | Tab Agent | Traditional Automation |
116
- |---------|--------------|----------------------|
117
- | **Access** | Only tabs you click to activate | Entire browser |
118
- | **Sessions** | Uses your cookies | Requires credentials |
119
- | **Visibility** | Green badge shows active tabs | Hidden/background |
120
- | **Control** | You choose what AI can access | Full access by default |
204
+ ### Non-Interactive Mode
121
205
 
122
- Your banking, email, and sensitive tabs stay completely isolated unless you explicitly activate them.
206
+ Skip the prompt with the `--profile` flag:
207
+
208
+ ```bash
209
+ npx tab-agent@latest start --profile="Profile 1"
210
+ npx tab-agent@latest start --profile="Work"
211
+ ```
212
+
213
+ The flag matches by profile name or directory name (case-insensitive). If Chrome is already running, the prompt is skipped entirely.
214
+
215
+ ## Experimental Safari Support
216
+
217
+ > **Note:** Safari support is experimental and requires source build steps. Relay + CLI routing work, but Safari packaging is still manual in Xcode.
218
+
219
+ ### Prerequisites
220
+
221
+ - macOS 14+ (Sonoma or later)
222
+ - Xcode 15+ (free from App Store)
223
+ - Node.js 18+
224
+
225
+ ### Setup Steps
226
+
227
+ 1. **Start the relay server** (in a terminal):
228
+ ```bash
229
+ npx tab-agent@latest start
230
+ ```
231
+
232
+ 2. **Open the Safari Swift package in Xcode**:
233
+ ```bash
234
+ open -a Xcode safari/Package.swift
235
+ ```
236
+
237
+ 3. **Create/configure a Safari Web Extension target in Xcode**:
238
+ - File → New → Target → Safari Web Extension
239
+ - Point it to the shared extension assets in `extension/`
240
+ - Use `extension/manifest.safari.json` settings as the Safari manifest baseline
241
+
242
+ 4. **Run the app target**:
243
+ - Click "Run" in Xcode (⌘R)
244
+
245
+ 5. **Enable the extension in Safari**:
246
+ - Safari → Settings → Extensions
247
+ - Check "Tab Agent"
248
+
249
+ 6. **Enable unsigned extensions** (required after each Safari restart):
250
+ - Safari → Develop → Allow Unsigned Extensions
251
+
252
+ 7. **Test the connection**:
253
+ ```bash
254
+ npx tab-agent tabs --browser=safari
255
+ ```
256
+
257
+ ### Using with Both Browsers
258
+
259
+ If you have both Chrome and Safari set up:
260
+
261
+ ```bash
262
+ # Auto-detect (uses whichever has activated tabs)
263
+ npx tab-agent snapshot
264
+
265
+ # Explicitly target a browser
266
+ npx tab-agent snapshot --browser=safari
267
+ npx tab-agent snapshot --browser=chrome
268
+
269
+ # See tabs from all browsers
270
+ npx tab-agent tabs
271
+ ```
123
272
 
124
273
  ## Supported Browsers
125
274
 
@@ -127,30 +276,37 @@ Your banking, email, and sensitive tabs stay completely isolated unless you expl
127
276
  - Brave
128
277
  - Microsoft Edge
129
278
  - Chromium
279
+ - Safari (experimental, manual source setup)
130
280
 
131
281
  ## Troubleshooting
132
282
 
133
283
  **Extension not detected?**
134
284
  - Make sure Developer mode is enabled in chrome://extensions
135
285
  - Reload the extension
286
+ - Run `npx tab-agent@latest setup` again after reloading so the native host is paired to the current extension ID
136
287
 
137
288
  **Commands not working?**
289
+ - Make sure relay is running: `npx tab-agent@latest start`
138
290
  - Click the extension icon — must show green "ON"
139
291
  - Run `npx tab-agent status` to check configuration
140
292
 
293
+ **Fresh machine install still acting like an older release?**
294
+ - Use `npx tab-agent@latest setup` and `npx tab-agent@latest start` to bypass stale cached `npx` installs
295
+ - Make sure the unpacked extension and the CLI/runtime come from the same release family
296
+
141
297
  **No active tabs?**
142
298
  - Activate at least one tab by clicking the extension icon
143
299
 
144
300
  ## How It Works
145
301
 
146
302
  1. **Chrome Extension** — Injects into activated tabs, captures DOM snapshots
147
- 2. **Relay Server** — Bridges AI ↔ Extension via Chrome Native Messaging (runs in background)
148
- 3. **CLI** — Simple commands that any LLM can execute
303
+ 2. **Relay Server** — Bridges AI ↔ Extension via native messaging (run with `npx tab-agent@latest start`)
304
+ 3. **CLI** — Simple commands for Claude Code and Codex
149
305
 
150
306
  ```
151
307
  You: "Find cheap flights to Tokyo"
152
308
 
153
- LLM → npx tab-agent navigate "google.com/flights"
309
+ Claude → npx tab-agent navigate "google.com/flights"
154
310
  → npx tab-agent snapshot
155
311
  → npx tab-agent type e5 "Tokyo"
156
312
  → npx tab-agent click e12
@@ -160,7 +316,3 @@ LLM → npx tab-agent navigate "google.com/flights"
160
316
  ## License
161
317
 
162
318
  MIT
163
-
164
- ---
165
-
166
- **Keywords:** browser agent, browser automation, AI browser control, Claude browser, ChatGPT browser, LLM web automation, Codex browser, puppeteer alternative, playwright alternative
package/bin/tab-agent.js CHANGED
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  const command = process.argv[2];
3
+ const hasHelpFlag = process.argv.includes('--help') || process.argv.includes('-h');
3
4
 
4
5
  // Commands that go to the command module
5
- const BROWSER_COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate'];
6
+ const BROWSER_COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate', 'hover', 'select', 'drag', 'get', 'find', 'cookies', 'storage', 'pdf'];
6
7
 
7
8
  if (command === '-v' || command === '--version') {
8
9
  console.log(require('../package.json').version);
@@ -13,6 +14,9 @@ if (BROWSER_COMMANDS.includes(command)) {
13
14
  const { runCommand } = require('../cli/command.js');
14
15
  runCommand(process.argv.slice(2));
15
16
  } else {
17
+ if (hasHelpFlag) {
18
+ showHelp(0);
19
+ }
16
20
  switch (command) {
17
21
  case 'setup':
18
22
  require('../cli/setup.js');
@@ -28,9 +32,9 @@ if (BROWSER_COMMANDS.includes(command)) {
28
32
  }
29
33
  }
30
34
 
31
- function showHelp() {
35
+ function showHelp(exitCode = null) {
32
36
  console.log(`
33
- tabpilot - Give LLMs full control of your browser
37
+ tab-agent - Give LLMs full control of your browser
34
38
 
35
39
  Setup:
36
40
  setup Auto-detect extension, configure native messaging
@@ -48,18 +52,29 @@ Browser Control:
48
52
  tabs List active tabs
49
53
  wait <text|selector> Wait for text or element
50
54
  screenshot [--full] Capture page (fallback)
55
+ hover <ref> Hover over element
56
+ select <ref> <value> Select dropdown option
57
+ drag <from> <to> Drag element to another
58
+ get <prop> [ref] [attr] Get text, value, attr, url, title
59
+ find <by> <query> Find by text, role, label, placeholder, selector
60
+ cookies <get|clear> View or clear cookies
61
+ storage <get|set|remove|rm|clear> Manage localStorage/sessionStorage
62
+ pdf [filename.pdf] Save page as PDF
51
63
 
52
64
  Workflow: snapshot → click/type → snapshot → repeat
53
65
 
54
66
  Examples:
55
- npx tabpilot setup
56
- npx tabpilot snapshot
57
- npx tabpilot click e5
58
- npx tabpilot type e3 "hello world"
59
- npx tabpilot navigate "https://google.com"
67
+ npx tab-agent setup
68
+ npx tab-agent snapshot
69
+ npx tab-agent click e5
70
+ npx tab-agent type e3 "hello world"
71
+ npx tab-agent navigate "https://google.com"
60
72
 
61
73
  Version: ${require('../package.json').version}
62
74
  `);
75
+ if (typeof exitCode === 'number') {
76
+ process.exit(exitCode);
77
+ }
63
78
  if (command && command !== 'help' && command !== '--help' && command !== '-h') {
64
79
  process.exit(1);
65
80
  }
package/cli/command.js CHANGED
@@ -1,9 +1,23 @@
1
1
  // cli/command.js
2
2
  const WebSocket = require('ws');
3
3
 
4
- const COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate'];
4
+ const COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate', 'hover', 'select', 'drag', 'get', 'find', 'cookies', 'storage', 'pdf'];
5
5
 
6
6
  async function runCommand(args) {
7
+ // Extract --browser flag
8
+ let targetBrowser = null;
9
+ const browserFlagIndex = args.findIndex(a => a === '--browser' || a.startsWith('--browser='));
10
+ if (browserFlagIndex !== -1) {
11
+ const flag = args[browserFlagIndex];
12
+ if (flag.includes('=')) {
13
+ targetBrowser = flag.split('=')[1];
14
+ } else if (args[browserFlagIndex + 1]) {
15
+ targetBrowser = args[browserFlagIndex + 1];
16
+ args.splice(browserFlagIndex + 1, 1);
17
+ }
18
+ args.splice(browserFlagIndex, 1);
19
+ }
20
+
7
21
  const [command, ...params] = args;
8
22
 
9
23
  if (!command || command === 'help') {
@@ -37,10 +51,10 @@ async function runCommand(args) {
37
51
 
38
52
  // First get tabs to find tabId
39
53
  if (command === 'tabs') {
40
- ws.send(JSON.stringify({ id: 1, action: 'tabs' }));
54
+ ws.send(JSON.stringify({ id: 1, action: 'tabs', browser: targetBrowser }));
41
55
  } else {
42
56
  // Get active tab first, then run command
43
- ws.send(JSON.stringify({ id: 0, action: 'tabs' }));
57
+ ws.send(JSON.stringify({ id: 0, action: 'tabs', browser: targetBrowser }));
44
58
  }
45
59
  });
46
60
 
@@ -57,7 +71,7 @@ async function runCommand(args) {
57
71
 
58
72
  const tabId = msg.tabs[0].tabId;
59
73
  const payload = buildPayload(command, params, tabId);
60
- ws.send(JSON.stringify({ id: 1, ...payload }));
74
+ ws.send(JSON.stringify({ id: 1, ...payload, browser: targetBrowser }));
61
75
  return;
62
76
  }
63
77
 
@@ -69,6 +83,10 @@ async function runCommand(args) {
69
83
  printSnapshot(msg);
70
84
  } else if (command === 'screenshot') {
71
85
  printScreenshot(msg);
86
+ } else if (command === 'pdf') {
87
+ printPdf(msg);
88
+ } else if (command === 'find') {
89
+ printFind(msg);
72
90
  } else {
73
91
  printResult(msg);
74
92
  }
@@ -108,10 +126,14 @@ function buildPayload(command, params, tabId) {
108
126
  payload.url = params[0];
109
127
  break;
110
128
  case 'wait':
111
- if (params[0]?.startsWith('.') || params[0]?.startsWith('#')) {
129
+ if (params[0] === '--url') {
130
+ payload.urlPattern = params[1];
131
+ } else if (params[0] === '--visible') {
132
+ payload.visibleRef = params[1];
133
+ } else if (params[0]?.startsWith('.') || params[0]?.startsWith('#')) {
112
134
  payload.selector = params[0];
113
135
  } else {
114
- payload.text = params.join(' ');
136
+ payload.text = params.filter(p => !/^\d+$/.test(p)).join(' ');
115
137
  }
116
138
  payload.timeout = parseInt(params.find(p => /^\d+$/.test(p))) || 5000;
117
139
  break;
@@ -123,6 +145,39 @@ function buildPayload(command, params, tabId) {
123
145
  payload.fullPage = true;
124
146
  }
125
147
  break;
148
+ case 'hover':
149
+ payload.ref = params[0];
150
+ break;
151
+ case 'select':
152
+ payload.ref = params[0];
153
+ payload.value = params.slice(1).join(' ');
154
+ break;
155
+ case 'drag':
156
+ payload.fromRef = params[0];
157
+ payload.toRef = params[1];
158
+ break;
159
+ case 'get':
160
+ payload.subcommand = params[0]; // text, html, value, attr, url, title
161
+ payload.ref = params[1];
162
+ payload.attr = params[2]; // for get attr <ref> <name>
163
+ break;
164
+ case 'find':
165
+ payload.by = params[0]; // text, role, label, placeholder, selector
166
+ payload.query = params.slice(1).join(' ');
167
+ break;
168
+ case 'cookies':
169
+ payload.subcommand = params[0]; // get, clear
170
+ break;
171
+ case 'storage':
172
+ payload.subcommand = params[0] === 'rm' ? 'remove' : params[0]; // get, set, remove, clear
173
+ payload.storageType = params.includes('--session') ? 'session' : 'local';
174
+ // Filter out --session flag
175
+ const storageParams = params.filter(p => p !== '--session');
176
+ payload.key = storageParams[1];
177
+ payload.value = storageParams.slice(2).join(' ');
178
+ break;
179
+ case 'pdf':
180
+ break;
126
181
  }
127
182
 
128
183
  return payload;
@@ -146,14 +201,32 @@ Commands:
146
201
  wait <text|selector> Wait for text or element
147
202
  screenshot [--full] Capture page (fallback)
148
203
  evaluate <script> Run JavaScript
204
+ hover <ref> Hover over element
205
+ select <ref> <value> Select dropdown option
206
+ drag <from> <to> Drag element to another
207
+ get <prop> [ref] [attr] Get text, value, attr, url, title
208
+ find <by> <query> Find by text, role, label, placeholder, selector
209
+ cookies <get|clear> View or clear cookies
210
+ storage <get|set|remove|rm|clear> Manage localStorage/sessionStorage
211
+ pdf [filename.pdf] Save page as PDF
212
+
213
+ Options:
214
+ --browser=<chrome|safari> Target specific browser
149
215
 
150
216
  Workflow: snapshot → click/type → snapshot → repeat
151
217
 
152
218
  Examples:
153
219
  npx tab-agent snapshot
154
220
  npx tab-agent click e5
155
- npx tab-agent type e3 "hello world"
156
- npx tab-agent navigate "https://google.com"
221
+ npx tab-agent get text e3
222
+ npx tab-agent find text "Submit"
223
+ npx tab-agent find role button
224
+ npx tab-agent pdf page.pdf
225
+ npx tab-agent drag e1 e5
226
+ npx tab-agent hover e3
227
+ npx tab-agent select e7 "Option 2"
228
+ npx tab-agent cookies get
229
+ npx tab-agent storage get myKey
157
230
  `);
158
231
  }
159
232
 
@@ -164,7 +237,8 @@ function printTabs(msg) {
164
237
  }
165
238
  console.log('Active tabs:\n');
166
239
  msg.tabs.forEach((tab, i) => {
167
- console.log(` ${i + 1}. [${tab.tabId}] ${tab.title}`);
240
+ const browserTag = tab.browser ? `[${tab.browser}] ` : '';
241
+ console.log(` ${i + 1}. ${browserTag}[${tab.tabId}] ${tab.title}`);
168
242
  console.log(` ${tab.url}\n`);
169
243
  });
170
244
  }
@@ -186,6 +260,36 @@ function printScreenshot(msg) {
186
260
  console.log(msg.screenshot);
187
261
  }
188
262
 
263
+ function printPdf(msg) {
264
+ if (!msg.ok) {
265
+ console.error('Error:', msg.error);
266
+ return;
267
+ }
268
+ // Check if a filename was given via params
269
+ const outFile = process.argv.find(a => a.endsWith('.pdf'));
270
+ if (outFile) {
271
+ require('fs').writeFileSync(outFile, Buffer.from(msg.pdf, 'base64'));
272
+ console.log(`PDF saved to ${outFile}`);
273
+ } else {
274
+ console.log(msg.pdf);
275
+ }
276
+ }
277
+
278
+ function printFind(msg) {
279
+ if (!msg.ok) {
280
+ console.error('Error:', msg.error);
281
+ return;
282
+ }
283
+ if (msg.results.length === 0) {
284
+ console.log('No matches found.');
285
+ return;
286
+ }
287
+ console.log(`Found ${msg.count} match${msg.count !== 1 ? 'es' : ''}:\n`);
288
+ msg.results.forEach(r => {
289
+ console.log(` [${r.ref}] ${r.role} "${r.name}"`);
290
+ });
291
+ }
292
+
189
293
  function printResult(msg) {
190
294
  if (!msg.ok) {
191
295
  console.error('Error:', msg.error);