tab-agent 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/README.md +179 -25
  3. package/bin/tab-agent.js +23 -8
  4. package/cli/command.js +113 -9
  5. package/cli/detect-extension.js +96 -14
  6. package/cli/launch-chrome.js +150 -0
  7. package/cli/setup.js +57 -22
  8. package/cli/start.js +65 -13
  9. package/cli/status.js +41 -7
  10. package/extension/content-script.js +218 -17
  11. package/extension/manifest.json +4 -3
  12. package/extension/manifest.safari.json +45 -0
  13. package/extension/popup/popup.html +58 -1
  14. package/extension/popup/popup.js +18 -0
  15. package/extension/service-worker.js +106 -13
  16. package/package.json +14 -3
  17. package/relay/install-native-host.sh +2 -2
  18. package/relay/native-host-wrapper.sh +1 -1
  19. package/relay/native-host.js +3 -1
  20. package/relay/server.js +124 -17
  21. package/skills/claude-code/tab-agent/SKILL.md +92 -0
  22. package/skills/codex/tab-agent/SKILL.md +92 -0
  23. package/relay/node_modules/.package-lock.json +0 -29
  24. package/relay/node_modules/ws/LICENSE +0 -20
  25. package/relay/node_modules/ws/README.md +0 -548
  26. package/relay/node_modules/ws/browser.js +0 -8
  27. package/relay/node_modules/ws/index.js +0 -13
  28. package/relay/node_modules/ws/lib/buffer-util.js +0 -131
  29. package/relay/node_modules/ws/lib/constants.js +0 -19
  30. package/relay/node_modules/ws/lib/event-target.js +0 -292
  31. package/relay/node_modules/ws/lib/extension.js +0 -203
  32. package/relay/node_modules/ws/lib/limiter.js +0 -55
  33. package/relay/node_modules/ws/lib/permessage-deflate.js +0 -528
  34. package/relay/node_modules/ws/lib/receiver.js +0 -706
  35. package/relay/node_modules/ws/lib/sender.js +0 -602
  36. package/relay/node_modules/ws/lib/stream.js +0 -161
  37. package/relay/node_modules/ws/lib/subprotocol.js +0 -62
  38. package/relay/node_modules/ws/lib/validation.js +0 -152
  39. package/relay/node_modules/ws/lib/websocket-server.js +0 -554
  40. package/relay/node_modules/ws/lib/websocket.js +0 -1393
  41. package/relay/node_modules/ws/package.json +0 -69
  42. package/relay/node_modules/ws/wrapper.mjs +0 -8
  43. package/relay/package-lock.json +0 -36
  44. package/relay/package.json +0 -12
  45. package/skills/claude-code/tab-agent.md +0 -57
  46. package/skills/codex/tab-agent.md +0 -38
package/CHANGELOG.md ADDED
@@ -0,0 +1,29 @@
1
+ # Changelog
2
+
3
+ ## [0.4.0] - 2026-03-14
4
+
5
+ ### Added
6
+ - Auto-launch Chrome with profile detection, saved defaults, and `--profile`
7
+ - New browser commands: `hover`, `select`, `drag`, `get`, `find`, `cookies`, `storage`, and `pdf`
8
+ - Experimental Safari support with browser selection flags
9
+ - Auto-activate toggle and bulk activation support in the extension popup
10
+
11
+ ### Fixed
12
+ - Published packages now ship a real `extension/manifest.json` for unpacked Chrome installs
13
+ - Setup can auto-detect unpacked Tab Agent extensions from Chrome profile preferences
14
+ - Status output shows the detected extension path and native host path for easier debugging
15
+ - README install steps now call out `@latest` usage to avoid stale cached `npx` installs
16
+
17
+ ## [0.1.0] - 2026-01-30
18
+
19
+ ### Added
20
+ - Chrome extension (Manifest V3)
21
+ - WebSocket relay server
22
+ - Native messaging host
23
+ - AI-readable page snapshots
24
+ - DOM actions: click, type, fill, press, select, hover, scroll
25
+ - Screenshot capture
26
+ - Multi-tab support
27
+ - Audit logging
28
+ - Claude Code skill
29
+ - Codex skill
package/README.md CHANGED
@@ -1,9 +1,16 @@
1
+ ![Tab Agent](./banner-pixel-1.svg)
2
+
1
3
  # Tab Agent
2
4
 
3
5
  [![npm version](https://img.shields.io/npm/v/tab-agent.svg)](https://www.npmjs.com/package/tab-agent)
6
+ [![Chrome](https://img.shields.io/badge/Chrome-Extension-4285F4?logo=googlechrome&logoColor=white)](https://github.com/DrHB/tab-agent)
4
7
  [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
8
 
6
- **Browser control for Claude Code and Codex** — click-to-activate security.
9
+ **Let Claude & Codex browse as YOU** — uses your existing logins, click-to-activate security.
10
+
11
+ > No headless browser. No re-authenticating. Your AI uses your actual Chrome sessions.
12
+ >
13
+ > **Safari support is experimental** — [see below](#experimental-safari-support)
7
14
 
8
15
  ```
9
16
  ┌─────────────────┐ ┌─────────────────┐ ┌─────────────────┐
@@ -18,30 +25,65 @@
18
25
  └───────────────────┘
19
26
  ```
20
27
 
28
+ ## Why Tab Agent?
29
+
30
+ Most browser automation tools (Playwright, Puppeteer, agent-browser) spin up a **fresh headless browser** — you start logged out of everything, and many sites actively block them. They detect headless browsers through missing plugins, `navigator.webdriver` flags, and other fingerprints.
31
+
32
+ Tab Agent is different — it uses your real Chrome with your real cookies:
33
+
34
+ ```
35
+ +---------------+--------------------------------------+----------------------------------+
36
+ | | Tab Agent | Headless Browsers |
37
+ +---------------+--------------------------------------+----------------------------------+
38
+ | Your logins | (+) Uses existing sessions | (-) Must re-authenticate |
39
+ | Security | (+) Click to activate specific tabs | (-) Full browser access |
40
+ | Privacy | (+) Credentials never leave browser | (-) Agent sees everything |
41
+ | Detection | (+) Real browser, real cookies | (-) Often blocked by anti-bot |
42
+ | Setup | (+) Uses your Chrome | (-) Downloads separate browser |
43
+ | Visibility | (+) Watch in real browser | (-) Runs hidden/headless |
44
+ +---------------+--------------------------------------+----------------------------------+
45
+ ```
46
+
47
+ **Use Tab Agent when:** Claude or Codex needs to browse as "you" — shopping with your Prime account, checking your GitHub notifications, using sites you're already logged into.
48
+
49
+ **Use headless browsers when:** CI/CD automation, web scraping, or testing with fresh sessions.
50
+
21
51
  ## Features
22
52
 
23
- - **Full browser control** — navigate, click, type, scroll, screenshot, run JavaScript
53
+ - **Full browser control** — navigate, click, type, scroll, hover, drag, screenshot, PDF, run JavaScript
24
54
  - **Uses your login sessions** — access GitHub, Gmail, Amazon without sharing credentials
25
- - **Runs in background** — relay starts automatically, works while you do other things
55
+ - **Runs in background** — run `npx tab-agent@latest start` once, then use commands while it stays running
26
56
  - **Click-to-activate security** — only tabs you explicitly enable, others stay private
27
57
  - **AI-optimized snapshots** — pages converted to text with refs `[e1]`, `[e2]` for easy targeting
28
58
  - **Works with Claude Code & Codex** — installs skills automatically
59
+ - **Auto-launches Chrome** — starts Chrome with profile selection if not already running
29
60
 
30
61
  ## Quick Start
31
62
 
32
63
  ```bash
33
- # 1. Install extension
64
+ # 1. Get the extension files
34
65
  git clone https://github.com/DrHB/tab-agent
66
+ cd tab-agent
35
67
  # Chrome: chrome://extensions → Developer mode → Load unpacked → select extension/
68
+ # The extension/ folder must contain manifest.json
36
69
 
37
70
  # 2. Setup
38
- npx tab-agent setup
71
+ npx tab-agent@latest setup
39
72
 
40
- # 3. Activate & go
73
+ # 3. Start relay
74
+ npx tab-agent@latest start
75
+
76
+ # 4. Activate & go
41
77
  # Click extension icon on any tab (turns green)
42
- # Ask Claude: "Search Amazon for mechanical keyboards and find the best rated"
78
+ # Ask Claude/Codex: "Search Amazon for mechanical keyboards and find the best rated"
43
79
  ```
44
80
 
81
+ If you have an older cached `npx` install, keep using `@latest` for `setup` and `start` so the CLI/runtime matches the current extension release.
82
+
83
+ <p align="center">
84
+ <img src="assets/toggle-demo.gif" alt="Pin extension and toggle on/off" width="600">
85
+ </p>
86
+
45
87
  ## Example Tasks
46
88
 
47
89
  ```bash
@@ -70,15 +112,41 @@ npx tab-agent click <ref> # Click element (e.g., click e5)
70
112
  npx tab-agent type <ref> <text> # Type into element
71
113
  npx tab-agent fill <ref> <value> # Fill form field
72
114
 
73
- # Navigation
115
+ # Navigation & interaction
74
116
  npx tab-agent navigate <url> # Go to URL
75
117
  npx tab-agent scroll <dir> [amount] # Scroll up/down
76
118
  npx tab-agent press <key> # Press key (Enter, Escape, Tab)
119
+ npx tab-agent hover <ref> # Hover over element
120
+ npx tab-agent select <ref> <value> # Select dropdown option
121
+ npx tab-agent drag <from> <to> # Drag and drop between elements
122
+
123
+ # Data extraction
124
+ npx tab-agent get text <ref> # Get element text
125
+ npx tab-agent get value <ref> # Get input value
126
+ npx tab-agent get attr <ref> href # Get element attribute
127
+ npx tab-agent get url # Get current URL
128
+ npx tab-agent get title # Get page title
129
+
130
+ # Semantic locators
131
+ npx tab-agent find text "Submit" # Find elements by text
132
+ npx tab-agent find role button # Find by ARIA role
133
+ npx tab-agent find label "Email" # Find by label text
134
+ npx tab-agent find placeholder "Search" # Find by placeholder
77
135
 
78
136
  # Utilities
79
137
  npx tab-agent tabs # List active tabs
80
138
  npx tab-agent wait <text> # Wait for text to appear
81
- npx tab-agent screenshot # Capture page (fallback for complex UIs)
139
+ npx tab-agent wait --url <pattern> # Wait for URL to match
140
+ npx tab-agent wait --visible <ref> # Wait for element to be visible
141
+ npx tab-agent screenshot [--full] # Capture page (fallback for complex UIs)
142
+ npx tab-agent pdf [filename.pdf] # Save page as PDF
143
+ npx tab-agent evaluate <script> # Run JavaScript
144
+ npx tab-agent cookies get # View cookies
145
+ npx tab-agent cookies clear # Clear cookies
146
+ npx tab-agent storage get [key] # Read localStorage
147
+ npx tab-agent storage set <key> <val> # Write localStorage
148
+ npx tab-agent storage remove <key> # Remove localStorage key (or use rm)
149
+ npx tab-agent storage clear # Clear localStorage
82
150
  ```
83
151
 
84
152
  **Workflow:** `snapshot` → use refs → `click`/`type` → `snapshot` again → repeat
@@ -89,35 +157,118 @@ npx tab-agent screenshot # Capture page (fallback for complex UIs)
89
157
 
90
158
  ```bash
91
159
  git clone https://github.com/DrHB/tab-agent
160
+ cd tab-agent
92
161
  ```
93
162
 
94
163
  1. Open `chrome://extensions`
95
164
  2. Enable **Developer mode** (top right)
96
165
  3. Click **Load unpacked**
97
166
  4. Select the `extension/` folder
167
+ 5. Confirm the folder contains `extension/manifest.json`
98
168
 
99
169
  ### 2. Run Setup
100
170
 
101
171
  ```bash
102
- npx tab-agent setup
172
+ npx tab-agent@latest setup
103
173
  ```
104
174
 
105
- This auto-detects your extension and configures everything.
175
+ This auto-detects unpacked and installed Tab Agent extensions, installs the native host, and configures everything.
106
176
 
107
- ### 3. Activate Tabs
177
+ ### 3. Start Relay
178
+
179
+ ```bash
180
+ npx tab-agent@latest start
181
+ ```
182
+
183
+ Keep this running in a terminal while you use `tab-agent` commands.
184
+
185
+ ### 4. Activate Tabs
108
186
 
109
187
  Click the Tab Agent icon on any tab you want to control. Green = active.
110
188
 
111
- ## Security Model
189
+ ## Auto-Launch Chrome
190
+
191
+ When you run `npx tab-agent@latest start`, Chrome is automatically launched if it's not already running. You'll be prompted to select which Chrome profile to use:
192
+
193
+ ```
194
+ Chrome is not running.
195
+
196
+ Chrome Profiles:
197
+ 1. Person 1 (Default)
198
+ 2. Work (Profile 1)
199
+ 3. Personal (Profile 2)
200
+
201
+ Select profile [1-3]:
202
+ ```
203
+
204
+ ### Non-Interactive Mode
112
205
 
113
- | Feature | Tab Agent | Traditional Automation |
114
- |---------|--------------|----------------------|
115
- | **Access** | Only tabs you click to activate | Entire browser |
116
- | **Sessions** | Uses your cookies | Requires credentials |
117
- | **Visibility** | Green badge shows active tabs | Hidden/background |
118
- | **Control** | You choose what AI can access | Full access by default |
206
+ Skip the prompt with the `--profile` flag:
119
207
 
120
- Your banking, email, and sensitive tabs stay completely isolated unless you explicitly activate them.
208
+ ```bash
209
+ npx tab-agent@latest start --profile="Profile 1"
210
+ npx tab-agent@latest start --profile="Work"
211
+ ```
212
+
213
+ The flag matches by profile name or directory name (case-insensitive). If Chrome is already running, the prompt is skipped entirely.
214
+
215
+ ## Experimental Safari Support
216
+
217
+ > **Note:** Safari support is experimental and requires source build steps. Relay + CLI routing work, but Safari packaging is still manual in Xcode.
218
+
219
+ ### Prerequisites
220
+
221
+ - macOS 14+ (Sonoma or later)
222
+ - Xcode 15+ (free from App Store)
223
+ - Node.js 18+
224
+
225
+ ### Setup Steps
226
+
227
+ 1. **Start the relay server** (in a terminal):
228
+ ```bash
229
+ npx tab-agent@latest start
230
+ ```
231
+
232
+ 2. **Open the Safari Swift package in Xcode**:
233
+ ```bash
234
+ open -a Xcode safari/Package.swift
235
+ ```
236
+
237
+ 3. **Create/configure a Safari Web Extension target in Xcode**:
238
+ - File → New → Target → Safari Web Extension
239
+ - Point it to the shared extension assets in `extension/`
240
+ - Use `extension/manifest.safari.json` settings as the Safari manifest baseline
241
+
242
+ 4. **Run the app target**:
243
+ - Click "Run" in Xcode (⌘R)
244
+
245
+ 5. **Enable the extension in Safari**:
246
+ - Safari → Settings → Extensions
247
+ - Check "Tab Agent"
248
+
249
+ 6. **Enable unsigned extensions** (required after each Safari restart):
250
+ - Safari → Develop → Allow Unsigned Extensions
251
+
252
+ 7. **Test the connection**:
253
+ ```bash
254
+ npx tab-agent tabs --browser=safari
255
+ ```
256
+
257
+ ### Using with Both Browsers
258
+
259
+ If you have both Chrome and Safari set up:
260
+
261
+ ```bash
262
+ # Auto-detect (uses whichever has activated tabs)
263
+ npx tab-agent snapshot
264
+
265
+ # Explicitly target a browser
266
+ npx tab-agent snapshot --browser=safari
267
+ npx tab-agent snapshot --browser=chrome
268
+
269
+ # See tabs from all browsers
270
+ npx tab-agent tabs
271
+ ```
121
272
 
122
273
  ## Supported Browsers
123
274
 
@@ -125,24 +276,31 @@ Your banking, email, and sensitive tabs stay completely isolated unless you expl
125
276
  - Brave
126
277
  - Microsoft Edge
127
278
  - Chromium
279
+ - Safari (experimental, manual source setup)
128
280
 
129
281
  ## Troubleshooting
130
282
 
131
283
  **Extension not detected?**
132
284
  - Make sure Developer mode is enabled in chrome://extensions
133
285
  - Reload the extension
286
+ - Run `npx tab-agent@latest setup` again after reloading so the native host is paired to the current extension ID
134
287
 
135
288
  **Commands not working?**
289
+ - Make sure relay is running: `npx tab-agent@latest start`
136
290
  - Click the extension icon — must show green "ON"
137
291
  - Run `npx tab-agent status` to check configuration
138
292
 
293
+ **Fresh machine install still acting like an older release?**
294
+ - Use `npx tab-agent@latest setup` and `npx tab-agent@latest start` to bypass stale cached `npx` installs
295
+ - Make sure the unpacked extension and the CLI/runtime come from the same release family
296
+
139
297
  **No active tabs?**
140
298
  - Activate at least one tab by clicking the extension icon
141
299
 
142
300
  ## How It Works
143
301
 
144
302
  1. **Chrome Extension** — Injects into activated tabs, captures DOM snapshots
145
- 2. **Relay Server** — Bridges AI ↔ Extension via Chrome Native Messaging (runs in background)
303
+ 2. **Relay Server** — Bridges AI ↔ Extension via native messaging (run with `npx tab-agent@latest start`)
146
304
  3. **CLI** — Simple commands for Claude Code and Codex
147
305
 
148
306
  ```
@@ -158,7 +316,3 @@ Claude → npx tab-agent navigate "google.com/flights"
158
316
  ## License
159
317
 
160
318
  MIT
161
-
162
- ---
163
-
164
- **Keywords:** browser automation, claude code, codex, AI browser control, web automation, puppeteer alternative, playwright alternative
package/bin/tab-agent.js CHANGED
@@ -1,8 +1,9 @@
1
1
  #!/usr/bin/env node
2
2
  const command = process.argv[2];
3
+ const hasHelpFlag = process.argv.includes('--help') || process.argv.includes('-h');
3
4
 
4
5
  // Commands that go to the command module
5
- const BROWSER_COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate'];
6
+ const BROWSER_COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate', 'hover', 'select', 'drag', 'get', 'find', 'cookies', 'storage', 'pdf'];
6
7
 
7
8
  if (command === '-v' || command === '--version') {
8
9
  console.log(require('../package.json').version);
@@ -13,6 +14,9 @@ if (BROWSER_COMMANDS.includes(command)) {
13
14
  const { runCommand } = require('../cli/command.js');
14
15
  runCommand(process.argv.slice(2));
15
16
  } else {
17
+ if (hasHelpFlag) {
18
+ showHelp(0);
19
+ }
16
20
  switch (command) {
17
21
  case 'setup':
18
22
  require('../cli/setup.js');
@@ -28,9 +32,9 @@ if (BROWSER_COMMANDS.includes(command)) {
28
32
  }
29
33
  }
30
34
 
31
- function showHelp() {
35
+ function showHelp(exitCode = null) {
32
36
  console.log(`
33
- tabpilot - Give LLMs full control of your browser
37
+ tab-agent - Give LLMs full control of your browser
34
38
 
35
39
  Setup:
36
40
  setup Auto-detect extension, configure native messaging
@@ -48,18 +52,29 @@ Browser Control:
48
52
  tabs List active tabs
49
53
  wait <text|selector> Wait for text or element
50
54
  screenshot [--full] Capture page (fallback)
55
+ hover <ref> Hover over element
56
+ select <ref> <value> Select dropdown option
57
+ drag <from> <to> Drag element to another
58
+ get <prop> [ref] [attr] Get text, value, attr, url, title
59
+ find <by> <query> Find by text, role, label, placeholder, selector
60
+ cookies <get|clear> View or clear cookies
61
+ storage <get|set|remove|rm|clear> Manage localStorage/sessionStorage
62
+ pdf [filename.pdf] Save page as PDF
51
63
 
52
64
  Workflow: snapshot → click/type → snapshot → repeat
53
65
 
54
66
  Examples:
55
- npx tabpilot setup
56
- npx tabpilot snapshot
57
- npx tabpilot click e5
58
- npx tabpilot type e3 "hello world"
59
- npx tabpilot navigate "https://google.com"
67
+ npx tab-agent setup
68
+ npx tab-agent snapshot
69
+ npx tab-agent click e5
70
+ npx tab-agent type e3 "hello world"
71
+ npx tab-agent navigate "https://google.com"
60
72
 
61
73
  Version: ${require('../package.json').version}
62
74
  `);
75
+ if (typeof exitCode === 'number') {
76
+ process.exit(exitCode);
77
+ }
63
78
  if (command && command !== 'help' && command !== '--help' && command !== '-h') {
64
79
  process.exit(1);
65
80
  }
package/cli/command.js CHANGED
@@ -1,9 +1,23 @@
1
1
  // cli/command.js
2
2
  const WebSocket = require('ws');
3
3
 
4
- const COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate'];
4
+ const COMMANDS = ['tabs', 'snapshot', 'screenshot', 'click', 'type', 'fill', 'press', 'scroll', 'navigate', 'wait', 'evaluate', 'hover', 'select', 'drag', 'get', 'find', 'cookies', 'storage', 'pdf'];
5
5
 
6
6
  async function runCommand(args) {
7
+ // Extract --browser flag
8
+ let targetBrowser = null;
9
+ const browserFlagIndex = args.findIndex(a => a === '--browser' || a.startsWith('--browser='));
10
+ if (browserFlagIndex !== -1) {
11
+ const flag = args[browserFlagIndex];
12
+ if (flag.includes('=')) {
13
+ targetBrowser = flag.split('=')[1];
14
+ } else if (args[browserFlagIndex + 1]) {
15
+ targetBrowser = args[browserFlagIndex + 1];
16
+ args.splice(browserFlagIndex + 1, 1);
17
+ }
18
+ args.splice(browserFlagIndex, 1);
19
+ }
20
+
7
21
  const [command, ...params] = args;
8
22
 
9
23
  if (!command || command === 'help') {
@@ -37,10 +51,10 @@ async function runCommand(args) {
37
51
 
38
52
  // First get tabs to find tabId
39
53
  if (command === 'tabs') {
40
- ws.send(JSON.stringify({ id: 1, action: 'tabs' }));
54
+ ws.send(JSON.stringify({ id: 1, action: 'tabs', browser: targetBrowser }));
41
55
  } else {
42
56
  // Get active tab first, then run command
43
- ws.send(JSON.stringify({ id: 0, action: 'tabs' }));
57
+ ws.send(JSON.stringify({ id: 0, action: 'tabs', browser: targetBrowser }));
44
58
  }
45
59
  });
46
60
 
@@ -57,7 +71,7 @@ async function runCommand(args) {
57
71
 
58
72
  const tabId = msg.tabs[0].tabId;
59
73
  const payload = buildPayload(command, params, tabId);
60
- ws.send(JSON.stringify({ id: 1, ...payload }));
74
+ ws.send(JSON.stringify({ id: 1, ...payload, browser: targetBrowser }));
61
75
  return;
62
76
  }
63
77
 
@@ -69,6 +83,10 @@ async function runCommand(args) {
69
83
  printSnapshot(msg);
70
84
  } else if (command === 'screenshot') {
71
85
  printScreenshot(msg);
86
+ } else if (command === 'pdf') {
87
+ printPdf(msg);
88
+ } else if (command === 'find') {
89
+ printFind(msg);
72
90
  } else {
73
91
  printResult(msg);
74
92
  }
@@ -108,10 +126,14 @@ function buildPayload(command, params, tabId) {
108
126
  payload.url = params[0];
109
127
  break;
110
128
  case 'wait':
111
- if (params[0]?.startsWith('.') || params[0]?.startsWith('#')) {
129
+ if (params[0] === '--url') {
130
+ payload.urlPattern = params[1];
131
+ } else if (params[0] === '--visible') {
132
+ payload.visibleRef = params[1];
133
+ } else if (params[0]?.startsWith('.') || params[0]?.startsWith('#')) {
112
134
  payload.selector = params[0];
113
135
  } else {
114
- payload.text = params.join(' ');
136
+ payload.text = params.filter(p => !/^\d+$/.test(p)).join(' ');
115
137
  }
116
138
  payload.timeout = parseInt(params.find(p => /^\d+$/.test(p))) || 5000;
117
139
  break;
@@ -123,6 +145,39 @@ function buildPayload(command, params, tabId) {
123
145
  payload.fullPage = true;
124
146
  }
125
147
  break;
148
+ case 'hover':
149
+ payload.ref = params[0];
150
+ break;
151
+ case 'select':
152
+ payload.ref = params[0];
153
+ payload.value = params.slice(1).join(' ');
154
+ break;
155
+ case 'drag':
156
+ payload.fromRef = params[0];
157
+ payload.toRef = params[1];
158
+ break;
159
+ case 'get':
160
+ payload.subcommand = params[0]; // text, html, value, attr, url, title
161
+ payload.ref = params[1];
162
+ payload.attr = params[2]; // for get attr <ref> <name>
163
+ break;
164
+ case 'find':
165
+ payload.by = params[0]; // text, role, label, placeholder, selector
166
+ payload.query = params.slice(1).join(' ');
167
+ break;
168
+ case 'cookies':
169
+ payload.subcommand = params[0]; // get, clear
170
+ break;
171
+ case 'storage':
172
+ payload.subcommand = params[0] === 'rm' ? 'remove' : params[0]; // get, set, remove, clear
173
+ payload.storageType = params.includes('--session') ? 'session' : 'local';
174
+ // Filter out --session flag
175
+ const storageParams = params.filter(p => p !== '--session');
176
+ payload.key = storageParams[1];
177
+ payload.value = storageParams.slice(2).join(' ');
178
+ break;
179
+ case 'pdf':
180
+ break;
126
181
  }
127
182
 
128
183
  return payload;
@@ -146,14 +201,32 @@ Commands:
146
201
  wait <text|selector> Wait for text or element
147
202
  screenshot [--full] Capture page (fallback)
148
203
  evaluate <script> Run JavaScript
204
+ hover <ref> Hover over element
205
+ select <ref> <value> Select dropdown option
206
+ drag <from> <to> Drag element to another
207
+ get <prop> [ref] [attr] Get text, value, attr, url, title
208
+ find <by> <query> Find by text, role, label, placeholder, selector
209
+ cookies <get|clear> View or clear cookies
210
+ storage <get|set|remove|rm|clear> Manage localStorage/sessionStorage
211
+ pdf [filename.pdf] Save page as PDF
212
+
213
+ Options:
214
+ --browser=<chrome|safari> Target specific browser
149
215
 
150
216
  Workflow: snapshot → click/type → snapshot → repeat
151
217
 
152
218
  Examples:
153
219
  npx tab-agent snapshot
154
220
  npx tab-agent click e5
155
- npx tab-agent type e3 "hello world"
156
- npx tab-agent navigate "https://google.com"
221
+ npx tab-agent get text e3
222
+ npx tab-agent find text "Submit"
223
+ npx tab-agent find role button
224
+ npx tab-agent pdf page.pdf
225
+ npx tab-agent drag e1 e5
226
+ npx tab-agent hover e3
227
+ npx tab-agent select e7 "Option 2"
228
+ npx tab-agent cookies get
229
+ npx tab-agent storage get myKey
157
230
  `);
158
231
  }
159
232
 
@@ -164,7 +237,8 @@ function printTabs(msg) {
164
237
  }
165
238
  console.log('Active tabs:\n');
166
239
  msg.tabs.forEach((tab, i) => {
167
- console.log(` ${i + 1}. [${tab.tabId}] ${tab.title}`);
240
+ const browserTag = tab.browser ? `[${tab.browser}] ` : '';
241
+ console.log(` ${i + 1}. ${browserTag}[${tab.tabId}] ${tab.title}`);
168
242
  console.log(` ${tab.url}\n`);
169
243
  });
170
244
  }
@@ -186,6 +260,36 @@ function printScreenshot(msg) {
186
260
  console.log(msg.screenshot);
187
261
  }
188
262
 
263
+ function printPdf(msg) {
264
+ if (!msg.ok) {
265
+ console.error('Error:', msg.error);
266
+ return;
267
+ }
268
+ // Check if a filename was given via params
269
+ const outFile = process.argv.find(a => a.endsWith('.pdf'));
270
+ if (outFile) {
271
+ require('fs').writeFileSync(outFile, Buffer.from(msg.pdf, 'base64'));
272
+ console.log(`PDF saved to ${outFile}`);
273
+ } else {
274
+ console.log(msg.pdf);
275
+ }
276
+ }
277
+
278
+ function printFind(msg) {
279
+ if (!msg.ok) {
280
+ console.error('Error:', msg.error);
281
+ return;
282
+ }
283
+ if (msg.results.length === 0) {
284
+ console.log('No matches found.');
285
+ return;
286
+ }
287
+ console.log(`Found ${msg.count} match${msg.count !== 1 ? 'es' : ''}:\n`);
288
+ msg.results.forEach(r => {
289
+ console.log(` [${r.ref}] ${r.role} "${r.name}"`);
290
+ });
291
+ }
292
+
189
293
  function printResult(msg) {
190
294
  if (!msg.ok) {
191
295
  console.error('Error:', msg.error);