tab-agent 0.3.4 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (46) hide show
  1. package/CHANGELOG.md +29 -0
  2. package/README.md +179 -25
  3. package/bin/tab-agent.js +23 -8
  4. package/cli/command.js +113 -9
  5. package/cli/detect-extension.js +96 -14
  6. package/cli/launch-chrome.js +150 -0
  7. package/cli/setup.js +57 -22
  8. package/cli/start.js +65 -13
  9. package/cli/status.js +41 -7
  10. package/extension/content-script.js +218 -17
  11. package/extension/manifest.json +4 -3
  12. package/extension/manifest.safari.json +45 -0
  13. package/extension/popup/popup.html +58 -1
  14. package/extension/popup/popup.js +18 -0
  15. package/extension/service-worker.js +106 -13
  16. package/package.json +14 -3
  17. package/relay/install-native-host.sh +2 -2
  18. package/relay/native-host-wrapper.sh +1 -1
  19. package/relay/native-host.js +3 -1
  20. package/relay/server.js +124 -17
  21. package/skills/claude-code/tab-agent/SKILL.md +92 -0
  22. package/skills/codex/tab-agent/SKILL.md +92 -0
  23. package/relay/node_modules/.package-lock.json +0 -29
  24. package/relay/node_modules/ws/LICENSE +0 -20
  25. package/relay/node_modules/ws/README.md +0 -548
  26. package/relay/node_modules/ws/browser.js +0 -8
  27. package/relay/node_modules/ws/index.js +0 -13
  28. package/relay/node_modules/ws/lib/buffer-util.js +0 -131
  29. package/relay/node_modules/ws/lib/constants.js +0 -19
  30. package/relay/node_modules/ws/lib/event-target.js +0 -292
  31. package/relay/node_modules/ws/lib/extension.js +0 -203
  32. package/relay/node_modules/ws/lib/limiter.js +0 -55
  33. package/relay/node_modules/ws/lib/permessage-deflate.js +0 -528
  34. package/relay/node_modules/ws/lib/receiver.js +0 -706
  35. package/relay/node_modules/ws/lib/sender.js +0 -602
  36. package/relay/node_modules/ws/lib/stream.js +0 -161
  37. package/relay/node_modules/ws/lib/subprotocol.js +0 -62
  38. package/relay/node_modules/ws/lib/validation.js +0 -152
  39. package/relay/node_modules/ws/lib/websocket-server.js +0 -554
  40. package/relay/node_modules/ws/lib/websocket.js +0 -1393
  41. package/relay/node_modules/ws/package.json +0 -69
  42. package/relay/node_modules/ws/wrapper.mjs +0 -8
  43. package/relay/package-lock.json +0 -36
  44. package/relay/package.json +0 -12
  45. package/skills/claude-code/tab-agent.md +0 -57
  46. package/skills/codex/tab-agent.md +0 -38
@@ -2,7 +2,7 @@
2
2
  set -e
3
3
 
4
4
  SCRIPT_DIR="$(cd "$(dirname "$0")" && pwd)"
5
- HOST_NAME="com.tabpilot.relay"
5
+ HOST_NAME="com.tabagent.relay"
6
6
  HOST_DIR="$HOME/Library/Application Support/TabAgent"
7
7
  WRAPPER_PATH="$HOST_DIR/native-host-wrapper.sh"
8
8
 
@@ -40,7 +40,7 @@ cp -R "$SCRIPT_DIR/node_modules" "$HOST_DIR/node_modules"
40
40
  cat > "$MANIFEST_DIR/$HOST_NAME.json" << EOF
41
41
  {
42
42
  "name": "$HOST_NAME",
43
- "description": "TabPilot Native Messaging Host",
43
+ "description": "Tab Agent Native Messaging Host",
44
44
  "path": "$WRAPPER_PATH",
45
45
  "type": "stdio",
46
46
  "allowed_origins": [
@@ -6,7 +6,7 @@ cd "$SCRIPT_DIR"
6
6
 
7
7
  LOG_FILE="$SCRIPT_DIR/wrapper.log"
8
8
  echo "$(date): Starting native host from $SCRIPT_DIR" >> "$LOG_FILE"
9
- export TAB_AGENT_LOG="/tmp/tabpilot-native-host.log"
9
+ export TAB_AGENT_LOG="/tmp/tab-agent-native-host.log"
10
10
 
11
11
  NODE_BIN="/opt/homebrew/bin/node"
12
12
  if [ ! -x "$NODE_BIN" ]; then
@@ -75,7 +75,9 @@ function scheduleReconnect() {
75
75
  function connectWebSocket() {
76
76
  try {
77
77
  ws = new WebSocket('ws://localhost:9876', {
78
- headers: { 'x-client-type': 'extension' }
78
+ headers: {
79
+ 'x-client-type': 'chrome'
80
+ }
79
81
  });
80
82
  } catch (error) {
81
83
  const now = Date.now();
package/relay/server.js CHANGED
@@ -7,7 +7,14 @@ const PORT = process.env.PORT || 9876;
7
7
  const httpServer = http.createServer((req, res) => {
8
8
  if (req.url === '/health') {
9
9
  res.writeHead(200, { 'Content-Type': 'application/json' });
10
- res.end(JSON.stringify({ ok: true, clients: wss.clients.size }));
10
+ res.end(JSON.stringify({
11
+ ok: true,
12
+ clients: wss.clients.size,
13
+ browsers: {
14
+ chrome: connections.chrome !== null,
15
+ safari: connections.safari !== null
16
+ }
17
+ }));
11
18
  } else {
12
19
  res.writeHead(404);
13
20
  res.end('Not found');
@@ -16,8 +23,8 @@ const httpServer = http.createServer((req, res) => {
16
23
 
17
24
  const wss = new WebSocket.Server({ server: httpServer });
18
25
 
19
- // Store extension connection
20
- let extensionConnection = null;
26
+ // Store extension connections by browser type
27
+ const connections = { chrome: null, safari: null };
21
28
  const pendingRequests = new Map();
22
29
 
23
30
  function safeParse(data, label) {
@@ -29,8 +36,50 @@ function safeParse(data, label) {
29
36
  }
30
37
  }
31
38
 
32
- function failPendingRequests(reason) {
39
+ /**
40
+ * Get the appropriate extension connection for routing.
41
+ * @param {string} targetBrowser - Optional: 'chrome' or 'safari'
42
+ * @returns {{ ws: WebSocket, browser: string } | null}
43
+ */
44
+ function getExtensionConnection(targetBrowser) {
45
+ // If specific browser requested, return it if connected
46
+ if (targetBrowser) {
47
+ const ws = connections[targetBrowser];
48
+ if (ws) {
49
+ return { ws, browser: targetBrowser };
50
+ }
51
+ return null;
52
+ }
53
+
54
+ // Auto-detect: if only one browser connected, use it
55
+ const chromeConnected = connections.chrome !== null;
56
+ const safariConnected = connections.safari !== null;
57
+
58
+ if (chromeConnected && !safariConnected) {
59
+ return { ws: connections.chrome, browser: 'chrome' };
60
+ }
61
+ if (safariConnected && !chromeConnected) {
62
+ return { ws: connections.safari, browser: 'safari' };
63
+ }
64
+ if (chromeConnected && safariConnected) {
65
+ // Default to chrome for backwards compatibility
66
+ return { ws: connections.chrome, browser: 'chrome' };
67
+ }
68
+
69
+ return null;
70
+ }
71
+
72
+ /**
73
+ * Fail pending requests, optionally filtered by browser.
74
+ * @param {string} reason - Error message
75
+ * @param {string} browser - Optional: only fail requests for this browser
76
+ */
77
+ function failPendingRequests(reason, browser) {
33
78
  for (const [id, pending] of pendingRequests.entries()) {
79
+ // If browser specified, only fail requests for that browser
80
+ if (browser && pending.browser !== browser) {
81
+ continue;
82
+ }
34
83
  try {
35
84
  pending.ws.send(JSON.stringify({ id: pending.clientId, ok: false, error: reason }));
36
85
  } catch (error) {
@@ -41,14 +90,17 @@ function failPendingRequests(reason) {
41
90
  }
42
91
 
43
92
  wss.on('connection', (ws, req) => {
44
- const isExtension = req.headers['x-client-type'] === 'extension';
93
+ const clientType = req.headers['x-client-type'];
94
+ // 'extension' means chrome for backwards compatibility, 'safari' means safari
95
+ const isExtension = clientType === 'extension' || clientType === 'safari';
96
+ const browser = clientType === 'safari' ? 'safari' : 'chrome';
45
97
 
46
98
  if (isExtension) {
47
- console.log('Extension connected');
48
- extensionConnection = ws;
99
+ console.log(`${browser} extension connected`);
100
+ connections[browser] = ws;
49
101
 
50
102
  ws.on('message', (data) => {
51
- const message = safeParse(data, 'extension');
103
+ const message = safeParse(data, `${browser} extension`);
52
104
  if (!message || typeof message.id === 'undefined') {
53
105
  return;
54
106
  }
@@ -62,9 +114,9 @@ wss.on('connection', (ws, req) => {
62
114
  });
63
115
 
64
116
  ws.on('close', () => {
65
- console.log('Extension disconnected');
66
- extensionConnection = null;
67
- failPendingRequests('Extension disconnected');
117
+ console.log(`${browser} extension disconnected`);
118
+ connections[browser] = null;
119
+ failPendingRequests(`${browser} extension disconnected`, browser);
68
120
  });
69
121
 
70
122
  } else {
@@ -75,19 +127,74 @@ wss.on('connection', (ws, req) => {
75
127
  if (!message || typeof message.id === 'undefined') {
76
128
  return;
77
129
  }
78
- const { id, ...command } = message;
130
+ const { id, browser: targetBrowser, ...command } = message;
79
131
 
80
132
  console.log(`Command: ${command.action}`, command);
81
133
 
82
- if (!extensionConnection) {
83
- ws.send(JSON.stringify({ id, ok: false, error: 'Extension not connected' }));
134
+ // Special handling for 'tabs' without specific browser - aggregate from all browsers
135
+ if (command.action === 'tabs' && !targetBrowser) {
136
+ const results = { chrome: null, safari: null };
137
+ let pending = 0;
138
+
139
+ const sendAggregated = () => {
140
+ const allTabs = [];
141
+ if (results.chrome?.tabs) {
142
+ results.chrome.tabs.forEach(t => allTabs.push({ ...t, browser: 'chrome' }));
143
+ }
144
+ if (results.safari?.tabs) {
145
+ results.safari.tabs.forEach(t => allTabs.push({ ...t, browser: 'safari' }));
146
+ }
147
+ ws.send(JSON.stringify({ id, ok: true, tabs: allTabs }));
148
+ };
149
+
150
+ for (const [browserName, browserWs] of Object.entries(connections)) {
151
+ if (browserWs) {
152
+ pending++;
153
+ const internalId = Date.now() + Math.random();
154
+
155
+ const handler = (data) => {
156
+ const msg = safeParse(data, browserName);
157
+ if (msg && msg.id === internalId) {
158
+ results[browserName] = msg;
159
+ pending--;
160
+ browserWs.removeListener('message', handler);
161
+ if (pending === 0) sendAggregated();
162
+ }
163
+ };
164
+ browserWs.on('message', handler);
165
+ browserWs.send(JSON.stringify({ id: internalId, action: 'tabs' }));
166
+
167
+ // Timeout after 2 seconds
168
+ setTimeout(() => {
169
+ if (results[browserName] === null) {
170
+ results[browserName] = { tabs: [] };
171
+ pending--;
172
+ browserWs.removeListener('message', handler);
173
+ if (pending === 0) sendAggregated();
174
+ }
175
+ }, 2000);
176
+ }
177
+ }
178
+
179
+ if (pending === 0) {
180
+ ws.send(JSON.stringify({ id, ok: false, error: 'No browsers connected' }));
181
+ }
182
+ return;
183
+ }
184
+
185
+ const connection = getExtensionConnection(targetBrowser);
186
+ if (!connection) {
187
+ const errorMsg = targetBrowser
188
+ ? `${targetBrowser} extension not connected`
189
+ : 'No extension connected';
190
+ ws.send(JSON.stringify({ id, ok: false, error: errorMsg }));
84
191
  return;
85
192
  }
86
193
 
87
194
  const internalId = Date.now() + Math.random();
88
- pendingRequests.set(internalId, { ws, clientId: id });
195
+ pendingRequests.set(internalId, { ws, clientId: id, browser: connection.browser });
89
196
 
90
- extensionConnection.send(JSON.stringify({ id: internalId, ...command }));
197
+ connection.ws.send(JSON.stringify({ id: internalId, ...command }));
91
198
  });
92
199
 
93
200
  ws.on('close', () => {
@@ -102,7 +209,7 @@ wss.on('connection', (ws, req) => {
102
209
  });
103
210
 
104
211
  httpServer.listen(PORT, () => {
105
- console.log(`TabPilot Relay running on ws://localhost:${PORT}`);
212
+ console.log(`Tab Agent Relay running on ws://localhost:${PORT}`);
106
213
  console.log(`Health check: http://localhost:${PORT}/health`);
107
214
  });
108
215
 
@@ -0,0 +1,92 @@
1
+ ---
2
+ name: tab-agent
3
+ description: Browser control via CLI - snapshot, click, type, navigate, find, get, drag, pdf
4
+ ---
5
+
6
+ # Tab Agent
7
+
8
+ Control browser tabs via CLI. User activates tabs via extension icon (green = active).
9
+
10
+ ## Before First Command
11
+
12
+ ```bash
13
+ curl -s http://localhost:9876/health || (npx tab-agent start &)
14
+ sleep 2
15
+ ```
16
+
17
+ ## Commands
18
+
19
+ ```bash
20
+ npx tab-agent snapshot # Get page with refs [e1], [e2]...
21
+ npx tab-agent click <ref> # Click element
22
+ npx tab-agent type <ref> <text> # Type text
23
+ npx tab-agent fill <ref> <value> # Fill form field
24
+ npx tab-agent press <key> # Press key (Enter, Escape, Tab)
25
+ npx tab-agent scroll <dir> [amount] # Scroll up/down
26
+ npx tab-agent navigate <url> # Go to URL
27
+ npx tab-agent tabs # List active tabs
28
+ npx tab-agent wait <text|selector> # Wait for condition
29
+ npx tab-agent wait --url <pattern> # Wait for URL match
30
+ npx tab-agent wait --visible <ref> # Wait for element visible
31
+ npx tab-agent screenshot # Capture page (fallback only)
32
+ npx tab-agent evaluate <script> # Run JavaScript
33
+ npx tab-agent hover <ref> # Hover over element
34
+ npx tab-agent select <ref> <value> # Select dropdown option
35
+ npx tab-agent drag <from> <to> # Drag and drop
36
+ npx tab-agent get text <ref> # Get element text
37
+ npx tab-agent get value <ref> # Get input value
38
+ npx tab-agent get attr <ref> <name> # Get attribute
39
+ npx tab-agent get url # Get current URL
40
+ npx tab-agent get title # Get page title
41
+ npx tab-agent find text "Submit" # Find by text content
42
+ npx tab-agent find role button # Find by ARIA role
43
+ npx tab-agent find label "Email" # Find by label
44
+ npx tab-agent find placeholder "Search" # Find by placeholder
45
+ npx tab-agent cookies get # View cookies
46
+ npx tab-agent cookies clear # Clear cookies
47
+ npx tab-agent storage get [key] # Read localStorage
48
+ npx tab-agent storage set <key> <val> # Write localStorage
49
+ npx tab-agent pdf [file.pdf] # Save page as PDF
50
+ ```
51
+
52
+ ## Workflow
53
+
54
+ 1. `snapshot` first - always start here to get element refs
55
+ 2. Use refs [e1], [e2]... with `click`/`type`/`fill`
56
+ 3. `snapshot` again after actions to see results
57
+ 4. Use `find` to locate elements without a full snapshot
58
+ 5. Use `get` to extract specific data from elements
59
+ 6. **Only use `screenshot` if:**
60
+ - Snapshot is missing expected content
61
+ - Page has complex visuals (charts, images, canvas)
62
+ - Debugging why an action didn't work
63
+
64
+ ## Examples
65
+
66
+ ```bash
67
+ # Search Google
68
+ npx tab-agent navigate "https://google.com"
69
+ npx tab-agent snapshot
70
+ npx tab-agent type e1 "hello world"
71
+ npx tab-agent press Enter
72
+ npx tab-agent snapshot # See results
73
+
74
+ # Find and click a button by text
75
+ npx tab-agent find text "Sign In"
76
+ npx tab-agent click e1
77
+
78
+ # Extract data
79
+ npx tab-agent get text e5
80
+ npx tab-agent get attr e3 href
81
+
82
+ # Save page as PDF
83
+ npx tab-agent pdf report.pdf
84
+ ```
85
+
86
+ ## Notes
87
+
88
+ - Refs reset on each snapshot - always snapshot before interacting
89
+ - `find` assigns new refs without resetting existing ones
90
+ - Keys: Enter, Escape, Tab, Backspace, ArrowUp/Down/Left/Right
91
+ - Prefer snapshot over screenshot - faster and text-based
92
+ - Use `--browser=safari` or `--browser=chrome` to target specific browser
@@ -0,0 +1,92 @@
1
+ ---
2
+ name: tab-agent
3
+ description: Browser control via CLI - snapshot, click, type, navigate, find, get, drag, pdf
4
+ ---
5
+
6
+ # Tab Agent
7
+
8
+ Control browser tabs via CLI. User activates tabs via extension icon (green = active).
9
+
10
+ ## Before First Command
11
+
12
+ ```bash
13
+ curl -s http://localhost:9876/health || (npx tab-agent start &)
14
+ sleep 2
15
+ ```
16
+
17
+ ## Commands
18
+
19
+ ```bash
20
+ npx tab-agent snapshot # Get page with refs [e1], [e2]...
21
+ npx tab-agent click <ref> # Click element
22
+ npx tab-agent type <ref> <text> # Type text
23
+ npx tab-agent fill <ref> <value> # Fill form field
24
+ npx tab-agent press <key> # Press key (Enter, Escape, Tab)
25
+ npx tab-agent scroll <dir> [amount] # Scroll up/down
26
+ npx tab-agent navigate <url> # Go to URL
27
+ npx tab-agent tabs # List active tabs
28
+ npx tab-agent wait <text|selector> # Wait for condition
29
+ npx tab-agent wait --url <pattern> # Wait for URL match
30
+ npx tab-agent wait --visible <ref> # Wait for element visible
31
+ npx tab-agent screenshot # Capture page (fallback only)
32
+ npx tab-agent evaluate <script> # Run JavaScript
33
+ npx tab-agent hover <ref> # Hover over element
34
+ npx tab-agent select <ref> <value> # Select dropdown option
35
+ npx tab-agent drag <from> <to> # Drag and drop
36
+ npx tab-agent get text <ref> # Get element text
37
+ npx tab-agent get value <ref> # Get input value
38
+ npx tab-agent get attr <ref> <name> # Get attribute
39
+ npx tab-agent get url # Get current URL
40
+ npx tab-agent get title # Get page title
41
+ npx tab-agent find text "Submit" # Find by text content
42
+ npx tab-agent find role button # Find by ARIA role
43
+ npx tab-agent find label "Email" # Find by label
44
+ npx tab-agent find placeholder "Search" # Find by placeholder
45
+ npx tab-agent cookies get # View cookies
46
+ npx tab-agent cookies clear # Clear cookies
47
+ npx tab-agent storage get [key] # Read localStorage
48
+ npx tab-agent storage set <key> <val> # Write localStorage
49
+ npx tab-agent pdf [file.pdf] # Save page as PDF
50
+ ```
51
+
52
+ ## Workflow
53
+
54
+ 1. `snapshot` first - always start here to get element refs
55
+ 2. Use refs [e1], [e2]... with `click`/`type`/`fill`
56
+ 3. `snapshot` again after actions to see results
57
+ 4. Use `find` to locate elements without a full snapshot
58
+ 5. Use `get` to extract specific data from elements
59
+ 6. **Only use `screenshot` if:**
60
+ - Snapshot is missing expected content
61
+ - Page has complex visuals (charts, images, canvas)
62
+ - Debugging why an action didn't work
63
+
64
+ ## Examples
65
+
66
+ ```bash
67
+ # Search Google
68
+ npx tab-agent navigate "https://google.com"
69
+ npx tab-agent snapshot
70
+ npx tab-agent type e1 "hello world"
71
+ npx tab-agent press Enter
72
+ npx tab-agent snapshot # See results
73
+
74
+ # Find and click a button by text
75
+ npx tab-agent find text "Sign In"
76
+ npx tab-agent click e1
77
+
78
+ # Extract data
79
+ npx tab-agent get text e5
80
+ npx tab-agent get attr e3 href
81
+
82
+ # Save page as PDF
83
+ npx tab-agent pdf report.pdf
84
+ ```
85
+
86
+ ## Notes
87
+
88
+ - Refs reset on each snapshot - always snapshot before interacting
89
+ - `find` assigns new refs without resetting existing ones
90
+ - Keys: Enter, Escape, Tab, Backspace, ArrowUp/Down/Left/Right
91
+ - Prefer snapshot over screenshot - faster and text-based
92
+ - Use `--browser=safari` or `--browser=chrome` to target specific browser
@@ -1,29 +0,0 @@
1
- {
2
- "name": "tab-agent-relay",
3
- "version": "0.1.0",
4
- "lockfileVersion": 3,
5
- "requires": true,
6
- "packages": {
7
- "node_modules/ws": {
8
- "version": "8.19.0",
9
- "resolved": "https://registry.npmjs.org/ws/-/ws-8.19.0.tgz",
10
- "integrity": "sha512-blAT2mjOEIi0ZzruJfIhb3nps74PRWTCz1IjglWEEpQl5XS/UNama6u2/rjFkDDouqr4L67ry+1aGIALViWjDg==",
11
- "license": "MIT",
12
- "engines": {
13
- "node": ">=10.0.0"
14
- },
15
- "peerDependencies": {
16
- "bufferutil": "^4.0.1",
17
- "utf-8-validate": ">=5.0.2"
18
- },
19
- "peerDependenciesMeta": {
20
- "bufferutil": {
21
- "optional": true
22
- },
23
- "utf-8-validate": {
24
- "optional": true
25
- }
26
- }
27
- }
28
- }
29
- }
@@ -1,20 +0,0 @@
1
- Copyright (c) 2011 Einar Otto Stangvik <einaros@gmail.com>
2
- Copyright (c) 2013 Arnout Kazemier and contributors
3
- Copyright (c) 2016 Luigi Pinca and contributors
4
-
5
- Permission is hereby granted, free of charge, to any person obtaining a copy of
6
- this software and associated documentation files (the "Software"), to deal in
7
- the Software without restriction, including without limitation the rights to
8
- use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
9
- the Software, and to permit persons to whom the Software is furnished to do so,
10
- subject to the following conditions:
11
-
12
- The above copyright notice and this permission notice shall be included in all
13
- copies or substantial portions of the Software.
14
-
15
- THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
- IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17
- FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18
- COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19
- IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20
- CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.