abu-browser-bridge 0.5.2 → 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.js CHANGED
@@ -17,6 +17,7 @@ import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js';
17
17
  import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js';
18
18
  import { startWSServer, stopWSServer } from './wsServer.js';
19
19
  import { registerTools } from './tools.js';
20
+ import { PKG_VERSION } from './version.js';
20
21
  const DEFAULT_WS_PORT = 9876;
21
22
  const DISCOVERY_PORT = 9875;
22
23
  /**
@@ -123,7 +124,7 @@ async function main() {
123
124
  // 2. Create MCP server
124
125
  const mcpServer = new McpServer({
125
126
  name: 'abu-browser-bridge',
126
- version: '0.5.2',
127
+ version: PKG_VERSION,
127
128
  });
128
129
  // 3. Register browser tools
129
130
  registerTools(mcpServer);
package/dist/tools.js CHANGED
@@ -67,7 +67,7 @@ export function registerTools(server) {
67
67
  });
68
68
  // 2. browser_snapshot
69
69
  server.tool('snapshot', `Get a structured snapshot of all interactive elements on the page (buttons, inputs, links, selects, etc.). Returns each element with a short reference ID (e.g., "e1") that can be used in subsequent actions. This is the primary way to understand what's on a page before taking action.`, {
70
- tabId: z.number().describe('Tab ID from get_tabs'),
70
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
71
71
  selector: z.string().optional().describe('Optional CSS selector to scope the snapshot to a specific area of the page'),
72
72
  }, async ({ tabId, selector }) => {
73
73
  ensureConnected();
@@ -76,7 +76,7 @@ export function registerTools(server) {
76
76
  });
77
77
  // 3. browser_click
78
78
  server.tool('click', 'Click an element on the page. Returns the result of the click action.', {
79
- tabId: z.number().describe('Tab ID from get_tabs'),
79
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
80
80
  locator: z.string().describe(`JSON string of element locator. ${LocatorDescription}`),
81
81
  }, async ({ tabId, locator }) => {
82
82
  ensureConnected();
@@ -86,7 +86,7 @@ export function registerTools(server) {
86
86
  });
87
87
  // 4. browser_fill
88
88
  server.tool('fill', 'Fill in a text input, textarea, or other editable field. Clears existing content and types the new value, triggering proper input/change events for framework compatibility (React, Vue, etc.).', {
89
- tabId: z.number().describe('Tab ID from get_tabs'),
89
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
90
90
  locator: z.string().describe(`JSON string of element locator. ${LocatorDescription}`),
91
91
  value: z.string().describe('The text value to fill into the field'),
92
92
  }, async ({ tabId, locator, value }) => {
@@ -97,7 +97,7 @@ export function registerTools(server) {
97
97
  });
98
98
  // 5. browser_select
99
99
  server.tool('select', 'Select an option from a <select> dropdown element.', {
100
- tabId: z.number().describe('Tab ID from get_tabs'),
100
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
101
101
  locator: z.string().describe(`JSON string of element locator. ${LocatorDescription}`),
102
102
  value: z.string().describe('The option value or visible text to select'),
103
103
  }, async ({ tabId, locator, value }) => {
@@ -108,14 +108,14 @@ export function registerTools(server) {
108
108
  });
109
109
  // 6. browser_wait_for
110
110
  server.tool('wait_for', `Wait for a condition to be met on the page. Useful for waiting for elements to appear after a click, waiting for loading to complete, or waiting for page navigation. Returns when the condition is met or times out.`, {
111
- tabId: z.number().describe('Tab ID from get_tabs'),
111
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
112
112
  condition: z.string().describe(`JSON string of wait condition. Options:
113
113
  - { "type": "appear", "locator": { "text": "成功" } } — wait for element to appear
114
114
  - { "type": "disappear", "locator": { "css": ".loading" } } — wait for element to disappear
115
115
  - { "type": "enabled", "locator": { "text": "提交" } } — wait for element to become clickable
116
116
  - { "type": "textContains", "locator": { "css": "#status" }, "text": "完成" } — wait for text content
117
117
  - { "type": "urlContains", "pattern": "/success" } — wait for URL change`),
118
- timeout: z.number().optional().default(30000).describe('Maximum wait time in ms (default: 30000)'),
118
+ timeout: z.coerce.number().optional().default(30000).describe('Maximum wait time in ms (default: 30000)'),
119
119
  }, async ({ tabId, condition, timeout }) => {
120
120
  ensureConnected();
121
121
  const parsed = parseCondition(condition);
@@ -124,7 +124,7 @@ export function registerTools(server) {
124
124
  });
125
125
  // 7. browser_extract_text
126
126
  server.tool('extract_text', 'Extract text content from the page or a specific element. Useful for reading content, checking values, or verifying results.', {
127
- tabId: z.number().describe('Tab ID from get_tabs'),
127
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
128
128
  selector: z.string().optional().describe('CSS selector to extract text from. If omitted, extracts the full page text (may be large).'),
129
129
  }, async ({ tabId, selector }) => {
130
130
  ensureConnected();
@@ -133,7 +133,7 @@ export function registerTools(server) {
133
133
  });
134
134
  // 8. browser_extract_table
135
135
  server.tool('extract_table', 'Extract structured data from an HTML table on the page. Returns headers and rows as arrays.', {
136
- tabId: z.number().describe('Tab ID from get_tabs'),
136
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
137
137
  selector: z.string().optional().describe('CSS selector for the target table. If omitted, extracts the largest table on the page.'),
138
138
  }, async ({ tabId, selector }) => {
139
139
  ensureConnected();
@@ -142,9 +142,9 @@ export function registerTools(server) {
142
142
  });
143
143
  // 9. browser_scroll
144
144
  server.tool('scroll', 'Scroll the page or a specific element.', {
145
- tabId: z.number().describe('Tab ID from get_tabs'),
145
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
146
146
  direction: z.enum(['up', 'down', 'left', 'right']).describe('Scroll direction'),
147
- amount: z.number().optional().default(500).describe('Scroll amount in pixels (default: 500)'),
147
+ amount: z.coerce.number().optional().default(500).describe('Scroll amount in pixels (default: 500)'),
148
148
  selector: z.string().optional().describe('CSS selector for the scrollable element. If omitted, scrolls the whole page.'),
149
149
  }, async ({ tabId, direction, amount, selector }) => {
150
150
  ensureConnected();
@@ -153,7 +153,7 @@ export function registerTools(server) {
153
153
  });
154
154
  // 10. browser_navigate
155
155
  server.tool('navigate', 'Navigate a tab to a specific URL, or go back/forward in history.', {
156
- tabId: z.number().describe('Tab ID from get_tabs'),
156
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
157
157
  url: z.string().optional().describe('URL to navigate to. Omit for back/forward.'),
158
158
  action: z.enum(['goto', 'back', 'forward', 'reload']).optional().default('goto').describe('Navigation action (default: goto)'),
159
159
  }, async ({ tabId, url, action }) => {
@@ -163,7 +163,7 @@ export function registerTools(server) {
163
163
  });
164
164
  // 11. browser_keyboard
165
165
  server.tool('keyboard', 'Send keyboard events to the page. Supports key combinations.', {
166
- tabId: z.number().describe('Tab ID from get_tabs'),
166
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
167
167
  key: z.string().describe('Key to press (e.g., "Enter", "Tab", "Escape", "a", "ArrowDown")'),
168
168
  modifiers: z.array(z.enum(['ctrl', 'shift', 'alt', 'meta'])).optional().describe('Modifier keys to hold'),
169
169
  }, async ({ tabId, key, modifiers }) => {
@@ -173,7 +173,7 @@ export function registerTools(server) {
173
173
  });
174
174
  // 12. browser_execute_js
175
175
  server.tool('execute_js', 'Execute arbitrary JavaScript code in the context of the page. Use this as a fallback when other tools cannot achieve the desired result. Returns the result of the expression.', {
176
- tabId: z.number().describe('Tab ID from get_tabs'),
176
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
177
177
  code: z.string().describe('JavaScript code to execute. The last expression value is returned.'),
178
178
  }, async ({ tabId, code }) => {
179
179
  ensureConnected();
@@ -182,7 +182,7 @@ export function registerTools(server) {
182
182
  });
183
183
  // 13. browser_screenshot
184
184
  server.tool('screenshot', 'Take a screenshot of the visible area of a tab. Returns a base64-encoded PNG image. Useful for visual confirmation of actions.', {
185
- tabId: z.number().describe('Tab ID from get_tabs'),
185
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
186
186
  }, async ({ tabId }) => {
187
187
  ensureConnected();
188
188
  const res = await sendToExtension('screenshot', { tabId });
@@ -197,7 +197,25 @@ export function registerTools(server) {
197
197
  }
198
198
  return { content: [{ type: 'text', text: formatResult(res) }] };
199
199
  });
200
- // 14. browser_connection_status
200
+ // 14. browser_screenshot_full_page
201
+ server.tool('screenshot_full_page', 'Take a full-page screenshot by scrolling and stitching the entire page content. Returns a base64-encoded PNG image of the complete page. Use this when the user asks for a "long screenshot" or wants to capture content beyond the visible viewport. This is slower than a regular screenshot.', {
202
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
203
+ }, async ({ tabId }) => {
204
+ ensureConnected();
205
+ // Full-page capture needs more time: scroll + multiple captures + stitch
206
+ const res = await sendToExtension('screenshot_full_page', { tabId }, 120_000);
207
+ if (res.success && typeof res.data === 'string') {
208
+ return {
209
+ content: [{
210
+ type: 'image',
211
+ data: res.data.replace(/^data:image\/png;base64,/, ''),
212
+ mimeType: 'image/png',
213
+ }]
214
+ };
215
+ }
216
+ return { content: [{ type: 'text', text: formatResult(res) }] };
217
+ });
218
+ // 15. browser_connection_status
201
219
  server.tool('connection_status', 'Check whether the Chrome Extension is connected to this bridge. Use this to verify the extension is ready before performing browser actions.', async () => {
202
220
  const connected = isExtensionConnected();
203
221
  return {
@@ -217,7 +235,7 @@ export function registerTools(server) {
217
235
  });
218
236
  // 16. start_recording — record user interactions
219
237
  server.tool('start_recording', 'Start recording user interactions on a page (clicks, inputs, selects). The user performs actions manually, then call stop_recording to get a list of recorded steps that can be used as an automation template.', {
220
- tabId: z.number().describe('Tab ID from get_tabs'),
238
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
221
239
  }, async ({ tabId }) => {
222
240
  ensureConnected();
223
241
  const res = await sendToExtension('start_recording', { tabId });
@@ -225,7 +243,7 @@ export function registerTools(server) {
225
243
  });
226
244
  // 17. stop_recording — stop recording and return captured steps
227
245
  server.tool('stop_recording', 'Stop recording user interactions and return the captured steps. Each step includes the action type, element locator, and value. Use these steps as a template to replay the automation.', {
228
- tabId: z.number().describe('Tab ID from get_tabs'),
246
+ tabId: z.coerce.number().describe('Tab ID from get_tabs'),
229
247
  }, async ({ tabId }) => {
230
248
  ensureConnected();
231
249
  const res = await sendToExtension('stop_recording', { tabId });
@@ -0,0 +1 @@
1
+ export declare const PKG_VERSION: string;
@@ -0,0 +1,7 @@
1
+ import { readFileSync } from 'fs';
2
+ import { fileURLToPath } from 'url';
3
+ import { dirname, resolve } from 'path';
4
+ const __filename = fileURLToPath(import.meta.url);
5
+ const __dirname = dirname(__filename);
6
+ const pkg = JSON.parse(readFileSync(resolve(__dirname, '../package.json'), 'utf-8'));
7
+ export const PKG_VERSION = pkg.version;
package/dist/wsServer.js CHANGED
@@ -12,10 +12,10 @@
12
12
  import { WebSocketServer, WebSocket } from 'ws';
13
13
  import { createServer } from 'http';
14
14
  import { randomBytes } from 'crypto';
15
+ import { PKG_VERSION } from './version.js';
15
16
  const DEFAULT_WS_PORT = 9876;
16
17
  const DISCOVERY_PORT = 9875;
17
18
  const HEARTBEAT_INTERVAL = 15_000; // 15s
18
- const PONG_TIMEOUT = 5_000;
19
19
  let wss = null;
20
20
  let discoveryServer = null;
21
21
  let extensionSocket = null;
@@ -70,7 +70,7 @@ function startDiscoveryServer() {
70
70
  pid: process.pid,
71
71
  extensionConnected: isExtensionConnected(),
72
72
  uptime: Math.round((Date.now() - startTime) / 1000),
73
- version: '0.5.2',
73
+ version: PKG_VERSION,
74
74
  token: authToken,
75
75
  }));
76
76
  return;
package/package.json CHANGED
@@ -1,13 +1,13 @@
1
1
  {
2
2
  "name": "abu-browser-bridge",
3
- "version": "0.5.2",
3
+ "version": "0.6.6",
4
4
  "description": "MCP Server that bridges Abu AI assistant with Chrome Extension for browser automation",
5
5
  "type": "module",
6
- "license": "MIT",
6
+ "license": "SEE LICENSE IN LICENSE",
7
7
  "author": "pm-shawn",
8
8
  "repository": {
9
9
  "type": "git",
10
- "url": "https://github.com/anthropics/abu"
10
+ "url": "https://github.com/PM-Shawn/Abu-Cowork"
11
11
  },
12
12
  "keywords": ["mcp", "browser", "automation", "chrome-extension", "abu"],
13
13
  "bin": {