@browserbridge/bbx 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/README.md +3 -1
  2. package/docs/api-reference.md +33 -33
  3. package/docs/mcp-vs-cli.md +104 -104
  4. package/docs/publishing.md +1 -3
  5. package/docs/quickstart.md +6 -6
  6. package/docs/unpacked-extension.md +72 -0
  7. package/manifest.json +3 -17
  8. package/package.json +44 -42
  9. package/packages/agent-client/src/cli-helpers.js +10 -5
  10. package/packages/agent-client/src/cli.js +65 -135
  11. package/packages/agent-client/src/client.js +37 -17
  12. package/packages/agent-client/src/command-registry.js +101 -69
  13. package/packages/agent-client/src/detect.js +3 -6
  14. package/packages/agent-client/src/install.js +10 -27
  15. package/packages/agent-client/src/mcp-config.js +11 -30
  16. package/packages/agent-client/src/runtime.js +41 -20
  17. package/packages/agent-client/src/setup-status.js +13 -28
  18. package/packages/extension/src/background-helpers.js +51 -36
  19. package/packages/extension/src/background-routing.js +11 -13
  20. package/packages/extension/src/background.js +562 -299
  21. package/packages/extension/src/content-script-helpers.js +17 -16
  22. package/packages/extension/src/content-script.js +175 -109
  23. package/packages/extension/src/sidepanel-helpers.js +3 -1
  24. package/packages/extension/ui/popup.js +39 -20
  25. package/packages/extension/ui/sidepanel.js +108 -191
  26. package/packages/extension/ui/ui.css +2 -1
  27. package/packages/mcp-server/src/handlers.js +546 -250
  28. package/packages/mcp-server/src/server.js +558 -257
  29. package/packages/native-host/bin/bridge-daemon.js +6 -2
  30. package/packages/native-host/bin/install-manifest.js +2 -2
  31. package/packages/native-host/bin/postinstall.js +4 -2
  32. package/packages/native-host/src/config.js +11 -7
  33. package/packages/native-host/src/daemon.js +143 -92
  34. package/packages/native-host/src/install-manifest.js +73 -22
  35. package/packages/native-host/src/native-host.js +55 -40
  36. package/packages/protocol/src/budget.js +3 -7
  37. package/packages/protocol/src/capabilities.js +3 -3
  38. package/packages/protocol/src/errors.js +11 -11
  39. package/packages/protocol/src/protocol.js +104 -71
  40. package/packages/protocol/src/registry.js +300 -45
  41. package/packages/protocol/src/summary.js +249 -106
  42. package/packages/protocol/src/types.js +1 -1
  43. package/skills/browser-bridge/SKILL.md +1 -1
  44. package/skills/browser-bridge/agents/openai.yaml +3 -3
  45. package/skills/browser-bridge/references/interaction.md +33 -11
  46. package/skills/browser-bridge/references/patch-workflow.md +3 -0
  47. package/skills/browser-bridge/references/protocol.md +125 -70
  48. package/skills/browser-bridge/references/tailwind.md +12 -11
  49. package/skills/browser-bridge/references/token-efficiency.md +23 -22
  50. package/skills/browser-bridge/references/ui-workflows.md +8 -0
  51. package/packages/extension/ui/offscreen.html +0 -6
  52. package/packages/extension/ui/offscreen.js +0 -61
@@ -24,7 +24,7 @@ import {
24
24
  handleStatusTool,
25
25
  handleStylesLayoutTool,
26
26
  handleTabsTool,
27
- handleInvestigateTool
27
+ handleInvestigateTool,
28
28
  } from './handlers.js';
29
29
  import {
30
30
  BUDGET_PRESETS,
@@ -39,15 +39,17 @@ import {
39
39
  } from '../../protocol/src/index.js';
40
40
 
41
41
  export const BUDGET_PRESET_DESCRIPTION = `Budget preset: "quick", "normal", or "deep" (defaults: query ${BUDGET_PRESETS.normal.maxNodes} nodes / depth ${BUDGET_PRESETS.normal.maxDepth} / text ${BUDGET_PRESETS.normal.textBudget}). Numeric fields override the preset when both are provided.`;
42
- export const TAB_ID_DESCRIPTION = 'Target a specific tab instead of the active tab in the enabled window.';
42
+ export const TAB_ID_DESCRIPTION =
43
+ 'Target a specific tab instead of the active tab in the enabled window.';
43
44
 
44
45
  /** @type {readonly import('../../protocol/src/types.js').BridgeMethod[]} */
45
46
  const INVESTIGATE_SUBAGENT_BRIDGE_METHODS = Object.freeze(
46
- getMethodsByMaxComplexity('low').filter((method) =>
47
- method.startsWith('page.') ||
48
- method.startsWith('dom.') ||
49
- method.startsWith('styles.') ||
50
- method.startsWith('layout.')
47
+ getMethodsByMaxComplexity('low').filter(
48
+ (method) =>
49
+ method.startsWith('page.') ||
50
+ method.startsWith('dom.') ||
51
+ method.startsWith('styles.') ||
52
+ method.startsWith('layout.')
51
53
  )
52
54
  );
53
55
 
@@ -58,12 +60,7 @@ const INVESTIGATE_DELEGATION_HINT = Object.freeze({
58
60
  modelClass: 'small',
59
61
  reasoningEffort: 'low',
60
62
  },
61
- preferredTools: [
62
- 'browser_dom',
63
- 'browser_page',
64
- 'browser_styles_layout',
65
- 'browser_batch',
66
- ],
63
+ preferredTools: ['browser_dom', 'browser_page', 'browser_styles_layout', 'browser_batch'],
67
64
  escalationTools: ['browser_capture'],
68
65
  preferredBridgeMethods: INVESTIGATE_SUBAGENT_BRIDGE_METHODS,
69
66
  escalationTriggers: [
@@ -79,270 +76,574 @@ const INVESTIGATE_DELEGATION_HINT = Object.freeze({
79
76
  export function createBridgeMcpServer() {
80
77
  const server = new McpServer({
81
78
  name: 'browser-bridge',
82
- version: '1.0.0'
79
+ version: '1.0.0',
83
80
  });
84
81
 
85
- server.registerTool('browser_status', {
86
- title: 'Browser Bridge Status',
87
- description: 'Check bridge readiness: daemon connectivity, extension state, and window access. Call first to confirm the bridge is usable. If access is not enabled, ask the user to click Enable in the extension popup or side panel, then retry.',
88
- inputSchema: {}
89
- }, handleStatusTool);
90
-
91
- server.registerTool('browser_setup', {
92
- title: 'Browser Bridge Setup Status',
93
- description: 'Check MCP and CLI skill installation status for agent integration.',
94
- inputSchema: {
95
- global: z.boolean().optional().describe('Check global (true) or local (false) config (default: true)')
96
- }
97
- }, handleSetupTool);
98
-
99
- server.registerTool('browser_logs', {
100
- title: 'Browser Bridge Logs',
101
- description: 'Tail recent bridge request logs for debugging connection or routing issues.',
102
- inputSchema: {
103
- limit: z.number().optional().describe(`Maximum log entries to return (default: ${DEFAULT_CONSOLE_LIMIT})`),
104
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION)
105
- }
106
- }, handleLogTool);
82
+ server.registerTool(
83
+ 'browser_status',
84
+ {
85
+ title: 'Browser Bridge Status',
86
+ description:
87
+ 'Check bridge readiness: daemon connectivity, extension state, and window access. Call first to confirm the bridge is usable. If access is not enabled, ask the user to click Enable in the extension popup or side panel, then retry.',
88
+ inputSchema: {},
89
+ },
90
+ handleStatusTool
91
+ );
107
92
 
108
- server.registerTool('browser_health', {
109
- title: 'Browser Bridge Health',
110
- description: 'Ping the bridge to verify daemon and extension connectivity.',
111
- inputSchema: {}
112
- }, handleHealthTool);
93
+ server.registerTool(
94
+ 'browser_setup',
95
+ {
96
+ title: 'Browser Bridge Setup Status',
97
+ description: 'Check MCP and CLI skill installation status for agent integration.',
98
+ inputSchema: {
99
+ global: z
100
+ .boolean()
101
+ .optional()
102
+ .describe('Check global (true) or local (false) config (default: true)'),
103
+ },
104
+ },
105
+ handleSetupTool
106
+ );
113
107
 
114
- server.registerTool('browser_tabs', {
115
- title: 'Browser Tabs',
116
- description: 'List, create, or close browser tabs. Prefer "list" to work in existing tabs; only use "create" when the user explicitly requests a new page.',
117
- inputSchema: {
118
- action: z.enum(['list', 'create', 'close']).describe('"list" (preferred), "create" (only when needed), or "close"'),
119
- url: z.string().optional().describe('URL for create action'),
120
- active: z.boolean().optional().describe('Focus the new tab (default: true)'),
121
- tabId: z.number().optional().describe('Tab ID (required for close)')
122
- }
123
- }, handleTabsTool);
108
+ server.registerTool(
109
+ 'browser_logs',
110
+ {
111
+ title: 'Browser Bridge Logs',
112
+ description: 'Tail recent bridge request logs for debugging connection or routing issues.',
113
+ inputSchema: {
114
+ limit: z
115
+ .number()
116
+ .optional()
117
+ .describe(`Maximum log entries to return (default: ${DEFAULT_CONSOLE_LIMIT})`),
118
+ budgetPreset: z
119
+ .enum(['quick', 'normal', 'deep'])
120
+ .optional()
121
+ .describe(BUDGET_PRESET_DESCRIPTION),
122
+ },
123
+ },
124
+ handleLogTool
125
+ );
124
126
 
125
- server.registerTool('browser_dom', {
126
- title: 'Browser DOM',
127
- description: 'Query, describe, read, search, or wait for DOM elements. Reuse elementRef from prior results. For full-page text, use browser_page action "text". accessibility_tree is debugger-backed — use query/find first.',
128
- inputSchema: {
129
- action: z.enum(['query', 'describe', 'text', 'attributes', 'wait', 'find_text', 'find_role', 'html', 'accessibility_tree']).describe('DOM operation to perform'),
130
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
131
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION),
132
- selector: z.string().optional().describe('CSS selector (used if no elementRef; resolves to first match)'),
133
- elementRef: z.string().optional().describe('Element reference from prior result (preferred over selector)'),
134
- withinRef: z.string().optional().describe('Scope query to this elementRef subtree'),
135
- maxNodes: z.number().optional().describe(`Maximum nodes to return (default: ${DEFAULT_MAX_NODES})`),
136
- maxDepth: z.number().optional().describe(`Maximum tree depth (default: ${DEFAULT_MAX_DEPTH})`),
137
- textBudget: z.number().optional().describe(`Max chars of text content per node (default: ${DEFAULT_TEXT_BUDGET})`),
138
- includeBbox: z.boolean().optional().describe('Include bounding box (default: true, set false to save tokens)'),
139
- attributeAllowlist: z.array(z.string()).optional().describe('Only include these attributes (reduces tokens)'),
140
- attributes: z.array(z.string()).optional().describe('Attribute names to fetch (for attributes action)'),
141
- text: z.string().optional().describe('Text to search for (for find_text/wait actions)'),
142
- exact: z.boolean().optional().describe('Require exact text match (default: false, substring match)'),
143
- maxResults: z.number().optional().describe('Maximum search results (default: 10)'),
144
- role: z.string().optional().describe('ARIA role to search for (for find_role action)'),
145
- name: z.string().optional().describe('Accessible name to match with role'),
146
- state: z.enum(['attached', 'detached', 'visible', 'hidden']).optional().describe('Expected element state (for wait action)'),
147
- timeoutMs: z.number().optional().describe(`Timeout for wait operations (default: ${DEFAULT_WAIT_TIMEOUT_MS})`),
148
- outer: z.boolean().optional().describe('Return outerHTML instead of innerHTML (default: false)'),
149
- maxLength: z.number().optional().describe(`Max HTML chars to return (default: ${DEFAULT_MAX_HTML_LENGTH})`)
150
- }
151
- }, handleDomTool);
127
+ server.registerTool(
128
+ 'browser_health',
129
+ {
130
+ title: 'Browser Bridge Health',
131
+ description: 'Ping the bridge to verify daemon and extension connectivity.',
132
+ inputSchema: {},
133
+ },
134
+ handleHealthTool
135
+ );
152
136
 
153
- server.registerTool('browser_styles_layout', {
154
- title: 'Browser Styles And Layout',
155
- description: 'Read computed styles, matched CSS rules, box model, or hit-test a viewport point. Reuse elementRef from prior queries. For DOM structure, use browser_dom.',
156
- inputSchema: {
157
- action: z.enum(['computed', 'matched_rules', 'box_model', 'hit_test']).describe('Style/layout operation to perform'),
158
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
159
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION),
160
- elementRef: z.string().optional().describe('Element reference (preferred over selector)'),
161
- selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
162
- properties: z.array(z.string()).optional().describe('Style properties to fetch (omitting returns all - expensive)'),
163
- x: z.number().optional().describe('X coordinate for hit_test (viewport relative)'),
164
- y: z.number().optional().describe('Y coordinate for hit_test (viewport relative)')
165
- }
166
- }, handleStylesLayoutTool);
137
+ server.registerTool(
138
+ 'browser_tabs',
139
+ {
140
+ title: 'Browser Tabs',
141
+ description:
142
+ 'List, create, or close browser tabs. Prefer "list" to work in existing tabs; only use "create" when the user explicitly requests a new page.',
143
+ inputSchema: {
144
+ action: z
145
+ .enum(['list', 'create', 'close'])
146
+ .describe('"list" (preferred), "create" (only when needed), or "close"'),
147
+ url: z.string().optional().describe('URL for create action'),
148
+ active: z.boolean().optional().describe('Focus the new tab (default: true)'),
149
+ tabId: z.number().optional().describe('Tab ID (required for close)'),
150
+ },
151
+ },
152
+ handleTabsTool
153
+ );
167
154
 
168
- server.registerTool('browser_page', {
169
- title: 'Browser Page State',
170
- description: 'Read page-level data: state (URL/title), evaluate (JS), console, storage, text, network, or performance. For element-level reads, use browser_dom. evaluate and performance are debugger-backed — prefer lighter reads first.',
171
- inputSchema: {
172
- action: z.enum(['state', 'evaluate', 'console', 'wait_for_load', 'storage', 'text', 'network', 'performance']).describe('Page operation to perform'),
173
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
174
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION),
175
- expression: z.string().optional().describe('JavaScript expression to evaluate (for evaluate action)'),
176
- awaitPromise: z.boolean().optional().describe('Await returned promises (default: false)'),
177
- timeoutMs: z.number().optional().describe(`Timeout for evaluate/wait operations (default: ${DEFAULT_WAIT_TIMEOUT_MS})`),
178
- returnByValue: z.boolean().optional().describe('Return actual value vs JSON (default: true)'),
179
- level: z.string().optional().describe('Minimum console level: log, warn, error (default: all)'),
180
- clear: z.boolean().optional().describe('Clear buffer after reading (default: false)'),
181
- limit: z.number().optional().describe(`Maximum entries to return (default: ${DEFAULT_CONSOLE_LIMIT})`),
182
- type: z.enum(['local', 'session']).optional().describe('Storage type to read (default: local)'),
183
- keys: z.array(z.string()).optional().describe('Specific storage keys to fetch (omitting returns all)'),
184
- textBudget: z.number().optional().describe(`Max chars for page text (default: ${DEFAULT_PAGE_TEXT_BUDGET})`),
185
- urlPattern: z.string().optional().describe('Filter network entries by URL pattern')
186
- }
187
- }, handlePageTool);
155
+ server.registerTool(
156
+ 'browser_dom',
157
+ {
158
+ title: 'Browser DOM',
159
+ description:
160
+ 'Query, describe, read, search, or wait for DOM elements. Reuse elementRef from prior results. For full-page text, use browser_page action "text". accessibility_tree is debugger-backed — use query/find first.',
161
+ inputSchema: {
162
+ action: z
163
+ .enum([
164
+ 'query',
165
+ 'describe',
166
+ 'text',
167
+ 'attributes',
168
+ 'wait',
169
+ 'find_text',
170
+ 'find_role',
171
+ 'html',
172
+ 'accessibility_tree',
173
+ ])
174
+ .describe('DOM operation to perform'),
175
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
176
+ budgetPreset: z
177
+ .enum(['quick', 'normal', 'deep'])
178
+ .optional()
179
+ .describe(BUDGET_PRESET_DESCRIPTION),
180
+ selector: z
181
+ .string()
182
+ .optional()
183
+ .describe('CSS selector (used if no elementRef; resolves to first match)'),
184
+ elementRef: z
185
+ .string()
186
+ .optional()
187
+ .describe('Element reference from prior result (preferred over selector)'),
188
+ withinRef: z.string().optional().describe('Scope query to this elementRef subtree'),
189
+ maxNodes: z
190
+ .number()
191
+ .optional()
192
+ .describe(`Maximum nodes to return (default: ${DEFAULT_MAX_NODES})`),
193
+ maxDepth: z
194
+ .number()
195
+ .optional()
196
+ .describe(`Maximum tree depth (default: ${DEFAULT_MAX_DEPTH})`),
197
+ textBudget: z
198
+ .number()
199
+ .optional()
200
+ .describe(`Max chars of text content per node (default: ${DEFAULT_TEXT_BUDGET})`),
201
+ includeBbox: z
202
+ .boolean()
203
+ .optional()
204
+ .describe('Include bounding box (default: true, set false to save tokens)'),
205
+ attributeAllowlist: z
206
+ .array(z.string())
207
+ .optional()
208
+ .describe('Only include these attributes (reduces tokens)'),
209
+ attributes: z
210
+ .array(z.string())
211
+ .optional()
212
+ .describe('Attribute names to fetch (for attributes action)'),
213
+ text: z.string().optional().describe('Text to search for (for find_text/wait actions)'),
214
+ exact: z
215
+ .boolean()
216
+ .optional()
217
+ .describe('Require exact text match (default: false, substring match)'),
218
+ maxResults: z.number().optional().describe('Maximum search results (default: 10)'),
219
+ role: z.string().optional().describe('ARIA role to search for (for find_role action)'),
220
+ name: z.string().optional().describe('Accessible name to match with role'),
221
+ state: z
222
+ .enum(['attached', 'detached', 'visible', 'hidden'])
223
+ .optional()
224
+ .describe('Expected element state (for wait action)'),
225
+ timeoutMs: z
226
+ .number()
227
+ .optional()
228
+ .describe(`Timeout for wait operations (default: ${DEFAULT_WAIT_TIMEOUT_MS})`),
229
+ outer: z
230
+ .boolean()
231
+ .optional()
232
+ .describe('Return outerHTML instead of innerHTML (default: false)'),
233
+ maxLength: z
234
+ .number()
235
+ .optional()
236
+ .describe(`Max HTML chars to return (default: ${DEFAULT_MAX_HTML_LENGTH})`),
237
+ },
238
+ },
239
+ handleDomTool
240
+ );
188
241
 
189
- server.registerTool('browser_navigation', {
190
- title: 'Browser Navigation',
191
- description: 'Navigate to a URL, reload, go back/forward, scroll, or resize the viewport. resize is debugger-backed — use only for exact viewport overrides.',
192
- inputSchema: {
193
- action: z.enum(['navigate', 'reload', 'go_back', 'go_forward', 'scroll', 'resize']).describe('Navigation operation to perform'),
194
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
195
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION),
196
- url: z.string().optional().describe('URL to navigate to (for navigate action)'),
197
- waitForLoad: z.boolean().optional().describe('Wait for load event (default: true)'),
198
- timeoutMs: z.number().optional().describe('Timeout for navigation (default: 30000)'),
199
- top: z.number().optional().describe('Scroll target Y position (pixels)'),
200
- left: z.number().optional().describe('Scroll target X position (pixels)'),
201
- behavior: z.enum(['auto', 'smooth']).optional().describe('Scroll behavior (default: auto)'),
202
- relative: z.boolean().optional().describe('Scroll relative to current position (default: false)'),
203
- width: z.number().optional().describe('Viewport width in pixels'),
204
- height: z.number().optional().describe('Viewport height in pixels'),
205
- reset: z.boolean().optional().describe('Reset viewport to original size (for resize)')
206
- }
207
- }, handleNavigationTool);
242
+ server.registerTool(
243
+ 'browser_styles_layout',
244
+ {
245
+ title: 'Browser Styles And Layout',
246
+ description:
247
+ 'Read computed styles, matched CSS rules, box model, or hit-test a viewport point. Reuse elementRef from prior queries. For DOM structure, use browser_dom.',
248
+ inputSchema: {
249
+ action: z
250
+ .enum(['computed', 'matched_rules', 'box_model', 'hit_test'])
251
+ .describe('Style/layout operation to perform'),
252
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
253
+ budgetPreset: z
254
+ .enum(['quick', 'normal', 'deep'])
255
+ .optional()
256
+ .describe(BUDGET_PRESET_DESCRIPTION),
257
+ elementRef: z.string().optional().describe('Element reference (preferred over selector)'),
258
+ selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
259
+ properties: z
260
+ .array(z.string())
261
+ .optional()
262
+ .describe('Style properties to fetch (omitting returns all - expensive)'),
263
+ x: z.number().optional().describe('X coordinate for hit_test (viewport relative)'),
264
+ y: z.number().optional().describe('Y coordinate for hit_test (viewport relative)'),
265
+ },
266
+ },
267
+ handleStylesLayoutTool
268
+ );
208
269
 
209
- server.registerTool('browser_input', {
210
- title: 'Browser Input',
211
- description: 'Simulate user input: click, focus, type, press keys, set checked, select options, hover, drag, or scroll into view. Reuse elementRef from prior queries.',
212
- inputSchema: {
213
- action: z.enum(['click', 'focus', 'type', 'press_key', 'set_checked', 'select_option', 'hover', 'drag', 'scroll_into_view']).describe('Input operation to perform'),
214
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
215
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION),
216
- elementRef: z.string().optional().describe('Target element reference (preferred over selector)'),
217
- selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
218
- button: z.enum(['left', 'middle', 'right']).optional().describe('Mouse button for click (default: left)'),
219
- clickCount: z.number().optional().describe('Click count (1=single, 2=double)'),
220
- text: z.string().optional().describe('Text to type (for type action)'),
221
- clear: z.boolean().optional().describe('Clear field before typing (default: false)'),
222
- submit: z.boolean().optional().describe('Press Enter after typing (default: false)'),
223
- key: z.string().optional().describe('Key to press (e.g., "Enter", "Tab", "ArrowDown")'),
224
- modifiers: z.array(z.enum(['Alt', 'Control', 'Meta', 'Shift'])).optional().describe('Modifier keys'),
225
- checked: z.boolean().optional().describe('Checked state (for set_checked action)'),
226
- values: z.array(z.string()).optional().describe('Option values to select'),
227
- labels: z.array(z.string()).optional().describe('Option labels to select (alternative to values)'),
228
- indexes: z.array(z.number()).optional().describe('Option indexes to select (alternative to values/labels)'),
229
- duration: z.number().optional().describe('Hover duration in ms (default: 100)'),
230
- sourceElementRef: z.string().optional().describe('Drag source element (for drag action)'),
231
- sourceSelector: z.string().optional().describe('Drag source selector (alternative to sourceElementRef)'),
232
- destinationElementRef: z.string().optional().describe('Drag destination element (for drag action)'),
233
- destinationSelector: z.string().optional().describe('Drag destination selector (alternative to destinationElementRef)'),
234
- offsetX: z.number().optional().describe('Drag drop offset X (default: 0)'),
235
- offsetY: z.number().optional().describe('Drag drop offset Y (default: 0)')
236
- }
237
- }, handleInputTool);
270
+ server.registerTool(
271
+ 'browser_page',
272
+ {
273
+ title: 'Browser Page State',
274
+ description:
275
+ 'Read page-level data: state (URL/title), evaluate (JS), console, storage, text, network, or performance. For element-level reads, use browser_dom. evaluate and performance are debugger-backed — prefer lighter reads first.',
276
+ inputSchema: {
277
+ action: z
278
+ .enum([
279
+ 'state',
280
+ 'evaluate',
281
+ 'console',
282
+ 'wait_for_load',
283
+ 'storage',
284
+ 'text',
285
+ 'network',
286
+ 'performance',
287
+ ])
288
+ .describe('Page operation to perform'),
289
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
290
+ budgetPreset: z
291
+ .enum(['quick', 'normal', 'deep'])
292
+ .optional()
293
+ .describe(BUDGET_PRESET_DESCRIPTION),
294
+ expression: z
295
+ .string()
296
+ .optional()
297
+ .describe('JavaScript expression to evaluate (for evaluate action)'),
298
+ awaitPromise: z.boolean().optional().describe('Await returned promises (default: false)'),
299
+ timeoutMs: z
300
+ .number()
301
+ .optional()
302
+ .describe(`Timeout for evaluate/wait operations (default: ${DEFAULT_WAIT_TIMEOUT_MS})`),
303
+ returnByValue: z
304
+ .boolean()
305
+ .optional()
306
+ .describe('Return actual value vs JSON (default: true)'),
307
+ level: z
308
+ .string()
309
+ .optional()
310
+ .describe('Minimum console level: log, warn, error (default: all)'),
311
+ clear: z.boolean().optional().describe('Clear buffer after reading (default: false)'),
312
+ limit: z
313
+ .number()
314
+ .optional()
315
+ .describe(`Maximum entries to return (default: ${DEFAULT_CONSOLE_LIMIT})`),
316
+ type: z
317
+ .enum(['local', 'session'])
318
+ .optional()
319
+ .describe('Storage type to read (default: local)'),
320
+ keys: z
321
+ .array(z.string())
322
+ .optional()
323
+ .describe('Specific storage keys to fetch (omitting returns all)'),
324
+ textBudget: z
325
+ .number()
326
+ .optional()
327
+ .describe(`Max chars for page text (default: ${DEFAULT_PAGE_TEXT_BUDGET})`),
328
+ urlPattern: z.string().optional().describe('Filter network entries by URL pattern'),
329
+ },
330
+ },
331
+ handlePageTool
332
+ );
238
333
 
239
- server.registerTool('browser_patch', {
240
- title: 'Browser Patch',
241
- description: 'Apply or rollback reversible style and DOM patches for live prototyping before editing source. Set verify=true to get computed results inline without a follow-up query.',
242
- inputSchema: {
243
- action: z.enum(['apply_styles', 'apply_dom', 'list', 'rollback', 'commit_baseline']).describe('Patch operation to perform'),
244
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
245
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION),
246
- elementRef: z.string().optional().describe('Target element reference (preferred over selector)'),
247
- selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
248
- declarations: z.record(z.string(), z.string()).optional().describe('CSS property: value pairs (for apply_styles)'),
249
- important: z.boolean().optional().describe('Add !important flag (default: false)'),
250
- operation: z.enum(['setAttribute', 'removeAttribute', 'addClass', 'removeClass', 'setTextContent', 'setProperty']).optional().describe('DOM mutation type'),
251
- value: z.unknown().optional().describe('Value for the DOM operation'),
252
- name: z.string().optional().describe('Attribute/class/property name (for apply_dom)'),
253
- patchId: z.string().optional().describe('Patch ID to rollback (omit for most recent)'),
254
- verify: z.boolean().optional().describe('Return computed result inline after applying, eliminating a verification round-trip')
255
- }
256
- }, handlePatchTool);
334
+ server.registerTool(
335
+ 'browser_navigation',
336
+ {
337
+ title: 'Browser Navigation',
338
+ description:
339
+ 'Navigate to a URL, reload, go back/forward, scroll, or resize the viewport. resize is debugger-backed — use only for exact viewport overrides.',
340
+ inputSchema: {
341
+ action: z
342
+ .enum(['navigate', 'reload', 'go_back', 'go_forward', 'scroll', 'resize'])
343
+ .describe('Navigation operation to perform'),
344
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
345
+ budgetPreset: z
346
+ .enum(['quick', 'normal', 'deep'])
347
+ .optional()
348
+ .describe(BUDGET_PRESET_DESCRIPTION),
349
+ url: z.string().optional().describe('URL to navigate to (for navigate action)'),
350
+ waitForLoad: z.boolean().optional().describe('Wait for load event (default: true)'),
351
+ timeoutMs: z.number().optional().describe('Timeout for navigation (default: 30000)'),
352
+ top: z.number().optional().describe('Scroll target Y position (pixels)'),
353
+ left: z.number().optional().describe('Scroll target X position (pixels)'),
354
+ behavior: z.enum(['auto', 'smooth']).optional().describe('Scroll behavior (default: auto)'),
355
+ relative: z
356
+ .boolean()
357
+ .optional()
358
+ .describe('Scroll relative to current position (default: false)'),
359
+ width: z.number().optional().describe('Viewport width in pixels'),
360
+ height: z.number().optional().describe('Viewport height in pixels'),
361
+ reset: z.boolean().optional().describe('Reset viewport to original size (for resize)'),
362
+ },
363
+ },
364
+ handleNavigationTool
365
+ );
257
366
 
258
- server.registerTool('browser_capture', {
259
- title: 'Browser Capture',
260
- description: 'Capture screenshots or CDP snapshots. Debugger-backed and token-expensive — use only when structured reads (browser_dom, browser_styles_layout) are insufficient. Prefer element, then tight region; full_page only for document-level context.',
261
- inputSchema: {
262
- action: z.enum(['element', 'region', 'full_page', 'cdp_document', 'cdp_dom_snapshot', 'cdp_box_model', 'cdp_computed_styles']).describe('element (preferred), region (tight crop), full_page (document-level only), or cdp_* for low-level data'),
263
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
264
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION),
265
- elementRef: z.string().optional().describe('Element reference (for element action, preferred)'),
266
- selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
267
- rect: z.object({
268
- x: z.number().describe('Region left edge (viewport pixels)'),
269
- y: z.number().describe('Region top edge (viewport pixels)'),
270
- width: z.number().describe('Region width (pixels)'),
271
- height: z.number().describe('Region height (pixels)')
272
- }).optional().describe('Viewport region for region action (keep crop tight)')
273
- }
274
- }, handleCaptureTool);
367
+ server.registerTool(
368
+ 'browser_input',
369
+ {
370
+ title: 'Browser Input',
371
+ description:
372
+ 'Simulate user input: click, focus, type, press keys, set checked, select options, hover, drag, or scroll into view. Reuse elementRef from prior queries.',
373
+ inputSchema: {
374
+ action: z
375
+ .enum([
376
+ 'click',
377
+ 'focus',
378
+ 'type',
379
+ 'press_key',
380
+ 'set_checked',
381
+ 'select_option',
382
+ 'hover',
383
+ 'drag',
384
+ 'scroll_into_view',
385
+ ])
386
+ .describe('Input operation to perform'),
387
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
388
+ budgetPreset: z
389
+ .enum(['quick', 'normal', 'deep'])
390
+ .optional()
391
+ .describe(BUDGET_PRESET_DESCRIPTION),
392
+ elementRef: z
393
+ .string()
394
+ .optional()
395
+ .describe('Target element reference (preferred over selector)'),
396
+ selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
397
+ button: z
398
+ .enum(['left', 'middle', 'right'])
399
+ .optional()
400
+ .describe('Mouse button for click (default: left)'),
401
+ clickCount: z.number().optional().describe('Click count (1=single, 2=double)'),
402
+ text: z.string().optional().describe('Text to type (for type action)'),
403
+ clear: z.boolean().optional().describe('Clear field before typing (default: false)'),
404
+ submit: z.boolean().optional().describe('Press Enter after typing (default: false)'),
405
+ key: z.string().optional().describe('Key to press (e.g., "Enter", "Tab", "ArrowDown")'),
406
+ modifiers: z
407
+ .array(z.enum(['Alt', 'Control', 'Meta', 'Shift']))
408
+ .optional()
409
+ .describe('Modifier keys'),
410
+ checked: z.boolean().optional().describe('Checked state (for set_checked action)'),
411
+ values: z.array(z.string()).optional().describe('Option values to select'),
412
+ labels: z
413
+ .array(z.string())
414
+ .optional()
415
+ .describe('Option labels to select (alternative to values)'),
416
+ indexes: z
417
+ .array(z.number())
418
+ .optional()
419
+ .describe('Option indexes to select (alternative to values/labels)'),
420
+ duration: z.number().optional().describe('Hover duration in ms (default: 100)'),
421
+ sourceElementRef: z.string().optional().describe('Drag source element (for drag action)'),
422
+ sourceSelector: z
423
+ .string()
424
+ .optional()
425
+ .describe('Drag source selector (alternative to sourceElementRef)'),
426
+ destinationElementRef: z
427
+ .string()
428
+ .optional()
429
+ .describe('Drag destination element (for drag action)'),
430
+ destinationSelector: z
431
+ .string()
432
+ .optional()
433
+ .describe('Drag destination selector (alternative to destinationElementRef)'),
434
+ offsetX: z.number().optional().describe('Drag drop offset X (default: 0)'),
435
+ offsetY: z.number().optional().describe('Drag drop offset Y (default: 0)'),
436
+ },
437
+ },
438
+ handleInputTool
439
+ );
275
440
 
276
- server.registerTool('browser_batch', {
277
- title: 'Browser Bridge Batch',
278
- description: 'Execute multiple bridge calls in parallel to reduce round-trips. Combine independent reads (e.g., styles + text + console) in one call. Preserves call order in the response.',
279
- inputSchema: {
280
- calls: z.array(z.object({
281
- method: z.string().describe('Bridge method name (e.g. "dom.query", "page.get_text")'),
282
- params: z.record(z.string(), z.unknown()).optional().describe('Method params for this call'),
441
+ server.registerTool(
442
+ 'browser_patch',
443
+ {
444
+ title: 'Browser Patch',
445
+ description:
446
+ 'Apply or rollback reversible style and DOM patches for live prototyping before editing source. Set verify=true to get computed results inline without a follow-up query.',
447
+ inputSchema: {
448
+ action: z
449
+ .enum(['apply_styles', 'apply_dom', 'list', 'rollback', 'commit_baseline'])
450
+ .describe('Patch operation to perform'),
283
451
  tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
284
- budgetPreset: z.enum(['quick', 'normal', 'deep']).optional().describe(BUDGET_PRESET_DESCRIPTION)
285
- })).min(1).describe('Calls to execute in parallel')
286
- }
287
- }, handleBatchTool);
452
+ budgetPreset: z
453
+ .enum(['quick', 'normal', 'deep'])
454
+ .optional()
455
+ .describe(BUDGET_PRESET_DESCRIPTION),
456
+ elementRef: z
457
+ .string()
458
+ .optional()
459
+ .describe('Target element reference (preferred over selector)'),
460
+ selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
461
+ declarations: z
462
+ .record(z.string(), z.string())
463
+ .optional()
464
+ .describe('CSS property: value pairs (for apply_styles)'),
465
+ important: z.boolean().optional().describe('Add !important flag (default: false)'),
466
+ operation: z
467
+ .enum([
468
+ 'setAttribute',
469
+ 'removeAttribute',
470
+ 'addClass',
471
+ 'removeClass',
472
+ 'setTextContent',
473
+ 'setProperty',
474
+ ])
475
+ .optional()
476
+ .describe('DOM mutation type'),
477
+ value: z.unknown().optional().describe('Value for the DOM operation'),
478
+ name: z.string().optional().describe('Attribute/class/property name (for apply_dom)'),
479
+ patchId: z.string().optional().describe('Patch ID to rollback (omit for most recent)'),
480
+ verify: z
481
+ .boolean()
482
+ .optional()
483
+ .describe(
484
+ 'Return computed result inline after applying, eliminating a verification round-trip'
485
+ ),
486
+ },
487
+ },
488
+ handlePatchTool
489
+ );
288
490
 
289
- server.registerTool('browser_call', {
290
- title: 'Raw Browser Bridge Call',
291
- description: 'Call any bridge method directly by name. Escape hatch when grouped tools lack a needed parameter or method.',
292
- inputSchema: {
293
- method: z.string().describe('Bridge method name (e.g., "dom.query", "input.click")'),
294
- params: z.record(z.string(), z.unknown()).optional().describe('Method parameters as object'),
295
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION)
296
- }
297
- }, handleRawCallTool);
491
+ server.registerTool(
492
+ 'browser_capture',
493
+ {
494
+ title: 'Browser Capture',
495
+ description:
496
+ 'Capture screenshots or CDP snapshots. Debugger-backed and token-expensive — use only when structured reads (browser_dom, browser_styles_layout) are insufficient. Prefer element, then tight region; full_page only for document-level context.',
497
+ inputSchema: {
498
+ action: z
499
+ .enum([
500
+ 'element',
501
+ 'region',
502
+ 'full_page',
503
+ 'cdp_document',
504
+ 'cdp_dom_snapshot',
505
+ 'cdp_box_model',
506
+ 'cdp_computed_styles',
507
+ ])
508
+ .describe(
509
+ 'element (preferred), region (tight crop), full_page (document-level only), or cdp_* for low-level data'
510
+ ),
511
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
512
+ budgetPreset: z
513
+ .enum(['quick', 'normal', 'deep'])
514
+ .optional()
515
+ .describe(BUDGET_PRESET_DESCRIPTION),
516
+ elementRef: z
517
+ .string()
518
+ .optional()
519
+ .describe('Element reference (for element action, preferred)'),
520
+ selector: z.string().optional().describe('CSS selector (used if no elementRef)'),
521
+ rect: z
522
+ .object({
523
+ x: z.number().describe('Region left edge (viewport pixels)'),
524
+ y: z.number().describe('Region top edge (viewport pixels)'),
525
+ width: z.number().describe('Region width (pixels)'),
526
+ height: z.number().describe('Region height (pixels)'),
527
+ })
528
+ .optional()
529
+ .describe('Viewport region for region action (keep crop tight)'),
530
+ },
531
+ },
532
+ handleCaptureTool
533
+ );
298
534
 
299
- server.registerTool('browser_skill', {
300
- title: 'Browser Bridge Runtime Context',
301
- description: 'Return runtime context: budget presets, method groups, and active limits. Call to discover defaults before inspecting a page.',
302
- inputSchema: {}
303
- }, handleSkillTool);
535
+ server.registerTool(
536
+ 'browser_batch',
537
+ {
538
+ title: 'Browser Bridge Batch',
539
+ description:
540
+ 'Execute multiple bridge calls in parallel to reduce round-trips. Combine independent reads (e.g., styles + text + console) in one call. Preserves call order in the response.',
541
+ inputSchema: {
542
+ calls: z
543
+ .array(
544
+ z.object({
545
+ method: z.string().describe('Bridge method name (e.g. "dom.query", "page.get_text")'),
546
+ params: z
547
+ .record(z.string(), z.unknown())
548
+ .optional()
549
+ .describe('Method params for this call'),
550
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
551
+ budgetPreset: z
552
+ .enum(['quick', 'normal', 'deep'])
553
+ .optional()
554
+ .describe(BUDGET_PRESET_DESCRIPTION),
555
+ })
556
+ )
557
+ .min(1)
558
+ .describe('Calls to execute in parallel'),
559
+ },
560
+ },
561
+ handleBatchTool
562
+ );
304
563
 
305
- server.registerTool('browser_access', {
306
- title: 'Request Browser Bridge Access',
307
- description: 'Request window access for Browser Bridge. Surfaces an Enable prompt in the extension popup or side panel. Use once per window; if access is already pending, ask the user to enable that window instead of requesting again.',
308
- inputSchema: {}
309
- }, handleAccessTool);
564
+ server.registerTool(
565
+ 'browser_call',
566
+ {
567
+ title: 'Raw Browser Bridge Call',
568
+ description:
569
+ 'Call any bridge method directly by name. Escape hatch when grouped tools lack a needed parameter or method.',
570
+ inputSchema: {
571
+ method: z.string().describe('Bridge method name (e.g., "dom.query", "input.click")'),
572
+ params: z
573
+ .record(z.string(), z.unknown())
574
+ .optional()
575
+ .describe('Method parameters as object'),
576
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
577
+ },
578
+ },
579
+ handleRawCallTool
580
+ );
310
581
 
311
- server.registerTool('browser_investigate', {
312
- title: 'Browser Investigate',
313
- description:
314
- 'Investigate a page to answer a question or verify a condition. ' +
315
- 'Pass a natural-language objective and an optional scope (quick/normal/deep). ' +
316
- 'DELEGATION HINT: Prefer delegating this to a smaller, low-cost subagent ' +
317
- 'that starts with structured reads via browser_dom, browser_page, ' +
318
- 'browser_styles_layout, and browser_batch. Escalate to browser_capture only ' +
319
- 'when structured reads are insufficient. ' +
320
- 'If subagent delegation is not available, a deterministic heuristic fallback ' +
321
- 'runs a scripted inspection sequence and returns a best-effort summary.',
322
- annotations: {
323
- readOnlyHint: true,
324
- destructiveHint: false,
325
- idempotentHint: true,
326
- openWorldHint: true,
582
+ server.registerTool(
583
+ 'browser_skill',
584
+ {
585
+ title: 'Browser Bridge Runtime Context',
586
+ description:
587
+ 'Return runtime context: budget presets, method groups, and active limits. Call to discover defaults before inspecting a page.',
588
+ inputSchema: {},
327
589
  },
328
- _meta: {
329
- delegationHint: INVESTIGATE_DELEGATION_HINT,
590
+ handleSkillTool
591
+ );
592
+
593
+ server.registerTool(
594
+ 'browser_access',
595
+ {
596
+ title: 'Request Browser Bridge Access',
597
+ description:
598
+ 'Request window access for Browser Bridge. Surfaces an Enable prompt in the extension popup or side panel. Use once per window; if access is already pending, ask the user to enable that window instead of requesting again.',
599
+ inputSchema: {},
330
600
  },
331
- inputSchema: {
332
- objective: z.string().describe(
333
- 'What to find, verify, or extract from the current page (natural language).'
334
- ),
335
- scope: z.enum(['quick', 'normal', 'deep']).optional().describe(
336
- 'Investigation depth: "quick" (page state + one DOM query), ' +
337
- '"normal" (state + DOM + text, default), ' +
338
- '"deep" (state + DOM + text + console + network).'
339
- ),
340
- tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
341
- selector: z.string().optional().describe(
342
- 'Optional CSS selector to scope the investigation to a subtree.'
343
- ),
601
+ handleAccessTool
602
+ );
603
+
604
+ server.registerTool(
605
+ 'browser_investigate',
606
+ {
607
+ title: 'Browser Investigate',
608
+ description:
609
+ 'Investigate a page to answer a question or verify a condition. ' +
610
+ 'Pass a natural-language objective and an optional scope (quick/normal/deep). ' +
611
+ 'DELEGATION HINT: Prefer delegating this to a smaller, low-cost subagent ' +
612
+ 'that starts with structured reads via browser_dom, browser_page, ' +
613
+ 'browser_styles_layout, and browser_batch. Escalate to browser_capture only ' +
614
+ 'when structured reads are insufficient. ' +
615
+ 'If subagent delegation is not available, a deterministic heuristic fallback ' +
616
+ 'runs a scripted inspection sequence and returns a best-effort summary.',
617
+ annotations: {
618
+ readOnlyHint: true,
619
+ destructiveHint: false,
620
+ idempotentHint: true,
621
+ openWorldHint: true,
622
+ },
623
+ _meta: {
624
+ delegationHint: INVESTIGATE_DELEGATION_HINT,
625
+ },
626
+ inputSchema: {
627
+ objective: z
628
+ .string()
629
+ .describe('What to find, verify, or extract from the current page (natural language).'),
630
+ scope: z
631
+ .enum(['quick', 'normal', 'deep'])
632
+ .optional()
633
+ .describe(
634
+ 'Investigation depth: "quick" (page state + one DOM query), ' +
635
+ '"normal" (state + DOM + text, default), ' +
636
+ '"deep" (state + DOM + text + console + network).'
637
+ ),
638
+ tabId: z.number().optional().describe(TAB_ID_DESCRIPTION),
639
+ selector: z
640
+ .string()
641
+ .optional()
642
+ .describe('Optional CSS selector to scope the investigation to a subtree.'),
643
+ },
344
644
  },
345
- }, handleInvestigateTool);
645
+ handleInvestigateTool
646
+ );
346
647
 
347
648
  return server;
348
649
  }