btcp-browser-agent 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -234,7 +234,6 @@ export function createClient() {
234
234
  assertSuccess(response);
235
235
  return response.data.visible;
236
236
  },
237
- // Tabs
238
237
  async screenshot(options) {
239
238
  const response = await sendCommand({
240
239
  id: generateCommandId(),
@@ -245,136 +244,27 @@ export function createClient() {
245
244
  assertSuccess(response);
246
245
  return response.data.screenshot;
247
246
  },
248
- async tabNew(options) {
249
- const response = await sendCommand({
250
- id: generateCommandId(),
251
- action: 'tabNew',
252
- url: options?.url,
253
- active: options?.active,
254
- });
255
- assertSuccess(response);
256
- return response.data;
257
- },
258
- async tabClose(tabId) {
259
- return sendCommand({
260
- id: generateCommandId(),
261
- action: 'tabClose',
262
- tabId,
263
- });
264
- },
265
- async tabSwitch(tabId) {
266
- return sendCommand({
267
- id: generateCommandId(),
268
- action: 'tabSwitch',
269
- tabId,
270
- });
271
- },
272
- async tabList() {
273
- const response = await sendCommand({
274
- id: generateCommandId(),
275
- action: 'tabList',
276
- });
277
- assertSuccess(response);
278
- return response.data.tabs;
279
- },
280
- // Tab Groups & Sessions
281
- async groupCreate(options) {
282
- const response = await sendCommand({
283
- id: generateCommandId(),
284
- action: 'groupCreate',
285
- tabIds: options?.tabIds,
286
- title: options?.title,
287
- color: options?.color,
288
- collapsed: options?.collapsed,
289
- });
290
- assertSuccess(response);
291
- return response.data;
292
- },
293
- async groupUpdate(groupId, options) {
294
- const response = await sendCommand({
295
- id: generateCommandId(),
296
- action: 'groupUpdate',
297
- groupId,
298
- title: options.title,
299
- color: options.color,
300
- collapsed: options.collapsed,
301
- });
302
- assertSuccess(response);
303
- return response.data;
304
- },
305
- async groupDelete(groupId) {
306
- return sendCommand({
307
- id: generateCommandId(),
308
- action: 'groupDelete',
309
- groupId,
310
- });
311
- },
312
- async groupList() {
313
- const response = await sendCommand({
314
- id: generateCommandId(),
315
- action: 'groupList',
316
- });
317
- assertSuccess(response);
318
- return response.data.groups;
319
- },
320
- async groupAddTabs(groupId, tabIds) {
247
+ async wait(options) {
321
248
  return sendCommand({
322
249
  id: generateCommandId(),
323
- action: 'groupAddTabs',
324
- groupId,
325
- tabIds,
250
+ action: 'wait',
251
+ selector: options?.selector,
252
+ timeout: options?.timeout,
326
253
  });
327
254
  },
328
- async groupRemoveTabs(tabIds) {
255
+ async press(key, options) {
329
256
  return sendCommand({
330
257
  id: generateCommandId(),
331
- action: 'groupRemoveTabs',
332
- tabIds,
333
- });
334
- },
335
- async groupGet(groupId) {
336
- const response = await sendCommand({
337
- id: generateCommandId(),
338
- action: 'groupGet',
339
- groupId,
340
- });
341
- assertSuccess(response);
342
- return response.data;
343
- },
344
- async sessionGetCurrent() {
345
- const response = await sendCommand({
346
- id: generateCommandId(),
347
- action: 'sessionGetCurrent',
348
- });
349
- assertSuccess(response);
350
- return response.data;
351
- },
352
- async popupInitialize() {
353
- const response = await sendCommand({
354
- id: generateCommandId(),
355
- action: 'popupInitialize',
356
- });
357
- assertSuccess(response);
358
- return response.data;
359
- },
360
- // Script Injection
361
- async scriptInject(code, options) {
362
- const response = await sendCommand({
363
- id: generateCommandId(),
364
- action: 'scriptInject',
365
- code,
366
- scriptId: options?.scriptId,
258
+ action: 'press',
259
+ key,
260
+ selector: options?.selector,
367
261
  });
368
- assertSuccess(response);
369
- return response.data;
370
262
  },
371
- async scriptSend(payload, options) {
263
+ async evaluate(expression) {
372
264
  const response = await sendCommand({
373
265
  id: generateCommandId(),
374
- action: 'scriptSend',
375
- payload,
376
- scriptId: options?.scriptId,
377
- timeout: options?.timeout,
266
+ action: 'evaluate',
267
+ script: expression,
378
268
  });
379
269
  assertSuccess(response);
380
270
  return response.data.result;
@@ -77,6 +77,9 @@ export interface RemoteAgentEvents {
77
77
  }
78
78
  /**
79
79
  * Get all browser tool definitions for BTCP registration
80
+ *
81
+ * Minimal toolset following Unix philosophy - each tool does one thing well.
82
+ * Advanced operations can be done via browser_evaluate.
80
83
  */
81
84
  export declare function getBrowserToolDefinitions(): BTCPToolDefinition[];
82
85
  /**
@@ -23,308 +23,71 @@ import { getBackgroundAgent } from './background.js';
23
23
  // ============================================================================
24
24
  /**
25
25
  * Get all browser tool definitions for BTCP registration
26
+ *
27
+ * Minimal toolset following Unix philosophy - each tool does one thing well.
28
+ * Advanced operations can be done via browser_evaluate.
26
29
  */
27
30
  export function getBrowserToolDefinitions() {
28
31
  return [
29
- // Navigation tools
30
32
  {
31
33
  name: 'browser_navigate',
32
- description: 'Navigate to a URL in the current tab',
34
+ description: 'Navigate to a URL',
33
35
  inputSchema: {
34
36
  type: 'object',
35
37
  properties: {
36
38
  url: { type: 'string', description: 'The URL to navigate to' },
37
- waitUntil: {
38
- type: 'string',
39
- enum: ['load', 'domcontentloaded'],
40
- description: 'Wait until page load event (default: load)',
41
- },
42
39
  },
43
40
  required: ['url'],
44
41
  },
45
42
  },
46
- {
47
- name: 'browser_back',
48
- description: 'Go back in browser history',
49
- inputSchema: { type: 'object', properties: {} },
50
- },
51
- {
52
- name: 'browser_forward',
53
- description: 'Go forward in browser history',
54
- inputSchema: { type: 'object', properties: {} },
55
- },
56
- {
57
- name: 'browser_reload',
58
- description: 'Reload the current page',
59
- inputSchema: {
60
- type: 'object',
61
- properties: {
62
- bypassCache: { type: 'boolean', description: 'Bypass browser cache' },
63
- },
64
- },
65
- },
66
- // DOM interaction tools
67
43
  {
68
44
  name: 'browser_snapshot',
69
- description: 'Get accessibility tree snapshot of the page. Returns a text representation with element refs (@ref:N) that can be used in other commands.',
45
+ description: 'Get page snapshot as accessibility tree with element refs (@ref:N). Use refs in click/type commands.',
70
46
  inputSchema: {
71
47
  type: 'object',
72
- properties: {
73
- selector: { type: 'string', description: 'CSS selector to scope the snapshot' },
74
- maxDepth: { type: 'number', description: 'Maximum tree depth to traverse' },
75
- mode: {
76
- type: 'string',
77
- enum: ['interactive', 'outline', 'content'],
78
- description: 'Snapshot mode: interactive (actionable elements), outline (structure), content (text)',
79
- },
80
- },
48
+ properties: {},
81
49
  },
82
50
  },
83
51
  {
84
52
  name: 'browser_click',
85
- description: 'Click an element by CSS selector or element ref (@ref:N from snapshot)',
53
+ description: 'Click an element using @ref:N from snapshot',
86
54
  inputSchema: {
87
55
  type: 'object',
88
56
  properties: {
89
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
57
+ ref: { type: 'string', description: 'Element reference from snapshot (e.g., @ref:5)' },
90
58
  },
91
- required: ['selector'],
59
+ required: ['ref'],
92
60
  },
93
61
  },
94
62
  {
95
63
  name: 'browser_type',
96
- description: 'Type text into an input element (appends to existing value)',
64
+ description: 'Type text into an element',
97
65
  inputSchema: {
98
66
  type: 'object',
99
67
  properties: {
100
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
68
+ ref: { type: 'string', description: 'Element reference from snapshot' },
101
69
  text: { type: 'string', description: 'Text to type' },
102
- clear: { type: 'boolean', description: 'Clear existing value before typing' },
103
- },
104
- required: ['selector', 'text'],
105
- },
106
- },
107
- {
108
- name: 'browser_fill',
109
- description: 'Fill an input element (replaces existing value)',
110
- inputSchema: {
111
- type: 'object',
112
- properties: {
113
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
114
- value: { type: 'string', description: 'Value to fill' },
115
- },
116
- required: ['selector', 'value'],
117
- },
118
- },
119
- {
120
- name: 'browser_select',
121
- description: 'Select an option from a dropdown',
122
- inputSchema: {
123
- type: 'object',
124
- properties: {
125
- selector: { type: 'string', description: 'CSS selector or @ref:N of the select element' },
126
- value: { type: 'string', description: 'Option value to select' },
127
- },
128
- required: ['selector', 'value'],
129
- },
130
- },
131
- {
132
- name: 'browser_check',
133
- description: 'Check a checkbox or radio button',
134
- inputSchema: {
135
- type: 'object',
136
- properties: {
137
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
138
- },
139
- required: ['selector'],
140
- },
141
- },
142
- {
143
- name: 'browser_uncheck',
144
- description: 'Uncheck a checkbox',
145
- inputSchema: {
146
- type: 'object',
147
- properties: {
148
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
149
- },
150
- required: ['selector'],
151
- },
152
- },
153
- {
154
- name: 'browser_hover',
155
- description: 'Hover over an element',
156
- inputSchema: {
157
- type: 'object',
158
- properties: {
159
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
160
- },
161
- required: ['selector'],
162
- },
163
- },
164
- {
165
- name: 'browser_scroll',
166
- description: 'Scroll the page or an element',
167
- inputSchema: {
168
- type: 'object',
169
- properties: {
170
- selector: { type: 'string', description: 'CSS selector or @ref:N (optional, scrolls window if omitted)' },
171
- x: { type: 'number', description: 'Horizontal scroll amount in pixels' },
172
- y: { type: 'number', description: 'Vertical scroll amount in pixels' },
173
- },
174
- },
175
- },
176
- {
177
- name: 'browser_getText',
178
- description: 'Get text content of an element',
179
- inputSchema: {
180
- type: 'object',
181
- properties: {
182
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
183
- },
184
- required: ['selector'],
185
- },
186
- },
187
- {
188
- name: 'browser_getAttribute',
189
- description: 'Get an attribute value from an element',
190
- inputSchema: {
191
- type: 'object',
192
- properties: {
193
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
194
- attribute: { type: 'string', description: 'Attribute name to get' },
195
70
  },
196
- required: ['selector', 'attribute'],
71
+ required: ['ref', 'text'],
197
72
  },
198
73
  },
199
- {
200
- name: 'browser_isVisible',
201
- description: 'Check if an element is visible',
202
- inputSchema: {
203
- type: 'object',
204
- properties: {
205
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
206
- },
207
- required: ['selector'],
208
- },
209
- },
210
- // Screenshot tool
211
74
  {
212
75
  name: 'browser_screenshot',
213
- description: 'Capture a screenshot of the visible tab',
214
- inputSchema: {
215
- type: 'object',
216
- properties: {
217
- format: { type: 'string', enum: ['png', 'jpeg'], description: 'Image format' },
218
- quality: { type: 'number', description: 'JPEG quality (0-100)' },
219
- },
220
- },
221
- },
222
- // Tab management tools
223
- {
224
- name: 'browser_tab_new',
225
- description: 'Open a new tab',
226
- inputSchema: {
227
- type: 'object',
228
- properties: {
229
- url: { type: 'string', description: 'URL to open (optional)' },
230
- active: { type: 'boolean', description: 'Make the new tab active (default: true)' },
231
- },
232
- },
233
- },
234
- {
235
- name: 'browser_tab_close',
236
- description: 'Close a tab',
237
- inputSchema: {
238
- type: 'object',
239
- properties: {
240
- tabId: { type: 'number', description: 'Tab ID to close (optional, closes active tab if omitted)' },
241
- },
242
- },
243
- },
244
- {
245
- name: 'browser_tab_switch',
246
- description: 'Switch to a different tab',
247
- inputSchema: {
248
- type: 'object',
249
- properties: {
250
- tabId: { type: 'number', description: 'Tab ID to switch to' },
251
- },
252
- required: ['tabId'],
253
- },
254
- },
255
- {
256
- name: 'browser_tab_list',
257
- description: 'List all tabs in the current session',
258
- inputSchema: { type: 'object', properties: {} },
259
- },
260
- // Keyboard tools
261
- {
262
- name: 'browser_press',
263
- description: 'Press a keyboard key (e.g., Enter, Tab, Escape)',
264
- inputSchema: {
265
- type: 'object',
266
- properties: {
267
- key: { type: 'string', description: 'Key to press (e.g., "Enter", "Tab", "Escape", "ArrowDown")' },
268
- selector: { type: 'string', description: 'Optional element to focus before pressing' },
269
- },
270
- required: ['key'],
271
- },
272
- },
273
- // Script injection tools
274
- {
275
- name: 'browser_script_inject',
276
- description: "Inject JavaScript code into the page's main world. The script can listen for commands via btcp:script-command messages and respond with btcp:script-ack.",
76
+ description: 'Capture a screenshot of the page',
277
77
  inputSchema: {
278
78
  type: 'object',
279
- properties: {
280
- code: { type: 'string', description: 'JavaScript code to inject' },
281
- scriptId: {
282
- type: 'string',
283
- description: 'Unique identifier for this script (default: "default"). Used to target with script_send.',
284
- },
285
- },
286
- required: ['code'],
287
- },
288
- },
289
- {
290
- name: 'browser_script_send',
291
- description: 'Send a command to an injected script and wait for acknowledgment. The injected script should listen for btcp:script-command and respond with btcp:script-ack.',
292
- inputSchema: {
293
- type: 'object',
294
- properties: {
295
- payload: {
296
- type: 'object',
297
- description: 'Payload to send to the script. Typically includes an "action" field.',
298
- },
299
- scriptId: { type: 'string', description: 'Target script ID (default: "default")' },
300
- timeout: { type: 'number', description: 'Timeout in milliseconds (default: 30000)' },
301
- },
302
- required: ['payload'],
79
+ properties: {},
303
80
  },
304
81
  },
305
- // Wait tools
306
82
  {
307
- name: 'browser_wait',
308
- description: 'Wait for a specified duration or condition',
309
- inputSchema: {
310
- type: 'object',
311
- properties: {
312
- ms: { type: 'number', description: 'Milliseconds to wait' },
313
- selector: { type: 'string', description: 'Wait for this selector to appear' },
314
- timeout: { type: 'number', description: 'Max wait time for selector (default: 30000)' },
315
- },
316
- },
317
- },
318
- // Evaluate tool
319
- {
320
- name: 'browser_evaluate',
321
- description: 'Evaluate JavaScript expression in the page context and return the result',
83
+ name: 'browser_scroll',
84
+ description: 'Scroll the page',
322
85
  inputSchema: {
323
86
  type: 'object',
324
87
  properties: {
325
- expression: { type: 'string', description: 'JavaScript expression to evaluate' },
88
+ direction: { type: 'string', enum: ['up', 'down'], description: 'Scroll direction' },
326
89
  },
327
- required: ['expression'],
90
+ required: ['direction'],
328
91
  },
329
92
  },
330
93
  ];
@@ -336,40 +99,25 @@ export function getBrowserToolDefinitions() {
336
99
  * Map BTCP tool name and arguments to browser-agent Command
337
100
  */
338
101
  export function mapToolToCommand(toolName, args) {
339
- // Remove 'browser_' prefix and convert to action
340
- const actionMap = {
341
- browser_navigate: 'navigate',
342
- browser_back: 'back',
343
- browser_forward: 'forward',
344
- browser_reload: 'reload',
345
- browser_snapshot: 'snapshot',
346
- browser_click: 'click',
347
- browser_type: 'type',
348
- browser_fill: 'fill',
349
- browser_select: 'select',
350
- browser_check: 'check',
351
- browser_uncheck: 'uncheck',
352
- browser_hover: 'hover',
353
- browser_scroll: 'scroll',
354
- browser_getText: 'getText',
355
- browser_getAttribute: 'getAttribute',
356
- browser_isVisible: 'isVisible',
357
- browser_screenshot: 'screenshot',
358
- browser_tab_new: 'tabNew',
359
- browser_tab_close: 'tabClose',
360
- browser_tab_switch: 'tabSwitch',
361
- browser_tab_list: 'tabList',
362
- browser_press: 'press',
363
- browser_script_inject: 'scriptInject',
364
- browser_script_send: 'scriptSend',
365
- browser_wait: 'wait',
366
- browser_evaluate: 'evaluate',
367
- };
368
- const action = actionMap[toolName];
369
- if (!action) {
370
- throw new Error(`Unknown tool: ${toolName}`);
102
+ switch (toolName) {
103
+ case 'browser_navigate':
104
+ return { action: 'navigate', url: args.url };
105
+ case 'browser_snapshot':
106
+ return { action: 'snapshot' };
107
+ case 'browser_click':
108
+ return { action: 'click', selector: args.ref };
109
+ case 'browser_type':
110
+ return { action: 'type', selector: args.ref, text: args.text };
111
+ case 'browser_screenshot':
112
+ return { action: 'screenshot' };
113
+ case 'browser_scroll': {
114
+ const direction = args.direction;
115
+ const amount = direction === 'down' ? 500 : -500;
116
+ return { action: 'scroll', y: amount };
117
+ }
118
+ default:
119
+ throw new Error(`Unknown tool: ${toolName}`);
371
120
  }
372
- return { action, ...args };
373
121
  }
374
122
  /**
375
123
  * Format response for BTCP protocol
@@ -444,17 +192,53 @@ export function createRemoteAgent(config) {
444
192
  }
445
193
  /**
446
194
  * Ensure a session exists, creating one if needed
195
+ *
196
+ * This checks in order:
197
+ * 1. Current active session
198
+ * 2. Persistent session from storage (reconnects if found)
199
+ * 3. Existing BTCP tab groups (reconnects to first one found)
200
+ * 4. Creates a new session if none found (respects maxSession limit)
447
201
  */
448
202
  async function ensureSession() {
203
+ // 1. Check if there's an active session
449
204
  const sessionResult = await backgroundAgent.execute({ action: 'sessionGetCurrent' });
450
205
  if (sessionResult.success && sessionResult.data) {
451
206
  const session = sessionResult.data.session;
452
207
  if (session?.groupId) {
208
+ log('Active session found:', session.groupId);
453
209
  return; // Session already exists
454
210
  }
455
211
  }
456
- // Create a new session with a tab
457
- log('No active session, creating one automatically...');
212
+ // 2. Try to reconnect via popup initialize (handles persistent session check)
213
+ log('No active session, trying to reconnect to existing session...');
214
+ const initResult = await backgroundAgent.execute({ action: 'popupInitialize' });
215
+ if (initResult.success && initResult.data) {
216
+ const initData = initResult.data;
217
+ if (initData.reconnected) {
218
+ log('Reconnected to existing session');
219
+ return;
220
+ }
221
+ }
222
+ // 3. Check for existing BTCP tab groups and try to use one
223
+ const groupsResult = await backgroundAgent.execute({ action: 'groupList' });
224
+ if (groupsResult.success && groupsResult.data) {
225
+ const groups = groupsResult.data;
226
+ const btcpGroup = groups.find(g => g.title?.startsWith('BTCP'));
227
+ if (btcpGroup) {
228
+ log('Found existing BTCP tab group, setting it as active session:', btcpGroup.id);
229
+ const useResult = await backgroundAgent.execute({
230
+ action: 'sessionUseGroup',
231
+ groupId: btcpGroup.id,
232
+ });
233
+ if (useResult.success) {
234
+ log('Successfully using existing BTCP group as session');
235
+ return;
236
+ }
237
+ log('Failed to use existing BTCP group:', useResult.error);
238
+ }
239
+ }
240
+ // 4. Create a new session (will fail if maxSession limit reached)
241
+ log('No existing session found, creating one automatically...');
458
242
  const groupResult = await backgroundAgent.execute({
459
243
  action: 'groupCreate',
460
244
  title: 'BTCP Session',
@@ -473,19 +257,8 @@ export function createRemoteAgent(config) {
473
257
  log('Tool call:', name, args);
474
258
  emit('toolCall', name, args);
475
259
  try {
476
- // Auto-create session if needed for commands that require it
477
- const sessionRequiredTools = [
478
- 'browser_navigate', 'browser_tab_new', 'browser_tab_close',
479
- 'browser_tab_switch', 'browser_tab_list', 'browser_snapshot',
480
- 'browser_click', 'browser_type', 'browser_fill', 'browser_select',
481
- 'browser_check', 'browser_uncheck', 'browser_hover', 'browser_scroll',
482
- 'browser_getText', 'browser_getAttribute', 'browser_isVisible',
483
- 'browser_press', 'browser_wait', 'browser_evaluate',
484
- 'browser_script_inject', 'browser_script_send',
485
- ];
486
- if (sessionRequiredTools.includes(name)) {
487
- await ensureSession();
488
- }
260
+ // Auto-ensure session for all browser tools (session management is internal)
261
+ await ensureSession();
489
262
  // Map tool to command and execute
490
263
  const command = mapToolToCommand(name, args);
491
264
  const response = await backgroundAgent.execute(command);