btcp-browser-agent 0.1.10 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,308 +23,71 @@ import { getBackgroundAgent } from './background.js';
23
23
  // ============================================================================
24
24
  /**
25
25
  * Get all browser tool definitions for BTCP registration
26
+ *
27
+ * Minimal toolset following Unix philosophy - each tool does one thing well.
28
+ * Advanced operations can be done via browser_evaluate.
26
29
  */
27
30
  export function getBrowserToolDefinitions() {
28
31
  return [
29
- // Navigation tools
30
32
  {
31
33
  name: 'browser_navigate',
32
- description: 'Navigate to a URL in the current tab',
34
+ description: 'Navigate to a URL',
33
35
  inputSchema: {
34
36
  type: 'object',
35
37
  properties: {
36
38
  url: { type: 'string', description: 'The URL to navigate to' },
37
- waitUntil: {
38
- type: 'string',
39
- enum: ['load', 'domcontentloaded'],
40
- description: 'Wait until page load event (default: load)',
41
- },
42
39
  },
43
40
  required: ['url'],
44
41
  },
45
42
  },
46
- {
47
- name: 'browser_back',
48
- description: 'Go back in browser history',
49
- inputSchema: { type: 'object', properties: {} },
50
- },
51
- {
52
- name: 'browser_forward',
53
- description: 'Go forward in browser history',
54
- inputSchema: { type: 'object', properties: {} },
55
- },
56
- {
57
- name: 'browser_reload',
58
- description: 'Reload the current page',
59
- inputSchema: {
60
- type: 'object',
61
- properties: {
62
- bypassCache: { type: 'boolean', description: 'Bypass browser cache' },
63
- },
64
- },
65
- },
66
- // DOM interaction tools
67
43
  {
68
44
  name: 'browser_snapshot',
69
- description: 'Get accessibility tree snapshot of the page. Returns a text representation with element refs (@ref:N) that can be used in other commands.',
45
+ description: 'Get page snapshot as accessibility tree with element refs (@ref:N). Use refs in click/type commands.',
70
46
  inputSchema: {
71
47
  type: 'object',
72
- properties: {
73
- selector: { type: 'string', description: 'CSS selector to scope the snapshot' },
74
- maxDepth: { type: 'number', description: 'Maximum tree depth to traverse' },
75
- mode: {
76
- type: 'string',
77
- enum: ['interactive', 'outline', 'content'],
78
- description: 'Snapshot mode: interactive (actionable elements), outline (structure), content (text)',
79
- },
80
- },
48
+ properties: {},
81
49
  },
82
50
  },
83
51
  {
84
52
  name: 'browser_click',
85
- description: 'Click an element by CSS selector or element ref (@ref:N from snapshot)',
53
+ description: 'Click an element using @ref:N from snapshot',
86
54
  inputSchema: {
87
55
  type: 'object',
88
56
  properties: {
89
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
57
+ ref: { type: 'string', description: 'Element reference from snapshot (e.g., @ref:5)' },
90
58
  },
91
- required: ['selector'],
59
+ required: ['ref'],
92
60
  },
93
61
  },
94
62
  {
95
63
  name: 'browser_type',
96
- description: 'Type text into an input element (appends to existing value)',
64
+ description: 'Type text into an element',
97
65
  inputSchema: {
98
66
  type: 'object',
99
67
  properties: {
100
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
68
+ ref: { type: 'string', description: 'Element reference from snapshot' },
101
69
  text: { type: 'string', description: 'Text to type' },
102
- clear: { type: 'boolean', description: 'Clear existing value before typing' },
103
- },
104
- required: ['selector', 'text'],
105
- },
106
- },
107
- {
108
- name: 'browser_fill',
109
- description: 'Fill an input element (replaces existing value)',
110
- inputSchema: {
111
- type: 'object',
112
- properties: {
113
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
114
- value: { type: 'string', description: 'Value to fill' },
115
- },
116
- required: ['selector', 'value'],
117
- },
118
- },
119
- {
120
- name: 'browser_select',
121
- description: 'Select an option from a dropdown',
122
- inputSchema: {
123
- type: 'object',
124
- properties: {
125
- selector: { type: 'string', description: 'CSS selector or @ref:N of the select element' },
126
- value: { type: 'string', description: 'Option value to select' },
127
- },
128
- required: ['selector', 'value'],
129
- },
130
- },
131
- {
132
- name: 'browser_check',
133
- description: 'Check a checkbox or radio button',
134
- inputSchema: {
135
- type: 'object',
136
- properties: {
137
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
138
- },
139
- required: ['selector'],
140
- },
141
- },
142
- {
143
- name: 'browser_uncheck',
144
- description: 'Uncheck a checkbox',
145
- inputSchema: {
146
- type: 'object',
147
- properties: {
148
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
149
- },
150
- required: ['selector'],
151
- },
152
- },
153
- {
154
- name: 'browser_hover',
155
- description: 'Hover over an element',
156
- inputSchema: {
157
- type: 'object',
158
- properties: {
159
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
160
- },
161
- required: ['selector'],
162
- },
163
- },
164
- {
165
- name: 'browser_scroll',
166
- description: 'Scroll the page or an element',
167
- inputSchema: {
168
- type: 'object',
169
- properties: {
170
- selector: { type: 'string', description: 'CSS selector or @ref:N (optional, scrolls window if omitted)' },
171
- x: { type: 'number', description: 'Horizontal scroll amount in pixels' },
172
- y: { type: 'number', description: 'Vertical scroll amount in pixels' },
173
- },
174
- },
175
- },
176
- {
177
- name: 'browser_getText',
178
- description: 'Get text content of an element',
179
- inputSchema: {
180
- type: 'object',
181
- properties: {
182
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
183
- },
184
- required: ['selector'],
185
- },
186
- },
187
- {
188
- name: 'browser_getAttribute',
189
- description: 'Get an attribute value from an element',
190
- inputSchema: {
191
- type: 'object',
192
- properties: {
193
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
194
- attribute: { type: 'string', description: 'Attribute name to get' },
195
70
  },
196
- required: ['selector', 'attribute'],
71
+ required: ['ref', 'text'],
197
72
  },
198
73
  },
199
- {
200
- name: 'browser_isVisible',
201
- description: 'Check if an element is visible',
202
- inputSchema: {
203
- type: 'object',
204
- properties: {
205
- selector: { type: 'string', description: 'CSS selector or @ref:N' },
206
- },
207
- required: ['selector'],
208
- },
209
- },
210
- // Screenshot tool
211
74
  {
212
75
  name: 'browser_screenshot',
213
- description: 'Capture a screenshot of the visible tab',
214
- inputSchema: {
215
- type: 'object',
216
- properties: {
217
- format: { type: 'string', enum: ['png', 'jpeg'], description: 'Image format' },
218
- quality: { type: 'number', description: 'JPEG quality (0-100)' },
219
- },
220
- },
221
- },
222
- // Tab management tools
223
- {
224
- name: 'browser_tab_new',
225
- description: 'Open a new tab',
226
- inputSchema: {
227
- type: 'object',
228
- properties: {
229
- url: { type: 'string', description: 'URL to open (optional)' },
230
- active: { type: 'boolean', description: 'Make the new tab active (default: true)' },
231
- },
232
- },
233
- },
234
- {
235
- name: 'browser_tab_close',
236
- description: 'Close a tab',
237
- inputSchema: {
238
- type: 'object',
239
- properties: {
240
- tabId: { type: 'number', description: 'Tab ID to close (optional, closes active tab if omitted)' },
241
- },
242
- },
243
- },
244
- {
245
- name: 'browser_tab_switch',
246
- description: 'Switch to a different tab',
247
- inputSchema: {
248
- type: 'object',
249
- properties: {
250
- tabId: { type: 'number', description: 'Tab ID to switch to' },
251
- },
252
- required: ['tabId'],
253
- },
254
- },
255
- {
256
- name: 'browser_tab_list',
257
- description: 'List all tabs in the current session',
258
- inputSchema: { type: 'object', properties: {} },
259
- },
260
- // Keyboard tools
261
- {
262
- name: 'browser_press',
263
- description: 'Press a keyboard key (e.g., Enter, Tab, Escape)',
264
- inputSchema: {
265
- type: 'object',
266
- properties: {
267
- key: { type: 'string', description: 'Key to press (e.g., "Enter", "Tab", "Escape", "ArrowDown")' },
268
- selector: { type: 'string', description: 'Optional element to focus before pressing' },
269
- },
270
- required: ['key'],
271
- },
272
- },
273
- // Script injection tools
274
- {
275
- name: 'browser_script_inject',
276
- description: "Inject JavaScript code into the page's main world. The script can listen for commands via btcp:script-command messages and respond with btcp:script-ack.",
76
+ description: 'Capture a screenshot of the page',
277
77
  inputSchema: {
278
78
  type: 'object',
279
- properties: {
280
- code: { type: 'string', description: 'JavaScript code to inject' },
281
- scriptId: {
282
- type: 'string',
283
- description: 'Unique identifier for this script (default: "default"). Used to target with script_send.',
284
- },
285
- },
286
- required: ['code'],
287
- },
288
- },
289
- {
290
- name: 'browser_script_send',
291
- description: 'Send a command to an injected script and wait for acknowledgment. The injected script should listen for btcp:script-command and respond with btcp:script-ack.',
292
- inputSchema: {
293
- type: 'object',
294
- properties: {
295
- payload: {
296
- type: 'object',
297
- description: 'Payload to send to the script. Typically includes an "action" field.',
298
- },
299
- scriptId: { type: 'string', description: 'Target script ID (default: "default")' },
300
- timeout: { type: 'number', description: 'Timeout in milliseconds (default: 30000)' },
301
- },
302
- required: ['payload'],
79
+ properties: {},
303
80
  },
304
81
  },
305
- // Wait tools
306
82
  {
307
- name: 'browser_wait',
308
- description: 'Wait for a specified duration or condition',
309
- inputSchema: {
310
- type: 'object',
311
- properties: {
312
- ms: { type: 'number', description: 'Milliseconds to wait' },
313
- selector: { type: 'string', description: 'Wait for this selector to appear' },
314
- timeout: { type: 'number', description: 'Max wait time for selector (default: 30000)' },
315
- },
316
- },
317
- },
318
- // Evaluate tool
319
- {
320
- name: 'browser_evaluate',
321
- description: 'Evaluate JavaScript expression in the page context and return the result',
83
+ name: 'browser_scroll',
84
+ description: 'Scroll the page',
322
85
  inputSchema: {
323
86
  type: 'object',
324
87
  properties: {
325
- expression: { type: 'string', description: 'JavaScript expression to evaluate' },
88
+ direction: { type: 'string', enum: ['up', 'down'], description: 'Scroll direction' },
326
89
  },
327
- required: ['expression'],
90
+ required: ['direction'],
328
91
  },
329
92
  },
330
93
  ];
@@ -336,40 +99,25 @@ export function getBrowserToolDefinitions() {
336
99
  * Map BTCP tool name and arguments to browser-agent Command
337
100
  */
338
101
  export function mapToolToCommand(toolName, args) {
339
- // Remove 'browser_' prefix and convert to action
340
- const actionMap = {
341
- browser_navigate: 'navigate',
342
- browser_back: 'back',
343
- browser_forward: 'forward',
344
- browser_reload: 'reload',
345
- browser_snapshot: 'snapshot',
346
- browser_click: 'click',
347
- browser_type: 'type',
348
- browser_fill: 'fill',
349
- browser_select: 'select',
350
- browser_check: 'check',
351
- browser_uncheck: 'uncheck',
352
- browser_hover: 'hover',
353
- browser_scroll: 'scroll',
354
- browser_getText: 'getText',
355
- browser_getAttribute: 'getAttribute',
356
- browser_isVisible: 'isVisible',
357
- browser_screenshot: 'screenshot',
358
- browser_tab_new: 'tabNew',
359
- browser_tab_close: 'tabClose',
360
- browser_tab_switch: 'tabSwitch',
361
- browser_tab_list: 'tabList',
362
- browser_press: 'press',
363
- browser_script_inject: 'scriptInject',
364
- browser_script_send: 'scriptSend',
365
- browser_wait: 'wait',
366
- browser_evaluate: 'evaluate',
367
- };
368
- const action = actionMap[toolName];
369
- if (!action) {
370
- throw new Error(`Unknown tool: ${toolName}`);
102
+ switch (toolName) {
103
+ case 'browser_navigate':
104
+ return { action: 'navigate', url: args.url };
105
+ case 'browser_snapshot':
106
+ return { action: 'snapshot' };
107
+ case 'browser_click':
108
+ return { action: 'click', selector: args.ref };
109
+ case 'browser_type':
110
+ return { action: 'type', selector: args.ref, text: args.text };
111
+ case 'browser_screenshot':
112
+ return { action: 'screenshot' };
113
+ case 'browser_scroll': {
114
+ const direction = args.direction;
115
+ const amount = direction === 'down' ? 500 : -500;
116
+ return { action: 'scroll', y: amount };
117
+ }
118
+ default:
119
+ throw new Error(`Unknown tool: ${toolName}`);
371
120
  }
372
- return { action, ...args };
373
121
  }
374
122
  /**
375
123
  * Format response for BTCP protocol
@@ -444,17 +192,53 @@ export function createRemoteAgent(config) {
444
192
  }
445
193
  /**
446
194
  * Ensure a session exists, creating one if needed
195
+ *
196
+ * This checks in order:
197
+ * 1. Current active session
198
+ * 2. Persistent session from storage (reconnects if found)
199
+ * 3. Existing BTCP tab groups (reconnects to first one found)
200
+ * 4. Creates a new session if none found (respects maxSession limit)
447
201
  */
448
202
  async function ensureSession() {
203
+ // 1. Check if there's an active session
449
204
  const sessionResult = await backgroundAgent.execute({ action: 'sessionGetCurrent' });
450
205
  if (sessionResult.success && sessionResult.data) {
451
206
  const session = sessionResult.data.session;
452
207
  if (session?.groupId) {
208
+ log('Active session found:', session.groupId);
453
209
  return; // Session already exists
454
210
  }
455
211
  }
456
- // Create a new session with a tab
457
- log('No active session, creating one automatically...');
212
+ // 2. Try to reconnect via popup initialize (handles persistent session check)
213
+ log('No active session, trying to reconnect to existing session...');
214
+ const initResult = await backgroundAgent.execute({ action: 'popupInitialize' });
215
+ if (initResult.success && initResult.data) {
216
+ const initData = initResult.data;
217
+ if (initData.reconnected) {
218
+ log('Reconnected to existing session');
219
+ return;
220
+ }
221
+ }
222
+ // 3. Check for existing BTCP tab groups and try to use one
223
+ const groupsResult = await backgroundAgent.execute({ action: 'groupList' });
224
+ if (groupsResult.success && groupsResult.data) {
225
+ const groups = groupsResult.data;
226
+ const btcpGroup = groups.find(g => g.title?.startsWith('BTCP'));
227
+ if (btcpGroup) {
228
+ log('Found existing BTCP tab group, setting it as active session:', btcpGroup.id);
229
+ const useResult = await backgroundAgent.execute({
230
+ action: 'sessionUseGroup',
231
+ groupId: btcpGroup.id,
232
+ });
233
+ if (useResult.success) {
234
+ log('Successfully using existing BTCP group as session');
235
+ return;
236
+ }
237
+ log('Failed to use existing BTCP group:', useResult.error);
238
+ }
239
+ }
240
+ // 4. Create a new session (will fail if maxSession limit reached)
241
+ log('No existing session found, creating one automatically...');
458
242
  const groupResult = await backgroundAgent.execute({
459
243
  action: 'groupCreate',
460
244
  title: 'BTCP Session',
@@ -473,19 +257,8 @@ export function createRemoteAgent(config) {
473
257
  log('Tool call:', name, args);
474
258
  emit('toolCall', name, args);
475
259
  try {
476
- // Auto-create session if needed for commands that require it
477
- const sessionRequiredTools = [
478
- 'browser_navigate', 'browser_tab_new', 'browser_tab_close',
479
- 'browser_tab_switch', 'browser_tab_list', 'browser_snapshot',
480
- 'browser_click', 'browser_type', 'browser_fill', 'browser_select',
481
- 'browser_check', 'browser_uncheck', 'browser_hover', 'browser_scroll',
482
- 'browser_getText', 'browser_getAttribute', 'browser_isVisible',
483
- 'browser_press', 'browser_wait', 'browser_evaluate',
484
- 'browser_script_inject', 'browser_script_send',
485
- ];
486
- if (sessionRequiredTools.includes(name)) {
487
- await ensureSession();
488
- }
260
+ // Auto-ensure session for all browser tools (session management is internal)
261
+ await ensureSession();
489
262
  // Map tool to command and execute
490
263
  const command = mapToolToCommand(name, args);
491
264
  const response = await backgroundAgent.execute(command);
@@ -2,6 +2,21 @@
2
2
  * SessionManager - Manages tab groups and sessions for BTCP Browser Agent
3
3
  */
4
4
  import type { GroupInfo, SessionInfo, GroupCreateOptions, GroupUpdateOptions } from './session-types.js';
5
+ /**
6
+ * Options for SessionManager
7
+ */
8
+ export interface SessionManagerOptions {
9
+ /**
10
+ * Maximum number of sessions allowed (default: 1)
11
+ * When limit is reached, new session creation will fail
12
+ */
13
+ maxSession?: number;
14
+ /**
15
+ * Maximum number of open tabs per session (default: 1)
16
+ * When limit is reached, oldest tabs will be closed
17
+ */
18
+ maxOpenTab?: number;
19
+ }
5
20
  /**
6
21
  * SessionManager handles Chrome tab group operations and session state
7
22
  */
@@ -9,7 +24,9 @@ export declare class SessionManager {
9
24
  private activeSessionGroupId;
10
25
  private sessionCounter;
11
26
  private initialized;
12
- constructor();
27
+ private maxSession;
28
+ private maxOpenTab;
29
+ constructor(options?: SessionManagerOptions);
13
30
  /**
14
31
  * Restore session from storage
15
32
  */
@@ -63,12 +80,42 @@ export declare class SessionManager {
63
80
  * Get the active session group ID
64
81
  */
65
82
  getActiveSessionGroupId(): number | null;
83
+ /**
84
+ * Get the maximum number of sessions allowed
85
+ */
86
+ getMaxSession(): number;
87
+ /**
88
+ * Get the maximum number of open tabs per session
89
+ */
90
+ getMaxOpenTab(): number;
91
+ /**
92
+ * Enforce the tab limit in the active session
93
+ * Closes oldest tabs if the limit is exceeded
94
+ */
95
+ enforceTabLimit(): Promise<void>;
96
+ /**
97
+ * Get the count of existing BTCP sessions by checking:
98
+ * 1. Persistent session from storage
99
+ * 2. Current active session
100
+ * 3. Existing tab groups (BTCP prefixed)
101
+ */
102
+ getSessionCount(): Promise<number>;
103
+ /**
104
+ * Check if a new session can be created based on maxSession limit
105
+ */
106
+ canCreateSession(): Promise<boolean>;
66
107
  /**
67
108
  * Set the active session group ID
68
109
  */
69
110
  setActiveSessionGroupId(groupId: number | null): void;
111
+ /**
112
+ * Use an existing tab group as the active session
113
+ * This validates the group exists and sets it as active with persistence
114
+ */
115
+ useExistingGroupAsSession(groupId: number): Promise<boolean>;
70
116
  /**
71
117
  * Add a tab to the active session (if one exists)
118
+ * Automatically enforces the tab limit after adding
72
119
  */
73
120
  addTabToActiveSession(tabId: number): Promise<boolean>;
74
121
  /**
@@ -82,6 +129,7 @@ export declare class SessionManager {
82
129
  }
83
130
  /**
84
131
  * Get the singleton SessionManager instance
132
+ * @param options Options for the SessionManager (only used on first call)
85
133
  */
86
- export declare function getSessionManager(): SessionManager;
134
+ export declare function getSessionManager(options?: SessionManagerOptions): SessionManager;
87
135
  //# sourceMappingURL=session-manager.d.ts.map