btcp-browser-agent 0.1.7 → 0.1.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,668 @@
1
+ /**
2
+ * @btcp/extension - Remote Control via BTCP Protocol
3
+ *
4
+ * Enables remote AI agents to control the browser via the Browser Tool Calling Protocol.
5
+ * Uses SSE for receiving commands and HTTP POST for sending results.
6
+ *
7
+ * @example
8
+ * ```typescript
9
+ * import { createRemoteAgent } from '@btcp/browser-agent/extension';
10
+ *
11
+ * const remote = createRemoteAgent({
12
+ * serverUrl: 'http://localhost:8080',
13
+ * sessionId: 'my-session',
14
+ * });
15
+ *
16
+ * await remote.connect();
17
+ * // Browser is now controllable by the BTCP server
18
+ * ```
19
+ */
20
+ import { getBackgroundAgent } from './background.js';
21
+ // ============================================================================
22
+ // Browser Tool Definitions
23
+ // ============================================================================
24
+ /**
25
+ * Get all browser tool definitions for BTCP registration
26
+ */
27
+ export function getBrowserToolDefinitions() {
28
+ return [
29
+ // Navigation tools
30
+ {
31
+ name: 'browser_navigate',
32
+ description: 'Navigate to a URL in the current tab',
33
+ inputSchema: {
34
+ type: 'object',
35
+ properties: {
36
+ url: { type: 'string', description: 'The URL to navigate to' },
37
+ waitUntil: {
38
+ type: 'string',
39
+ enum: ['load', 'domcontentloaded'],
40
+ description: 'Wait until page load event (default: load)',
41
+ },
42
+ },
43
+ required: ['url'],
44
+ },
45
+ },
46
+ {
47
+ name: 'browser_back',
48
+ description: 'Go back in browser history',
49
+ inputSchema: { type: 'object', properties: {} },
50
+ },
51
+ {
52
+ name: 'browser_forward',
53
+ description: 'Go forward in browser history',
54
+ inputSchema: { type: 'object', properties: {} },
55
+ },
56
+ {
57
+ name: 'browser_reload',
58
+ description: 'Reload the current page',
59
+ inputSchema: {
60
+ type: 'object',
61
+ properties: {
62
+ bypassCache: { type: 'boolean', description: 'Bypass browser cache' },
63
+ },
64
+ },
65
+ },
66
+ // DOM interaction tools
67
+ {
68
+ name: 'browser_snapshot',
69
+ description: 'Get accessibility tree snapshot of the page. Returns a text representation with element refs (@ref:N) that can be used in other commands.',
70
+ inputSchema: {
71
+ type: 'object',
72
+ properties: {
73
+ selector: { type: 'string', description: 'CSS selector to scope the snapshot' },
74
+ maxDepth: { type: 'number', description: 'Maximum tree depth to traverse' },
75
+ mode: {
76
+ type: 'string',
77
+ enum: ['interactive', 'outline', 'content'],
78
+ description: 'Snapshot mode: interactive (actionable elements), outline (structure), content (text)',
79
+ },
80
+ },
81
+ },
82
+ },
83
+ {
84
+ name: 'browser_click',
85
+ description: 'Click an element by CSS selector or element ref (@ref:N from snapshot)',
86
+ inputSchema: {
87
+ type: 'object',
88
+ properties: {
89
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
90
+ },
91
+ required: ['selector'],
92
+ },
93
+ },
94
+ {
95
+ name: 'browser_type',
96
+ description: 'Type text into an input element (appends to existing value)',
97
+ inputSchema: {
98
+ type: 'object',
99
+ properties: {
100
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
101
+ text: { type: 'string', description: 'Text to type' },
102
+ clear: { type: 'boolean', description: 'Clear existing value before typing' },
103
+ },
104
+ required: ['selector', 'text'],
105
+ },
106
+ },
107
+ {
108
+ name: 'browser_fill',
109
+ description: 'Fill an input element (replaces existing value)',
110
+ inputSchema: {
111
+ type: 'object',
112
+ properties: {
113
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
114
+ value: { type: 'string', description: 'Value to fill' },
115
+ },
116
+ required: ['selector', 'value'],
117
+ },
118
+ },
119
+ {
120
+ name: 'browser_select',
121
+ description: 'Select an option from a dropdown',
122
+ inputSchema: {
123
+ type: 'object',
124
+ properties: {
125
+ selector: { type: 'string', description: 'CSS selector or @ref:N of the select element' },
126
+ value: { type: 'string', description: 'Option value to select' },
127
+ },
128
+ required: ['selector', 'value'],
129
+ },
130
+ },
131
+ {
132
+ name: 'browser_check',
133
+ description: 'Check a checkbox or radio button',
134
+ inputSchema: {
135
+ type: 'object',
136
+ properties: {
137
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
138
+ },
139
+ required: ['selector'],
140
+ },
141
+ },
142
+ {
143
+ name: 'browser_uncheck',
144
+ description: 'Uncheck a checkbox',
145
+ inputSchema: {
146
+ type: 'object',
147
+ properties: {
148
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
149
+ },
150
+ required: ['selector'],
151
+ },
152
+ },
153
+ {
154
+ name: 'browser_hover',
155
+ description: 'Hover over an element',
156
+ inputSchema: {
157
+ type: 'object',
158
+ properties: {
159
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
160
+ },
161
+ required: ['selector'],
162
+ },
163
+ },
164
+ {
165
+ name: 'browser_scroll',
166
+ description: 'Scroll the page or an element',
167
+ inputSchema: {
168
+ type: 'object',
169
+ properties: {
170
+ selector: { type: 'string', description: 'CSS selector or @ref:N (optional, scrolls window if omitted)' },
171
+ x: { type: 'number', description: 'Horizontal scroll amount in pixels' },
172
+ y: { type: 'number', description: 'Vertical scroll amount in pixels' },
173
+ },
174
+ },
175
+ },
176
+ {
177
+ name: 'browser_getText',
178
+ description: 'Get text content of an element',
179
+ inputSchema: {
180
+ type: 'object',
181
+ properties: {
182
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
183
+ },
184
+ required: ['selector'],
185
+ },
186
+ },
187
+ {
188
+ name: 'browser_getAttribute',
189
+ description: 'Get an attribute value from an element',
190
+ inputSchema: {
191
+ type: 'object',
192
+ properties: {
193
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
194
+ attribute: { type: 'string', description: 'Attribute name to get' },
195
+ },
196
+ required: ['selector', 'attribute'],
197
+ },
198
+ },
199
+ {
200
+ name: 'browser_isVisible',
201
+ description: 'Check if an element is visible',
202
+ inputSchema: {
203
+ type: 'object',
204
+ properties: {
205
+ selector: { type: 'string', description: 'CSS selector or @ref:N' },
206
+ },
207
+ required: ['selector'],
208
+ },
209
+ },
210
+ // Screenshot tool
211
+ {
212
+ name: 'browser_screenshot',
213
+ description: 'Capture a screenshot of the visible tab',
214
+ inputSchema: {
215
+ type: 'object',
216
+ properties: {
217
+ format: { type: 'string', enum: ['png', 'jpeg'], description: 'Image format' },
218
+ quality: { type: 'number', description: 'JPEG quality (0-100)' },
219
+ },
220
+ },
221
+ },
222
+ // Tab management tools
223
+ {
224
+ name: 'browser_tab_new',
225
+ description: 'Open a new tab',
226
+ inputSchema: {
227
+ type: 'object',
228
+ properties: {
229
+ url: { type: 'string', description: 'URL to open (optional)' },
230
+ active: { type: 'boolean', description: 'Make the new tab active (default: true)' },
231
+ },
232
+ },
233
+ },
234
+ {
235
+ name: 'browser_tab_close',
236
+ description: 'Close a tab',
237
+ inputSchema: {
238
+ type: 'object',
239
+ properties: {
240
+ tabId: { type: 'number', description: 'Tab ID to close (optional, closes active tab if omitted)' },
241
+ },
242
+ },
243
+ },
244
+ {
245
+ name: 'browser_tab_switch',
246
+ description: 'Switch to a different tab',
247
+ inputSchema: {
248
+ type: 'object',
249
+ properties: {
250
+ tabId: { type: 'number', description: 'Tab ID to switch to' },
251
+ },
252
+ required: ['tabId'],
253
+ },
254
+ },
255
+ {
256
+ name: 'browser_tab_list',
257
+ description: 'List all tabs in the current session',
258
+ inputSchema: { type: 'object', properties: {} },
259
+ },
260
+ // Keyboard tools
261
+ {
262
+ name: 'browser_press',
263
+ description: 'Press a keyboard key (e.g., Enter, Tab, Escape)',
264
+ inputSchema: {
265
+ type: 'object',
266
+ properties: {
267
+ key: { type: 'string', description: 'Key to press (e.g., "Enter", "Tab", "Escape", "ArrowDown")' },
268
+ selector: { type: 'string', description: 'Optional element to focus before pressing' },
269
+ },
270
+ required: ['key'],
271
+ },
272
+ },
273
+ // Script injection tools
274
+ {
275
+ name: 'browser_script_inject',
276
+ description: "Inject JavaScript code into the page's main world. The script can listen for commands via btcp:script-command messages and respond with btcp:script-ack.",
277
+ inputSchema: {
278
+ type: 'object',
279
+ properties: {
280
+ code: { type: 'string', description: 'JavaScript code to inject' },
281
+ scriptId: {
282
+ type: 'string',
283
+ description: 'Unique identifier for this script (default: "default"). Used to target with script_send.',
284
+ },
285
+ },
286
+ required: ['code'],
287
+ },
288
+ },
289
+ {
290
+ name: 'browser_script_send',
291
+ description: 'Send a command to an injected script and wait for acknowledgment. The injected script should listen for btcp:script-command and respond with btcp:script-ack.',
292
+ inputSchema: {
293
+ type: 'object',
294
+ properties: {
295
+ payload: {
296
+ type: 'object',
297
+ description: 'Payload to send to the script. Typically includes an "action" field.',
298
+ },
299
+ scriptId: { type: 'string', description: 'Target script ID (default: "default")' },
300
+ timeout: { type: 'number', description: 'Timeout in milliseconds (default: 30000)' },
301
+ },
302
+ required: ['payload'],
303
+ },
304
+ },
305
+ // Wait tools
306
+ {
307
+ name: 'browser_wait',
308
+ description: 'Wait for a specified duration or condition',
309
+ inputSchema: {
310
+ type: 'object',
311
+ properties: {
312
+ ms: { type: 'number', description: 'Milliseconds to wait' },
313
+ selector: { type: 'string', description: 'Wait for this selector to appear' },
314
+ timeout: { type: 'number', description: 'Max wait time for selector (default: 30000)' },
315
+ },
316
+ },
317
+ },
318
+ // Evaluate tool
319
+ {
320
+ name: 'browser_evaluate',
321
+ description: 'Evaluate JavaScript expression in the page context and return the result',
322
+ inputSchema: {
323
+ type: 'object',
324
+ properties: {
325
+ expression: { type: 'string', description: 'JavaScript expression to evaluate' },
326
+ },
327
+ required: ['expression'],
328
+ },
329
+ },
330
+ ];
331
+ }
332
+ // ============================================================================
333
+ // Tool Name to Command Mapping
334
+ // ============================================================================
335
+ /**
336
+ * Map BTCP tool name and arguments to browser-agent Command
337
+ */
338
+ export function mapToolToCommand(toolName, args) {
339
+ // Remove 'browser_' prefix and convert to action
340
+ const actionMap = {
341
+ browser_navigate: 'navigate',
342
+ browser_back: 'back',
343
+ browser_forward: 'forward',
344
+ browser_reload: 'reload',
345
+ browser_snapshot: 'snapshot',
346
+ browser_click: 'click',
347
+ browser_type: 'type',
348
+ browser_fill: 'fill',
349
+ browser_select: 'select',
350
+ browser_check: 'check',
351
+ browser_uncheck: 'uncheck',
352
+ browser_hover: 'hover',
353
+ browser_scroll: 'scroll',
354
+ browser_getText: 'getText',
355
+ browser_getAttribute: 'getAttribute',
356
+ browser_isVisible: 'isVisible',
357
+ browser_screenshot: 'screenshot',
358
+ browser_tab_new: 'tabNew',
359
+ browser_tab_close: 'tabClose',
360
+ browser_tab_switch: 'tabSwitch',
361
+ browser_tab_list: 'tabList',
362
+ browser_press: 'press',
363
+ browser_script_inject: 'scriptInject',
364
+ browser_script_send: 'scriptSend',
365
+ browser_wait: 'wait',
366
+ browser_evaluate: 'evaluate',
367
+ };
368
+ const action = actionMap[toolName];
369
+ if (!action) {
370
+ throw new Error(`Unknown tool: ${toolName}`);
371
+ }
372
+ return { action, ...args };
373
+ }
374
+ /**
375
+ * Format response for BTCP protocol
376
+ */
377
+ export function formatResponseForBTCP(response) {
378
+ if (!response.success) {
379
+ return [{ type: 'text', text: `Error: ${response.error}` }];
380
+ }
381
+ const data = response.data;
382
+ // Handle screenshot - return as image
383
+ if (data && typeof data === 'object' && 'screenshot' in data) {
384
+ const format = data.format || 'png';
385
+ return [
386
+ {
387
+ type: 'image',
388
+ data: data.screenshot,
389
+ mimeType: `image/${format}`,
390
+ },
391
+ ];
392
+ }
393
+ // Handle snapshot - return as text (accessibility tree)
394
+ if (data && typeof data === 'object' && 'snapshot' in data) {
395
+ return [{ type: 'text', text: data.snapshot }];
396
+ }
397
+ // Default: JSON stringify the data
398
+ return [{ type: 'text', text: JSON.stringify(data, null, 2) }];
399
+ }
400
+ /**
401
+ * Create a remote agent that connects to a BTCP server
402
+ *
403
+ * @example
404
+ * ```typescript
405
+ * const remote = createRemoteAgent({
406
+ * serverUrl: 'http://localhost:8080',
407
+ * sessionId: 'browser-1',
408
+ * });
409
+ *
410
+ * remote.on('connect', () => console.log('Connected!'));
411
+ * remote.on('toolCall', (name, args) => console.log('Tool called:', name, args));
412
+ *
413
+ * await remote.connect();
414
+ * ```
415
+ */
416
+ export function createRemoteAgent(config) {
417
+ const { serverUrl, sessionId = `browser-${Date.now()}`, autoReconnect = true, reconnectDelay = 1000, maxReconnectAttempts = 10, connectionTimeout = 30000, debug = false, } = config;
418
+ // State
419
+ let state = 'disconnected';
420
+ let eventSource = null;
421
+ let reconnectAttempts = 0;
422
+ let reconnectTimer = null;
423
+ const backgroundAgent = getBackgroundAgent();
424
+ const tools = getBrowserToolDefinitions();
425
+ // Event handlers
426
+ const eventHandlers = new Map();
427
+ function log(...args) {
428
+ if (debug) {
429
+ console.log('[RemoteAgent]', ...args);
430
+ }
431
+ }
432
+ function emit(event, ...args) {
433
+ const handlers = eventHandlers.get(event);
434
+ if (handlers) {
435
+ handlers.forEach((handler) => {
436
+ try {
437
+ handler(...args);
438
+ }
439
+ catch (error) {
440
+ console.error(`[RemoteAgent] Error in ${event} handler:`, error);
441
+ }
442
+ });
443
+ }
444
+ }
445
+ /**
446
+ * Ensure a session exists, creating one if needed
447
+ */
448
+ async function ensureSession() {
449
+ const sessionResult = await backgroundAgent.execute({ action: 'sessionGetCurrent' });
450
+ if (sessionResult.success && sessionResult.data) {
451
+ const session = sessionResult.data.session;
452
+ if (session?.groupId) {
453
+ return; // Session already exists
454
+ }
455
+ }
456
+ // Create a new session with a tab
457
+ log('No active session, creating one automatically...');
458
+ const groupResult = await backgroundAgent.execute({
459
+ action: 'groupCreate',
460
+ title: 'BTCP Session',
461
+ color: 'blue',
462
+ });
463
+ if (!groupResult.success) {
464
+ throw new Error(`Failed to create session: ${groupResult.error}`);
465
+ }
466
+ log('Session created:', groupResult.data);
467
+ }
468
+ /**
469
+ * Handle incoming tool call request
470
+ */
471
+ async function handleToolCall(request) {
472
+ const { name, arguments: args } = request.params;
473
+ log('Tool call:', name, args);
474
+ emit('toolCall', name, args);
475
+ try {
476
+ // Auto-create session if needed for commands that require it
477
+ const sessionRequiredTools = [
478
+ 'browser_navigate', 'browser_tab_new', 'browser_tab_close',
479
+ 'browser_tab_switch', 'browser_tab_list', 'browser_snapshot',
480
+ 'browser_click', 'browser_type', 'browser_fill', 'browser_select',
481
+ 'browser_check', 'browser_uncheck', 'browser_hover', 'browser_scroll',
482
+ 'browser_getText', 'browser_getAttribute', 'browser_isVisible',
483
+ 'browser_press', 'browser_wait', 'browser_evaluate',
484
+ 'browser_script_inject', 'browser_script_send',
485
+ ];
486
+ if (sessionRequiredTools.includes(name)) {
487
+ await ensureSession();
488
+ }
489
+ // Map tool to command and execute
490
+ const command = mapToolToCommand(name, args);
491
+ const response = await backgroundAgent.execute(command);
492
+ // Send response back to server
493
+ await sendResponse(request.id, formatResponseForBTCP(response));
494
+ }
495
+ catch (error) {
496
+ const errorMessage = error instanceof Error ? error.message : String(error);
497
+ log('Tool call error:', errorMessage);
498
+ await sendResponse(request.id, [{ type: 'text', text: `Error: ${errorMessage}` }], true);
499
+ }
500
+ }
501
+ /**
502
+ * Send response back to BTCP server
503
+ */
504
+ async function sendResponse(requestId, content, isError = false) {
505
+ const responseUrl = `${serverUrl}/response`;
506
+ const body = {
507
+ jsonrpc: '2.0',
508
+ id: requestId,
509
+ ...(isError
510
+ ? { error: { code: -32000, message: content[0]?.type === 'text' ? content[0].text : 'Unknown error' } }
511
+ : { result: { content } }),
512
+ };
513
+ try {
514
+ await fetch(responseUrl, {
515
+ method: 'POST',
516
+ headers: { 'Content-Type': 'application/json' },
517
+ body: JSON.stringify(body),
518
+ });
519
+ }
520
+ catch (error) {
521
+ log('Failed to send response:', error);
522
+ }
523
+ }
524
+ /**
525
+ * Register tools with the server
526
+ */
527
+ async function registerTools() {
528
+ const registerUrl = `${serverUrl}/register`;
529
+ const body = {
530
+ jsonrpc: '2.0',
531
+ id: `register-${Date.now()}`,
532
+ method: 'tools/register',
533
+ params: {
534
+ sessionId,
535
+ tools,
536
+ },
537
+ };
538
+ try {
539
+ const response = await fetch(registerUrl, {
540
+ method: 'POST',
541
+ headers: { 'Content-Type': 'application/json' },
542
+ body: JSON.stringify(body),
543
+ });
544
+ if (!response.ok) {
545
+ throw new Error(`Registration failed: ${response.status}`);
546
+ }
547
+ log('Tools registered successfully');
548
+ }
549
+ catch (error) {
550
+ log('Failed to register tools:', error);
551
+ throw error;
552
+ }
553
+ }
554
+ /**
555
+ * Connect to SSE endpoint
556
+ */
557
+ function connectSSE() {
558
+ return new Promise((resolve, reject) => {
559
+ const sseUrl = `${serverUrl}/events?sessionId=${encodeURIComponent(sessionId)}`;
560
+ log('Connecting to SSE:', sseUrl);
561
+ state = 'connecting';
562
+ eventSource = new EventSource(sseUrl);
563
+ const timeout = setTimeout(() => {
564
+ if (state === 'connecting') {
565
+ eventSource?.close();
566
+ reject(new Error('Connection timeout'));
567
+ }
568
+ }, connectionTimeout);
569
+ eventSource.onopen = () => {
570
+ clearTimeout(timeout);
571
+ state = 'connected';
572
+ reconnectAttempts = 0;
573
+ log('SSE connected');
574
+ emit('connect');
575
+ resolve();
576
+ };
577
+ eventSource.onerror = (event) => {
578
+ clearTimeout(timeout);
579
+ log('SSE error:', event);
580
+ if (state === 'connecting') {
581
+ reject(new Error('Connection failed'));
582
+ return;
583
+ }
584
+ // Handle disconnect
585
+ state = 'disconnected';
586
+ eventSource?.close();
587
+ eventSource = null;
588
+ emit('disconnect', undefined, 'Connection lost');
589
+ emit('error', new Error('SSE connection error'));
590
+ // Attempt reconnect if enabled
591
+ if (autoReconnect && reconnectAttempts < maxReconnectAttempts) {
592
+ const delay = reconnectDelay * Math.pow(2, reconnectAttempts);
593
+ reconnectAttempts++;
594
+ log(`Reconnecting in ${delay}ms (attempt ${reconnectAttempts}/${maxReconnectAttempts})`);
595
+ reconnectTimer = setTimeout(() => {
596
+ connectSSE().catch((error) => {
597
+ log('Reconnect failed:', error);
598
+ });
599
+ }, delay);
600
+ }
601
+ };
602
+ eventSource.addEventListener('request', (event) => {
603
+ try {
604
+ const data = JSON.parse(event.data);
605
+ if (data.method === 'tools/call') {
606
+ handleToolCall(data);
607
+ }
608
+ }
609
+ catch (error) {
610
+ log('Failed to parse SSE message:', error);
611
+ }
612
+ });
613
+ // Handle ping/pong for keepalive
614
+ eventSource.addEventListener('ping', () => {
615
+ log('Received ping');
616
+ });
617
+ });
618
+ }
619
+ return {
620
+ async connect() {
621
+ if (state !== 'disconnected') {
622
+ throw new Error(`Cannot connect: current state is ${state}`);
623
+ }
624
+ // First register tools
625
+ await registerTools();
626
+ // Then connect to SSE
627
+ await connectSSE();
628
+ },
629
+ disconnect() {
630
+ if (reconnectTimer) {
631
+ clearTimeout(reconnectTimer);
632
+ reconnectTimer = null;
633
+ }
634
+ reconnectAttempts = maxReconnectAttempts; // Prevent auto-reconnect
635
+ if (eventSource) {
636
+ eventSource.close();
637
+ eventSource = null;
638
+ }
639
+ if (state !== 'disconnected') {
640
+ state = 'disconnected';
641
+ emit('disconnect', 1000, 'Client disconnected');
642
+ }
643
+ log('Disconnected');
644
+ },
645
+ isConnected() {
646
+ return state === 'connected';
647
+ },
648
+ getState() {
649
+ return state;
650
+ },
651
+ on(event, handler) {
652
+ if (!eventHandlers.has(event)) {
653
+ eventHandlers.set(event, new Set());
654
+ }
655
+ eventHandlers.get(event).add(handler);
656
+ },
657
+ off(event, handler) {
658
+ eventHandlers.get(event)?.delete(handler);
659
+ },
660
+ getAgent() {
661
+ return backgroundAgent;
662
+ },
663
+ getTools() {
664
+ return tools;
665
+ },
666
+ };
667
+ }
668
+ //# sourceMappingURL=remote.js.map