btcp-browser-agent 0.1.9 → 0.1.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,69 +1,69 @@
1
- {
2
- "name": "btcp-browser-agent",
3
- "version": "0.1.9",
4
- "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
- "type": "module",
6
- "main": "dist/index.js",
7
- "types": "dist/index.d.ts",
8
- "exports": {
9
- ".": {
10
- "types": "./dist/index.d.ts",
11
- "import": "./dist/index.js",
12
- "default": "./dist/index.js"
13
- },
14
- "./core": {
15
- "types": "./packages/core/dist/index.d.ts",
16
- "import": "./packages/core/dist/index.js",
17
- "default": "./packages/core/dist/index.js"
18
- },
19
- "./extension": {
20
- "types": "./packages/extension/dist/index.d.ts",
21
- "import": "./packages/extension/dist/index.js",
22
- "default": "./packages/extension/dist/index.js"
23
- },
24
- "./extension/content": {
25
- "types": "./packages/extension/dist/content.d.ts",
26
- "import": "./packages/extension/dist/content.js",
27
- "default": "./packages/extension/dist/content.js"
28
- },
29
- "./extension/background": {
30
- "types": "./packages/extension/dist/background.d.ts",
31
- "import": "./packages/extension/dist/background.js",
32
- "default": "./packages/extension/dist/background.js"
33
- }
34
- },
35
- "scripts": {
36
- "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
- "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
- "clean": "rm -rf dist packages/*/dist",
39
- "prepare": "npm run build",
40
- "test": "vitest run",
41
- "test:watch": "vitest",
42
- "typecheck": "tsc --noEmit"
43
- },
44
- "workspaces": [
45
- "packages/core",
46
- "packages/extension",
47
- "packages/cli"
48
- ],
49
- "files": [
50
- "dist",
51
- "packages/core/dist",
52
- "packages/extension/dist",
53
- "!**/__tests__",
54
- "!**/*.map"
55
- ],
56
- "license": "Apache-2.0",
57
- "repository": {
58
- "type": "git",
59
- "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
- },
61
- "dependencies": {},
62
- "devDependencies": {
63
- "@types/chrome": "^0.0.268",
64
- "@types/node": "^20.10.0",
65
- "jsdom": "^24.0.0",
66
- "typescript": "^5.3.0",
67
- "vitest": "^2.0.0"
68
- }
69
- }
1
+ {
2
+ "name": "btcp-browser-agent",
3
+ "version": "0.1.11",
4
+ "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js",
12
+ "default": "./dist/index.js"
13
+ },
14
+ "./core": {
15
+ "types": "./packages/core/dist/index.d.ts",
16
+ "import": "./packages/core/dist/index.js",
17
+ "default": "./packages/core/dist/index.js"
18
+ },
19
+ "./extension": {
20
+ "types": "./packages/extension/dist/index.d.ts",
21
+ "import": "./packages/extension/dist/index.js",
22
+ "default": "./packages/extension/dist/index.js"
23
+ },
24
+ "./extension/content": {
25
+ "types": "./packages/extension/dist/content.d.ts",
26
+ "import": "./packages/extension/dist/content.js",
27
+ "default": "./packages/extension/dist/content.js"
28
+ },
29
+ "./extension/background": {
30
+ "types": "./packages/extension/dist/background.d.ts",
31
+ "import": "./packages/extension/dist/background.js",
32
+ "default": "./packages/extension/dist/background.js"
33
+ }
34
+ },
35
+ "scripts": {
36
+ "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
+ "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
+ "clean": "rm -rf dist packages/*/dist",
39
+ "prepare": "npm run build",
40
+ "test": "vitest run",
41
+ "test:watch": "vitest",
42
+ "typecheck": "tsc --noEmit"
43
+ },
44
+ "workspaces": [
45
+ "packages/core",
46
+ "packages/extension",
47
+ "packages/cli"
48
+ ],
49
+ "files": [
50
+ "dist",
51
+ "packages/core/dist",
52
+ "packages/extension/dist",
53
+ "!**/__tests__",
54
+ "!**/*.map"
55
+ ],
56
+ "license": "Apache-2.0",
57
+ "repository": {
58
+ "type": "git",
59
+ "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
+ },
61
+ "dependencies": {},
62
+ "devDependencies": {
63
+ "@types/chrome": "^0.0.268",
64
+ "@types/node": "^20.10.0",
65
+ "jsdom": "^24.0.0",
66
+ "typescript": "^5.3.0",
67
+ "vitest": "^2.0.0"
68
+ }
69
+ }
@@ -796,15 +796,15 @@ export class DOMActions {
796
796
  // Create overlay container with absolute positioning covering entire document
797
797
  this.overlayContainer = this.document.createElement('div');
798
798
  this.overlayContainer.id = 'btcp-highlight-overlay';
799
- this.overlayContainer.style.cssText = `
800
- position: absolute;
801
- top: 0;
802
- left: 0;
803
- width: ${this.document.documentElement.scrollWidth}px;
804
- height: ${this.document.documentElement.scrollHeight}px;
805
- pointer-events: none;
806
- z-index: 999999;
807
- contain: layout style paint;
799
+ this.overlayContainer.style.cssText = `
800
+ position: absolute;
801
+ top: 0;
802
+ left: 0;
803
+ width: ${this.document.documentElement.scrollWidth}px;
804
+ height: ${this.document.documentElement.scrollHeight}px;
805
+ pointer-events: none;
806
+ z-index: 999999;
807
+ contain: layout style paint;
808
808
  `;
809
809
  let highlightedCount = 0;
810
810
  // Create border overlays and labels for each ref
@@ -825,17 +825,17 @@ export class DOMActions {
825
825
  const border = this.document.createElement('div');
826
826
  border.className = 'btcp-ref-border';
827
827
  border.dataset.ref = ref;
828
- border.style.cssText = `
829
- position: absolute;
830
- width: ${bbox.width}px;
831
- height: ${bbox.height}px;
832
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
833
- border: 2px solid rgba(59, 130, 246, 0.8);
834
- border-radius: 2px;
835
- box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
836
- pointer-events: none;
837
- will-change: transform;
838
- contain: layout style paint;
828
+ border.style.cssText = `
829
+ position: absolute;
830
+ width: ${bbox.width}px;
831
+ height: ${bbox.height}px;
832
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
833
+ border: 2px solid rgba(59, 130, 246, 0.8);
834
+ border-radius: 2px;
835
+ box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
836
+ pointer-events: none;
837
+ will-change: transform;
838
+ contain: layout style paint;
839
839
  `;
840
840
  // Create label
841
841
  const label = this.document.createElement('div');
@@ -843,21 +843,21 @@ export class DOMActions {
843
843
  label.dataset.ref = ref;
844
844
  // Extract number from ref (e.g., "@ref:5" -> "5")
845
845
  label.textContent = ref.replace('@ref:', '');
846
- label.style.cssText = `
847
- position: absolute;
848
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
849
- background: rgba(59, 130, 246, 0.9);
850
- color: white;
851
- padding: 2px 6px;
852
- border-radius: 3px;
853
- font-family: monospace;
854
- font-size: 11px;
855
- font-weight: bold;
856
- box-shadow: 0 2px 4px rgba(0,0,0,0.3);
857
- pointer-events: none;
858
- white-space: nowrap;
859
- will-change: transform;
860
- contain: layout style paint;
846
+ label.style.cssText = `
847
+ position: absolute;
848
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
849
+ background: rgba(59, 130, 246, 0.9);
850
+ color: white;
851
+ padding: 2px 6px;
852
+ border-radius: 3px;
853
+ font-family: monospace;
854
+ font-size: 11px;
855
+ font-weight: bold;
856
+ box-shadow: 0 2px 4px rgba(0,0,0,0.3);
857
+ pointer-events: none;
858
+ white-space: nowrap;
859
+ will-change: transform;
860
+ contain: layout style paint;
861
861
  `;
862
862
  this.overlayContainer.appendChild(border);
863
863
  this.overlayContainer.appendChild(label);
@@ -491,6 +491,15 @@ export class BackgroundAgent {
491
491
  error: chrome.runtime.lastError.message || 'Failed to send message to tab',
492
492
  });
493
493
  }
494
+ else if (!response) {
495
+ // Response is undefined but no lastError - return success with empty data
496
+ // so the agent can continue gracefully
497
+ resolve({
498
+ id: command.id,
499
+ success: true,
500
+ data: { message: 'No response from content script - page may be unresponsive' },
501
+ });
502
+ }
494
503
  else {
495
504
  const resp = response;
496
505
  if (resp.type === 'btcp:response') {
@@ -585,7 +594,7 @@ export class BackgroundAgent {
585
594
  'tabNew', 'tabClose', 'tabSwitch', 'tabList',
586
595
  'groupCreate', 'groupUpdate', 'groupDelete', 'groupList',
587
596
  'groupAddTabs', 'groupRemoveTabs', 'groupGet',
588
- 'sessionGetCurrent', 'popupInitialize',
597
+ 'sessionGetCurrent', 'sessionUseGroup', 'popupInitialize',
589
598
  ];
590
599
  return extensionActions.includes(command.action);
591
600
  }
@@ -670,6 +679,17 @@ export class BackgroundAgent {
670
679
  const session = await this.sessionManager.getCurrentSession();
671
680
  return { id: command.id, success: true, data: { session } };
672
681
  }
682
+ case 'sessionUseGroup': {
683
+ const used = await this.sessionManager.useExistingGroupAsSession(command.groupId);
684
+ if (!used) {
685
+ return {
686
+ id: command.id,
687
+ success: false,
688
+ error: `Failed to use group ${command.groupId} as session. Group may not exist.`,
689
+ };
690
+ }
691
+ return { id: command.id, success: true, data: { groupId: command.groupId, used: true } };
692
+ }
673
693
  case 'popupInitialize': {
674
694
  console.log('[BackgroundAgent] Popup initializing, checking for session reconnection...');
675
695
  // Check if we have a stored session but no active connection
@@ -39,7 +39,7 @@
39
39
  * await client.click('@ref:5');
40
40
  * ```
41
41
  */
42
- import type { Command, Response, TabInfo } from './types.js';
42
+ import type { Command, Response } from './types.js';
43
43
  import { BackgroundAgent as _BackgroundAgent, getBackgroundAgent as _getBackgroundAgent, setupMessageListener as _setupMessageListener, BrowserAgent as _BrowserAgent, getBrowserAgent as _getBrowserAgent } from './background.js';
44
44
  export * from './types.js';
45
45
  export { createScriptMessenger, createMethodMessenger, type MessageDefinitions, type ScriptMessenger, type MethodMessenger, type ScriptMessengerOptions, type PayloadOf, type ResultOf, } from './script-messenger.js';
@@ -130,124 +130,22 @@ export interface Client {
130
130
  quality?: number;
131
131
  }): Promise<string>;
132
132
  /**
133
- * Open a new tab
133
+ * Wait for a selector to appear
134
134
  */
135
- tabNew(options?: {
136
- url?: string;
137
- active?: boolean;
138
- }): Promise<{
139
- tabId: number;
140
- url?: string;
141
- }>;
142
- /**
143
- * Close a tab
144
- */
145
- tabClose(tabId?: number): Promise<Response>;
146
- /**
147
- * Switch to a tab
148
- */
149
- tabSwitch(tabId: number): Promise<Response>;
150
- /**
151
- * List all tabs
152
- */
153
- tabList(): Promise<TabInfo[]>;
154
- /**
155
- * Create a new tab group
156
- */
157
- groupCreate(options?: {
158
- tabIds?: number[];
159
- title?: string;
160
- color?: string;
161
- collapsed?: boolean;
162
- }): Promise<{
163
- group: import('./session-types.js').GroupInfo;
164
- }>;
165
- /**
166
- * Update a tab group
167
- */
168
- groupUpdate(groupId: number, options: {
169
- title?: string;
170
- color?: string;
171
- collapsed?: boolean;
172
- }): Promise<{
173
- group: import('./session-types.js').GroupInfo;
174
- }>;
175
- /**
176
- * Delete a tab group (closes all tabs)
177
- */
178
- groupDelete(groupId: number): Promise<Response>;
179
- /**
180
- * List all tab groups
181
- */
182
- groupList(): Promise<import('./session-types.js').GroupInfo[]>;
183
- /**
184
- * Add tabs to a group
185
- */
186
- groupAddTabs(groupId: number, tabIds: number[]): Promise<Response>;
187
- /**
188
- * Remove tabs from their group
189
- */
190
- groupRemoveTabs(tabIds: number[]): Promise<Response>;
191
- /**
192
- * Get a specific tab group
193
- */
194
- groupGet(groupId: number): Promise<{
195
- group: import('./session-types.js').GroupInfo;
196
- }>;
197
- /**
198
- * Get current active session
199
- */
200
- sessionGetCurrent(): Promise<{
201
- session: import('./session-types.js').SessionInfo | null;
202
- }>;
203
- /**
204
- * Initialize popup (triggers session reconnection check)
205
- */
206
- popupInitialize(): Promise<{
207
- initialized: boolean;
208
- reconnected: boolean;
209
- }>;
135
+ wait(options?: {
136
+ selector?: string;
137
+ timeout?: number;
138
+ }): Promise<Response>;
210
139
  /**
211
- * Inject a script into the page's main world
212
- *
213
- * The script runs in the page context (not the content script isolated world),
214
- * allowing access to page-level APIs like window, fetch interceptors, etc.
215
- *
216
- * @example
217
- * ```typescript
218
- * await client.scriptInject(`
219
- * window.addEventListener('message', (event) => {
220
- * if (event.data?.type !== 'btcp:script-command') return;
221
- * if (event.data.scriptId !== 'helper') return;
222
- * const { commandId, payload } = event.data;
223
- * // Handle and ack
224
- * window.postMessage({ type: 'btcp:script-ack', commandId, result: { ok: true } }, '*');
225
- * });
226
- * `, { scriptId: 'helper' });
227
- * ```
140
+ * Press a key
228
141
  */
229
- scriptInject(code: string, options?: {
230
- scriptId?: string;
231
- }): Promise<{
232
- scriptId: string;
233
- injected: boolean;
234
- }>;
142
+ press(key: string, options?: {
143
+ selector?: string;
144
+ }): Promise<Response>;
235
145
  /**
236
- * Send a command to an injected script and wait for acknowledgment
237
- *
238
- * @example
239
- * ```typescript
240
- * const result = await client.scriptSend(
241
- * { action: 'getData', selector: '.items' },
242
- * { scriptId: 'helper', timeout: 5000 }
243
- * );
244
- * console.log(result); // { items: [...] }
245
- * ```
146
+ * Evaluate JavaScript in the page context
246
147
  */
247
- scriptSend(payload: unknown, options?: {
248
- scriptId?: string;
249
- timeout?: number;
250
- }): Promise<unknown>;
148
+ evaluate(expression: string): Promise<unknown>;
251
149
  }
252
150
  /**
253
151
  * Generate a unique command ID for BTCP commands
@@ -234,7 +234,6 @@ export function createClient() {
234
234
  assertSuccess(response);
235
235
  return response.data.visible;
236
236
  },
237
- // Tabs
238
237
  async screenshot(options) {
239
238
  const response = await sendCommand({
240
239
  id: generateCommandId(),
@@ -245,136 +244,27 @@ export function createClient() {
245
244
  assertSuccess(response);
246
245
  return response.data.screenshot;
247
246
  },
248
- async tabNew(options) {
249
- const response = await sendCommand({
250
- id: generateCommandId(),
251
- action: 'tabNew',
252
- url: options?.url,
253
- active: options?.active,
254
- });
255
- assertSuccess(response);
256
- return response.data;
257
- },
258
- async tabClose(tabId) {
259
- return sendCommand({
260
- id: generateCommandId(),
261
- action: 'tabClose',
262
- tabId,
263
- });
264
- },
265
- async tabSwitch(tabId) {
266
- return sendCommand({
267
- id: generateCommandId(),
268
- action: 'tabSwitch',
269
- tabId,
270
- });
271
- },
272
- async tabList() {
273
- const response = await sendCommand({
274
- id: generateCommandId(),
275
- action: 'tabList',
276
- });
277
- assertSuccess(response);
278
- return response.data.tabs;
279
- },
280
- // Tab Groups & Sessions
281
- async groupCreate(options) {
282
- const response = await sendCommand({
283
- id: generateCommandId(),
284
- action: 'groupCreate',
285
- tabIds: options?.tabIds,
286
- title: options?.title,
287
- color: options?.color,
288
- collapsed: options?.collapsed,
289
- });
290
- assertSuccess(response);
291
- return response.data;
292
- },
293
- async groupUpdate(groupId, options) {
294
- const response = await sendCommand({
295
- id: generateCommandId(),
296
- action: 'groupUpdate',
297
- groupId,
298
- title: options.title,
299
- color: options.color,
300
- collapsed: options.collapsed,
301
- });
302
- assertSuccess(response);
303
- return response.data;
304
- },
305
- async groupDelete(groupId) {
306
- return sendCommand({
307
- id: generateCommandId(),
308
- action: 'groupDelete',
309
- groupId,
310
- });
311
- },
312
- async groupList() {
313
- const response = await sendCommand({
314
- id: generateCommandId(),
315
- action: 'groupList',
316
- });
317
- assertSuccess(response);
318
- return response.data.groups;
319
- },
320
- async groupAddTabs(groupId, tabIds) {
247
+ async wait(options) {
321
248
  return sendCommand({
322
249
  id: generateCommandId(),
323
- action: 'groupAddTabs',
324
- groupId,
325
- tabIds,
250
+ action: 'wait',
251
+ selector: options?.selector,
252
+ timeout: options?.timeout,
326
253
  });
327
254
  },
328
- async groupRemoveTabs(tabIds) {
255
+ async press(key, options) {
329
256
  return sendCommand({
330
257
  id: generateCommandId(),
331
- action: 'groupRemoveTabs',
332
- tabIds,
333
- });
334
- },
335
- async groupGet(groupId) {
336
- const response = await sendCommand({
337
- id: generateCommandId(),
338
- action: 'groupGet',
339
- groupId,
340
- });
341
- assertSuccess(response);
342
- return response.data;
343
- },
344
- async sessionGetCurrent() {
345
- const response = await sendCommand({
346
- id: generateCommandId(),
347
- action: 'sessionGetCurrent',
348
- });
349
- assertSuccess(response);
350
- return response.data;
351
- },
352
- async popupInitialize() {
353
- const response = await sendCommand({
354
- id: generateCommandId(),
355
- action: 'popupInitialize',
356
- });
357
- assertSuccess(response);
358
- return response.data;
359
- },
360
- // Script Injection
361
- async scriptInject(code, options) {
362
- const response = await sendCommand({
363
- id: generateCommandId(),
364
- action: 'scriptInject',
365
- code,
366
- scriptId: options?.scriptId,
258
+ action: 'press',
259
+ key,
260
+ selector: options?.selector,
367
261
  });
368
- assertSuccess(response);
369
- return response.data;
370
262
  },
371
- async scriptSend(payload, options) {
263
+ async evaluate(expression) {
372
264
  const response = await sendCommand({
373
265
  id: generateCommandId(),
374
- action: 'scriptSend',
375
- payload,
376
- scriptId: options?.scriptId,
377
- timeout: options?.timeout,
266
+ action: 'evaluate',
267
+ script: expression,
378
268
  });
379
269
  assertSuccess(response);
380
270
  return response.data.result;
@@ -77,6 +77,9 @@ export interface RemoteAgentEvents {
77
77
  }
78
78
  /**
79
79
  * Get all browser tool definitions for BTCP registration
80
+ *
81
+ * Minimal toolset following Unix philosophy - each tool does one thing well.
82
+ * Advanced operations can be done via browser_evaluate.
80
83
  */
81
84
  export declare function getBrowserToolDefinitions(): BTCPToolDefinition[];
82
85
  /**