btcp-browser-agent 0.1.10 → 0.1.12

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,69 +1,69 @@
1
- {
2
- "name": "btcp-browser-agent",
3
- "version": "0.1.10",
4
- "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
- "type": "module",
6
- "main": "dist/index.js",
7
- "types": "dist/index.d.ts",
8
- "exports": {
9
- ".": {
10
- "types": "./dist/index.d.ts",
11
- "import": "./dist/index.js",
12
- "default": "./dist/index.js"
13
- },
14
- "./core": {
15
- "types": "./packages/core/dist/index.d.ts",
16
- "import": "./packages/core/dist/index.js",
17
- "default": "./packages/core/dist/index.js"
18
- },
19
- "./extension": {
20
- "types": "./packages/extension/dist/index.d.ts",
21
- "import": "./packages/extension/dist/index.js",
22
- "default": "./packages/extension/dist/index.js"
23
- },
24
- "./extension/content": {
25
- "types": "./packages/extension/dist/content.d.ts",
26
- "import": "./packages/extension/dist/content.js",
27
- "default": "./packages/extension/dist/content.js"
28
- },
29
- "./extension/background": {
30
- "types": "./packages/extension/dist/background.d.ts",
31
- "import": "./packages/extension/dist/background.js",
32
- "default": "./packages/extension/dist/background.js"
33
- }
34
- },
35
- "scripts": {
36
- "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
- "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
- "clean": "rm -rf dist packages/*/dist",
39
- "prepare": "npm run build",
40
- "test": "vitest run",
41
- "test:watch": "vitest",
42
- "typecheck": "tsc --noEmit"
43
- },
44
- "workspaces": [
45
- "packages/core",
46
- "packages/extension",
47
- "packages/cli"
48
- ],
49
- "files": [
50
- "dist",
51
- "packages/core/dist",
52
- "packages/extension/dist",
53
- "!**/__tests__",
54
- "!**/*.map"
55
- ],
56
- "license": "Apache-2.0",
57
- "repository": {
58
- "type": "git",
59
- "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
- },
61
- "dependencies": {},
62
- "devDependencies": {
63
- "@types/chrome": "^0.0.268",
64
- "@types/node": "^20.10.0",
65
- "jsdom": "^24.0.0",
66
- "typescript": "^5.3.0",
67
- "vitest": "^2.0.0"
68
- }
69
- }
1
+ {
2
+ "name": "btcp-browser-agent",
3
+ "version": "0.1.12",
4
+ "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js",
12
+ "default": "./dist/index.js"
13
+ },
14
+ "./core": {
15
+ "types": "./packages/core/dist/index.d.ts",
16
+ "import": "./packages/core/dist/index.js",
17
+ "default": "./packages/core/dist/index.js"
18
+ },
19
+ "./extension": {
20
+ "types": "./packages/extension/dist/index.d.ts",
21
+ "import": "./packages/extension/dist/index.js",
22
+ "default": "./packages/extension/dist/index.js"
23
+ },
24
+ "./extension/content": {
25
+ "types": "./packages/extension/dist/content.d.ts",
26
+ "import": "./packages/extension/dist/content.js",
27
+ "default": "./packages/extension/dist/content.js"
28
+ },
29
+ "./extension/background": {
30
+ "types": "./packages/extension/dist/background.d.ts",
31
+ "import": "./packages/extension/dist/background.js",
32
+ "default": "./packages/extension/dist/background.js"
33
+ }
34
+ },
35
+ "scripts": {
36
+ "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
+ "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
+ "clean": "rm -rf dist packages/*/dist",
39
+ "prepare": "npm run build",
40
+ "test": "vitest run",
41
+ "test:watch": "vitest",
42
+ "typecheck": "tsc --noEmit"
43
+ },
44
+ "workspaces": [
45
+ "packages/core",
46
+ "packages/extension",
47
+ "packages/cli"
48
+ ],
49
+ "files": [
50
+ "dist",
51
+ "packages/core/dist",
52
+ "packages/extension/dist",
53
+ "!**/__tests__",
54
+ "!**/*.map"
55
+ ],
56
+ "license": "Apache-2.0",
57
+ "repository": {
58
+ "type": "git",
59
+ "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
+ },
61
+ "dependencies": {},
62
+ "devDependencies": {
63
+ "@types/chrome": "^0.0.268",
64
+ "@types/node": "^20.10.0",
65
+ "jsdom": "^24.0.0",
66
+ "typescript": "^5.3.0",
67
+ "vitest": "^2.0.0"
68
+ }
69
+ }
@@ -349,31 +349,99 @@ export class DOMActions {
349
349
  }
350
350
  async type(selector, text, options = {}) {
351
351
  const element = this.getElement(selector);
352
- if (!(element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement)) {
352
+ // Check if element is contenteditable
353
+ const isContentEditable = element.getAttribute('contenteditable') === 'true' ||
354
+ element.getAttribute('contenteditable') === '';
355
+ if (!(element instanceof HTMLInputElement || element instanceof HTMLTextAreaElement || isContentEditable)) {
353
356
  const actualType = element.tagName.toLowerCase();
354
357
  const availableActions = this.getAvailableActionsForElement(element);
355
- throw createElementNotCompatibleError(selector, 'type', actualType, ['input', 'textarea'], availableActions);
358
+ throw createElementNotCompatibleError(selector, 'type', actualType, ['input', 'textarea', 'contenteditable'], availableActions);
356
359
  }
357
- element.focus();
358
- if (options.clear) {
359
- element.value = '';
360
- element.dispatchEvent(new Event('input', { bubbles: true }));
360
+ // Focus the element (cast to HTMLElement for contenteditable)
361
+ if (element instanceof HTMLElement) {
362
+ element.focus();
361
363
  }
362
- for (const char of text) {
363
- element.dispatchEvent(new KeyboardEvent('keydown', { key: char, bubbles: true }));
364
- element.dispatchEvent(new KeyboardEvent('keypress', { key: char, bubbles: true }));
365
- element.value += char;
366
- element.dispatchEvent(new Event('input', { bubbles: true }));
367
- element.dispatchEvent(new KeyboardEvent('keyup', { key: char, bubbles: true }));
368
- if (options.delay) {
369
- await this.sleep(options.delay);
364
+ // Handle contenteditable elements differently
365
+ if (isContentEditable) {
366
+ const htmlElement = element;
367
+ if (options.clear) {
368
+ htmlElement.textContent = '';
369
+ htmlElement.dispatchEvent(new Event('input', { bubbles: true }));
370
+ }
371
+ for (const char of text) {
372
+ htmlElement.dispatchEvent(new KeyboardEvent('keydown', { key: char, bubbles: true }));
373
+ htmlElement.dispatchEvent(new KeyboardEvent('keypress', { key: char, bubbles: true }));
374
+ // Insert text at cursor position or append
375
+ const selection = this.window.getSelection();
376
+ if (selection && selection.rangeCount > 0) {
377
+ const range = selection.getRangeAt(0);
378
+ range.deleteContents();
379
+ const textNode = this.document.createTextNode(char);
380
+ range.insertNode(textNode);
381
+ range.setStartAfter(textNode);
382
+ range.setEndAfter(textNode);
383
+ selection.removeAllRanges();
384
+ selection.addRange(range);
385
+ }
386
+ else {
387
+ htmlElement.textContent += char;
388
+ }
389
+ htmlElement.dispatchEvent(new Event('input', { bubbles: true }));
390
+ htmlElement.dispatchEvent(new KeyboardEvent('keyup', { key: char, bubbles: true }));
391
+ if (options.delay) {
392
+ await this.sleep(options.delay);
393
+ }
394
+ }
395
+ htmlElement.dispatchEvent(new Event('change', { bubbles: true }));
396
+ // Wait for verification that textContent contains typed text
397
+ const result = await waitForAssertion(() => {
398
+ const content = htmlElement.textContent || '';
399
+ const expected = text;
400
+ const actual = content;
401
+ if (!content.includes(text)) {
402
+ return {
403
+ success: false,
404
+ error: `Expected textContent to contain "${text}"`,
405
+ description: 'textContent check',
406
+ expected,
407
+ actual
408
+ };
409
+ }
410
+ return {
411
+ success: true,
412
+ error: null,
413
+ description: 'textContent check',
414
+ expected,
415
+ actual
416
+ };
417
+ }, { timeout: 1000, interval: 50 });
418
+ if (!result.success) {
419
+ throw createVerificationError('type', result, selector);
370
420
  }
371
421
  }
372
- element.dispatchEvent(new Event('change', { bubbles: true }));
373
- // Wait for verification that value contains typed text
374
- const result = await waitForAssertion(() => assertValueContains(element, text), { timeout: 1000, interval: 50 });
375
- if (!result.success) {
376
- throw createVerificationError('type', result, selector);
422
+ else {
423
+ // Handle regular input/textarea elements
424
+ const inputElement = element;
425
+ if (options.clear) {
426
+ inputElement.value = '';
427
+ inputElement.dispatchEvent(new Event('input', { bubbles: true }));
428
+ }
429
+ for (const char of text) {
430
+ inputElement.dispatchEvent(new KeyboardEvent('keydown', { key: char, bubbles: true }));
431
+ inputElement.dispatchEvent(new KeyboardEvent('keypress', { key: char, bubbles: true }));
432
+ inputElement.value += char;
433
+ inputElement.dispatchEvent(new Event('input', { bubbles: true }));
434
+ inputElement.dispatchEvent(new KeyboardEvent('keyup', { key: char, bubbles: true }));
435
+ if (options.delay) {
436
+ await this.sleep(options.delay);
437
+ }
438
+ }
439
+ inputElement.dispatchEvent(new Event('change', { bubbles: true }));
440
+ // Wait for verification that value contains typed text
441
+ const result = await waitForAssertion(() => assertValueContains(inputElement, text), { timeout: 1000, interval: 50 });
442
+ if (!result.success) {
443
+ throw createVerificationError('type', result, selector);
444
+ }
377
445
  }
378
446
  return { success: true, error: null };
379
447
  }
@@ -796,15 +864,15 @@ export class DOMActions {
796
864
  // Create overlay container with absolute positioning covering entire document
797
865
  this.overlayContainer = this.document.createElement('div');
798
866
  this.overlayContainer.id = 'btcp-highlight-overlay';
799
- this.overlayContainer.style.cssText = `
800
- position: absolute;
801
- top: 0;
802
- left: 0;
803
- width: ${this.document.documentElement.scrollWidth}px;
804
- height: ${this.document.documentElement.scrollHeight}px;
805
- pointer-events: none;
806
- z-index: 999999;
807
- contain: layout style paint;
867
+ this.overlayContainer.style.cssText = `
868
+ position: absolute;
869
+ top: 0;
870
+ left: 0;
871
+ width: ${this.document.documentElement.scrollWidth}px;
872
+ height: ${this.document.documentElement.scrollHeight}px;
873
+ pointer-events: none;
874
+ z-index: 999999;
875
+ contain: layout style paint;
808
876
  `;
809
877
  let highlightedCount = 0;
810
878
  // Create border overlays and labels for each ref
@@ -825,17 +893,17 @@ export class DOMActions {
825
893
  const border = this.document.createElement('div');
826
894
  border.className = 'btcp-ref-border';
827
895
  border.dataset.ref = ref;
828
- border.style.cssText = `
829
- position: absolute;
830
- width: ${bbox.width}px;
831
- height: ${bbox.height}px;
832
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
833
- border: 2px solid rgba(59, 130, 246, 0.8);
834
- border-radius: 2px;
835
- box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
836
- pointer-events: none;
837
- will-change: transform;
838
- contain: layout style paint;
896
+ border.style.cssText = `
897
+ position: absolute;
898
+ width: ${bbox.width}px;
899
+ height: ${bbox.height}px;
900
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
901
+ border: 2px solid rgba(59, 130, 246, 0.8);
902
+ border-radius: 2px;
903
+ box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
904
+ pointer-events: none;
905
+ will-change: transform;
906
+ contain: layout style paint;
839
907
  `;
840
908
  // Create label
841
909
  const label = this.document.createElement('div');
@@ -843,21 +911,21 @@ export class DOMActions {
843
911
  label.dataset.ref = ref;
844
912
  // Extract number from ref (e.g., "@ref:5" -> "5")
845
913
  label.textContent = ref.replace('@ref:', '');
846
- label.style.cssText = `
847
- position: absolute;
848
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
849
- background: rgba(59, 130, 246, 0.9);
850
- color: white;
851
- padding: 2px 6px;
852
- border-radius: 3px;
853
- font-family: monospace;
854
- font-size: 11px;
855
- font-weight: bold;
856
- box-shadow: 0 2px 4px rgba(0,0,0,0.3);
857
- pointer-events: none;
858
- white-space: nowrap;
859
- will-change: transform;
860
- contain: layout style paint;
914
+ label.style.cssText = `
915
+ position: absolute;
916
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
917
+ background: rgba(59, 130, 246, 0.9);
918
+ color: white;
919
+ padding: 2px 6px;
920
+ border-radius: 3px;
921
+ font-family: monospace;
922
+ font-size: 11px;
923
+ font-weight: bold;
924
+ box-shadow: 0 2px 4px rgba(0,0,0,0.3);
925
+ pointer-events: none;
926
+ white-space: nowrap;
927
+ will-change: transform;
928
+ contain: layout style paint;
861
929
  `;
862
930
  this.overlayContainer.appendChild(border);
863
931
  this.overlayContainer.appendChild(label);
@@ -110,6 +110,11 @@ function getRole(element) {
110
110
  const type = element.type || 'text';
111
111
  return INPUT_ROLES[type] || 'textbox';
112
112
  }
113
+ // Detect contenteditable elements (ProseMirror, Quill, TinyMCE, etc.)
114
+ const contentEditable = element.getAttribute('contenteditable');
115
+ if (contentEditable === 'true' || contentEditable === '') {
116
+ return 'textbox';
117
+ }
113
118
  return IMPLICIT_ROLES[tagName] || null;
114
119
  }
115
120
  /**
@@ -320,6 +325,21 @@ function getAccessibleName(element) {
320
325
  if (isImage) {
321
326
  return getImageLabel(element);
322
327
  }
328
+ // Handle contenteditable elements (ProseMirror, Quill, TinyMCE, etc.)
329
+ const contentEditable = element.getAttribute('contenteditable');
330
+ if (contentEditable === 'true' || contentEditable === '') {
331
+ // Try data-placeholder attribute (common in rich text editors)
332
+ const placeholder = element.getAttribute('data-placeholder');
333
+ if (placeholder)
334
+ return placeholder.trim();
335
+ // Try finding placeholder in child paragraph element
336
+ const placeholderEl = element.querySelector('[data-placeholder]');
337
+ if (placeholderEl) {
338
+ const placeholderText = placeholderEl.getAttribute('data-placeholder');
339
+ if (placeholderText)
340
+ return placeholderText.trim();
341
+ }
342
+ }
323
343
  const ariaLabel = element.getAttribute('aria-label');
324
344
  if (ariaLabel)
325
345
  return ariaLabel.trim();
@@ -594,7 +594,7 @@ export class BackgroundAgent {
594
594
  'tabNew', 'tabClose', 'tabSwitch', 'tabList',
595
595
  'groupCreate', 'groupUpdate', 'groupDelete', 'groupList',
596
596
  'groupAddTabs', 'groupRemoveTabs', 'groupGet',
597
- 'sessionGetCurrent', 'popupInitialize',
597
+ 'sessionGetCurrent', 'sessionUseGroup', 'popupInitialize',
598
598
  ];
599
599
  return extensionActions.includes(command.action);
600
600
  }
@@ -679,6 +679,17 @@ export class BackgroundAgent {
679
679
  const session = await this.sessionManager.getCurrentSession();
680
680
  return { id: command.id, success: true, data: { session } };
681
681
  }
682
+ case 'sessionUseGroup': {
683
+ const used = await this.sessionManager.useExistingGroupAsSession(command.groupId);
684
+ if (!used) {
685
+ return {
686
+ id: command.id,
687
+ success: false,
688
+ error: `Failed to use group ${command.groupId} as session. Group may not exist.`,
689
+ };
690
+ }
691
+ return { id: command.id, success: true, data: { groupId: command.groupId, used: true } };
692
+ }
682
693
  case 'popupInitialize': {
683
694
  console.log('[BackgroundAgent] Popup initializing, checking for session reconnection...');
684
695
  // Check if we have a stored session but no active connection
@@ -39,7 +39,7 @@
39
39
  * await client.click('@ref:5');
40
40
  * ```
41
41
  */
42
- import type { Command, Response, TabInfo } from './types.js';
42
+ import type { Command, Response } from './types.js';
43
43
  import { BackgroundAgent as _BackgroundAgent, getBackgroundAgent as _getBackgroundAgent, setupMessageListener as _setupMessageListener, BrowserAgent as _BrowserAgent, getBrowserAgent as _getBrowserAgent } from './background.js';
44
44
  export * from './types.js';
45
45
  export { createScriptMessenger, createMethodMessenger, type MessageDefinitions, type ScriptMessenger, type MethodMessenger, type ScriptMessengerOptions, type PayloadOf, type ResultOf, } from './script-messenger.js';
@@ -130,124 +130,22 @@ export interface Client {
130
130
  quality?: number;
131
131
  }): Promise<string>;
132
132
  /**
133
- * Open a new tab
133
+ * Wait for a selector to appear
134
134
  */
135
- tabNew(options?: {
136
- url?: string;
137
- active?: boolean;
138
- }): Promise<{
139
- tabId: number;
140
- url?: string;
141
- }>;
142
- /**
143
- * Close a tab
144
- */
145
- tabClose(tabId?: number): Promise<Response>;
146
- /**
147
- * Switch to a tab
148
- */
149
- tabSwitch(tabId: number): Promise<Response>;
150
- /**
151
- * List all tabs
152
- */
153
- tabList(): Promise<TabInfo[]>;
154
- /**
155
- * Create a new tab group
156
- */
157
- groupCreate(options?: {
158
- tabIds?: number[];
159
- title?: string;
160
- color?: string;
161
- collapsed?: boolean;
162
- }): Promise<{
163
- group: import('./session-types.js').GroupInfo;
164
- }>;
165
- /**
166
- * Update a tab group
167
- */
168
- groupUpdate(groupId: number, options: {
169
- title?: string;
170
- color?: string;
171
- collapsed?: boolean;
172
- }): Promise<{
173
- group: import('./session-types.js').GroupInfo;
174
- }>;
175
- /**
176
- * Delete a tab group (closes all tabs)
177
- */
178
- groupDelete(groupId: number): Promise<Response>;
179
- /**
180
- * List all tab groups
181
- */
182
- groupList(): Promise<import('./session-types.js').GroupInfo[]>;
183
- /**
184
- * Add tabs to a group
185
- */
186
- groupAddTabs(groupId: number, tabIds: number[]): Promise<Response>;
187
- /**
188
- * Remove tabs from their group
189
- */
190
- groupRemoveTabs(tabIds: number[]): Promise<Response>;
191
- /**
192
- * Get a specific tab group
193
- */
194
- groupGet(groupId: number): Promise<{
195
- group: import('./session-types.js').GroupInfo;
196
- }>;
197
- /**
198
- * Get current active session
199
- */
200
- sessionGetCurrent(): Promise<{
201
- session: import('./session-types.js').SessionInfo | null;
202
- }>;
203
- /**
204
- * Initialize popup (triggers session reconnection check)
205
- */
206
- popupInitialize(): Promise<{
207
- initialized: boolean;
208
- reconnected: boolean;
209
- }>;
135
+ wait(options?: {
136
+ selector?: string;
137
+ timeout?: number;
138
+ }): Promise<Response>;
210
139
  /**
211
- * Inject a script into the page's main world
212
- *
213
- * The script runs in the page context (not the content script isolated world),
214
- * allowing access to page-level APIs like window, fetch interceptors, etc.
215
- *
216
- * @example
217
- * ```typescript
218
- * await client.scriptInject(`
219
- * window.addEventListener('message', (event) => {
220
- * if (event.data?.type !== 'btcp:script-command') return;
221
- * if (event.data.scriptId !== 'helper') return;
222
- * const { commandId, payload } = event.data;
223
- * // Handle and ack
224
- * window.postMessage({ type: 'btcp:script-ack', commandId, result: { ok: true } }, '*');
225
- * });
226
- * `, { scriptId: 'helper' });
227
- * ```
140
+ * Press a key
228
141
  */
229
- scriptInject(code: string, options?: {
230
- scriptId?: string;
231
- }): Promise<{
232
- scriptId: string;
233
- injected: boolean;
234
- }>;
142
+ press(key: string, options?: {
143
+ selector?: string;
144
+ }): Promise<Response>;
235
145
  /**
236
- * Send a command to an injected script and wait for acknowledgment
237
- *
238
- * @example
239
- * ```typescript
240
- * const result = await client.scriptSend(
241
- * { action: 'getData', selector: '.items' },
242
- * { scriptId: 'helper', timeout: 5000 }
243
- * );
244
- * console.log(result); // { items: [...] }
245
- * ```
146
+ * Evaluate JavaScript in the page context
246
147
  */
247
- scriptSend(payload: unknown, options?: {
248
- scriptId?: string;
249
- timeout?: number;
250
- }): Promise<unknown>;
148
+ evaluate(expression: string): Promise<unknown>;
251
149
  }
252
150
  /**
253
151
  * Generate a unique command ID for BTCP commands