btcp-browser-agent 0.1.14 → 0.1.17

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/package.json CHANGED
@@ -1,69 +1,69 @@
1
- {
2
- "name": "btcp-browser-agent",
3
- "version": "0.1.14",
4
- "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
- "type": "module",
6
- "main": "dist/index.js",
7
- "types": "dist/index.d.ts",
8
- "exports": {
9
- ".": {
10
- "types": "./dist/index.d.ts",
11
- "import": "./dist/index.js",
12
- "default": "./dist/index.js"
13
- },
14
- "./core": {
15
- "types": "./packages/core/dist/index.d.ts",
16
- "import": "./packages/core/dist/index.js",
17
- "default": "./packages/core/dist/index.js"
18
- },
19
- "./extension": {
20
- "types": "./packages/extension/dist/index.d.ts",
21
- "import": "./packages/extension/dist/index.js",
22
- "default": "./packages/extension/dist/index.js"
23
- },
24
- "./extension/content": {
25
- "types": "./packages/extension/dist/content.d.ts",
26
- "import": "./packages/extension/dist/content.js",
27
- "default": "./packages/extension/dist/content.js"
28
- },
29
- "./extension/background": {
30
- "types": "./packages/extension/dist/background.d.ts",
31
- "import": "./packages/extension/dist/background.js",
32
- "default": "./packages/extension/dist/background.js"
33
- }
34
- },
35
- "scripts": {
36
- "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
- "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
- "clean": "rm -rf dist packages/*/dist",
39
- "prepare": "npm run build",
40
- "test": "vitest run",
41
- "test:watch": "vitest",
42
- "typecheck": "tsc --noEmit"
43
- },
44
- "workspaces": [
45
- "packages/core",
46
- "packages/extension",
47
- "packages/cli"
48
- ],
49
- "files": [
50
- "dist",
51
- "packages/core/dist",
52
- "packages/extension/dist",
53
- "!**/__tests__",
54
- "!**/*.map"
55
- ],
56
- "license": "Apache-2.0",
57
- "repository": {
58
- "type": "git",
59
- "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
- },
61
- "dependencies": {},
62
- "devDependencies": {
63
- "@types/chrome": "^0.0.268",
64
- "@types/node": "^20.10.0",
65
- "jsdom": "^24.0.0",
66
- "typescript": "^5.3.0",
67
- "vitest": "^2.0.0"
68
- }
69
- }
1
+ {
2
+ "name": "btcp-browser-agent",
3
+ "version": "0.1.17",
4
+ "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js",
12
+ "default": "./dist/index.js"
13
+ },
14
+ "./core": {
15
+ "types": "./packages/core/dist/index.d.ts",
16
+ "import": "./packages/core/dist/index.js",
17
+ "default": "./packages/core/dist/index.js"
18
+ },
19
+ "./extension": {
20
+ "types": "./packages/extension/dist/index.d.ts",
21
+ "import": "./packages/extension/dist/index.js",
22
+ "default": "./packages/extension/dist/index.js"
23
+ },
24
+ "./extension/content": {
25
+ "types": "./packages/extension/dist/content.d.ts",
26
+ "import": "./packages/extension/dist/content.js",
27
+ "default": "./packages/extension/dist/content.js"
28
+ },
29
+ "./extension/background": {
30
+ "types": "./packages/extension/dist/background.d.ts",
31
+ "import": "./packages/extension/dist/background.js",
32
+ "default": "./packages/extension/dist/background.js"
33
+ }
34
+ },
35
+ "scripts": {
36
+ "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
+ "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
+ "clean": "rm -rf dist packages/*/dist",
39
+ "prepare": "npm run build",
40
+ "test": "vitest run",
41
+ "test:watch": "vitest",
42
+ "typecheck": "tsc --noEmit"
43
+ },
44
+ "workspaces": [
45
+ "packages/core",
46
+ "packages/extension",
47
+ "packages/cli"
48
+ ],
49
+ "files": [
50
+ "dist",
51
+ "packages/core/dist",
52
+ "packages/extension/dist",
53
+ "!**/__tests__",
54
+ "!**/*.map"
55
+ ],
56
+ "license": "Apache-2.0",
57
+ "repository": {
58
+ "type": "git",
59
+ "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
+ },
61
+ "dependencies": {},
62
+ "devDependencies": {
63
+ "@types/chrome": "^0.0.268",
64
+ "@types/node": "^20.10.0",
65
+ "jsdom": "^24.0.0",
66
+ "typescript": "^5.3.0",
67
+ "vitest": "^2.0.0"
68
+ }
69
+ }
@@ -864,15 +864,15 @@ export class DOMActions {
864
864
  // Create overlay container with absolute positioning covering entire document
865
865
  this.overlayContainer = this.document.createElement('div');
866
866
  this.overlayContainer.id = 'btcp-highlight-overlay';
867
- this.overlayContainer.style.cssText = `
868
- position: absolute;
869
- top: 0;
870
- left: 0;
871
- width: ${this.document.documentElement.scrollWidth}px;
872
- height: ${this.document.documentElement.scrollHeight}px;
873
- pointer-events: none;
874
- z-index: 999999;
875
- contain: layout style paint;
867
+ this.overlayContainer.style.cssText = `
868
+ position: absolute;
869
+ top: 0;
870
+ left: 0;
871
+ width: ${this.document.documentElement.scrollWidth}px;
872
+ height: ${this.document.documentElement.scrollHeight}px;
873
+ pointer-events: none;
874
+ z-index: 999999;
875
+ contain: layout style paint;
876
876
  `;
877
877
  let highlightedCount = 0;
878
878
  // Create border overlays and labels for each ref
@@ -893,17 +893,17 @@ export class DOMActions {
893
893
  const border = this.document.createElement('div');
894
894
  border.className = 'btcp-ref-border';
895
895
  border.dataset.ref = ref;
896
- border.style.cssText = `
897
- position: absolute;
898
- width: ${bbox.width}px;
899
- height: ${bbox.height}px;
900
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
901
- border: 2px solid rgba(59, 130, 246, 0.8);
902
- border-radius: 2px;
903
- box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
904
- pointer-events: none;
905
- will-change: transform;
906
- contain: layout style paint;
896
+ border.style.cssText = `
897
+ position: absolute;
898
+ width: ${bbox.width}px;
899
+ height: ${bbox.height}px;
900
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
901
+ border: 2px solid rgba(59, 130, 246, 0.8);
902
+ border-radius: 2px;
903
+ box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
904
+ pointer-events: none;
905
+ will-change: transform;
906
+ contain: layout style paint;
907
907
  `;
908
908
  // Create label
909
909
  const label = this.document.createElement('div');
@@ -911,21 +911,21 @@ export class DOMActions {
911
911
  label.dataset.ref = ref;
912
912
  // Extract number from ref (e.g., "@ref:5" -> "5")
913
913
  label.textContent = ref.replace('@ref:', '');
914
- label.style.cssText = `
915
- position: absolute;
916
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
917
- background: rgba(59, 130, 246, 0.9);
918
- color: white;
919
- padding: 2px 6px;
920
- border-radius: 3px;
921
- font-family: monospace;
922
- font-size: 11px;
923
- font-weight: bold;
924
- box-shadow: 0 2px 4px rgba(0,0,0,0.3);
925
- pointer-events: none;
926
- white-space: nowrap;
927
- will-change: transform;
928
- contain: layout style paint;
914
+ label.style.cssText = `
915
+ position: absolute;
916
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
917
+ background: rgba(59, 130, 246, 0.9);
918
+ color: white;
919
+ padding: 2px 6px;
920
+ border-radius: 3px;
921
+ font-family: monospace;
922
+ font-size: 11px;
923
+ font-weight: bold;
924
+ box-shadow: 0 2px 4px rgba(0,0,0,0.3);
925
+ pointer-events: none;
926
+ white-space: nowrap;
927
+ will-change: transform;
928
+ contain: layout style paint;
929
929
  `;
930
930
  this.overlayContainer.appendChild(border);
931
931
  this.overlayContainer.appendChild(label);
@@ -723,6 +723,53 @@ function getSectionName(element) {
723
723
  }
724
724
  return '';
725
725
  }
726
+ /**
727
+ * Create head snapshot - lightweight HTTP HEAD-style page overview
728
+ * Returns page metadata without DOM traversal for fast verification
729
+ */
730
+ function createHeadSnapshot(document, _refMap, options) {
731
+ const { root = document.body } = options;
732
+ const win = document.defaultView || window;
733
+ // Count elements (lightweight - no deep traversal)
734
+ const allElements = root.querySelectorAll('*');
735
+ const interactiveSelector = 'button, a[href], input, textarea, select, [role="button"], [tabindex]:not([tabindex="-1"])';
736
+ const interactiveElements = root.querySelectorAll(interactiveSelector);
737
+ // Page status detection
738
+ const viewportArea = win.innerWidth * win.innerHeight;
739
+ const hasInteractive = interactiveElements.length > 0;
740
+ const isComplete = document.readyState === 'complete';
741
+ let status = 'loading';
742
+ if (viewportArea === 0) {
743
+ status = 'loading';
744
+ }
745
+ else if (!hasInteractive) {
746
+ status = 'empty';
747
+ }
748
+ else if (isComplete) {
749
+ status = 'ready';
750
+ }
751
+ else {
752
+ status = 'interactive';
753
+ }
754
+ // Build output
755
+ const output = [
756
+ `URL: ${document.location?.href || 'about:blank'}`,
757
+ `TITLE: ${document.title || 'Untitled'}`,
758
+ `VIEWPORT: ${win.innerWidth}x${win.innerHeight}`,
759
+ `STATUS: ${status}`,
760
+ `ELEMENTS: total=${allElements.length} interactive=${interactiveElements.length}`,
761
+ `READY_STATE: ${document.readyState}`
762
+ ].join('\n');
763
+ return {
764
+ tree: output,
765
+ refs: {}, // No refs in head mode
766
+ metadata: {
767
+ totalInteractiveElements: interactiveElements.length,
768
+ capturedElements: 0,
769
+ quality: 'high'
770
+ }
771
+ };
772
+ }
726
773
  /**
727
774
  * Create outline snapshot - structural overview with metadata
728
775
  */
@@ -1219,6 +1266,9 @@ export function createSnapshot(document, refMap, options = {}) {
1219
1266
  const { root = document.body, maxDepth = 50, includeHidden = false, mode = 'interactive', format = 'tree', grep: grepPattern } = options;
1220
1267
  // Dispatch based on mode
1221
1268
  const effectiveMode = mode;
1269
+ if (effectiveMode === 'head') {
1270
+ return createHeadSnapshot(document, refMap, { ...options, root });
1271
+ }
1222
1272
  if (effectiveMode === 'outline') {
1223
1273
  return createOutlineSnapshot(document, refMap, { ...options, root });
1224
1274
  }
@@ -96,7 +96,7 @@ export interface GrepOptions {
96
96
  /**
97
97
  * Snapshot mode determines what content to capture
98
98
  */
99
- export type SnapshotMode = 'interactive' | 'outline' | 'content';
99
+ export type SnapshotMode = 'interactive' | 'outline' | 'content' | 'head';
100
100
  /**
101
101
  * Snapshot output format
102
102
  */
@@ -115,6 +115,7 @@ export interface SnapshotCommand extends BaseCommand {
115
115
  baseSnapshot?: SnapshotData;
116
116
  /**
117
117
  * Snapshot mode:
118
+ * - 'head': Lightweight page overview (URL, title, element counts, status)
118
119
  * - 'interactive': Find clickable elements (default)
119
120
  * - 'outline': Understand page structure with xpaths + metadata
120
121
  * - 'content': Extract text content from sections
@@ -446,7 +446,7 @@ export class BackgroundAgent {
446
446
  resolve({
447
447
  id: command.id,
448
448
  success: false,
449
- error: chrome.runtime.lastError.message || 'Failed to send message to tab',
449
+ error: chrome.runtime.lastError?.message || 'Failed to send message to tab',
450
450
  });
451
451
  }
452
452
  else if (!response) {
@@ -40,6 +40,7 @@
40
40
  * ```
41
41
  */
42
42
  import type { Command, Response } from './types.js';
43
+ import type { Transport } from './transport/types.js';
43
44
  import { BackgroundAgent as _BackgroundAgent, getBackgroundAgent as _getBackgroundAgent, setupMessageListener as _setupMessageListener, BrowserAgent as _BrowserAgent, getBrowserAgent as _getBrowserAgent } from './background.js';
44
45
  export * from './types.js';
45
46
  export { createScriptMessenger, createMethodMessenger, type MessageDefinitions, type ScriptMessenger, type MethodMessenger, type ScriptMessengerOptions, type PayloadOf, type ResultOf, } from './script-messenger.js';
@@ -47,6 +48,7 @@ export { createRemoteAgent, getBrowserToolDefinitions, mapToolToCommand, formatR
47
48
  export { _BackgroundAgent as BackgroundAgent, _getBackgroundAgent as getBackgroundAgent, _setupMessageListener as setupMessageListener, _BrowserAgent as BrowserAgent, _getBrowserAgent as getBrowserAgent, };
48
49
  export { createContentAgent, type ContentAgent } from '../../core/dist/index.js';
49
50
  export type { SnapshotData, BoundingBox, Modifier, } from '../../core/dist/index.js';
51
+ export * from './transport/index.js';
50
52
  /**
51
53
  * Client for sending commands to the extension background script
52
54
  */
@@ -147,6 +149,24 @@ export interface Client {
147
149
  */
148
150
  evaluate(expression: string): Promise<unknown>;
149
151
  }
152
+ /**
153
+ * Options for creating a client
154
+ */
155
+ export interface CreateClientOptions {
156
+ /**
157
+ * Transport to use for sending commands.
158
+ * Defaults to ChromeExtensionTransport.
159
+ *
160
+ * @example Using direct transport in background script:
161
+ * ```typescript
162
+ * import { createClient, createDirectTransport, getBackgroundAgent } from '@btcp/browser-agent/extension';
163
+ *
164
+ * const transport = createDirectTransport({ agent: getBackgroundAgent() });
165
+ * const client = createClient({ transport });
166
+ * ```
167
+ */
168
+ transport?: Transport;
169
+ }
150
170
  /**
151
171
  * Generate a unique command ID for BTCP commands
152
172
  */
@@ -154,24 +174,31 @@ export declare function generateCommandId(): string;
154
174
  /**
155
175
  * Create a client for communicating with the extension
156
176
  *
157
- * This function works in both popup/content scripts and background scripts:
158
- * - In popup/content scripts: Uses chrome.runtime.sendMessage to communicate with background
159
- * - In background scripts: Uses BackgroundAgent directly for better performance
177
+ * By default uses ChromeExtensionTransport for popup/content script contexts.
178
+ * Pass a custom transport for different communication mechanisms.
160
179
  *
161
- * @example Popup usage:
180
+ * @example Default (Chrome Extension):
162
181
  * ```typescript
163
182
  * import { createClient } from '@btcp/browser-agent/extension';
164
183
  * const client = createClient();
165
184
  * await client.navigate('https://example.com');
166
185
  * ```
167
186
  *
168
- * @example Background script usage:
187
+ * @example With explicit transport:
169
188
  * ```typescript
170
- * import { createClient } from '@btcp/browser-agent/extension';
171
- * const client = createClient();
172
- * // Works the same way - commands go directly to BackgroundAgent
173
- * await client.navigate('https://example.com');
189
+ * import { createClient, createChromeExtensionTransport } from '@btcp/browser-agent/extension';
190
+ *
191
+ * const transport = createChromeExtensionTransport({ debug: true });
192
+ * const client = createClient({ transport });
193
+ * ```
194
+ *
195
+ * @example Direct transport (background script):
196
+ * ```typescript
197
+ * import { createClient, createDirectTransport, getBackgroundAgent } from '@btcp/browser-agent/extension';
198
+ *
199
+ * const transport = createDirectTransport({ agent: getBackgroundAgent() });
200
+ * const client = createClient({ transport });
174
201
  * ```
175
202
  */
176
- export declare function createClient(): Client;
203
+ export declare function createClient(options?: CreateClientOptions): Client;
177
204
  //# sourceMappingURL=index.d.ts.map
@@ -39,6 +39,7 @@
39
39
  * await client.click('@ref:5');
40
40
  * ```
41
41
  */
42
+ import { ChromeExtensionTransport } from './transport/chrome-extension.js';
42
43
  // Import for local use (and re-export below)
43
44
  import { BackgroundAgent as _BackgroundAgent, getBackgroundAgent as _getBackgroundAgent, setupMessageListener as _setupMessageListener, BrowserAgent as _BrowserAgent, getBrowserAgent as _getBrowserAgent, } from './background.js';
44
45
  export * from './types.js';
@@ -52,6 +53,8 @@ export { _BackgroundAgent as BackgroundAgent, _getBackgroundAgent as getBackgrou
52
53
  _BrowserAgent as BrowserAgent, _getBrowserAgent as getBrowserAgent, };
53
54
  // Re-export ContentAgent for content script usage
54
55
  export { createContentAgent } from '../../core/dist/index.js';
56
+ // Re-export transport module
57
+ export * from './transport/index.js';
55
58
  let commandIdCounter = 0;
56
59
  /**
57
60
  * Generate a unique command ID for BTCP commands
@@ -59,79 +62,41 @@ let commandIdCounter = 0;
59
62
  export function generateCommandId() {
60
63
  return `cmd_${Date.now()}_${commandIdCounter++}`;
61
64
  }
62
- /**
63
- * Check if we're running in a background/service worker context
64
- */
65
- function isBackgroundContext() {
66
- // In Manifest V3, background scripts run as service workers
67
- return typeof ServiceWorkerGlobalScope !== 'undefined' && self instanceof ServiceWorkerGlobalScope;
68
- }
69
65
  /**
70
66
  * Create a client for communicating with the extension
71
67
  *
72
- * This function works in both popup/content scripts and background scripts:
73
- * - In popup/content scripts: Uses chrome.runtime.sendMessage to communicate with background
74
- * - In background scripts: Uses BackgroundAgent directly for better performance
68
+ * By default uses ChromeExtensionTransport for popup/content script contexts.
69
+ * Pass a custom transport for different communication mechanisms.
75
70
  *
76
- * @example Popup usage:
71
+ * @example Default (Chrome Extension):
77
72
  * ```typescript
78
73
  * import { createClient } from '@btcp/browser-agent/extension';
79
74
  * const client = createClient();
80
75
  * await client.navigate('https://example.com');
81
76
  * ```
82
77
  *
83
- * @example Background script usage:
78
+ * @example With explicit transport:
84
79
  * ```typescript
85
- * import { createClient } from '@btcp/browser-agent/extension';
86
- * const client = createClient();
87
- * // Works the same way - commands go directly to BackgroundAgent
88
- * await client.navigate('https://example.com');
80
+ * import { createClient, createChromeExtensionTransport } from '@btcp/browser-agent/extension';
81
+ *
82
+ * const transport = createChromeExtensionTransport({ debug: true });
83
+ * const client = createClient({ transport });
84
+ * ```
85
+ *
86
+ * @example Direct transport (background script):
87
+ * ```typescript
88
+ * import { createClient, createDirectTransport, getBackgroundAgent } from '@btcp/browser-agent/extension';
89
+ *
90
+ * const transport = createDirectTransport({ agent: getBackgroundAgent() });
91
+ * const client = createClient({ transport });
89
92
  * ```
90
93
  */
91
- export function createClient() {
92
- // Detect if we're in background context
93
- const inBackground = isBackgroundContext();
94
- // Lazily get the background agent to avoid circular dependency issues
95
- let bgAgent = null;
96
- function getAgent() {
97
- if (!bgAgent) {
98
- // Use the singleton getter from background.js
99
- bgAgent = _getBackgroundAgent();
100
- }
101
- return bgAgent;
102
- }
94
+ export function createClient(options = {}) {
95
+ // Default to Chrome extension transport
96
+ const transport = options.transport ?? new ChromeExtensionTransport();
103
97
  async function sendCommand(command) {
104
- // In background context, use BackgroundAgent directly
105
- if (inBackground) {
106
- return getAgent().execute(command);
107
- }
108
- // In popup/content context, use message passing
109
98
  const id = command.id || generateCommandId();
110
- return new Promise((resolve) => {
111
- chrome.runtime.sendMessage({ type: 'btcp:command', command: { ...command, id } }, (response) => {
112
- if (chrome.runtime.lastError) {
113
- resolve({
114
- id,
115
- success: false,
116
- error: chrome.runtime.lastError.message || 'Unknown error',
117
- });
118
- }
119
- else {
120
- const resp = response;
121
- if (resp.type === 'btcp:response') {
122
- resolve(resp.response);
123
- }
124
- else {
125
- // Unexpected pong response
126
- resolve({
127
- id,
128
- success: false,
129
- error: 'Unexpected response type',
130
- });
131
- }
132
- }
133
- });
134
- });
99
+ return transport.send({ ...command, id });
135
100
  }
136
101
  function assertSuccess(response) {
137
102
  if (!response.success) {
@@ -27,6 +27,7 @@ export declare class SessionManager {
27
27
  private initializationPromise;
28
28
  private maxSession;
29
29
  private maxOpenTab;
30
+ private ensureSessionPromise;
30
31
  constructor(options?: SessionManagerOptions);
31
32
  /**
32
33
  * Wait for SessionManager to finish initialization
@@ -62,8 +63,10 @@ export declare class SessionManager {
62
63
  reconnectSession(groupId: number): Promise<boolean>;
63
64
  /**
64
65
  * Create a new tab group
66
+ * @param options Group creation options
67
+ * @param internal If true, bypasses session limit check (used by ensureSession)
65
68
  */
66
- createGroup(options?: GroupCreateOptions): Promise<GroupInfo>;
69
+ createGroup(options?: GroupCreateOptions, internal?: boolean): Promise<GroupInfo>;
67
70
  /**
68
71
  * Update an existing tab group
69
72
  */
@@ -136,8 +139,14 @@ export declare class SessionManager {
136
139
  /**
137
140
  * Ensure a session exists - restore from storage, use existing, or create new
138
141
  * Returns the session group ID (creates if needed)
142
+ *
143
+ * This method is atomic - concurrent calls will wait for the same promise
139
144
  */
140
145
  ensureSession(): Promise<number>;
146
+ /**
147
+ * Internal implementation of ensureSession
148
+ */
149
+ private _doEnsureSession;
141
150
  /**
142
151
  * Get the primary tab in session (ensures session exists first)
143
152
  * Returns the first tab in the session group