btcp-browser-agent 0.1.4 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
package/dist/index.d.ts CHANGED
@@ -25,10 +25,10 @@
25
25
  * ```typescript
26
26
  * import { createContentAgent } from '@btcp/browser-agent';
27
27
  * const agent = createContentAgent();
28
- * await agent.execute({ id: '1', action: 'snapshot' });
28
+ * await agent.execute({ action: 'snapshot' });
29
29
  * ```
30
30
  */
31
31
  export { createContentAgent, type ContentAgent, DOMActions, createSnapshot, createRefMap, createSimpleRefMap, type Command, type Response, type SnapshotData, type BoundingBox, type RefMap, type Modifier, } from '../packages/core/dist/index.js';
32
32
  export type { ExtensionMessage, ExtensionResponse, TabInfo, ChromeTab, ExtensionCommand, } from '../packages/extension/dist/index.js';
33
- export { BackgroundAgent, getBackgroundAgent, setupMessageListener, createClient, type Client, } from '../packages/extension/dist/index.js';
33
+ export { BackgroundAgent, getBackgroundAgent, setupMessageListener, createClient, generateCommandId, type Client, BrowserAgent, getBrowserAgent, } from '../packages/extension/dist/index.js';
34
34
  //# sourceMappingURL=index.d.ts.map
package/dist/index.js CHANGED
@@ -25,11 +25,13 @@
25
25
  * ```typescript
26
26
  * import { createContentAgent } from '@btcp/browser-agent';
27
27
  * const agent = createContentAgent();
28
- * await agent.execute({ id: '1', action: 'snapshot' });
28
+ * await agent.execute({ action: 'snapshot' });
29
29
  * ```
30
30
  */
31
31
  // Re-export everything from core (for standalone usage)
32
32
  export { createContentAgent, DOMActions, createSnapshot, createRefMap, createSimpleRefMap, } from '../packages/core/dist/index.js';
33
33
  // Re-export extension functions
34
- export { BackgroundAgent, getBackgroundAgent, setupMessageListener, createClient, } from '../packages/extension/dist/index.js';
34
+ export { BackgroundAgent, getBackgroundAgent, setupMessageListener, createClient, generateCommandId,
35
+ // Deprecated aliases for backwards compatibility
36
+ BrowserAgent, getBrowserAgent, } from '../packages/extension/dist/index.js';
35
37
  //# sourceMappingURL=index.js.map
package/package.json CHANGED
@@ -1,69 +1,69 @@
1
- {
2
- "name": "btcp-browser-agent",
3
- "version": "0.1.4",
4
- "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
- "type": "module",
6
- "main": "dist/index.js",
7
- "types": "dist/index.d.ts",
8
- "exports": {
9
- ".": {
10
- "types": "./dist/index.d.ts",
11
- "import": "./dist/index.js",
12
- "default": "./dist/index.js"
13
- },
14
- "./core": {
15
- "types": "./packages/core/dist/index.d.ts",
16
- "import": "./packages/core/dist/index.js",
17
- "default": "./packages/core/dist/index.js"
18
- },
19
- "./extension": {
20
- "types": "./packages/extension/dist/index.d.ts",
21
- "import": "./packages/extension/dist/index.js",
22
- "default": "./packages/extension/dist/index.js"
23
- },
24
- "./extension/content": {
25
- "types": "./packages/extension/dist/content.d.ts",
26
- "import": "./packages/extension/dist/content.js",
27
- "default": "./packages/extension/dist/content.js"
28
- },
29
- "./extension/background": {
30
- "types": "./packages/extension/dist/background.d.ts",
31
- "import": "./packages/extension/dist/background.js",
32
- "default": "./packages/extension/dist/background.js"
33
- }
34
- },
35
- "scripts": {
36
- "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
- "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
- "clean": "rm -rf dist packages/*/dist",
39
- "prepare": "npm run build",
40
- "test": "vitest run",
41
- "test:watch": "vitest",
42
- "typecheck": "tsc --noEmit"
43
- },
44
- "workspaces": [
45
- "packages/core",
46
- "packages/extension",
47
- "packages/cli"
48
- ],
49
- "files": [
50
- "dist",
51
- "packages/core/dist",
52
- "packages/extension/dist",
53
- "!**/__tests__",
54
- "!**/*.map"
55
- ],
56
- "license": "Apache-2.0",
57
- "repository": {
58
- "type": "git",
59
- "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
- },
61
- "dependencies": {},
62
- "devDependencies": {
63
- "@types/chrome": "^0.0.268",
64
- "@types/node": "^20.10.0",
65
- "jsdom": "^24.0.0",
66
- "typescript": "^5.3.0",
67
- "vitest": "^2.0.0"
68
- }
69
- }
1
+ {
2
+ "name": "btcp-browser-agent",
3
+ "version": "0.1.7",
4
+ "description": "Give AI agents the power to control browsers. A foundation for building agentic systems with smart DOM snapshots and stable element references.",
5
+ "type": "module",
6
+ "main": "dist/index.js",
7
+ "types": "dist/index.d.ts",
8
+ "exports": {
9
+ ".": {
10
+ "types": "./dist/index.d.ts",
11
+ "import": "./dist/index.js",
12
+ "default": "./dist/index.js"
13
+ },
14
+ "./core": {
15
+ "types": "./packages/core/dist/index.d.ts",
16
+ "import": "./packages/core/dist/index.js",
17
+ "default": "./packages/core/dist/index.js"
18
+ },
19
+ "./extension": {
20
+ "types": "./packages/extension/dist/index.d.ts",
21
+ "import": "./packages/extension/dist/index.js",
22
+ "default": "./packages/extension/dist/index.js"
23
+ },
24
+ "./extension/content": {
25
+ "types": "./packages/extension/dist/content.d.ts",
26
+ "import": "./packages/extension/dist/content.js",
27
+ "default": "./packages/extension/dist/content.js"
28
+ },
29
+ "./extension/background": {
30
+ "types": "./packages/extension/dist/background.d.ts",
31
+ "import": "./packages/extension/dist/background.js",
32
+ "default": "./packages/extension/dist/background.js"
33
+ }
34
+ },
35
+ "scripts": {
36
+ "build": "npm run build:packages && tsc -p tsconfig.build.json",
37
+ "build:packages": "tsc -p packages/core/tsconfig.json && tsc -p packages/extension/tsconfig.json && tsc -p packages/cli/tsconfig.json",
38
+ "clean": "rm -rf dist packages/*/dist",
39
+ "prepare": "npm run build",
40
+ "test": "vitest run",
41
+ "test:watch": "vitest",
42
+ "typecheck": "tsc --noEmit"
43
+ },
44
+ "workspaces": [
45
+ "packages/core",
46
+ "packages/extension",
47
+ "packages/cli"
48
+ ],
49
+ "files": [
50
+ "dist",
51
+ "packages/core/dist",
52
+ "packages/extension/dist",
53
+ "!**/__tests__",
54
+ "!**/*.map"
55
+ ],
56
+ "license": "Apache-2.0",
57
+ "repository": {
58
+ "type": "git",
59
+ "url": "git+https://github.com/browser-tool-calling-protocol/btcp-browser-agent.git"
60
+ },
61
+ "dependencies": {},
62
+ "devDependencies": {
63
+ "@types/chrome": "^0.0.268",
64
+ "@types/node": "^20.10.0",
65
+ "jsdom": "^24.0.0",
66
+ "typescript": "^5.3.0",
67
+ "vitest": "^2.0.0"
68
+ }
69
+ }
@@ -4,6 +4,10 @@
4
4
  * Element interaction handlers using native browser APIs.
5
5
  */
6
6
  import type { Command, Response, RefMap } from './types.js';
7
+ /**
8
+ * Generate a unique command ID
9
+ */
10
+ export declare function generateCommandId(): string;
7
11
  /**
8
12
  * DOM Actions executor
9
13
  */
@@ -18,6 +22,8 @@ export declare class DOMActions {
18
22
  constructor(doc: Document, win: Window, refMap: RefMap);
19
23
  /**
20
24
  * Execute a command and return a response
25
+ *
26
+ * The command ID is auto-generated internally - users don't need to provide it.
21
27
  */
22
28
  execute(command: Command): Promise<Response>;
23
29
  private dispatch;
@@ -5,6 +5,14 @@
5
5
  */
6
6
  import { createSnapshot } from './snapshot.js';
7
7
  import { DetailedError, createElementNotFoundError, createElementNotCompatibleError, createTimeoutError, createInvalidParametersError, } from './errors.js';
8
+ // Command ID counter for auto-generated IDs
9
+ let commandIdCounter = 0;
10
+ /**
11
+ * Generate a unique command ID
12
+ */
13
+ export function generateCommandId() {
14
+ return `cmd_${Date.now()}_${commandIdCounter++}`;
15
+ }
8
16
  /**
9
17
  * DOM Actions executor
10
18
  */
@@ -23,18 +31,22 @@ export class DOMActions {
23
31
  }
24
32
  /**
25
33
  * Execute a command and return a response
34
+ *
35
+ * The command ID is auto-generated internally - users don't need to provide it.
26
36
  */
27
37
  async execute(command) {
38
+ // Auto-generate ID if not provided
39
+ const id = command.id || generateCommandId();
28
40
  try {
29
41
  const data = await this.dispatch(command);
30
- return { id: command.id, success: true, data };
42
+ return { id, success: true, data };
31
43
  }
32
44
  catch (error) {
33
45
  const message = error instanceof Error ? error.message : String(error);
34
46
  // Include structured error data if available
35
47
  if (error instanceof DetailedError) {
36
48
  return {
37
- id: command.id,
49
+ id,
38
50
  success: false,
39
51
  error: message,
40
52
  errorCode: error.code,
@@ -42,7 +54,7 @@ export class DOMActions {
42
54
  suggestions: error.suggestions,
43
55
  };
44
56
  }
45
- return { id: command.id, success: false, error: message };
57
+ return { id, success: false, error: message };
46
58
  }
47
59
  }
48
60
  async dispatch(command) {
@@ -90,11 +102,13 @@ export class DOMActions {
90
102
  selector: command.selector,
91
103
  maxDepth: command.maxDepth,
92
104
  includeHidden: command.includeHidden,
93
- interactive: command.interactive,
94
105
  compact: command.compact,
95
- all: command.all,
106
+ mode: command.mode,
96
107
  format: command.format,
97
108
  grep: command.grep,
109
+ maxLength: command.maxLength,
110
+ includeLinks: command.includeLinks,
111
+ includeImages: command.includeImages,
98
112
  });
99
113
  case 'querySelector':
100
114
  return this.querySelector(command.selector);
@@ -482,11 +496,13 @@ export class DOMActions {
482
496
  root,
483
497
  maxDepth: options.maxDepth,
484
498
  includeHidden: options.includeHidden,
485
- interactive: options.interactive,
486
499
  compact: options.compact,
487
- all: options.all,
500
+ mode: options.mode,
488
501
  format: options.format,
489
502
  grep: options.grep,
503
+ maxLength: options.maxLength,
504
+ includeLinks: options.includeLinks,
505
+ includeImages: options.includeImages,
490
506
  });
491
507
  // Store snapshot data for highlight command (preserve refs internally)
492
508
  this.lastSnapshotData = snapshotData;
@@ -749,15 +765,15 @@ export class DOMActions {
749
765
  // Create overlay container with absolute positioning covering entire document
750
766
  this.overlayContainer = this.document.createElement('div');
751
767
  this.overlayContainer.id = 'btcp-highlight-overlay';
752
- this.overlayContainer.style.cssText = `
753
- position: absolute;
754
- top: 0;
755
- left: 0;
756
- width: ${this.document.documentElement.scrollWidth}px;
757
- height: ${this.document.documentElement.scrollHeight}px;
758
- pointer-events: none;
759
- z-index: 999999;
760
- contain: layout style paint;
768
+ this.overlayContainer.style.cssText = `
769
+ position: absolute;
770
+ top: 0;
771
+ left: 0;
772
+ width: ${this.document.documentElement.scrollWidth}px;
773
+ height: ${this.document.documentElement.scrollHeight}px;
774
+ pointer-events: none;
775
+ z-index: 999999;
776
+ contain: layout style paint;
761
777
  `;
762
778
  let highlightedCount = 0;
763
779
  // Create border overlays and labels for each ref
@@ -778,17 +794,17 @@ export class DOMActions {
778
794
  const border = this.document.createElement('div');
779
795
  border.className = 'btcp-ref-border';
780
796
  border.dataset.ref = ref;
781
- border.style.cssText = `
782
- position: absolute;
783
- width: ${bbox.width}px;
784
- height: ${bbox.height}px;
785
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
786
- border: 2px solid rgba(59, 130, 246, 0.8);
787
- border-radius: 2px;
788
- box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
789
- pointer-events: none;
790
- will-change: transform;
791
- contain: layout style paint;
797
+ border.style.cssText = `
798
+ position: absolute;
799
+ width: ${bbox.width}px;
800
+ height: ${bbox.height}px;
801
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
802
+ border: 2px solid rgba(59, 130, 246, 0.8);
803
+ border-radius: 2px;
804
+ box-shadow: 0 0 0 1px rgba(59, 130, 246, 0.2);
805
+ pointer-events: none;
806
+ will-change: transform;
807
+ contain: layout style paint;
792
808
  `;
793
809
  // Create label
794
810
  const label = this.document.createElement('div');
@@ -796,21 +812,21 @@ export class DOMActions {
796
812
  label.dataset.ref = ref;
797
813
  // Extract number from ref (e.g., "@ref:5" -> "5")
798
814
  label.textContent = ref.replace('@ref:', '');
799
- label.style.cssText = `
800
- position: absolute;
801
- transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
802
- background: rgba(59, 130, 246, 0.9);
803
- color: white;
804
- padding: 2px 6px;
805
- border-radius: 3px;
806
- font-family: monospace;
807
- font-size: 11px;
808
- font-weight: bold;
809
- box-shadow: 0 2px 4px rgba(0,0,0,0.3);
810
- pointer-events: none;
811
- white-space: nowrap;
812
- will-change: transform;
813
- contain: layout style paint;
815
+ label.style.cssText = `
816
+ position: absolute;
817
+ transform: translate3d(${bbox.left + this.window.scrollX}px, ${bbox.top + this.window.scrollY}px, 0);
818
+ background: rgba(59, 130, 246, 0.9);
819
+ color: white;
820
+ padding: 2px 6px;
821
+ border-radius: 3px;
822
+ font-family: monospace;
823
+ font-size: 11px;
824
+ font-weight: bold;
825
+ box-shadow: 0 2px 4px rgba(0,0,0,0.3);
826
+ pointer-events: none;
827
+ white-space: nowrap;
828
+ will-change: transform;
829
+ contain: layout style paint;
814
830
  `;
815
831
  this.overlayContainer.appendChild(border);
816
832
  this.overlayContainer.appendChild(label);
@@ -11,17 +11,10 @@
11
11
  * const agent = createContentAgent(document, window);
12
12
  *
13
13
  * // Take a snapshot
14
- * const snapshot = await agent.execute({
15
- * id: '1',
16
- * action: 'snapshot'
17
- * });
14
+ * const snapshot = await agent.execute({ action: 'snapshot' });
18
15
  *
19
16
  * // Click an element
20
- * await agent.execute({
21
- * id: '2',
22
- * action: 'click',
23
- * selector: '@ref:5' // From snapshot
24
- * });
17
+ * await agent.execute({ action: 'click', selector: '@ref:5' });
25
18
  * ```
26
19
  */
27
20
  import type { Command, Response, RefMap } from './types.js';
@@ -29,7 +22,7 @@ export * from './types.js';
29
22
  export * from './errors.js';
30
23
  export { createSnapshot } from './snapshot.js';
31
24
  export { createRefMap, createSimpleRefMap } from './ref-map.js';
32
- export { DOMActions } from './actions.js';
25
+ export { DOMActions, generateCommandId } from './actions.js';
33
26
  /**
34
27
  * ContentAgent - DOM automation agent that runs in content script context
35
28
  *
@@ -83,10 +76,10 @@ export interface ContentAgent {
83
76
  * const agent = createContentAgent();
84
77
  *
85
78
  * // Take a snapshot of the page
86
- * const { data } = await agent.execute({ id: '1', action: 'snapshot' });
79
+ * const { data } = await agent.execute({ action: 'snapshot' });
87
80
  *
88
81
  * // Click an element using ref from snapshot
89
- * await agent.execute({ id: '2', action: 'click', selector: '@ref:5' });
82
+ * await agent.execute({ action: 'click', selector: '@ref:5' });
90
83
  * ```
91
84
  */
92
85
  export declare function createContentAgent(doc?: Document, win?: Window): ContentAgent;
@@ -11,17 +11,10 @@
11
11
  * const agent = createContentAgent(document, window);
12
12
  *
13
13
  * // Take a snapshot
14
- * const snapshot = await agent.execute({
15
- * id: '1',
16
- * action: 'snapshot'
17
- * });
14
+ * const snapshot = await agent.execute({ action: 'snapshot' });
18
15
  *
19
16
  * // Click an element
20
- * await agent.execute({
21
- * id: '2',
22
- * action: 'click',
23
- * selector: '@ref:5' // From snapshot
24
- * });
17
+ * await agent.execute({ action: 'click', selector: '@ref:5' });
25
18
  * ```
26
19
  */
27
20
  import { DOMActions } from './actions.js';
@@ -30,7 +23,7 @@ export * from './types.js';
30
23
  export * from './errors.js';
31
24
  export { createSnapshot } from './snapshot.js';
32
25
  export { createRefMap, createSimpleRefMap } from './ref-map.js';
33
- export { DOMActions } from './actions.js';
26
+ export { DOMActions, generateCommandId } from './actions.js';
34
27
  /**
35
28
  * Create a ContentAgent for DOM automation
36
29
  *
@@ -44,10 +37,10 @@ export { DOMActions } from './actions.js';
44
37
  * const agent = createContentAgent();
45
38
  *
46
39
  * // Take a snapshot of the page
47
- * const { data } = await agent.execute({ id: '1', action: 'snapshot' });
40
+ * const { data } = await agent.execute({ action: 'snapshot' });
48
41
  *
49
42
  * // Click an element using ref from snapshot
50
- * await agent.execute({ id: '2', action: 'click', selector: '@ref:5' });
43
+ * await agent.execute({ action: 'click', selector: '@ref:5' });
51
44
  * ```
52
45
  */
53
46
  export function createContentAgent(doc = document, win = window) {
@@ -3,8 +3,13 @@
3
3
  *
4
4
  * Generates a flat accessibility snapshot of the DOM.
5
5
  * Produces a compact, AI-friendly list of interactive elements.
6
+ *
7
+ * Supports three modes:
8
+ * - 'interactive': Find clickable elements (default)
9
+ * - 'outline': Understand page structure with xpaths + metadata
10
+ * - 'content': Extract text content from sections
6
11
  */
7
- import type { SnapshotData, RefMap } from './types.js';
12
+ import type { SnapshotData, RefMap, SnapshotMode, SnapshotFormat } from './types.js';
8
13
  /**
9
14
  * Grep options (mirrors Unix grep flags)
10
15
  */
@@ -22,15 +27,37 @@ interface SnapshotOptions {
22
27
  root?: Element;
23
28
  maxDepth?: number;
24
29
  includeHidden?: boolean;
25
- interactive?: boolean;
26
30
  compact?: boolean;
27
- all?: boolean;
28
- format?: 'tree' | 'html';
31
+ /**
32
+ * Snapshot mode:
33
+ * - 'interactive': Find clickable elements (default)
34
+ * - 'outline': Understand page structure with xpaths + metadata
35
+ * - 'content': Extract text content from sections
36
+ */
37
+ mode?: SnapshotMode;
38
+ /**
39
+ * Output format:
40
+ * - 'tree': Flat accessibility tree (default)
41
+ * - 'html': Raw HTML
42
+ * - 'markdown': Markdown formatted content
43
+ */
44
+ format?: SnapshotFormat;
29
45
  /** Grep filter - string pattern or options object */
30
46
  grep?: string | GrepOptions;
47
+ /** Max chars per section in content mode */
48
+ maxLength?: number;
49
+ /** Include links as [text](url) in markdown format */
50
+ includeLinks?: boolean;
51
+ /** Include images as ![alt](src) in markdown format */
52
+ includeImages?: boolean;
31
53
  }
32
54
  /**
33
55
  * Generate flat snapshot of the DOM
56
+ *
57
+ * Supports three modes:
58
+ * - 'interactive' (default): Find clickable elements with @ref markers
59
+ * - 'outline': Structural overview with xpaths and metadata
60
+ * - 'content': Extract text content from sections
34
61
  */
35
62
  export declare function createSnapshot(document: Document, refMap: RefMap, options?: SnapshotOptions): SnapshotData;
36
63
  export {};