@kritchoff/agent-browser 0.9.52 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,7 +57,8 @@ async function main() {
57
57
  default:
58
58
  // Pass through arbitrary commands to the agent daemon
59
59
  // e.g. "agent-browser open https://google.com"
60
- await agent.command(...filteredArgs);
60
+ const result = await agent.command(...filteredArgs);
61
+ console.log(result);
61
62
  break;
62
63
  }
63
64
  } catch (error) {
package/package.json CHANGED
@@ -1,6 +1,6 @@
1
1
  {
2
2
  "name": "@kritchoff/agent-browser",
3
- "version": "0.9.52",
3
+ "version": "1.0.1",
4
4
  "description": "Headless browser automation CLI for AI agents",
5
5
  "type": "module",
6
6
  "main": "dist/index.js",
@@ -10,8 +10,6 @@
10
10
  "bin",
11
11
  "scripts",
12
12
  "skills",
13
- "sdk.sh",
14
- "start.sh",
15
13
  "docker-compose.sdk.yml"
16
14
  ],
17
15
  "bin": {
@@ -20,18 +18,10 @@
20
18
  "scripts": {
21
19
  "prepare": "husky",
22
20
  "version:sync": "node scripts/sync-version.js",
23
- "version": "npm run version:sync && git add cli/Cargo.toml",
21
+ "version": "npm run version:sync",
24
22
  "build": "tsc",
25
- "build:native": "npm run version:sync && cargo build --release --manifest-path cli/Cargo.toml && node scripts/copy-native.js",
26
- "build:linux": "npm run version:sync && docker compose -f docker/docker-compose.yml run --rm build-linux",
27
- "build:macos": "npm run version:sync && (cargo build --release --manifest-path cli/Cargo.toml --target aarch64-apple-darwin & cargo build --release --manifest-path cli/Cargo.toml --target x86_64-apple-darwin & wait) && cp cli/target/aarch64-apple-darwin/release/agent-browser bin/agent-browser-darwin-arm64 && cp cli/target/x86_64-apple-darwin/release/agent-browser bin/agent-browser-darwin-x64",
28
- "build:windows": "npm run version:sync && docker compose -f docker/docker-compose.yml run --rm build-windows",
29
- "build:all-platforms": "npm run version:sync && (npm run build:linux & npm run build:windows & wait) && npm run build:macos",
30
23
  "build:docker": "docker build -t agent-browser-builder -f docker/Dockerfile.build .",
31
- "snapshot": "./scripts/snapshot_manager.sh",
32
- "fast-reset": "./scripts/fast_reset.sh",
33
- "sdk": "./sdk.sh",
34
- "release": "npm run version:sync && npm run build && npm run build:all-platforms && npm publish",
24
+ "release": "npm run version:sync && npm run build && npm publish",
35
25
  "prepublishOnly": "npm run build",
36
26
  "start": "node dist/daemon.js",
37
27
  "dev": "tsx src/daemon.ts",
@@ -40,7 +30,6 @@
40
30
  "format:check": "prettier --check 'src/**/*.ts'",
41
31
  "test": "vitest run",
42
32
  "test:watch": "vitest",
43
- "postinstall": "node scripts/postinstall.js",
44
33
  "changeset": "changeset",
45
34
  "ci:version": "changeset version && pnpm run version:sync && pnpm install --no-frozen-lockfile",
46
35
  "ci:publish": "pnpm run version:sync && pnpm run build && changeset publish"
package/README.sdk.md DELETED
@@ -1,129 +0,0 @@
1
- # @wootzapp/agent-browser SDK
2
-
3
- The official Node.js SDK for controlling the WootzApp Agent Browser environment.
4
-
5
- This SDK provides a **Real Android Browser** (WootzApp) wrapped in a Docker container, controlled by a high-speed Playwright daemon. It is specifically designed for AI Agents to navigate the mobile web, bypassing bot detection, and generating LLM-friendly semantic trees (AXTree).
6
-
7
- ## Features
8
-
9
- - **Real Mobile Environment**: Full Android 14 OS with Touch Events and Mobile Viewports.
10
- - **Zero-Config Setup**: The SDK automatically downloads and orchestrates the required Docker containers.
11
- - **Hyper-Speed Warm Boots**: Uses advanced VDI Volume Mounting to boot the environment in **< 5 seconds** after the first run.
12
- - **Fast Resets**: Cleans the browser state via Android userspace reboot in **~15 seconds**.
13
- - **Playwright Parity**: Control the mobile browser using standard Playwright commands (`click`, `type`, `waitForSelector`).
14
- - **Semantic AXTree**: Built-in `snapshot()` method generates a clean, text-based UI tree optimized for LLM reasoning.
15
-
16
- ---
17
-
18
- ## Prerequisites
19
-
20
- 1. **Docker Engine**: Must be installed and running.
21
- - *Linux Users*: Ensure your user is in the `docker` group (`sudo usermod -aG docker $USER`).
22
- 2. **Node.js**: v18+ is required.
23
-
24
- ---
25
-
26
- ## Installation
27
-
28
- Install the SDK in your project:
29
-
30
- ```bash
31
- npm install @kritchoff/agent-browser
32
- ```
33
-
34
- *(Optional but recommended)* Install `tsx` to run TypeScript files natively:
35
- ```bash
36
- npm install -D tsx
37
- ```
38
-
39
- ---
40
-
41
- ## Quick Start Guide
42
-
43
- Create a file named `agent.ts`:
44
-
45
- ```typescript
46
- import { WootzAgent } from '@kritchoff/agent-browser';
47
-
48
- async function main() {
49
- // 1. Initialize the controller
50
- const agent = new WootzAgent();
51
-
52
- console.log('🚀 Booting Environment...');
53
- // First run: Downloads 3GB image and cold boots (~90s).
54
- // Next run: Instant Hyper-Speed Warm Boot (~5s).
55
- await agent.start();
56
-
57
- console.log('🌐 Navigating to Google...');
58
- await agent.navigate('https://google.com');
59
-
60
- console.log('📸 Capturing Semantic Tree for LLM...');
61
- const uiTree = await agent.snapshot();
62
- console.log(uiTree);
63
-
64
- console.log('⌨️ Typing and Searching...');
65
- await agent.type('textarea[name="q"]', 'WootzApp AI');
66
- await agent.press('Enter');
67
-
68
- console.log('🧹 Fast Reset for next task...');
69
- // Wipes all tabs, cookies, and cache in ~15s
70
- await agent.reset();
71
-
72
- console.log('🛑 Shutting down...');
73
- // Completely destroys containers and releases ports
74
- await agent.stop();
75
- }
76
-
77
- main().catch(console.error);
78
- ```
79
-
80
- Run your agent:
81
- ```bash
82
- npx tsx agent.ts
83
- ```
84
-
85
- ---
86
-
87
- ## CLI Usage (Global Install)
88
-
89
- You can also use the SDK directly from your terminal to debug or control the browser manually.
90
-
91
- ```bash
92
- npm install -g @kritchoff/agent-browser
93
-
94
- # Start the environment
95
- agent-browser start
96
-
97
- # Run commands
98
- agent-browser navigate https://news.ycombinator.com
99
- agent-browser click ".titleline a"
100
- agent-browser snapshot
101
-
102
- # Clean the browser
103
- agent-browser reset
104
-
105
- # Stop
106
- agent-browser stop
107
- ```
108
-
109
- ---
110
-
111
- ## Troubleshooting
112
-
113
- ### `Error: Failed to connect to agent daemon (ECONNREFUSED)`
114
- - **Cause**: The container failed to bind port `32001` to your host machine.
115
- - **Fix**: Run `agent.stop()` or `docker rm -f $(docker ps -aq)` to clear old/stuck containers, then run `agent.start()` again. The SDK has built-in self-healing, but a manual hard reset always works.
116
-
117
- ### `net::ERR_NAME_NOT_RESOLVED`
118
- - **Cause**: The Android Emulator temporarily lost its internet connection after a Warm Boot.
119
- - **Fix**: The SDK automatically toggles Airplane Mode to fix this, but if it persists, ensure your host machine has a stable internet connection before starting the agent.
120
-
121
- ### `Selector "..." matched X elements (Strict Mode Violation)`
122
- - **Cause**: Playwright requires selectors to point to exactly one element.
123
- - **Fix**: Use more specific selectors, or use Playwright's `>> nth=0` pseudo-selector to pick the first match (e.g., `agent.click('a >> nth=0')`).
124
-
125
- ---
126
-
127
- ## Next Steps
128
-
129
- For a complete list of all available commands (clicking, typing, tabbing, network interception), please read the [COMMANDS.md](./COMMANDS.md) file.
Binary file
@@ -1,36 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * Copies the compiled Rust binary to bin/ with platform-specific naming
5
- */
6
-
7
- import { copyFileSync, existsSync, mkdirSync } from 'fs';
8
- import { dirname, join } from 'path';
9
- import { fileURLToPath } from 'url';
10
- import { platform, arch } from 'os';
11
-
12
- const __dirname = dirname(fileURLToPath(import.meta.url));
13
- const projectRoot = join(__dirname, '..');
14
-
15
- const sourceExt = platform() === 'win32' ? '.exe' : '';
16
- const sourcePath = join(projectRoot, `cli/target/release/agent-browser${sourceExt}`);
17
- const binDir = join(projectRoot, 'bin');
18
-
19
- // Determine platform suffix
20
- const platformKey = `${platform()}-${arch()}`;
21
- const ext = platform() === 'win32' ? '.exe' : '';
22
- const targetName = `agent-browser-${platformKey}${ext}`;
23
- const targetPath = join(binDir, targetName);
24
-
25
- if (!existsSync(sourcePath)) {
26
- console.error(`Error: Native binary not found at ${sourcePath}`);
27
- console.error('Run "cargo build --release --manifest-path cli/Cargo.toml" first');
28
- process.exit(1);
29
- }
30
-
31
- if (!existsSync(binDir)) {
32
- mkdirSync(binDir, { recursive: true });
33
- }
34
-
35
- copyFileSync(sourcePath, targetPath);
36
- console.log(`✓ Copied native binary to ${targetPath}`);
@@ -1,117 +0,0 @@
1
- #!/bin/bash
2
- # Fast Android environment reset using userspace reboot.
3
- #
4
- # This script resets the Android emulator state much faster (~15s) than
5
- # a full container restart (~60s). It uses 'adb reboot userspace' to
6
- # restart the Android framework while keeping the kernel running.
7
- #
8
- # Usage:
9
- # ./scripts/fast_reset.sh
10
-
11
- set -e
12
-
13
- SCRIPT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" && pwd)"
14
- PROJECT_DIR="$(dirname "$SCRIPT_DIR")"
15
-
16
- cd "$PROJECT_DIR"
17
-
18
- # Colors for output
19
- RED='\033[0;31m'
20
- GREEN='\033[0;32m'
21
- YELLOW='\033[1;33m'
22
- BLUE='\033[0;34m'
23
- NC='\033[0m' # No Color
24
-
25
- log_info() {
26
- echo -e "${BLUE}[INFO]${NC} $1"
27
- }
28
-
29
- log_success() {
30
- echo -e "${GREEN}[OK]${NC} $1"
31
- }
32
-
33
- log_warn() {
34
- echo -e "${YELLOW}[WARN]${NC} $1"
35
- }
36
-
37
- log_error() {
38
- echo -e "${RED}[ERROR]${NC} $1"
39
- }
40
-
41
- # Respect COMPOSE_FILE from environment, or auto-detect
42
- if [ -z "$COMPOSE_FILE" ]; then
43
- if [ -f "$PROJECT_DIR/docker-compose.sdk.yml" ]; then
44
- COMPOSE_FILE="$PROJECT_DIR/docker-compose.sdk.yml"
45
- else
46
- COMPOSE_FILE="$PROJECT_DIR/docker-compose.prod.yml"
47
- fi
48
- fi
49
-
50
- # Detect container
51
- CONTAINER=$(docker compose -f "$COMPOSE_FILE" ps -q android-service)
52
- if [ -z "$CONTAINER" ]; then
53
- log_error "android-service container not running."
54
- exit 1
55
- fi
56
-
57
- ADB_CMD="docker exec $CONTAINER adb"
58
-
59
- log_info "Initiating fast reset (userspace reboot)..."
60
-
61
- # 1. Trigger userspace reboot
62
- # This command returns immediately and the device goes offline
63
- $ADB_CMD shell reboot userspace || true
64
-
65
- # 2. Wait for device to come back online
66
- log_info "Waiting for device to come online..."
67
- start_time=$(date +%s)
68
- timeout=30
69
-
70
- while true; do
71
- current_time=$(date +%s)
72
- elapsed=$((current_time - start_time))
73
-
74
- if [ $elapsed -gt $timeout ]; then
75
- log_error "Timeout waiting for device after ${timeout}s"
76
- exit 1
77
- fi
78
-
79
- # Check if device is visible to ADB and state is 'device'
80
- if $ADB_CMD get-state 2>/dev/null | grep -q "device"; then
81
- # Verify shell is responsive
82
- if $ADB_CMD shell echo ok 2>/dev/null | grep -q "ok"; then
83
- break
84
- fi
85
- fi
86
-
87
- sleep 1
88
- done
89
-
90
- log_success "Device online (${elapsed}s)"
91
-
92
- # 3. Wait for CDP (Chrome DevTools Protocol)
93
- log_info "Waiting for browser CDP..."
94
- cdp_timeout=30
95
- cdp_start_time=$(date +%s)
96
-
97
- while true; do
98
- current_time=$(date +%s)
99
- elapsed=$((current_time - cdp_start_time))
100
-
101
- if [ $elapsed -gt $cdp_timeout ]; then
102
- log_warn "Timeout waiting for CDP. Browser might not have autostarted."
103
- log_info "Attempting to start browser manually..."
104
- $ADB_CMD shell am start -n com.wootzapp.web/com.aspect.chromium.ChromiumMain -a android.intent.action.VIEW -d 'about:blank'
105
- sleep 2
106
- fi
107
-
108
- # Check CDP version endpoint
109
- if docker exec $CONTAINER curl -s --connect-timeout 2 http://localhost:9224/json/version >/dev/null; then
110
- break
111
- fi
112
-
113
- sleep 1
114
- done
115
-
116
- log_success "Browser CDP ready"
117
- log_success "Fast reset complete!"
@@ -1,235 +0,0 @@
1
- #!/usr/bin/env node
2
-
3
- /**
4
- * Postinstall script for agent-browser
5
- *
6
- * Downloads the platform-specific native binary if not present.
7
- * On global installs, patches npm's bin entry to use the native binary directly:
8
- * - Windows: Overwrites .cmd/.ps1 shims
9
- * - Mac/Linux: Replaces symlink to point to native binary
10
- */
11
-
12
- import { existsSync, mkdirSync, chmodSync, createWriteStream, unlinkSync, writeFileSync, symlinkSync, lstatSync } from 'fs';
13
- import { dirname, join } from 'path';
14
- import { fileURLToPath } from 'url';
15
- import { platform, arch } from 'os';
16
- import { get } from 'https';
17
- import { execSync } from 'child_process';
18
-
19
- const __dirname = dirname(fileURLToPath(import.meta.url));
20
- const projectRoot = join(__dirname, '..');
21
- const binDir = join(projectRoot, 'bin');
22
-
23
- // Platform detection
24
- const platformKey = `${platform()}-${arch()}`;
25
- const ext = platform() === 'win32' ? '.exe' : '';
26
- const binaryName = `agent-browser-${platformKey}${ext}`;
27
- const binaryPath = join(binDir, binaryName);
28
-
29
- // Package info
30
- const packageJson = JSON.parse(
31
- (await import('fs')).readFileSync(join(projectRoot, 'package.json'), 'utf8')
32
- );
33
- const version = packageJson.version;
34
-
35
- // GitHub release URL
36
- const GITHUB_REPO = 'vercel-labs/agent-browser';
37
- const DOWNLOAD_URL = `https://github.com/${GITHUB_REPO}/releases/download/v${version}/${binaryName}`;
38
-
39
- async function downloadFile(url, dest) {
40
- return new Promise((resolve, reject) => {
41
- const file = createWriteStream(dest);
42
-
43
- const request = (url) => {
44
- get(url, (response) => {
45
- // Handle redirects
46
- if (response.statusCode === 301 || response.statusCode === 302) {
47
- request(response.headers.location);
48
- return;
49
- }
50
-
51
- if (response.statusCode !== 200) {
52
- reject(new Error(`Failed to download: HTTP ${response.statusCode}`));
53
- return;
54
- }
55
-
56
- response.pipe(file);
57
- file.on('finish', () => {
58
- file.close();
59
- resolve();
60
- });
61
- }).on('error', (err) => {
62
- unlinkSync(dest);
63
- reject(err);
64
- });
65
- };
66
-
67
- request(url);
68
- });
69
- }
70
-
71
- async function main() {
72
- // Check if binary already exists
73
- if (existsSync(binaryPath)) {
74
- // Ensure binary is executable (npm doesn't preserve execute bit)
75
- if (platform() !== 'win32') {
76
- chmodSync(binaryPath, 0o755);
77
- }
78
- console.log(`✓ Native binary ready: ${binaryName}`);
79
-
80
- // On global installs, fix npm's bin entry to use native binary directly
81
- await fixGlobalInstallBin();
82
-
83
- showPlaywrightReminder();
84
- return;
85
- }
86
-
87
- // Ensure bin directory exists
88
- if (!existsSync(binDir)) {
89
- mkdirSync(binDir, { recursive: true });
90
- }
91
-
92
- console.log(`Downloading native binary for ${platformKey}...`);
93
- console.log(`URL: ${DOWNLOAD_URL}`);
94
-
95
- try {
96
- await downloadFile(DOWNLOAD_URL, binaryPath);
97
-
98
- // Make executable on Unix
99
- if (platform() !== 'win32') {
100
- chmodSync(binaryPath, 0o755);
101
- }
102
-
103
- console.log(`✓ Downloaded native binary: ${binaryName}`);
104
- } catch (err) {
105
- console.log(`⚠ Could not download native binary: ${err.message}`);
106
- console.log(` The CLI will use Node.js fallback (slightly slower startup)`);
107
- console.log('');
108
- console.log('To build the native binary locally:');
109
- console.log(' 1. Install Rust: https://rustup.rs');
110
- console.log(' 2. Run: npm run build:native');
111
- }
112
-
113
- // On global installs, fix npm's bin entry to use native binary directly
114
- // This avoids the /bin/sh error on Windows and provides zero-overhead execution
115
- await fixGlobalInstallBin();
116
-
117
- showPlaywrightReminder();
118
- }
119
-
120
- function showPlaywrightReminder() {
121
- console.log('');
122
- console.log('╔═══════════════════════════════════════════════════════════════════════════╗');
123
- console.log('║ To download browser binaries, run: ║');
124
- console.log('║ ║');
125
- console.log('║ npx playwright install chromium ║');
126
- console.log('║ ║');
127
- console.log('║ On Linux, include system dependencies with: ║');
128
- console.log('║ ║');
129
- console.log('║ npx playwright install --with-deps chromium ║');
130
- console.log('║ ║');
131
- console.log('╚═══════════════════════════════════════════════════════════════════════════╝');
132
- }
133
-
134
- /**
135
- * Fix npm's bin entry on global installs to use the native binary directly.
136
- * This provides zero-overhead CLI execution for global installs.
137
- */
138
- async function fixGlobalInstallBin() {
139
- if (platform() === 'win32') {
140
- await fixWindowsShims();
141
- } else {
142
- await fixUnixSymlink();
143
- }
144
- }
145
-
146
- /**
147
- * Fix npm symlink on Mac/Linux global installs.
148
- * Replace the symlink to the JS wrapper with a symlink to the native binary.
149
- */
150
- async function fixUnixSymlink() {
151
- // Get npm's global bin directory (npm prefix -g + /bin)
152
- let npmBinDir;
153
- try {
154
- const prefix = execSync('npm prefix -g', { encoding: 'utf8' }).trim();
155
- npmBinDir = join(prefix, 'bin');
156
- } catch {
157
- return; // npm not available
158
- }
159
-
160
- const symlinkPath = join(npmBinDir, 'agent-browser');
161
-
162
- // Check if symlink exists (indicates global install)
163
- try {
164
- const stat = lstatSync(symlinkPath);
165
- if (!stat.isSymbolicLink()) {
166
- return; // Not a symlink, don't touch it
167
- }
168
- } catch {
169
- return; // Symlink doesn't exist, not a global install
170
- }
171
-
172
- // Replace symlink to point directly to native binary
173
- try {
174
- unlinkSync(symlinkPath);
175
- symlinkSync(binaryPath, symlinkPath);
176
- console.log('✓ Optimized: symlink points to native binary (zero overhead)');
177
- } catch (err) {
178
- // Permission error or other issue - not critical, JS wrapper still works
179
- console.log(`⚠ Could not optimize symlink: ${err.message}`);
180
- console.log(' CLI will work via Node.js wrapper (slightly slower startup)');
181
- }
182
- }
183
-
184
- /**
185
- * Fix npm-generated shims on Windows global installs.
186
- * npm generates shims that try to run /bin/sh, which doesn't exist on Windows.
187
- * We overwrite them to invoke the native .exe directly.
188
- */
189
- async function fixWindowsShims() {
190
- // Check if this is a global install by looking for npm's global prefix
191
- let npmBinDir;
192
- try {
193
- npmBinDir = execSync('npm prefix -g', { encoding: 'utf8' }).trim();
194
- } catch {
195
- return; // Not a global install or npm not available
196
- }
197
-
198
- // The shims are in the npm prefix directory (not prefix/bin on Windows)
199
- const cmdShim = join(npmBinDir, 'agent-browser.cmd');
200
- const ps1Shim = join(npmBinDir, 'agent-browser.ps1');
201
-
202
- // Only fix if shims exist (indicates global install)
203
- if (!existsSync(cmdShim)) {
204
- return;
205
- }
206
-
207
- // Path to native binary relative to npm prefix
208
- const relativeBinaryPath = 'node_modules\\agent-browser\\bin\\agent-browser-win32-x64.exe';
209
-
210
- try {
211
- // Overwrite .cmd shim
212
- const cmdContent = `@ECHO off\r\n"%~dp0${relativeBinaryPath}" %*\r\n`;
213
- writeFileSync(cmdShim, cmdContent);
214
-
215
- // Overwrite .ps1 shim
216
- const ps1Content = `#!/usr/bin/env pwsh
217
- $basedir = Split-Path $MyInvocation.MyCommand.Definition -Parent
218
- $exe = ""
219
- if ($PSVersionTable.PSVersion -lt "6.0" -or $IsWindows) {
220
- $exe = ".exe"
221
- }
222
- & "$basedir/${relativeBinaryPath.replace(/\\/g, '/')}" $args
223
- exit $LASTEXITCODE
224
- `;
225
- writeFileSync(ps1Shim, ps1Content);
226
-
227
- console.log('✓ Optimized: shims point to native binary (zero overhead)');
228
- } catch (err) {
229
- // Permission error or other issue - not critical, JS wrapper still works
230
- console.log(`⚠ Could not optimize shims: ${err.message}`);
231
- console.log(' CLI will work via Node.js wrapper (slightly slower startup)');
232
- }
233
- }
234
-
235
- main().catch(console.error);