agent-browser 0.0.0 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. package/.prettierrc +7 -0
  2. package/README.md +271 -1
  3. package/bin/agent-browser +2 -0
  4. package/dist/actions.d.ts +7 -0
  5. package/dist/actions.d.ts.map +1 -0
  6. package/dist/actions.js +1138 -0
  7. package/dist/actions.js.map +1 -0
  8. package/dist/browser.d.ts +232 -0
  9. package/dist/browser.d.ts.map +1 -0
  10. package/dist/browser.js +477 -0
  11. package/dist/browser.js.map +1 -0
  12. package/dist/browser.test.d.ts +2 -0
  13. package/dist/browser.test.d.ts.map +1 -0
  14. package/dist/browser.test.js +136 -0
  15. package/dist/browser.test.js.map +1 -0
  16. package/dist/client.d.ts +17 -0
  17. package/dist/client.d.ts.map +1 -0
  18. package/dist/client.js +133 -0
  19. package/dist/client.js.map +1 -0
  20. package/dist/daemon.d.ts +29 -0
  21. package/dist/daemon.d.ts.map +1 -0
  22. package/dist/daemon.js +165 -0
  23. package/dist/daemon.js.map +1 -0
  24. package/dist/index.d.ts +3 -0
  25. package/dist/index.d.ts.map +1 -0
  26. package/dist/index.js +972 -0
  27. package/dist/index.js.map +1 -0
  28. package/dist/protocol.d.ts +26 -0
  29. package/dist/protocol.d.ts.map +1 -0
  30. package/dist/protocol.js +717 -0
  31. package/dist/protocol.js.map +1 -0
  32. package/dist/protocol.test.d.ts +2 -0
  33. package/dist/protocol.test.d.ts.map +1 -0
  34. package/dist/protocol.test.js +176 -0
  35. package/dist/protocol.test.js.map +1 -0
  36. package/dist/types.d.ts +604 -0
  37. package/dist/types.d.ts.map +1 -0
  38. package/dist/types.js +2 -0
  39. package/dist/types.js.map +1 -0
  40. package/package.json +36 -7
  41. package/src/actions.ts +1658 -0
  42. package/src/browser.test.ts +157 -0
  43. package/src/browser.ts +586 -0
  44. package/src/client.ts +150 -0
  45. package/src/daemon.ts +187 -0
  46. package/src/index.ts +984 -0
  47. package/src/protocol.test.ts +216 -0
  48. package/src/protocol.ts +848 -0
  49. package/src/types.ts +913 -0
  50. package/tsconfig.json +19 -0
  51. package/vitest.config.ts +9 -0
  52. package/index.js +0 -2
package/.prettierrc ADDED
@@ -0,0 +1,7 @@
1
+ {
2
+ "semi": true,
3
+ "singleQuote": true,
4
+ "trailingComma": "es5",
5
+ "printWidth": 100,
6
+ "tabWidth": 2
7
+ }
package/README.md CHANGED
@@ -1,3 +1,273 @@
1
1
  # agent-browser
2
2
 
3
- Coming soon.
3
+ Headless browser automation CLI for AI agents.
4
+
5
+ ## Installation
6
+
7
+ ```bash
8
+ pnpm install
9
+ npx playwright install chromium
10
+ pnpm build
11
+ ```
12
+
13
+ ## Quick Start
14
+
15
+ ```bash
16
+ agent-browser open example.com
17
+ agent-browser click "#submit"
18
+ agent-browser fill "#email" "test@example.com"
19
+ agent-browser get text "h1"
20
+ agent-browser screenshot page.png
21
+ agent-browser close
22
+ ```
23
+
24
+ ## Commands
25
+
26
+ ### Core Commands
27
+
28
+ ```bash
29
+ agent-browser open <url> # Navigate to URL
30
+ agent-browser click <sel> # Click element
31
+ agent-browser type <sel> <text> # Type into element
32
+ agent-browser fill <sel> <text> # Clear and fill
33
+ agent-browser press <key> # Press key (Enter, Tab, Control+a)
34
+ agent-browser keydown <key> # Hold key down
35
+ agent-browser keyup <key> # Release key
36
+ agent-browser insert <text> # Insert text (no key events)
37
+ agent-browser hover <sel> # Hover element
38
+ agent-browser select <sel> <val> # Select dropdown option
39
+ agent-browser multiselect <sel> <v1> <v2> # Multi-select
40
+ agent-browser check <sel> # Check checkbox
41
+ agent-browser uncheck <sel> # Uncheck checkbox
42
+ agent-browser scroll <dir> [px] # Scroll (up/down/left/right)
43
+ agent-browser scrollinto <sel> # Scroll element into view
44
+ agent-browser drag <src> <tgt> # Drag and drop
45
+ agent-browser upload <sel> <files> # Upload files
46
+ agent-browser download [path] # Wait for download
47
+ agent-browser screenshot [path] # Take screenshot (--full for full page)
48
+ agent-browser pdf <path> # Save as PDF
49
+ agent-browser snapshot # Accessibility tree (best for AI)
50
+ agent-browser eval <js> # Run JavaScript
51
+ agent-browser close # Close browser
52
+ ```
53
+
54
+ ### Get Info
55
+
56
+ ```bash
57
+ agent-browser get text <sel> # Get text content
58
+ agent-browser get html <sel> # Get innerHTML
59
+ agent-browser get value <sel> # Get input value
60
+ agent-browser get attr <sel> <attr> # Get attribute
61
+ agent-browser get title # Get page title
62
+ agent-browser get url # Get current URL
63
+ agent-browser get count <sel> # Count matching elements
64
+ agent-browser get box <sel> # Get bounding box
65
+ ```
66
+
67
+ ### Check State
68
+
69
+ ```bash
70
+ agent-browser is visible <sel> # Check if visible
71
+ agent-browser is enabled <sel> # Check if enabled
72
+ agent-browser is checked <sel> # Check if checked
73
+ ```
74
+
75
+ ### Find Elements (Semantic Locators)
76
+
77
+ ```bash
78
+ agent-browser find role <role> <action> [value] # By ARIA role
79
+ agent-browser find text <text> <action> # By text content
80
+ agent-browser find label <label> <action> [value] # By label
81
+ agent-browser find placeholder <ph> <action> [value] # By placeholder
82
+ agent-browser find alt <text> <action> # By alt text
83
+ agent-browser find title <text> <action> # By title attr
84
+ agent-browser find testid <id> <action> [value] # By data-testid
85
+ agent-browser find first <sel> <action> [value] # First match
86
+ agent-browser find last <sel> <action> [value] # Last match
87
+ agent-browser find nth <n> <sel> <action> [value] # Nth match
88
+ ```
89
+
90
+ **Actions:** `click`, `fill`, `check`, `hover`, `text`
91
+
92
+ **Examples:**
93
+ ```bash
94
+ agent-browser find role button click --name "Submit"
95
+ agent-browser find text "Sign In" click
96
+ agent-browser find label "Email" fill "test@test.com"
97
+ agent-browser find first ".item" click
98
+ agent-browser find nth 2 "a" text
99
+ ```
100
+
101
+ ### Wait
102
+
103
+ ```bash
104
+ agent-browser wait <selector> # Wait for element
105
+ agent-browser wait <ms> # Wait for time
106
+ agent-browser wait --text "Welcome" # Wait for text
107
+ agent-browser wait --url "**/dash" # Wait for URL pattern
108
+ agent-browser wait --load networkidle # Wait for load state
109
+ agent-browser wait --fn "window.ready === true" # Wait for JS condition
110
+ ```
111
+
112
+ **Load states:** `load`, `domcontentloaded`, `networkidle`
113
+
114
+ ### Mouse Control
115
+
116
+ ```bash
117
+ agent-browser mouse move <x> <y> # Move mouse
118
+ agent-browser mouse down [button] # Press button (left/right/middle)
119
+ agent-browser mouse up [button] # Release button
120
+ agent-browser mouse wheel <dy> [dx] # Scroll wheel
121
+ ```
122
+
123
+ ### Browser Settings
124
+
125
+ ```bash
126
+ agent-browser set viewport <w> <h> # Set viewport size
127
+ agent-browser set device <name> # Emulate device ("iPhone 14")
128
+ agent-browser set geo <lat> <lng> # Set geolocation
129
+ agent-browser set offline [on|off] # Toggle offline mode
130
+ agent-browser set headers <json> # Extra HTTP headers
131
+ agent-browser set credentials <u> <p> # HTTP basic auth
132
+ agent-browser set media [dark|light|print] # Emulate media
133
+ ```
134
+
135
+ ### Cookies & Storage
136
+
137
+ ```bash
138
+ agent-browser cookies # Get all cookies
139
+ agent-browser cookies set <json> # Set cookies
140
+ agent-browser cookies clear # Clear cookies
141
+
142
+ agent-browser storage local # Get all localStorage
143
+ agent-browser storage local <key> # Get specific key
144
+ agent-browser storage local set <k> <v> # Set value
145
+ agent-browser storage local clear # Clear all
146
+
147
+ agent-browser storage session # Same for sessionStorage
148
+ ```
149
+
150
+ ### Network
151
+
152
+ ```bash
153
+ agent-browser network route <url> # Intercept requests
154
+ agent-browser network route <url> --abort # Block requests
155
+ agent-browser network route <url> --body <json> # Mock response
156
+ agent-browser network unroute [url] # Remove routes
157
+ agent-browser network requests # View tracked requests
158
+ agent-browser network requests --filter api # Filter requests
159
+ agent-browser response <url> # Get response body (waits for matching request)
160
+ ```
161
+
162
+ ### Tabs & Windows
163
+
164
+ ```bash
165
+ agent-browser tab # List tabs
166
+ agent-browser tab new # New tab
167
+ agent-browser tab <n> # Switch to tab n
168
+ agent-browser tab close [n] # Close tab
169
+ agent-browser window new # New window
170
+ ```
171
+
172
+ ### Frames
173
+
174
+ ```bash
175
+ agent-browser frame <sel> # Switch to iframe
176
+ agent-browser frame main # Back to main frame
177
+ ```
178
+
179
+ ### Dialogs
180
+
181
+ ```bash
182
+ agent-browser dialog accept [text] # Accept (with optional prompt text)
183
+ agent-browser dialog dismiss # Dismiss
184
+ ```
185
+
186
+ ### Debug
187
+
188
+ ```bash
189
+ agent-browser trace start # Start recording trace
190
+ agent-browser trace stop <path> # Stop and save trace
191
+ agent-browser console # View console messages
192
+ agent-browser console --clear # Clear console
193
+ agent-browser errors # View page errors
194
+ agent-browser highlight <sel> # Highlight element
195
+ agent-browser state save <path> # Save auth state
196
+ agent-browser state load <path> # Load auth state
197
+ agent-browser initscript <js> # Run JS on every page load
198
+ ```
199
+
200
+ ### Navigation
201
+
202
+ ```bash
203
+ agent-browser back # Go back
204
+ agent-browser forward # Go forward
205
+ agent-browser reload # Reload page
206
+ ```
207
+
208
+ ### Sessions
209
+
210
+ ```bash
211
+ agent-browser session # Show current session
212
+ agent-browser session list # List active sessions
213
+ ```
214
+
215
+ ## Options
216
+
217
+ | Option | Description |
218
+ |--------|-------------|
219
+ | `--session <name>` | Use isolated session (or `AGENT_BROWSER_SESSION` env) |
220
+ | `--json` | JSON output (for agents) |
221
+ | `--full, -f` | Full page screenshot |
222
+ | `--name, -n` | Locator name filter |
223
+ | `--exact` | Exact text match |
224
+ | `--debug` | Debug output |
225
+
226
+ ## Sessions
227
+
228
+ Run multiple isolated browser instances:
229
+
230
+ ```bash
231
+ # Different sessions
232
+ agent-browser --session agent1 open site-a.com
233
+ agent-browser --session agent2 open site-b.com
234
+
235
+ # Or via environment
236
+ AGENT_BROWSER_SESSION=agent1 agent-browser click "#btn"
237
+
238
+ # List all
239
+ agent-browser session list
240
+ ```
241
+
242
+ ## Selectors
243
+
244
+ ```bash
245
+ # CSS
246
+ agent-browser click "#id"
247
+ agent-browser click ".class"
248
+ agent-browser click "div > button"
249
+
250
+ # Text
251
+ agent-browser click "text=Submit"
252
+
253
+ # XPath
254
+ agent-browser click "xpath=//button"
255
+
256
+ # Semantic (recommended)
257
+ agent-browser find role button click --name "Submit"
258
+ agent-browser find label "Email" fill "test@test.com"
259
+ ```
260
+
261
+ ## Agent Mode
262
+
263
+ Use `--json` for machine-readable output:
264
+
265
+ ```bash
266
+ agent-browser snapshot --json
267
+ agent-browser get text "h1" --json
268
+ agent-browser is visible ".modal" --json
269
+ ```
270
+
271
+ ## License
272
+
273
+ MIT
@@ -0,0 +1,2 @@
1
+ #!/usr/bin/env node
2
+ import '../dist/index.js';
@@ -0,0 +1,7 @@
1
+ import type { BrowserManager } from './browser.js';
2
+ import type { Command, Response } from './types.js';
3
+ /**
4
+ * Execute a command and return a response
5
+ */
6
+ export declare function executeCommand(command: Command, browser: BrowserManager): Promise<Response>;
7
+ //# sourceMappingURL=actions.d.ts.map
@@ -0,0 +1 @@
1
+ {"version":3,"file":"actions.d.ts","sourceRoot":"","sources":["../src/actions.ts"],"names":[],"mappings":"AACA,OAAO,KAAK,EAAE,cAAc,EAAE,MAAM,cAAc,CAAC;AACnD,OAAO,KAAK,EACV,OAAO,EACP,QAAQ,EAoGT,MAAM,YAAY,CAAC;AAQpB;;GAEG;AACH,wBAAsB,cAAc,CAAC,OAAO,EAAE,OAAO,EAAE,OAAO,EAAE,cAAc,GAAG,OAAO,CAAC,QAAQ,CAAC,CAiPjG"}