nstbrowser-ai-agent 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (119) hide show
  1. package/LICENSE +201 -0
  2. package/README.md +1321 -0
  3. package/bin/nstbrowser-ai-agent-darwin-arm64 +0 -0
  4. package/bin/nstbrowser-ai-agent-darwin-x64 +0 -0
  5. package/bin/nstbrowser-ai-agent-linux-arm64 +0 -0
  6. package/bin/nstbrowser-ai-agent-linux-x64 +0 -0
  7. package/bin/nstbrowser-ai-agent-win32-x64.exe +0 -0
  8. package/bin/nstbrowser-ai-agent.js +109 -0
  9. package/dist/action-policy.d.ts +14 -0
  10. package/dist/action-policy.d.ts.map +1 -0
  11. package/dist/action-policy.js +253 -0
  12. package/dist/action-policy.js.map +1 -0
  13. package/dist/actions.d.ts +18 -0
  14. package/dist/actions.d.ts.map +1 -0
  15. package/dist/actions.js +2037 -0
  16. package/dist/actions.js.map +1 -0
  17. package/dist/auth-cli.d.ts +2 -0
  18. package/dist/auth-cli.d.ts.map +1 -0
  19. package/dist/auth-cli.js +97 -0
  20. package/dist/auth-cli.js.map +1 -0
  21. package/dist/auth-vault.d.ts +36 -0
  22. package/dist/auth-vault.d.ts.map +1 -0
  23. package/dist/auth-vault.js +125 -0
  24. package/dist/auth-vault.js.map +1 -0
  25. package/dist/browser.d.ts +573 -0
  26. package/dist/browser.d.ts.map +1 -0
  27. package/dist/browser.js +2036 -0
  28. package/dist/browser.js.map +1 -0
  29. package/dist/confirmation.d.ts +8 -0
  30. package/dist/confirmation.d.ts.map +1 -0
  31. package/dist/confirmation.js +30 -0
  32. package/dist/confirmation.js.map +1 -0
  33. package/dist/daemon.d.ts +65 -0
  34. package/dist/daemon.d.ts.map +1 -0
  35. package/dist/daemon.js +589 -0
  36. package/dist/daemon.js.map +1 -0
  37. package/dist/diff.d.ts +18 -0
  38. package/dist/diff.d.ts.map +1 -0
  39. package/dist/diff.js +271 -0
  40. package/dist/diff.js.map +1 -0
  41. package/dist/domain-filter.d.ts +28 -0
  42. package/dist/domain-filter.d.ts.map +1 -0
  43. package/dist/domain-filter.js +149 -0
  44. package/dist/domain-filter.js.map +1 -0
  45. package/dist/encryption.d.ts +73 -0
  46. package/dist/encryption.d.ts.map +1 -0
  47. package/dist/encryption.js +171 -0
  48. package/dist/encryption.js.map +1 -0
  49. package/dist/ios-actions.d.ts +11 -0
  50. package/dist/ios-actions.d.ts.map +1 -0
  51. package/dist/ios-actions.js +228 -0
  52. package/dist/ios-actions.js.map +1 -0
  53. package/dist/ios-manager.d.ts +266 -0
  54. package/dist/ios-manager.d.ts.map +1 -0
  55. package/dist/ios-manager.js +1073 -0
  56. package/dist/ios-manager.js.map +1 -0
  57. package/dist/nstbrowser-actions.d.ts +10 -0
  58. package/dist/nstbrowser-actions.d.ts.map +1 -0
  59. package/dist/nstbrowser-actions.js +277 -0
  60. package/dist/nstbrowser-actions.js.map +1 -0
  61. package/dist/nstbrowser-client.d.ts +197 -0
  62. package/dist/nstbrowser-client.d.ts.map +1 -0
  63. package/dist/nstbrowser-client.js +454 -0
  64. package/dist/nstbrowser-client.js.map +1 -0
  65. package/dist/nstbrowser-errors.d.ts +28 -0
  66. package/dist/nstbrowser-errors.d.ts.map +1 -0
  67. package/dist/nstbrowser-errors.js +59 -0
  68. package/dist/nstbrowser-errors.js.map +1 -0
  69. package/dist/nstbrowser-profile-resolver.d.ts +89 -0
  70. package/dist/nstbrowser-profile-resolver.d.ts.map +1 -0
  71. package/dist/nstbrowser-profile-resolver.js +227 -0
  72. package/dist/nstbrowser-profile-resolver.js.map +1 -0
  73. package/dist/nstbrowser-types.d.ts +151 -0
  74. package/dist/nstbrowser-types.d.ts.map +1 -0
  75. package/dist/nstbrowser-types.js +5 -0
  76. package/dist/nstbrowser-types.js.map +1 -0
  77. package/dist/nstbrowser-utils.d.ts +71 -0
  78. package/dist/nstbrowser-utils.d.ts.map +1 -0
  79. package/dist/nstbrowser-utils.js +174 -0
  80. package/dist/nstbrowser-utils.js.map +1 -0
  81. package/dist/protocol.d.ts +26 -0
  82. package/dist/protocol.d.ts.map +1 -0
  83. package/dist/protocol.js +1245 -0
  84. package/dist/protocol.js.map +1 -0
  85. package/dist/snapshot.d.ts +67 -0
  86. package/dist/snapshot.d.ts.map +1 -0
  87. package/dist/snapshot.js +514 -0
  88. package/dist/snapshot.js.map +1 -0
  89. package/dist/state-utils.d.ts +77 -0
  90. package/dist/state-utils.d.ts.map +1 -0
  91. package/dist/state-utils.js +178 -0
  92. package/dist/state-utils.js.map +1 -0
  93. package/dist/stream-server.d.ts +117 -0
  94. package/dist/stream-server.d.ts.map +1 -0
  95. package/dist/stream-server.js +309 -0
  96. package/dist/stream-server.js.map +1 -0
  97. package/dist/types.d.ts +1121 -0
  98. package/dist/types.d.ts.map +1 -0
  99. package/dist/types.js +2 -0
  100. package/dist/types.js.map +1 -0
  101. package/package.json +83 -0
  102. package/scripts/analyze-api-coverage.js +205 -0
  103. package/scripts/analyze-cli-coverage.js +239 -0
  104. package/scripts/build-all-platforms.sh +68 -0
  105. package/scripts/check-version-sync.js +39 -0
  106. package/scripts/copy-native.js +36 -0
  107. package/scripts/download-nstbrowser-docs.js +152 -0
  108. package/scripts/generate-skills.sh +218 -0
  109. package/scripts/postinstall.js +231 -0
  110. package/scripts/sync-version.js +69 -0
  111. package/skills/nstbrowser-ai-agent/SKILL.md +759 -0
  112. package/skills/nstbrowser-ai-agent/references/batch-operations.md +414 -0
  113. package/skills/nstbrowser-ai-agent/references/nst-api-reference.md +960 -0
  114. package/skills/nstbrowser-ai-agent/references/profile-management.md +672 -0
  115. package/skills/nstbrowser-ai-agent/references/proxy-configuration.md +460 -0
  116. package/skills/nstbrowser-ai-agent/references/troubleshooting.md +773 -0
  117. package/skills/nstbrowser-ai-agent/templates/automated-workflow.sh +248 -0
  118. package/skills/nstbrowser-ai-agent/templates/batch-proxy-update.sh +257 -0
  119. package/skills/nstbrowser-ai-agent/templates/profile-setup.sh +248 -0
package/README.md ADDED
@@ -0,0 +1,1321 @@
1
+ # nstbrowser-ai-agent
2
+
3
+ Headless browser automation CLI for AI agents. Fast Rust CLI with Node.js fallback.
4
+
5
+ ## Installation
6
+
7
+ ### npm (Recommended)
8
+
9
+ Install globally via npm to get the native Rust binary for maximum performance:
10
+
11
+ ```bash
12
+ npm install -g nstbrowser-ai-agent
13
+ ```
14
+
15
+ This installs the pre-compiled native binary for your platform (Linux, macOS, Windows).
16
+
17
+ ### Quick Start (no install)
18
+
19
+ Run directly with `npx` if you want to try it without installing globally:
20
+
21
+ ```bash
22
+ npx nstbrowser-ai-agent open example.com
23
+ ```
24
+
25
+ > **Note:** `npx` routes through Node.js before reaching the Rust CLI, so it is noticeably slower than a global install. For regular use, install globally.
26
+
27
+ ### Project Installation (local dependency)
28
+
29
+ For projects that want to pin the version in `package.json`:
30
+
31
+ ```bash
32
+ npm install nstbrowser-ai-agent
33
+ npx nstbrowser-ai-agent install
34
+ ```
35
+
36
+ Then use via `npx` or `package.json` scripts:
37
+
38
+ ```bash
39
+ npx nstbrowser-ai-agent open example.com
40
+ ```
41
+
42
+ ### Download from GitHub Releases
43
+
44
+ You can also download pre-built binaries directly from [GitHub Releases](https://github.com/nstbrowser/nstbrowser-ai-agent/releases):
45
+
46
+ 1. Download the binary for your platform:
47
+ - `nstbrowser-ai-agent-linux-x64` (Linux x64)
48
+ - `nstbrowser-ai-agent-linux-arm64` (Linux ARM64)
49
+ - `nstbrowser-ai-agent-darwin-x64` (macOS Intel)
50
+ - `nstbrowser-ai-agent-darwin-arm64` (macOS Apple Silicon)
51
+ - `nstbrowser-ai-agent-win32-x64.exe` (Windows x64)
52
+
53
+ 2. Make it executable (Linux/macOS):
54
+ ```bash
55
+ chmod +x nstbrowser-ai-agent-*
56
+ ```
57
+
58
+ 3. Move to a directory in your PATH:
59
+ ```bash
60
+ sudo mv nstbrowser-ai-agent-* /usr/local/bin/nstbrowser-ai-agent
61
+ ```
62
+
63
+ 4. Download Chromium:
64
+ ```bash
65
+ nstbrowser-ai-agent install
66
+ ```
67
+
68
+ ### From Source
69
+
70
+ ```bash
71
+ git clone https://github.com/nstbrowser/nstbrowser-ai-agent
72
+ cd nstbrowser-ai-agent
73
+ pnpm install
74
+ pnpm build
75
+ pnpm build:native # Requires Rust (https://rustup.rs)
76
+ pnpm link --global # Makes nstbrowser-ai-agent available globally
77
+ nstbrowser-ai-agent install
78
+ ```
79
+
80
+ ### Linux Dependencies
81
+
82
+ On Linux, install system dependencies:
83
+
84
+ ```bash
85
+ nstbrowser-ai-agent install --with-deps
86
+ # or manually: npx playwright install-deps chromium
87
+ ```
88
+
89
+ ## Default Provider
90
+
91
+ By default, nstbrowser-ai-agent uses **Nstbrowser** as the browser provider. This means you don't need to specify `-p nst` every time - it's automatic.
92
+
93
+ ### Using Nstbrowser (Default)
94
+
95
+ ```bash
96
+ # Set your API key (required for Nstbrowser)
97
+ export NST_API_KEY="your-api-key"
98
+
99
+ # Launch browser (uses Nstbrowser by default)
100
+ nstbrowser-ai-agent open example.com
101
+ nstbrowser-ai-agent snapshot # Get accessibility tree with refs
102
+ nstbrowser-ai-agent click @e2 # Click by ref from snapshot
103
+ nstbrowser-ai-agent fill @e3 "test@example.com" # Fill by ref
104
+ nstbrowser-ai-agent get text @e1 # Get text by ref
105
+ nstbrowser-ai-agent screenshot page.png
106
+ nstbrowser-ai-agent close
107
+
108
+ # Nstbrowser management (no 'nst' prefix needed with default provider)
109
+ nstbrowser-ai-agent profile list # List profiles
110
+ nstbrowser-ai-agent profile create my-profile # Create profile
111
+ nstbrowser-ai-agent browser list # List running browsers
112
+ nstbrowser-ai-agent browser start profile-id # Start browser with profile
113
+ ```
114
+
115
+ ### Using Local Browser Mode
116
+
117
+ If you want to use a local browser instead of Nstbrowser, use the `--local` flag:
118
+
119
+ ```bash
120
+ # Use local browser (no API key needed)
121
+ nstbrowser-ai-agent --local open example.com
122
+ nstbrowser-ai-agent --headed open example.com # Visual browser (also uses local)
123
+ ```
124
+
125
+ ### Provider Selection Logic
126
+
127
+ The provider is selected automatically based on the following priority (highest to lowest):
128
+
129
+ 1. Explicit `--provider` flag
130
+ 2. `--local` flag (uses local browser)
131
+ 3. `--headed` flag (implies local)
132
+ 4. `--cdp` flag (implies local)
133
+ 5. `--auto-connect` flag (implies local)
134
+ 6. `NST_API_KEY` environment variable (uses nst)
135
+ 7. **Default: nst (Nstbrowser)**
136
+
137
+ This means if you have `NST_API_KEY` set, Nstbrowser will be used automatically. To override this, use `--local` or any other flag that implies local mode.
138
+
139
+ ### Traditional Selectors (also supported)
140
+
141
+ ```bash
142
+ nstbrowser-ai-agent click "#submit"
143
+ nstbrowser-ai-agent fill "#email" "test@example.com"
144
+ nstbrowser-ai-agent find role button click --name "Submit"
145
+ ```
146
+
147
+ ## Commands
148
+
149
+ ### Core Commands
150
+
151
+ ```bash
152
+ nstbrowser-ai-agent open <url> # Navigate to URL (aliases: goto, navigate)
153
+ nstbrowser-ai-agent click <sel> # Click element (--new-tab to open in new tab)
154
+ nstbrowser-ai-agent dblclick <sel> # Double-click element
155
+ nstbrowser-ai-agent focus <sel> # Focus element
156
+ nstbrowser-ai-agent type <sel> <text> # Type into element
157
+ nstbrowser-ai-agent fill <sel> <text> # Clear and fill
158
+ nstbrowser-ai-agent press <key> # Press key (Enter, Tab, Control+a) (alias: key)
159
+ nstbrowser-ai-agent keyboard type <text> # Type with real keystrokes (no selector, current focus)
160
+ nstbrowser-ai-agent keyboard inserttext <text> # Insert text without key events (no selector)
161
+ nstbrowser-ai-agent keydown <key> # Hold key down
162
+ nstbrowser-ai-agent keyup <key> # Release key
163
+ nstbrowser-ai-agent hover <sel> # Hover element
164
+ nstbrowser-ai-agent select <sel> <val> # Select dropdown option
165
+ nstbrowser-ai-agent check <sel> # Check checkbox
166
+ nstbrowser-ai-agent uncheck <sel> # Uncheck checkbox
167
+ nstbrowser-ai-agent scroll <dir> [px] # Scroll (up/down/left/right, --selector <sel>)
168
+ nstbrowser-ai-agent scrollintoview <sel> # Scroll element into view (alias: scrollinto)
169
+ nstbrowser-ai-agent drag <src> <tgt> # Drag and drop
170
+ nstbrowser-ai-agent upload <sel> <files> # Upload files
171
+ nstbrowser-ai-agent screenshot [path] # Take screenshot (--full for full page, saves to a temporary directory if no path)
172
+ nstbrowser-ai-agent screenshot --annotate # Annotated screenshot with numbered element labels
173
+ nstbrowser-ai-agent pdf <path> # Save as PDF
174
+ nstbrowser-ai-agent snapshot # Accessibility tree with refs (best for AI)
175
+ nstbrowser-ai-agent eval <js> # Run JavaScript (-b for base64, --stdin for piped input)
176
+ nstbrowser-ai-agent connect <port> # Connect to browser via CDP
177
+ nstbrowser-ai-agent close # Close browser (aliases: quit, exit)
178
+ ```
179
+
180
+ ### Get Info
181
+
182
+ ```bash
183
+ nstbrowser-ai-agent get text <sel> # Get text content
184
+ nstbrowser-ai-agent get html <sel> # Get innerHTML
185
+ nstbrowser-ai-agent get value <sel> # Get input value
186
+ nstbrowser-ai-agent get attr <sel> <attr> # Get attribute
187
+ nstbrowser-ai-agent get title # Get page title
188
+ nstbrowser-ai-agent get url # Get current URL
189
+ nstbrowser-ai-agent get count <sel> # Count matching elements
190
+ nstbrowser-ai-agent get box <sel> # Get bounding box
191
+ nstbrowser-ai-agent get styles <sel> # Get computed styles
192
+ ```
193
+
194
+ ### Check State
195
+
196
+ ```bash
197
+ nstbrowser-ai-agent is visible <sel> # Check if visible
198
+ nstbrowser-ai-agent is enabled <sel> # Check if enabled
199
+ nstbrowser-ai-agent is checked <sel> # Check if checked
200
+ ```
201
+
202
+ ### Find Elements (Semantic Locators)
203
+
204
+ ```bash
205
+ nstbrowser-ai-agent find role <role> <action> [value] # By ARIA role
206
+ nstbrowser-ai-agent find text <text> <action> # By text content
207
+ nstbrowser-ai-agent find label <label> <action> [value] # By label
208
+ nstbrowser-ai-agent find placeholder <ph> <action> [value] # By placeholder
209
+ nstbrowser-ai-agent find alt <text> <action> # By alt text
210
+ nstbrowser-ai-agent find title <text> <action> # By title attr
211
+ nstbrowser-ai-agent find testid <id> <action> [value] # By data-testid
212
+ nstbrowser-ai-agent find first <sel> <action> [value] # First match
213
+ nstbrowser-ai-agent find last <sel> <action> [value] # Last match
214
+ nstbrowser-ai-agent find nth <n> <sel> <action> [value] # Nth match
215
+ ```
216
+
217
+ **Actions:** `click`, `fill`, `type`, `hover`, `focus`, `check`, `uncheck`, `text`
218
+
219
+ **Options:** `--name <name>` (filter role by accessible name), `--exact` (require exact text match)
220
+
221
+ **Examples:**
222
+ ```bash
223
+ nstbrowser-ai-agent find role button click --name "Submit"
224
+ nstbrowser-ai-agent find text "Sign In" click
225
+ nstbrowser-ai-agent find label "Email" fill "test@test.com"
226
+ nstbrowser-ai-agent find first ".item" click
227
+ nstbrowser-ai-agent find nth 2 "a" text
228
+ ```
229
+
230
+ ### Wait
231
+
232
+ ```bash
233
+ nstbrowser-ai-agent wait <selector> # Wait for element to be visible
234
+ nstbrowser-ai-agent wait <ms> # Wait for time (milliseconds)
235
+ nstbrowser-ai-agent wait --text "Welcome" # Wait for text to appear
236
+ nstbrowser-ai-agent wait --url "**/dash" # Wait for URL pattern
237
+ nstbrowser-ai-agent wait --load networkidle # Wait for load state
238
+ nstbrowser-ai-agent wait --fn "window.ready === true" # Wait for JS condition
239
+ ```
240
+
241
+ **Load states:** `load`, `domcontentloaded`, `networkidle`
242
+
243
+ ### Mouse Control
244
+
245
+ ```bash
246
+ nstbrowser-ai-agent mouse move <x> <y> # Move mouse
247
+ nstbrowser-ai-agent mouse down [button] # Press button (left/right/middle)
248
+ nstbrowser-ai-agent mouse up [button] # Release button
249
+ nstbrowser-ai-agent mouse wheel <dy> [dx] # Scroll wheel
250
+ ```
251
+
252
+ ### Browser Settings
253
+
254
+ ```bash
255
+ nstbrowser-ai-agent set viewport <w> <h> # Set viewport size
256
+ nstbrowser-ai-agent set device <name> # Emulate device ("iPhone 14")
257
+ nstbrowser-ai-agent set geo <lat> <lng> # Set geolocation
258
+ nstbrowser-ai-agent set offline [on|off] # Toggle offline mode
259
+ nstbrowser-ai-agent set headers <json> # Extra HTTP headers
260
+ nstbrowser-ai-agent set credentials <u> <p> # HTTP basic auth
261
+ nstbrowser-ai-agent set media [dark|light] # Emulate color scheme
262
+ ```
263
+
264
+ ### Cookies & Storage
265
+
266
+ ```bash
267
+ nstbrowser-ai-agent cookies # Get all cookies
268
+ nstbrowser-ai-agent cookies set <name> <val> # Set cookie
269
+ nstbrowser-ai-agent cookies clear # Clear cookies
270
+
271
+ nstbrowser-ai-agent storage local # Get all localStorage
272
+ nstbrowser-ai-agent storage local <key> # Get specific key
273
+ nstbrowser-ai-agent storage local set <k> <v> # Set value
274
+ nstbrowser-ai-agent storage local clear # Clear all
275
+
276
+ nstbrowser-ai-agent storage session # Same for sessionStorage
277
+ ```
278
+
279
+ ### Network
280
+
281
+ ```bash
282
+ nstbrowser-ai-agent network route <url> # Intercept requests
283
+ nstbrowser-ai-agent network route <url> --abort # Block requests
284
+ nstbrowser-ai-agent network route <url> --body <json> # Mock response
285
+ nstbrowser-ai-agent network unroute [url] # Remove routes
286
+ nstbrowser-ai-agent network requests # View tracked requests
287
+ nstbrowser-ai-agent network requests --filter api # Filter requests
288
+ ```
289
+
290
+ ### Tabs & Windows
291
+
292
+ ```bash
293
+ nstbrowser-ai-agent tab # List tabs
294
+ nstbrowser-ai-agent tab new [url] # New tab (optionally with URL)
295
+ nstbrowser-ai-agent tab <n> # Switch to tab n
296
+ nstbrowser-ai-agent tab close [n] # Close tab
297
+ nstbrowser-ai-agent window new # New window
298
+ ```
299
+
300
+ ### Frames
301
+
302
+ ```bash
303
+ nstbrowser-ai-agent frame <sel> # Switch to iframe
304
+ nstbrowser-ai-agent frame main # Back to main frame
305
+ ```
306
+
307
+ ### Dialogs
308
+
309
+ ```bash
310
+ nstbrowser-ai-agent dialog accept [text] # Accept (with optional prompt text)
311
+ nstbrowser-ai-agent dialog dismiss # Dismiss
312
+ ```
313
+
314
+ ### Diff
315
+
316
+ ```bash
317
+ nstbrowser-ai-agent diff snapshot # Compare current vs last snapshot
318
+ nstbrowser-ai-agent diff snapshot --baseline before.txt # Compare current vs saved snapshot file
319
+ nstbrowser-ai-agent diff snapshot --selector "#main" --compact # Scoped snapshot diff
320
+ nstbrowser-ai-agent diff screenshot --baseline before.png # Visual pixel diff against baseline
321
+ nstbrowser-ai-agent diff screenshot --baseline b.png -o d.png # Save diff image to custom path
322
+ nstbrowser-ai-agent diff screenshot --baseline b.png -t 0.2 # Adjust color threshold (0-1)
323
+ nstbrowser-ai-agent diff url https://v1.com https://v2.com # Compare two URLs (snapshot diff)
324
+ nstbrowser-ai-agent diff url https://v1.com https://v2.com --screenshot # Also visual diff
325
+ nstbrowser-ai-agent diff url https://v1.com https://v2.com --wait-until networkidle # Custom wait strategy
326
+ nstbrowser-ai-agent diff url https://v1.com https://v2.com --selector "#main" # Scope to element
327
+ ```
328
+
329
+ ### Debug
330
+
331
+ ```bash
332
+ nstbrowser-ai-agent trace start [path] # Start recording trace
333
+ nstbrowser-ai-agent trace stop [path] # Stop and save trace
334
+ nstbrowser-ai-agent profiler start # Start Chrome DevTools profiling
335
+ nstbrowser-ai-agent profiler stop [path] # Stop and save profile (.json)
336
+ nstbrowser-ai-agent console # View console messages (log, error, warn, info)
337
+ nstbrowser-ai-agent console --clear # Clear console
338
+ nstbrowser-ai-agent errors # View page errors (uncaught JavaScript exceptions)
339
+ nstbrowser-ai-agent errors --clear # Clear errors
340
+ nstbrowser-ai-agent highlight <sel> # Highlight element
341
+ nstbrowser-ai-agent state save <path> # Save auth state
342
+ nstbrowser-ai-agent state load <path> # Load auth state
343
+ nstbrowser-ai-agent state list # List saved state files
344
+ nstbrowser-ai-agent state show <file> # Show state summary
345
+ nstbrowser-ai-agent state rename <old> <new> # Rename state file
346
+ nstbrowser-ai-agent state clear [name] # Clear states for session
347
+ nstbrowser-ai-agent state clear --all # Clear all saved states
348
+ nstbrowser-ai-agent state clean --older-than <days> # Delete old states
349
+ ```
350
+
351
+ ### Navigation
352
+
353
+ ```bash
354
+ nstbrowser-ai-agent back # Go back
355
+ nstbrowser-ai-agent forward # Go forward
356
+ nstbrowser-ai-agent reload # Reload page
357
+ ```
358
+
359
+ ### Setup
360
+
361
+ ```bash
362
+ nstbrowser-ai-agent install # Download Chromium browser
363
+ nstbrowser-ai-agent install --with-deps # Also install system deps (Linux)
364
+ ```
365
+
366
+ ## Sessions
367
+
368
+ Run multiple isolated browser instances:
369
+
370
+ ```bash
371
+ # Different sessions
372
+ nstbrowser-ai-agent --session agent1 open site-a.com
373
+ nstbrowser-ai-agent --session agent2 open site-b.com
374
+
375
+ # Or via environment variable
376
+ NSTBROWSER_AI_AGENT_SESSION=agent1 nstbrowser-ai-agent click "#btn"
377
+
378
+ # List active sessions
379
+ nstbrowser-ai-agent session list
380
+ # Output:
381
+ # Active sessions:
382
+ # -> default
383
+ # agent1
384
+
385
+ # Show current session
386
+ nstbrowser-ai-agent session
387
+ ```
388
+
389
+ Each session has its own:
390
+ - Browser instance
391
+ - Cookies and storage
392
+ - Navigation history
393
+ - Authentication state
394
+
395
+ ## Configuration
396
+
397
+ Configure NST API credentials once and use forever:
398
+
399
+ ```bash
400
+ # Set API key (required)
401
+ nstbrowser-ai-agent config set key <your-api-key>
402
+
403
+ # Set custom host (optional, default: 127.0.0.1)
404
+ nstbrowser-ai-agent config set host api.example.com
405
+
406
+ # Set custom port (optional, default: 8848)
407
+ nstbrowser-ai-agent config set port 9000
408
+
409
+ # View current configuration
410
+ nstbrowser-ai-agent config show
411
+
412
+ # Get specific value
413
+ nstbrowser-ai-agent config get key
414
+
415
+ # Remove configuration
416
+ nstbrowser-ai-agent config unset key
417
+ ```
418
+
419
+ Configuration is stored in `~/.nst-ai-agent/config.json` and takes priority over environment variables.
420
+
421
+ **Priority order:** Config file > Environment variables > Defaults
422
+
423
+ ## Persistent Profiles
424
+
425
+ By default, browser state (cookies, localStorage, login sessions) is ephemeral and lost when the browser closes. Use `--profile` to persist state across browser restarts:
426
+
427
+ ```bash
428
+ # Use a persistent profile directory
429
+ nstbrowser-ai-agent --profile ~/.myapp-profile open myapp.com
430
+
431
+ # Login once, then reuse the authenticated session
432
+ nstbrowser-ai-agent --profile ~/.myapp-profile open myapp.com/dashboard
433
+
434
+ # Or via environment variable
435
+ NSTBROWSER_AI_AGENT_PROFILE=~/.myapp-profile nstbrowser-ai-agent open myapp.com
436
+ ```
437
+
438
+ The profile directory stores:
439
+ - Cookies and localStorage
440
+ - IndexedDB data
441
+ - Service workers
442
+ - Browser cache
443
+ - Login sessions
444
+
445
+ **Tip**: Use different profile paths for different projects to keep their browser state isolated.
446
+
447
+ ## Session Persistence
448
+
449
+ Alternatively, use `--session-name` to automatically save and restore cookies and localStorage across browser restarts:
450
+
451
+ ```bash
452
+ # Auto-save/load state for "twitter" session
453
+ nstbrowser-ai-agent --session-name twitter open twitter.com
454
+
455
+ # Login once, then state persists automatically
456
+ # State files stored in ~/.nstbrowser-ai-agent/sessions/
457
+
458
+ # Or via environment variable
459
+ export NSTBROWSER_AI_AGENT_SESSION_NAME=twitter
460
+ nstbrowser-ai-agent open twitter.com
461
+ ```
462
+
463
+ ### State Encryption
464
+
465
+ Encrypt saved session data at rest with AES-256-GCM:
466
+
467
+ ```bash
468
+ # Generate key: openssl rand -hex 32
469
+ export NSTBROWSER_AI_AGENT_ENCRYPTION_KEY=<64-char-hex-key>
470
+
471
+ # State files are now encrypted automatically
472
+ nstbrowser-ai-agent --session-name secure open example.com
473
+ ```
474
+
475
+ | Variable | Description |
476
+ |----------|-------------|
477
+ | `NSTBROWSER_AI_AGENT_SESSION_NAME` | Auto-save/load state persistence name |
478
+ | `NSTBROWSER_AI_AGENT_ENCRYPTION_KEY` | 64-char hex key for AES-256-GCM encryption |
479
+ | `NSTBROWSER_AI_AGENT_STATE_EXPIRE_DAYS` | Auto-delete states older than N days (default: 30) |
480
+ | `NSTBROWSER_AI_AGENT_PROVIDER` | Browser provider (default: nst) |
481
+ | `NSTBROWSER_AI_AGENT_LOCAL` | Use local browser instead of Nstbrowser |
482
+ | `NST_API_KEY` | Nstbrowser API key (required for nst provider, default provider) |
483
+ | `NST_HOST` | Nstbrowser API host (default: localhost) |
484
+ | `NST_PORT` | Nstbrowser API port (default: 8848) |
485
+
486
+ ## Security
487
+
488
+ nstbrowser-ai-agent includes security features for safe AI agent deployments. All features are opt-in -- existing workflows are unaffected until you explicitly enable a feature:
489
+
490
+ - **Authentication Vault** -- Store credentials locally (always encrypted), reference by name. The LLM never sees passwords. A key is auto-generated at `~/.nstbrowser-ai-agent/.encryption-key` if `NSTBROWSER_AI_AGENT_ENCRYPTION_KEY` is not set: `echo "pass" | nstbrowser-ai-agent auth save github --url https://github.com/login --username user --password-stdin` then `nstbrowser-ai-agent auth login github`
491
+ - **Content Boundary Markers** -- Wrap page output in delimiters so LLMs can distinguish tool output from untrusted content: `--content-boundaries`
492
+ - **Domain Allowlist** -- Restrict navigation to trusted domains (wildcards like `*.example.com` also match the bare domain): `--allowed-domains "example.com,*.example.com"`. Sub-resource requests (scripts, images, fetch) and WebSocket/EventSource connections to non-allowed domains are also blocked. Include any CDN domains your target pages depend on (e.g., `*.cdn.example.com`).
493
+ - **Action Policy** -- Gate destructive actions with a static policy file: `--action-policy ./policy.json`
494
+ - **Action Confirmation** -- Require explicit approval for sensitive action categories: `--confirm-actions eval,download`
495
+ - **Output Length Limits** -- Prevent context flooding: `--max-output 50000`
496
+
497
+ | Variable | Description |
498
+ |----------|-------------|
499
+ | `NSTBROWSER_AI_AGENT_CONTENT_BOUNDARIES` | Wrap page output in boundary markers |
500
+ | `NSTBROWSER_AI_AGENT_MAX_OUTPUT` | Max characters for page output |
501
+ | `NSTBROWSER_AI_AGENT_ALLOWED_DOMAINS` | Comma-separated allowed domain patterns |
502
+ | `NSTBROWSER_AI_AGENT_ACTION_POLICY` | Path to action policy JSON file |
503
+ | `NSTBROWSER_AI_AGENT_CONFIRM_ACTIONS` | Action categories requiring confirmation |
504
+ | `NSTBROWSER_AI_AGENT_CONFIRM_INTERACTIVE` | Enable interactive confirmation prompts |
505
+ | `NSTBROWSER_AI_AGENT_PROVIDER` | Browser provider (default: nst) |
506
+ | `NSTBROWSER_AI_AGENT_LOCAL` | Use local browser instead of Nstbrowser |
507
+ | `NST_API_KEY` | Nstbrowser API key (required for nst provider, default provider) |
508
+ | `NST_HOST` | Nstbrowser API host (default: localhost) |
509
+ | `NST_PORT` | Nstbrowser API port (default: 8848) |
510
+
511
+ See the Security section below for details on environment variable handling.
512
+
513
+ ## Snapshot Options
514
+
515
+ The `snapshot` command supports filtering to reduce output size:
516
+
517
+ ```bash
518
+ nstbrowser-ai-agent snapshot # Full accessibility tree
519
+ nstbrowser-ai-agent snapshot -i # Interactive elements only (buttons, inputs, links)
520
+ nstbrowser-ai-agent snapshot -i -C # Include cursor-interactive elements (divs with onclick, etc.)
521
+ nstbrowser-ai-agent snapshot -c # Compact (remove empty structural elements)
522
+ nstbrowser-ai-agent snapshot -d 3 # Limit depth to 3 levels
523
+ nstbrowser-ai-agent snapshot -s "#main" # Scope to CSS selector
524
+ nstbrowser-ai-agent snapshot -i -c -d 5 # Combine options
525
+ ```
526
+
527
+ | Option | Description |
528
+ |--------|-------------|
529
+ | `-i, --interactive` | Only show interactive elements (buttons, links, inputs) |
530
+ | `-C, --cursor` | Include cursor-interactive elements (cursor:pointer, onclick, tabindex) |
531
+ | `-c, --compact` | Remove empty structural elements |
532
+ | `-d, --depth <n>` | Limit tree depth |
533
+ | `-s, --selector <sel>` | Scope to CSS selector |
534
+
535
+ The `-C` flag is useful for modern web apps that use custom clickable elements (divs, spans) instead of standard buttons/links.
536
+
537
+ ## Annotated Screenshots
538
+
539
+ The `--annotate` flag overlays numbered labels on interactive elements in the screenshot. Each label `[N]` corresponds to ref `@eN`, so the same refs work for both visual and text-based workflows.
540
+
541
+ ```bash
542
+ nstbrowser-ai-agent screenshot --annotate
543
+ # -> Screenshot saved to /tmp/screenshot-2026-02-17T12-00-00-abc123.png
544
+ # [1] @e1 button "Submit"
545
+ # [2] @e2 link "Home"
546
+ # [3] @e3 textbox "Email"
547
+ ```
548
+
549
+ After an annotated screenshot, refs are cached so you can immediately interact with elements:
550
+
551
+ ```bash
552
+ nstbrowser-ai-agent screenshot --annotate ./page.png
553
+ nstbrowser-ai-agent click @e2 # Click the "Home" link labeled [2]
554
+ ```
555
+
556
+ This is useful for multimodal AI models that can reason about visual layout, unlabeled icon buttons, canvas elements, or visual state that the text accessibility tree cannot capture.
557
+
558
+ ## Options
559
+
560
+ | Option | Description |
561
+ |--------|------------------------------------------------------------------------------------------------------------------|
562
+ | `--session <name>` | Use isolated session (or `NSTBROWSER_AI_AGENT_SESSION` env) |
563
+ | `--session-name <name>` | Auto-save/restore session state (or `NSTBROWSER_AI_AGENT_SESSION_NAME` env) |
564
+ | `--profile <path>` | Persistent browser profile directory (or `NSTBROWSER_AI_AGENT_PROFILE` env) |
565
+ | `--state <path>` | Load storage state from JSON file (or `NSTBROWSER_AI_AGENT_STATE` env) |
566
+ | `--headers <json>` | Set HTTP headers scoped to the URL's origin |
567
+ | `--executable-path <path>` | Custom browser executable (or `NSTBROWSER_AI_AGENT_EXECUTABLE_PATH` env) |
568
+ | `--extension <path>` | Load browser extension (repeatable; or `NSTBROWSER_AI_AGENT_EXTENSIONS` env) |
569
+ | `--args <args>` | Browser launch args, comma or newline separated (or `NSTBROWSER_AI_AGENT_ARGS` env) |
570
+ | `--user-agent <ua>` | Custom User-Agent string (or `NSTBROWSER_AI_AGENT_USER_AGENT` env) |
571
+ | `--proxy <url>` | Proxy server URL with optional auth (or `NSTBROWSER_AI_AGENT_PROXY` env) |
572
+ | `--proxy-bypass <hosts>` | Hosts to bypass proxy (or `NSTBROWSER_AI_AGENT_PROXY_BYPASS` env) |
573
+ | `--ignore-https-errors` | Ignore HTTPS certificate errors (useful for self-signed certs) |
574
+ | `--allow-file-access` | Allow file:// URLs to access local files (Chromium only) |
575
+ | `-p, --provider <name>` | Browser provider: `nst` (default), `local` (or `NSTBROWSER_AI_AGENT_PROVIDER` env) |
576
+ | `--local` | Use local browser instead of Nstbrowser (or `NSTBROWSER_AI_AGENT_LOCAL` env) |
577
+ | `--json` | JSON output (for agents) |
578
+ | `--full, -f` | Full page screenshot |
579
+ | `--annotate` | Annotated screenshot with numbered element labels (or `NSTBROWSER_AI_AGENT_ANNOTATE` env) |
580
+ | `--headed` | Show browser window (not headless, implies local mode) |
581
+ | `--cdp <port\|url>` | Connect via Chrome DevTools Protocol (port or WebSocket URL, implies local mode) |
582
+ | `--auto-connect` | Auto-discover and connect to running Chrome (implies local mode) (or `NSTBROWSER_AI_AGENT_AUTO_CONNECT` env) |
583
+ | `--color-scheme <scheme>` | Color scheme: `dark`, `light`, `no-preference` (or `NSTBROWSER_AI_AGENT_COLOR_SCHEME` env) |
584
+ | `--download-path <path>` | Default download directory (or `NSTBROWSER_AI_AGENT_DOWNLOAD_PATH` env) |
585
+ | `--content-boundaries` | Wrap page output in boundary markers for LLM safety (or `NSTBROWSER_AI_AGENT_CONTENT_BOUNDARIES` env) |
586
+ | `--max-output <chars>` | Truncate page output to N characters (or `NSTBROWSER_AI_AGENT_MAX_OUTPUT` env) |
587
+ | `--allowed-domains <list>` | Comma-separated allowed domain patterns (or `NSTBROWSER_AI_AGENT_ALLOWED_DOMAINS` env) |
588
+ | `--action-policy <path>` | Path to action policy JSON file (or `NSTBROWSER_AI_AGENT_ACTION_POLICY` env) |
589
+ | `--confirm-actions <list>` | Action categories requiring confirmation (or `NSTBROWSER_AI_AGENT_CONFIRM_ACTIONS` env) |
590
+ | `--confirm-interactive` | Interactive confirmation prompts; auto-denies if stdin is not a TTY (or `NSTBROWSER_AI_AGENT_CONFIRM_INTERACTIVE` env) |
591
+ | `--native` | [Experimental] Use native Rust daemon instead of Node.js (or `NSTBROWSER_AI_AGENT_NATIVE` env) |
592
+ | `--config <path>` | Use a custom config file (or `NSTBROWSER_AI_AGENT_CONFIG` env) |
593
+ | `--debug` | Debug output |
594
+
595
+ ## Configuration
596
+
597
+ ### Configuration Files
598
+
599
+ Create an `nstbrowser-ai-agent.json` file to set persistent defaults instead of repeating flags on every command.
600
+
601
+ **Locations (lowest to highest priority):**
602
+
603
+ 1. `~/.nstbrowser-ai-agent/config.json` -- user-level defaults
604
+ 2. `./nstbrowser-ai-agent.json` -- project-level overrides (in working directory)
605
+ 3. `NSTBROWSER_AI_AGENT_*` environment variables override config file values
606
+ 4. CLI flags override everything
607
+
608
+ **Example `nstbrowser-ai-agent.json`:**
609
+
610
+ ```json
611
+ {
612
+ "headed": true,
613
+ "proxy": "http://localhost:8080",
614
+ "profile": "./browser-data",
615
+ "userAgent": "my-agent/1.0",
616
+ "ignoreHttpsErrors": true
617
+ }
618
+ ```
619
+
620
+ Use `--config <path>` or `NSTBROWSER_AI_AGENT_CONFIG` to load a specific config file instead of the defaults:
621
+
622
+ ```bash
623
+ nstbrowser-ai-agent --config ./ci-config.json open example.com
624
+ NSTBROWSER_AI_AGENT_CONFIG=./ci-config.json nstbrowser-ai-agent open example.com
625
+ ```
626
+
627
+ All options from the table above can be set in the config file using camelCase keys (e.g., `--executable-path` becomes `"executablePath"`, `--proxy-bypass` becomes `"proxyBypass"`). Unknown keys are ignored for forward compatibility.
628
+
629
+ Boolean flags accept an optional `true`/`false` value to override config settings. For example, `--headed false` disables `"headed": true` from config. A bare `--headed` is equivalent to `--headed true`.
630
+
631
+ Auto-discovered config files that are missing are silently ignored. If `--config <path>` points to a missing or invalid file, nstbrowser-ai-agent exits with an error. Extensions from user and project configs are merged (concatenated), not replaced.
632
+
633
+ > **Tip:** If your project-level `nstbrowser-ai-agent.json` contains environment-specific values (paths, proxies), consider adding it to `.gitignore`.
634
+
635
+ ### Environment Variables via .env Files
636
+
637
+ You can store environment variables in `.env` files for easier configuration management:
638
+
639
+ **Supported files (in priority order):**
640
+
641
+ 1. `.nstbrowser-ai-agent.env` -- project-specific configuration (highest priority)
642
+ 2. `.env` -- standard environment file
643
+
644
+ **Example `.nstbrowser-ai-agent.env`:**
645
+
646
+ ```bash
647
+ # Nstbrowser configuration
648
+ NST_API_KEY=your-api-key-here
649
+ NST_HOST=api.nstbrowser.io
650
+ NST_PORT=443
651
+
652
+ # Agent configuration
653
+ NSTBROWSER_AI_AGENT_DEBUG=1
654
+ NSTBROWSER_AI_AGENT_DEFAULT_TIMEOUT=30000
655
+ ```
656
+
657
+ The `.env` files are loaded automatically when you run any command. Variables set in `.nstbrowser-ai-agent.env` take priority over `.env`.
658
+
659
+ > **Security Note:** Never commit `.env` files containing API keys to version control. Add them to `.gitignore`.
660
+
661
+ **Example `.gitignore`:**
662
+
663
+ ```
664
+ .env
665
+ .nstbrowser-ai-agent.env
666
+ nstbrowser-ai-agent.json
667
+ ```
668
+
669
+ ## Default Timeout
670
+
671
+ The default Playwright timeout for standard operations (clicks, waits, fills, etc.) is 25 seconds. This is intentionally below the CLI's 30-second IPC read timeout so that Playwright returns a proper error instead of the CLI timing out with EAGAIN.
672
+
673
+ Override the default timeout via environment variable:
674
+
675
+ ```bash
676
+ # Set a longer timeout for slow pages (in milliseconds)
677
+ export NSTBROWSER_AI_AGENT_DEFAULT_TIMEOUT=45000
678
+ ```
679
+
680
+ > **Note:** Setting this above 30000 (30s) may cause EAGAIN errors on slow operations because the CLI's read timeout will expire before Playwright responds. The CLI retries transient errors automatically, but response times will increase.
681
+
682
+ | Variable | Description |
683
+ |----------|-------------|
684
+ | `NSTBROWSER_AI_AGENT_DEFAULT_TIMEOUT` | Default Playwright timeout in ms (default: 25000) |
685
+
686
+ ## Selectors
687
+
688
+ ### Refs (Recommended for AI)
689
+
690
+ Refs provide deterministic element selection from snapshots:
691
+
692
+ ```bash
693
+ # 1. Get snapshot with refs
694
+ nstbrowser-ai-agent snapshot
695
+ # Output:
696
+ # - heading "Example Domain" [ref=e1] [level=1]
697
+ # - button "Submit" [ref=e2]
698
+ # - textbox "Email" [ref=e3]
699
+ # - link "Learn more" [ref=e4]
700
+
701
+ # 2. Use refs to interact
702
+ nstbrowser-ai-agent click @e2 # Click the button
703
+ nstbrowser-ai-agent fill @e3 "test@example.com" # Fill the textbox
704
+ nstbrowser-ai-agent get text @e1 # Get heading text
705
+ nstbrowser-ai-agent hover @e4 # Hover the link
706
+ ```
707
+
708
+ **Why use refs?**
709
+ - **Deterministic**: Ref points to exact element from snapshot
710
+ - **Fast**: No DOM re-query needed
711
+ - **AI-friendly**: Snapshot + ref workflow is optimal for LLMs
712
+
713
+ ### CSS Selectors
714
+
715
+ ```bash
716
+ nstbrowser-ai-agent click "#id"
717
+ nstbrowser-ai-agent click ".class"
718
+ nstbrowser-ai-agent click "div > button"
719
+ ```
720
+
721
+ ### Text & XPath
722
+
723
+ ```bash
724
+ nstbrowser-ai-agent click "text=Submit"
725
+ nstbrowser-ai-agent click "xpath=//button"
726
+ ```
727
+
728
+ ### Semantic Locators
729
+
730
+ ```bash
731
+ nstbrowser-ai-agent find role button click --name "Submit"
732
+ nstbrowser-ai-agent find label "Email" fill "test@test.com"
733
+ ```
734
+
735
+ ## Agent Mode
736
+
737
+ Use `--json` for machine-readable output:
738
+
739
+ ```bash
740
+ nstbrowser-ai-agent snapshot --json
741
+ # Returns: {"success":true,"data":{"snapshot":"...","refs":{"e1":{"role":"heading","name":"Title"},...}}}
742
+
743
+ nstbrowser-ai-agent get text @e1 --json
744
+ nstbrowser-ai-agent is visible @e2 --json
745
+ ```
746
+
747
+ ### Optimal AI Workflow
748
+
749
+ ```bash
750
+ # 1. Navigate and get snapshot
751
+ nstbrowser-ai-agent open example.com
752
+ nstbrowser-ai-agent snapshot -i --json # AI parses tree and refs
753
+
754
+ # 2. AI identifies target refs from snapshot
755
+ # 3. Execute actions using refs
756
+ nstbrowser-ai-agent click @e2
757
+ nstbrowser-ai-agent fill @e3 "input text"
758
+
759
+ # 4. Get new snapshot if page changed
760
+ nstbrowser-ai-agent snapshot -i --json
761
+ ```
762
+
763
+ ### Command Chaining
764
+
765
+ Commands can be chained with `&&` in a single shell invocation. The browser persists via a background daemon, so chaining is safe and more efficient:
766
+
767
+ ```bash
768
+ # Open, wait for load, and snapshot in one call
769
+ nstbrowser-ai-agent open example.com && nstbrowser-ai-agent wait --load networkidle && nstbrowser-ai-agent snapshot -i
770
+
771
+ # Chain multiple interactions
772
+ nstbrowser-ai-agent fill @e1 "user@example.com" && nstbrowser-ai-agent fill @e2 "pass" && nstbrowser-ai-agent click @e3
773
+
774
+ # Navigate and screenshot
775
+ nstbrowser-ai-agent open example.com && nstbrowser-ai-agent wait --load networkidle && nstbrowser-ai-agent screenshot page.png
776
+ ```
777
+
778
+ Use `&&` when you don't need intermediate output. Run commands separately when you need to parse output first (e.g., snapshot to discover refs before interacting).
779
+
780
+ ## Headed Mode
781
+
782
+ Show the browser window for debugging:
783
+
784
+ ```bash
785
+ nstbrowser-ai-agent open example.com --headed
786
+ ```
787
+
788
+ This opens a visible browser window instead of running headless.
789
+
790
+ ## Authenticated Sessions
791
+
792
+ Use `--headers` to set HTTP headers for a specific origin, enabling authentication without login flows:
793
+
794
+ ```bash
795
+ # Headers are scoped to api.example.com only
796
+ nstbrowser-ai-agent open api.example.com --headers '{"Authorization": "Bearer <token>"}'
797
+
798
+ # Requests to api.example.com include the auth header
799
+ nstbrowser-ai-agent snapshot -i --json
800
+ nstbrowser-ai-agent click @e2
801
+
802
+ # Navigate to another domain - headers are NOT sent (safe!)
803
+ nstbrowser-ai-agent open other-site.com
804
+ ```
805
+
806
+ This is useful for:
807
+ - **Skipping login flows** - Authenticate via headers instead of UI
808
+ - **Switching users** - Start new sessions with different auth tokens
809
+ - **API testing** - Access protected endpoints directly
810
+ - **Security** - Headers are scoped to the origin, not leaked to other domains
811
+
812
+ To set headers for multiple origins, use `--headers` with each `open` command:
813
+
814
+ ```bash
815
+ nstbrowser-ai-agent open api.example.com --headers '{"Authorization": "Bearer token1"}'
816
+ nstbrowser-ai-agent open api.acme.com --headers '{"Authorization": "Bearer token2"}'
817
+ ```
818
+
819
+ For global headers (all domains), use `set headers`:
820
+
821
+ ```bash
822
+ nstbrowser-ai-agent set headers '{"X-Custom-Header": "value"}'
823
+ ```
824
+
825
+ ## Custom Browser Executable
826
+
827
+ Use a custom browser executable instead of the bundled Chromium. This is useful for:
828
+ - **Serverless deployment**: Use lightweight Chromium builds like `@sparticuz/chromium` (~50MB vs ~684MB)
829
+ - **System browsers**: Use an existing Chrome/Chromium installation
830
+ - **Custom builds**: Use modified browser builds
831
+
832
+ ### CLI Usage
833
+
834
+ ```bash
835
+ # Via flag
836
+ nstbrowser-ai-agent --executable-path /path/to/chromium open example.com
837
+
838
+ # Via environment variable
839
+ NSTBROWSER_AI_AGENT_EXECUTABLE_PATH=/path/to/chromium nstbrowser-ai-agent open example.com
840
+ ```
841
+
842
+ ### Serverless Example (AWS Lambda)
843
+
844
+ ```typescript
845
+ import chromium from '@sparticuz/chromium';
846
+ import { BrowserManager } from 'nstbrowser-ai-agent';
847
+
848
+ export async function handler() {
849
+ const browser = new BrowserManager();
850
+ await browser.launch({
851
+ executablePath: await chromium.executablePath(),
852
+ headless: true,
853
+ });
854
+ // ... use browser
855
+ }
856
+ ```
857
+
858
+ ## Local Files
859
+
860
+ Open and interact with local files (PDFs, HTML, etc.) using `file://` URLs:
861
+
862
+ ```bash
863
+ # Enable file access (required for JavaScript to access local files)
864
+ nstbrowser-ai-agent --allow-file-access open file:///path/to/document.pdf
865
+ nstbrowser-ai-agent --allow-file-access open file:///path/to/page.html
866
+
867
+ # Take screenshot of a local PDF
868
+ nstbrowser-ai-agent --allow-file-access open file:///Users/me/report.pdf
869
+ nstbrowser-ai-agent screenshot report.png
870
+ ```
871
+
872
+ The `--allow-file-access` flag adds Chromium flags (`--allow-file-access-from-files`, `--allow-file-access`) that allow `file://` URLs to:
873
+ - Load and render local files
874
+ - Access other local files via JavaScript (XHR, fetch)
875
+ - Load local resources (images, scripts, stylesheets)
876
+
877
+ **Note:** This flag only works with Chromium. For security, it's disabled by default.
878
+
879
+ ## CDP Mode
880
+
881
+ Connect to an existing browser via Chrome DevTools Protocol:
882
+
883
+ ```bash
884
+ # Start Chrome with: google-chrome --remote-debugging-port=9222
885
+
886
+ # Connect once, then run commands without --cdp
887
+ nstbrowser-ai-agent connect 9222
888
+ nstbrowser-ai-agent snapshot
889
+ nstbrowser-ai-agent tab
890
+ nstbrowser-ai-agent close
891
+
892
+ # Or pass --cdp on each command
893
+ nstbrowser-ai-agent --cdp 9222 snapshot
894
+
895
+ # Connect to remote browser via WebSocket URL
896
+ nstbrowser-ai-agent --cdp "wss://your-browser-service.com/cdp?token=..." snapshot
897
+ ```
898
+
899
+ The `--cdp` flag accepts either:
900
+ - A port number (e.g., `9222`) for local connections via `http://localhost:{port}`
901
+ - A full WebSocket URL (e.g., `wss://...` or `ws://...`) for remote browser services
902
+
903
+ This enables control of:
904
+ - Electron apps
905
+ - Chrome/Chromium instances with remote debugging
906
+ - WebView2 applications
907
+ - Any browser exposing a CDP endpoint
908
+
909
+ ### Auto-Connect
910
+
911
+ Use `--auto-connect` to automatically discover and connect to a running Chrome instance without specifying a port:
912
+
913
+ ```bash
914
+ # Auto-discover running Chrome with remote debugging
915
+ nstbrowser-ai-agent --auto-connect open example.com
916
+ nstbrowser-ai-agent --auto-connect snapshot
917
+
918
+ # Or via environment variable
919
+ NSTBROWSER_AI_AGENT_AUTO_CONNECT=1 nstbrowser-ai-agent snapshot
920
+ ```
921
+
922
+ Auto-connect discovers Chrome by:
923
+ 1. Reading Chrome's `DevToolsActivePort` file from the default user data directory
924
+ 2. Falling back to probing common debugging ports (9222, 9229)
925
+
926
+ This is useful when:
927
+ - Chrome 144+ has remote debugging enabled via `chrome://inspect/#remote-debugging` (which uses a dynamic port)
928
+ - You want a zero-configuration connection to your existing browser
929
+ - You don't want to track which port Chrome is using
930
+
931
+ ## Streaming (Browser Preview)
932
+
933
+ Stream the browser viewport via WebSocket for live preview or "pair browsing" where a human can watch and interact alongside an AI agent.
934
+
935
+ ### Enable Streaming
936
+
937
+ Set the `NSTBROWSER_AI_AGENT_STREAM_PORT` environment variable:
938
+
939
+ ```bash
940
+ NSTBROWSER_AI_AGENT_STREAM_PORT=9223 nstbrowser-ai-agent open example.com
941
+ ```
942
+
943
+ This starts a WebSocket server on the specified port that streams the browser viewport and accepts input events.
944
+
945
+ ### WebSocket Protocol
946
+
947
+ Connect to `ws://localhost:9223` to receive frames and send input:
948
+
949
+ **Receive frames:**
950
+ ```json
951
+ {
952
+ "type": "frame",
953
+ "data": "<base64-encoded-jpeg>",
954
+ "metadata": {
955
+ "deviceWidth": 1280,
956
+ "deviceHeight": 720,
957
+ "pageScaleFactor": 1,
958
+ "offsetTop": 0,
959
+ "scrollOffsetX": 0,
960
+ "scrollOffsetY": 0
961
+ }
962
+ }
963
+ ```
964
+
965
+ **Send mouse events:**
966
+ ```json
967
+ {
968
+ "type": "input_mouse",
969
+ "eventType": "mousePressed",
970
+ "x": 100,
971
+ "y": 200,
972
+ "button": "left",
973
+ "clickCount": 1
974
+ }
975
+ ```
976
+
977
+ **Send keyboard events:**
978
+ ```json
979
+ {
980
+ "type": "input_keyboard",
981
+ "eventType": "keyDown",
982
+ "key": "Enter",
983
+ "code": "Enter"
984
+ }
985
+ ```
986
+
987
+ **Send touch events:**
988
+ ```json
989
+ {
990
+ "type": "input_touch",
991
+ "eventType": "touchStart",
992
+ "touchPoints": [{ "x": 100, "y": 200 }]
993
+ }
994
+ ```
995
+
996
+ ### Programmatic API
997
+
998
+ For advanced use, control streaming directly via the protocol:
999
+
1000
+ ```typescript
1001
+ import { BrowserManager } from 'nstbrowser-ai-agent';
1002
+
1003
+ const browser = new BrowserManager();
1004
+ await browser.launch({ headless: true });
1005
+ await browser.navigate('https://example.com');
1006
+
1007
+ // Start screencast
1008
+ await browser.startScreencast((frame) => {
1009
+ // frame.data is base64-encoded image
1010
+ // frame.metadata contains viewport info
1011
+ console.log('Frame received:', frame.metadata.deviceWidth, 'x', frame.metadata.deviceHeight);
1012
+ }, {
1013
+ format: 'jpeg',
1014
+ quality: 80,
1015
+ maxWidth: 1280,
1016
+ maxHeight: 720,
1017
+ });
1018
+
1019
+ // Inject mouse events
1020
+ await browser.injectMouseEvent({
1021
+ type: 'mousePressed',
1022
+ x: 100,
1023
+ y: 200,
1024
+ button: 'left',
1025
+ });
1026
+
1027
+ // Inject keyboard events
1028
+ await browser.injectKeyboardEvent({
1029
+ type: 'keyDown',
1030
+ key: 'Enter',
1031
+ code: 'Enter',
1032
+ });
1033
+
1034
+ // Stop when done
1035
+ await browser.stopScreencast();
1036
+ ```
1037
+
1038
+ ## Architecture
1039
+
1040
+ nstbrowser-ai-agent uses a client-daemon architecture:
1041
+
1042
+ 1. **Rust CLI** (fast native binary) - Parses commands, communicates with daemon
1043
+ 2. **Node.js Daemon** (default) - Manages Playwright browser instance
1044
+ 3. **Native Daemon** (experimental, `--native`) - Pure Rust daemon using direct CDP, no Node.js required
1045
+ 4. **Fallback** - If native binary unavailable, uses Node.js directly
1046
+
1047
+ The daemon starts automatically on first command and persists between commands for fast subsequent operations.
1048
+
1049
+ **Browser Engine:** Uses Chromium by default. The default Node.js daemon also supports Firefox and WebKit via Playwright. The experimental native daemon speaks Chrome DevTools Protocol (CDP) directly and supports Chromium-based browsers and Safari (via WebDriver).
1050
+
1051
+ ## Experimental: Native Mode
1052
+
1053
+ The native daemon is a pure Rust implementation that communicates with Chrome directly via CDP, eliminating the Node.js and Playwright dependencies. It is currently **experimental** and opt-in.
1054
+
1055
+ ### Enabling Native Mode
1056
+
1057
+ ```bash
1058
+ # Via flag
1059
+ nstbrowser-ai-agent --native open example.com
1060
+
1061
+ # Via environment variable (recommended for persistent use)
1062
+ export NSTBROWSER_AI_AGENT_NATIVE=1
1063
+ nstbrowser-ai-agent open example.com
1064
+ ```
1065
+
1066
+ Or add to your config file (`nstbrowser-ai-agent.json`):
1067
+
1068
+ ```json
1069
+ {"native": true}
1070
+ ```
1071
+
1072
+ ### What's Different
1073
+
1074
+ | | Default (Node.js) | Native (`--native`) |
1075
+ |---|---|---|
1076
+ | **Runtime** | Node.js + Playwright | Pure Rust binary |
1077
+ | **Protocol** | Playwright protocol | Direct CDP / WebDriver |
1078
+ | **Install size** | Larger (Node.js + npm deps) | Smaller (single binary) |
1079
+ | **Browser support** | Chromium, Firefox, WebKit | Chromium, Safari (via WebDriver) |
1080
+ | **Stability** | Stable | Experimental |
1081
+
1082
+ ### Known Limitations
1083
+
1084
+ - Firefox and WebKit are not yet supported (Chromium and Safari only)
1085
+ - Some Playwright-specific features (tracing format, HAR export) are not available
1086
+ - The native daemon and Node.js daemon share the same session socket, so you cannot run both simultaneously for the same session. Use `nstbrowser-ai-agent close` before switching modes.
1087
+
1088
+ ## Platforms
1089
+
1090
+ | Platform | Binary | Fallback |
1091
+ |----------|--------|----------|
1092
+ | macOS ARM64 | Native Rust | Node.js |
1093
+ | macOS x64 | Native Rust | Node.js |
1094
+ | Linux ARM64 | Native Rust | Node.js |
1095
+ | Linux x64 | Native Rust | Node.js |
1096
+ | Windows x64 | Native Rust | Node.js |
1097
+
1098
+ ## Usage with AI Agents
1099
+
1100
+ ### Just ask the agent
1101
+
1102
+ The simplest approach -- just tell your agent to use it:
1103
+
1104
+ ```
1105
+ Use nstbrowser-ai-agent to test the login flow. Run nstbrowser-ai-agent --help to see available commands.
1106
+ ```
1107
+
1108
+ The `--help` output is comprehensive and most agents can figure it out from there.
1109
+
1110
+ ### AI Coding Assistants (recommended)
1111
+
1112
+ Add the skill to your AI coding assistant for richer context:
1113
+
1114
+ ```bash
1115
+ npx skills add nstbrowser/nstbrowser-ai-agent
1116
+ ```
1117
+
1118
+ This works with Claude Code, Codex, Cursor, Gemini CLI, GitHub Copilot, Goose, OpenCode, and Windsurf. The skill is fetched from the repository, so it stays up to date automatically -- do not copy `SKILL.md` from `node_modules` as it will become stale.
1119
+
1120
+ ### Claude Code
1121
+
1122
+ Install as a Claude Code skill:
1123
+
1124
+ ```bash
1125
+ npx skills add nstbrowser/nstbrowser-ai-agent
1126
+ ```
1127
+
1128
+ This adds the skill to `.claude/skills/nstbrowser-ai-agent/SKILL.md` in your project. The skill teaches Claude Code the full nstbrowser-ai-agent workflow, including the snapshot-ref interaction pattern, session management, and timeout handling.
1129
+
1130
+ ### AGENTS.md / CLAUDE.md
1131
+
1132
+ For more consistent results, add to your project or global instructions file:
1133
+
1134
+ ```markdown
1135
+ ## Browser Automation
1136
+
1137
+ Use `nstbrowser-ai-agent` for web automation. Run `nstbrowser-ai-agent --help` for all commands.
1138
+
1139
+ Core workflow:
1140
+ 1. `nstbrowser-ai-agent open <url>` - Navigate to page
1141
+ 2. `nstbrowser-ai-agent snapshot -i` - Get interactive elements with refs (@e1, @e2)
1142
+ 3. `nstbrowser-ai-agent click @e1` / `fill @e2 "text"` - Interact using refs
1143
+ 4. Re-snapshot after page changes
1144
+ ```
1145
+
1146
+ ## Nstbrowser Integration
1147
+
1148
+ [Nstbrowser](https://www.nstbrowser.io) provides advanced browser fingerprinting and anti-detection capabilities for web automation. It offers local browser instances with customizable fingerprints, proxy management, and profile persistence.
1149
+
1150
+ **Nstbrowser is the default provider** - you don't need to specify `-p nst` unless you want to be explicit.
1151
+
1152
+ **Setup:**
1153
+
1154
+ 1. Download and install the Nstbrowser client from [nstbrowser.io](https://www.nstbrowser.io)
1155
+ 2. Start the Nstbrowser client application
1156
+ 3. Get your API key from the Nstbrowser dashboard
1157
+
1158
+ **Usage:**
1159
+
1160
+ ```bash
1161
+ # Set environment variables
1162
+ export NST_API_KEY="your-api-key"
1163
+ export NST_HOST="localhost" # Optional, default: localhost
1164
+ export NST_PORT="8848" # Optional, default: 8848
1165
+
1166
+ # Launch browser (uses Nstbrowser by default)
1167
+ nstbrowser-ai-agent open https://example.com
1168
+
1169
+ # Or use a named profile
1170
+ export NST_PROFILE="my-profile"
1171
+ nstbrowser-ai-agent open https://example.com
1172
+
1173
+ # Or be explicit with -p nst
1174
+ nstbrowser-ai-agent -p nst open https://example.com
1175
+ ```
1176
+
1177
+ Or use environment variables for persistent configuration:
1178
+
1179
+ ```bash
1180
+ export NSTBROWSER_AI_AGENT_PROVIDER=nst # Optional, nst is default
1181
+ export NST_API_KEY="your-api-key"
1182
+ export NST_PROFILE="my-profile"
1183
+ nstbrowser-ai-agent open https://example.com
1184
+ ```
1185
+
1186
+ **Profile Management:**
1187
+
1188
+ ```bash
1189
+ # With default NST provider (NST_API_KEY set), no 'nst' prefix needed:
1190
+ nstbrowser-ai-agent profile list # List all profiles
1191
+ nstbrowser-ai-agent profile create myprofile \ # Create new profile
1192
+ --proxy-host 127.0.0.1 --proxy-port 1080 --proxy-enabled
1193
+
1194
+ # Traditional explicit syntax still works:
1195
+ nstbrowser-ai-agent nst profile list
1196
+ nstbrowser-ai-agent nst profile create myprofile \
1197
+ --proxy-host 127.0.0.1 --proxy-port 1080 --proxy-enabled
1198
+
1199
+ # Update proxy settings
1200
+ nstbrowser-ai-agent profile proxy update profile-123 \
1201
+ --host 127.0.0.1 --port 1080 --type http
1202
+
1203
+ # Manage tags
1204
+ nstbrowser-ai-agent profile tags create profile-123 "production"
1205
+ nstbrowser-ai-agent profile tags list
1206
+
1207
+ # Manage groups
1208
+ nstbrowser-ai-agent profile groups list
1209
+ nstbrowser-ai-agent profile groups change group-id profile-123
1210
+
1211
+ # List profiles with cursor pagination (for large datasets)
1212
+ nstbrowser-ai-agent profile list-cursor --page-size 50
1213
+ nstbrowser-ai-agent profile list-cursor --cursor "token" --page-size 50
1214
+
1215
+ # Clear cache and cookies
1216
+ nstbrowser-ai-agent profile cache clear profile-123
1217
+ nstbrowser-ai-agent profile cookies clear profile-123
1218
+
1219
+ # Delete profiles (supports batch operations)
1220
+ nstbrowser-ai-agent profile delete profile-1 profile-2 profile-3
1221
+ ```
1222
+
1223
+ **Profile Selection (Name or ID):**
1224
+
1225
+ The `--profile` flag accepts either a profile name or profile ID (UUID). The system automatically detects UUID patterns:
1226
+
1227
+ ```bash
1228
+ # By profile name
1229
+ nstbrowser-ai-agent --profile my-profile open example.com
1230
+ nstbrowser-ai-agent browser start proxy_ph
1231
+
1232
+ # By profile ID (UUID format auto-detected)
1233
+ nstbrowser-ai-agent --profile ef2b083a-8f77-4a7f-8441-a8d56bbd832b open example.com
1234
+ nstbrowser-ai-agent browser start ef2b083a-8f77-4a7f-8441-a8d56bbd832b
1235
+
1236
+ # Both work the same way - no need to remember which flag to use
1237
+ # The system automatically detects if you're using a UUID or a name
1238
+ ```
1239
+
1240
+ You can still use `--profile-id` for explicit ID specification if preferred, but `--profile` now handles both formats automatically.
1241
+
1242
+ **Browser Instance Management:**
1243
+
1244
+ ```bash
1245
+ # With default NST provider (NST_API_KEY set), no 'nst' prefix needed:
1246
+ nstbrowser-ai-agent browser list # List running instances
1247
+ nstbrowser-ai-agent browser start profile-123 # Start browser for profile
1248
+ nstbrowser-ai-agent browser start-batch p1 p2 p3 # Start multiple browsers
1249
+ nstbrowser-ai-agent browser start-once # Start temporary browser
1250
+ nstbrowser-ai-agent browser stop profile-123 # Stop browser instance
1251
+ nstbrowser-ai-agent browser stop-all # Stop all instances
1252
+ nstbrowser-ai-agent browser pages profile-123 # Get browser pages/tabs
1253
+ nstbrowser-ai-agent browser debugger profile-123 # Get debugger URL
1254
+ nstbrowser-ai-agent browser cdp-url profile-123 # Get CDP WebSocket URL
1255
+ nstbrowser-ai-agent browser cdp-url-once # Get CDP URL for temp browser
1256
+ nstbrowser-ai-agent browser connect profile-123 # Connect and get CDP URL
1257
+ nstbrowser-ai-agent browser connect-once # Connect to temp browser
1258
+
1259
+ # Traditional explicit syntax still works:
1260
+ nstbrowser-ai-agent nst browser list
1261
+ nstbrowser-ai-agent nst browser start profile-123
1262
+ nstbrowser-ai-agent nst browser start-batch p1 p2 p3
1263
+ nstbrowser-ai-agent nst browser stop profile-123
1264
+ nstbrowser-ai-agent nst browser stop-all
1265
+ ```
1266
+
1267
+ **Environment Variables:**
1268
+
1269
+ <table>
1270
+ <thead>
1271
+ <tr>
1272
+ <th>Variable</th>
1273
+ <th>Description</th>
1274
+ <th>Default</th>
1275
+ </tr>
1276
+ </thead>
1277
+ <tbody>
1278
+ <tr>
1279
+ <td><code>NST_API_KEY</code></td>
1280
+ <td>Nstbrowser API key (required)</td>
1281
+ <td>(none)</td>
1282
+ </tr>
1283
+ <tr>
1284
+ <td><code>NST_HOST</code></td>
1285
+ <td>Nstbrowser API host</td>
1286
+ <td><code>localhost</code></td>
1287
+ </tr>
1288
+ <tr>
1289
+ <td><code>NST_PORT</code></td>
1290
+ <td>Nstbrowser API port</td>
1291
+ <td><code>8848</code></td>
1292
+ </tr>
1293
+ <tr>
1294
+ <td><code>NST_PROFILE</code></td>
1295
+ <td>Profile name for provider=nst launch</td>
1296
+ <td>(none)</td>
1297
+ </tr>
1298
+ </tbody>
1299
+ </table>
1300
+
1301
+ **Features:**
1302
+
1303
+ - **Advanced Fingerprinting**: Customize browser fingerprints to avoid detection
1304
+ - **Profile Management**: Create and manage multiple browser profiles with different configurations
1305
+ - **Proxy Support**: Configure proxies per profile with authentication
1306
+ - **Tag System**: Organize profiles with tags for easy management
1307
+ - **Group Management**: Organize profiles into groups
1308
+ - **Local Execution**: Runs locally on your machine, no cloud dependency
1309
+ - **Batch Operations**: Perform operations on multiple profiles simultaneously
1310
+
1311
+ **Requirements:**
1312
+
1313
+ - Nstbrowser client must be installed and running
1314
+ - API key from Nstbrowser dashboard
1315
+ - Local network access to Nstbrowser API (default: localhost:8848)
1316
+
1317
+ When enabled, nstbrowser-ai-agent connects to your local Nstbrowser instance via CDP. All standard nstbrowser-ai-agent commands work identically, with the added benefit of Nstbrowser's anti-detection features.
1318
+
1319
+ ## License
1320
+
1321
+ Apache-2.0