@next-open-ai/openclawx 0.6.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (198) hide show
  1. package/README.md +523 -0
  2. package/apps/desktop/README.md +210 -0
  3. package/apps/desktop/renderer/dist/assets/index-CYkSfhcp.css +10 -0
  4. package/apps/desktop/renderer/dist/assets/index-FI6O25Ms.js +89 -0
  5. package/apps/desktop/renderer/dist/index.html +22 -0
  6. package/dist/cli/cli.d.ts +2 -0
  7. package/dist/cli/cli.js +198 -0
  8. package/dist/cli/service.d.ts +13 -0
  9. package/dist/cli/service.js +243 -0
  10. package/dist/cli.d.ts +5 -0
  11. package/dist/cli.js +5 -0
  12. package/dist/core/agent/agent-dir.d.ts +14 -0
  13. package/dist/core/agent/agent-dir.js +75 -0
  14. package/dist/core/agent/agent-manager.d.ts +64 -0
  15. package/dist/core/agent/agent-manager.js +278 -0
  16. package/dist/core/agent/config-manager.d.ts +25 -0
  17. package/dist/core/agent/config-manager.js +84 -0
  18. package/dist/core/agent/run.d.ts +26 -0
  19. package/dist/core/agent/run.js +65 -0
  20. package/dist/core/agent/skills.d.ts +20 -0
  21. package/dist/core/agent/skills.js +86 -0
  22. package/dist/core/config/desktop-config.d.ts +90 -0
  23. package/dist/core/config/desktop-config.js +521 -0
  24. package/dist/core/config/provider-support-default.d.ts +21 -0
  25. package/dist/core/config/provider-support-default.js +57 -0
  26. package/dist/core/installer/index.d.ts +1 -0
  27. package/dist/core/installer/index.js +1 -0
  28. package/dist/core/installer/skill-installer.d.ts +39 -0
  29. package/dist/core/installer/skill-installer.js +215 -0
  30. package/dist/core/mcp/adapter.d.ts +17 -0
  31. package/dist/core/mcp/adapter.js +49 -0
  32. package/dist/core/mcp/client.d.ts +24 -0
  33. package/dist/core/mcp/client.js +70 -0
  34. package/dist/core/mcp/config.d.ts +22 -0
  35. package/dist/core/mcp/config.js +69 -0
  36. package/dist/core/mcp/index.d.ts +18 -0
  37. package/dist/core/mcp/index.js +20 -0
  38. package/dist/core/mcp/operator.d.ts +15 -0
  39. package/dist/core/mcp/operator.js +72 -0
  40. package/dist/core/mcp/transport/index.d.ts +11 -0
  41. package/dist/core/mcp/transport/index.js +16 -0
  42. package/dist/core/mcp/transport/sse.d.ts +20 -0
  43. package/dist/core/mcp/transport/sse.js +82 -0
  44. package/dist/core/mcp/transport/stdio.d.ts +32 -0
  45. package/dist/core/mcp/transport/stdio.js +132 -0
  46. package/dist/core/mcp/types.d.ts +72 -0
  47. package/dist/core/mcp/types.js +5 -0
  48. package/dist/core/memory/build-summary.d.ts +6 -0
  49. package/dist/core/memory/build-summary.js +27 -0
  50. package/dist/core/memory/compaction-extension.d.ts +6 -0
  51. package/dist/core/memory/compaction-extension.js +23 -0
  52. package/dist/core/memory/embedding.d.ts +4 -0
  53. package/dist/core/memory/embedding.js +15 -0
  54. package/dist/core/memory/index.d.ts +29 -0
  55. package/dist/core/memory/index.js +70 -0
  56. package/dist/core/memory/remote-embedding.d.ts +10 -0
  57. package/dist/core/memory/remote-embedding.js +36 -0
  58. package/dist/core/memory/types.d.ts +16 -0
  59. package/dist/core/memory/types.js +1 -0
  60. package/dist/core/memory/vector-store.d.ts +15 -0
  61. package/dist/core/memory/vector-store.js +65 -0
  62. package/dist/core/tools/bookmark-tool.d.ts +9 -0
  63. package/dist/core/tools/bookmark-tool.js +118 -0
  64. package/dist/core/tools/browser-tool.d.ts +10 -0
  65. package/dist/core/tools/browser-tool.js +362 -0
  66. package/dist/core/tools/index.d.ts +4 -0
  67. package/dist/core/tools/index.js +4 -0
  68. package/dist/core/tools/install-skill-tool.d.ts +6 -0
  69. package/dist/core/tools/install-skill-tool.js +53 -0
  70. package/dist/core/tools/save-experience-tool.d.ts +5 -0
  71. package/dist/core/tools/save-experience-tool.js +54 -0
  72. package/dist/gateway/auth-hooks.d.ts +17 -0
  73. package/dist/gateway/auth-hooks.js +19 -0
  74. package/dist/gateway/backend-url.d.ts +2 -0
  75. package/dist/gateway/backend-url.js +11 -0
  76. package/dist/gateway/channel-handler.d.ts +6 -0
  77. package/dist/gateway/channel-handler.js +3 -0
  78. package/dist/gateway/clients.d.ts +5 -0
  79. package/dist/gateway/clients.js +4 -0
  80. package/dist/gateway/connection-handler.d.ts +6 -0
  81. package/dist/gateway/connection-handler.js +48 -0
  82. package/dist/gateway/index.d.ts +3 -0
  83. package/dist/gateway/index.js +2 -0
  84. package/dist/gateway/message-handler.d.ts +5 -0
  85. package/dist/gateway/message-handler.js +65 -0
  86. package/dist/gateway/methods/agent-cancel.d.ts +10 -0
  87. package/dist/gateway/methods/agent-cancel.js +17 -0
  88. package/dist/gateway/methods/agent-chat.d.ts +8 -0
  89. package/dist/gateway/methods/agent-chat.js +148 -0
  90. package/dist/gateway/methods/connect.d.ts +9 -0
  91. package/dist/gateway/methods/connect.js +18 -0
  92. package/dist/gateway/methods/install-skill-from-path.d.ts +13 -0
  93. package/dist/gateway/methods/install-skill-from-path.js +15 -0
  94. package/dist/gateway/methods/install-skill-from-upload.d.ts +14 -0
  95. package/dist/gateway/methods/install-skill-from-upload.js +13 -0
  96. package/dist/gateway/methods/run-scheduled-task.d.ts +15 -0
  97. package/dist/gateway/methods/run-scheduled-task.js +127 -0
  98. package/dist/gateway/paths.d.ts +20 -0
  99. package/dist/gateway/paths.js +19 -0
  100. package/dist/gateway/server.d.ts +8 -0
  101. package/dist/gateway/server.js +190 -0
  102. package/dist/gateway/sse-handler.d.ts +6 -0
  103. package/dist/gateway/sse-handler.js +3 -0
  104. package/dist/gateway/types.d.ts +90 -0
  105. package/dist/gateway/types.js +1 -0
  106. package/dist/gateway/utils.d.ts +22 -0
  107. package/dist/gateway/utils.js +67 -0
  108. package/dist/gateway/voice-handler.d.ts +12 -0
  109. package/dist/gateway/voice-handler.js +18 -0
  110. package/dist/index.d.ts +5 -0
  111. package/dist/index.js +5 -0
  112. package/dist/server/agent-config/agent-config.controller.d.ts +30 -0
  113. package/dist/server/agent-config/agent-config.controller.js +83 -0
  114. package/dist/server/agent-config/agent-config.module.d.ts +2 -0
  115. package/dist/server/agent-config/agent-config.module.js +19 -0
  116. package/dist/server/agent-config/agent-config.service.d.ts +53 -0
  117. package/dist/server/agent-config/agent-config.service.js +213 -0
  118. package/dist/server/agents/agents.controller.d.ts +41 -0
  119. package/dist/server/agents/agents.controller.js +118 -0
  120. package/dist/server/agents/agents.gateway.d.ts +21 -0
  121. package/dist/server/agents/agents.gateway.js +103 -0
  122. package/dist/server/agents/agents.module.d.ts +2 -0
  123. package/dist/server/agents/agents.module.js +20 -0
  124. package/dist/server/agents/agents.service.d.ts +63 -0
  125. package/dist/server/agents/agents.service.js +169 -0
  126. package/dist/server/app.module.d.ts +2 -0
  127. package/dist/server/app.module.js +38 -0
  128. package/dist/server/auth/auth.controller.d.ts +20 -0
  129. package/dist/server/auth/auth.controller.js +64 -0
  130. package/dist/server/auth/auth.module.d.ts +2 -0
  131. package/dist/server/auth/auth.module.js +19 -0
  132. package/dist/server/bootstrap.d.ts +15 -0
  133. package/dist/server/bootstrap.js +38 -0
  134. package/dist/server/config/config.controller.d.ts +73 -0
  135. package/dist/server/config/config.controller.js +95 -0
  136. package/dist/server/config/config.module.d.ts +2 -0
  137. package/dist/server/config/config.module.js +21 -0
  138. package/dist/server/config/config.service.d.ts +82 -0
  139. package/dist/server/config/config.service.js +123 -0
  140. package/dist/server/database/database.module.d.ts +2 -0
  141. package/dist/server/database/database.module.js +18 -0
  142. package/dist/server/database/database.service.d.ts +26 -0
  143. package/dist/server/database/database.service.js +253 -0
  144. package/dist/server/main.d.ts +1 -0
  145. package/dist/server/main.js +9 -0
  146. package/dist/server/saved-items/saved-items.controller.d.ts +57 -0
  147. package/dist/server/saved-items/saved-items.controller.js +229 -0
  148. package/dist/server/saved-items/saved-items.module.d.ts +2 -0
  149. package/dist/server/saved-items/saved-items.module.js +25 -0
  150. package/dist/server/saved-items/saved-items.service.d.ts +31 -0
  151. package/dist/server/saved-items/saved-items.service.js +105 -0
  152. package/dist/server/saved-items/tags.controller.d.ts +30 -0
  153. package/dist/server/saved-items/tags.controller.js +85 -0
  154. package/dist/server/saved-items/tags.service.d.ts +24 -0
  155. package/dist/server/saved-items/tags.service.js +84 -0
  156. package/dist/server/skills/skills.controller.d.ts +63 -0
  157. package/dist/server/skills/skills.controller.js +194 -0
  158. package/dist/server/skills/skills.module.d.ts +2 -0
  159. package/dist/server/skills/skills.module.js +22 -0
  160. package/dist/server/skills/skills.service.d.ts +65 -0
  161. package/dist/server/skills/skills.service.js +388 -0
  162. package/dist/server/tasks/tasks.controller.d.ts +52 -0
  163. package/dist/server/tasks/tasks.controller.js +163 -0
  164. package/dist/server/tasks/tasks.module.d.ts +2 -0
  165. package/dist/server/tasks/tasks.module.js +23 -0
  166. package/dist/server/tasks/tasks.service.d.ts +86 -0
  167. package/dist/server/tasks/tasks.service.js +327 -0
  168. package/dist/server/usage/usage.controller.d.ts +12 -0
  169. package/dist/server/usage/usage.controller.js +46 -0
  170. package/dist/server/usage/usage.module.d.ts +2 -0
  171. package/dist/server/usage/usage.module.js +19 -0
  172. package/dist/server/usage/usage.service.d.ts +21 -0
  173. package/dist/server/usage/usage.service.js +55 -0
  174. package/dist/server/users/users.controller.d.ts +35 -0
  175. package/dist/server/users/users.controller.js +69 -0
  176. package/dist/server/users/users.module.d.ts +2 -0
  177. package/dist/server/users/users.module.js +19 -0
  178. package/dist/server/users/users.service.d.ts +39 -0
  179. package/dist/server/users/users.service.js +140 -0
  180. package/dist/server/workspace/workspace.controller.d.ts +24 -0
  181. package/dist/server/workspace/workspace.controller.js +132 -0
  182. package/dist/server/workspace/workspace.module.d.ts +2 -0
  183. package/dist/server/workspace/workspace.module.js +21 -0
  184. package/dist/server/workspace/workspace.service.d.ts +36 -0
  185. package/dist/server/workspace/workspace.service.js +142 -0
  186. package/package.json +90 -0
  187. package/skills/agent-browser/SKILL.md +207 -0
  188. package/skills/agent-browser/references/authentication.md +202 -0
  189. package/skills/agent-browser/references/commands.md +259 -0
  190. package/skills/agent-browser/references/proxy-support.md +188 -0
  191. package/skills/agent-browser/references/session-management.md +193 -0
  192. package/skills/agent-browser/references/snapshot-refs.md +194 -0
  193. package/skills/agent-browser/references/video-recording.md +173 -0
  194. package/skills/agent-browser/templates/authenticated-session.sh +97 -0
  195. package/skills/agent-browser/templates/capture-workflow.sh +69 -0
  196. package/skills/agent-browser/templates/form-automation.sh +62 -0
  197. package/skills/find-skills/SKILL.md +140 -0
  198. package/skills/url-bookmark/SKILL.md +36 -0
@@ -0,0 +1,207 @@
1
+ ---
2
+ name: agent-browser
3
+ description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction.
4
+ allowed-tools: Bash(agent-browser:*)
5
+ ---
6
+
7
+ # Browser Automation with agent-browser
8
+
9
+ ## Core Workflow
10
+
11
+ Every browser automation follows this pattern:
12
+
13
+ 1. **Navigate**: `agent-browser open <url>`
14
+ 2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
15
+ 3. **Interact**: Use refs to click, fill, select
16
+ 4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
17
+
18
+ ```bash
19
+ agent-browser open https://example.com/form
20
+ agent-browser snapshot -i
21
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
22
+
23
+ agent-browser fill @e1 "user@example.com"
24
+ agent-browser fill @e2 "password123"
25
+ agent-browser click @e3
26
+ agent-browser wait --load networkidle
27
+ agent-browser snapshot -i # Check result
28
+ ```
29
+
30
+ ## Essential Commands
31
+
32
+ ```bash
33
+ # Navigation
34
+ agent-browser open <url> # Navigate (aliases: goto, navigate)
35
+ agent-browser close # Close browser
36
+
37
+ # Snapshot
38
+ agent-browser snapshot -i # Interactive elements with refs (recommended)
39
+ agent-browser snapshot -s "#selector" # Scope to CSS selector
40
+
41
+ # Interaction (use @refs from snapshot)
42
+ agent-browser click @e1 # Click element
43
+ agent-browser fill @e2 "text" # Clear and type text
44
+ agent-browser type @e2 "text" # Type without clearing
45
+ agent-browser select @e1 "option" # Select dropdown option
46
+ agent-browser check @e1 # Check checkbox
47
+ agent-browser press Enter # Press key
48
+ agent-browser scroll down 500 # Scroll page
49
+
50
+ # Get information
51
+ agent-browser get text @e1 # Get element text
52
+ agent-browser get url # Get current URL
53
+ agent-browser get title # Get page title
54
+
55
+ # Wait
56
+ agent-browser wait @e1 # Wait for element
57
+ agent-browser wait --load networkidle # Wait for network idle
58
+ agent-browser wait --url "**/page" # Wait for URL pattern
59
+ agent-browser wait 2000 # Wait milliseconds
60
+
61
+ # Capture
62
+ agent-browser screenshot # Screenshot to temp dir
63
+ agent-browser screenshot --full # Full page screenshot
64
+ agent-browser pdf output.pdf # Save as PDF
65
+ ```
66
+
67
+ ## Common Patterns
68
+
69
+ ### Form Submission
70
+
71
+ ```bash
72
+ agent-browser open https://example.com/signup
73
+ agent-browser snapshot -i
74
+ agent-browser fill @e1 "Jane Doe"
75
+ agent-browser fill @e2 "jane@example.com"
76
+ agent-browser select @e3 "California"
77
+ agent-browser check @e4
78
+ agent-browser click @e5
79
+ agent-browser wait --load networkidle
80
+ ```
81
+
82
+ ### Authentication with State Persistence
83
+
84
+ ```bash
85
+ # Login once and save state
86
+ agent-browser open https://app.example.com/login
87
+ agent-browser snapshot -i
88
+ agent-browser fill @e1 "$USERNAME"
89
+ agent-browser fill @e2 "$PASSWORD"
90
+ agent-browser click @e3
91
+ agent-browser wait --url "**/dashboard"
92
+ agent-browser state save auth.json
93
+
94
+ # Reuse in future sessions
95
+ agent-browser state load auth.json
96
+ agent-browser open https://app.example.com/dashboard
97
+ ```
98
+
99
+ ### Data Extraction
100
+
101
+ ```bash
102
+ agent-browser open https://example.com/products
103
+ agent-browser snapshot -i
104
+ agent-browser get text @e5 # Get specific element text
105
+ agent-browser get text body > page.txt # Get all page text
106
+
107
+ # JSON output for parsing
108
+ agent-browser snapshot -i --json
109
+ agent-browser get text @e1 --json
110
+ ```
111
+
112
+ ### Parallel Sessions
113
+
114
+ ```bash
115
+ agent-browser --session site1 open https://site-a.com
116
+ agent-browser --session site2 open https://site-b.com
117
+
118
+ agent-browser --session site1 snapshot -i
119
+ agent-browser --session site2 snapshot -i
120
+
121
+ agent-browser session list
122
+ ```
123
+
124
+ ### Visual Browser (Debugging)
125
+
126
+ ```bash
127
+ agent-browser --headed open https://example.com
128
+ agent-browser highlight @e1 # Highlight element
129
+ agent-browser record start demo.webm # Record session
130
+ ```
131
+
132
+ ### iOS Simulator (Mobile Safari)
133
+
134
+ ```bash
135
+ # List available iOS simulators
136
+ agent-browser device list
137
+
138
+ # Launch Safari on a specific device
139
+ agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
140
+
141
+ # Same workflow as desktop - snapshot, interact, re-snapshot
142
+ agent-browser -p ios snapshot -i
143
+ agent-browser -p ios tap @e1 # Tap (alias for click)
144
+ agent-browser -p ios fill @e2 "text"
145
+ agent-browser -p ios swipe up # Mobile-specific gesture
146
+
147
+ # Take screenshot
148
+ agent-browser -p ios screenshot mobile.png
149
+
150
+ # Close session (shuts down simulator)
151
+ agent-browser -p ios close
152
+ ```
153
+
154
+ **Requirements:** macOS with Xcode, Appium (`npm install -g appium && appium driver install xcuitest`)
155
+
156
+ **Real devices:** Works with physical iOS devices if pre-configured. Use `--device "<UDID>"` where UDID is from `xcrun xctrace list devices`.
157
+
158
+ ## Ref Lifecycle (Important)
159
+
160
+ Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after:
161
+
162
+ - Clicking links or buttons that navigate
163
+ - Form submissions
164
+ - Dynamic content loading (dropdowns, modals)
165
+
166
+ ```bash
167
+ agent-browser click @e5 # Navigates to new page
168
+ agent-browser snapshot -i # MUST re-snapshot
169
+ agent-browser click @e1 # Use new refs
170
+ ```
171
+
172
+ ## Semantic Locators (Alternative to Refs)
173
+
174
+ When refs are unavailable or unreliable, use semantic locators:
175
+
176
+ ```bash
177
+ agent-browser find text "Sign In" click
178
+ agent-browser find label "Email" fill "user@test.com"
179
+ agent-browser find role button click --name "Submit"
180
+ agent-browser find placeholder "Search" type "query"
181
+ agent-browser find testid "submit-btn" click
182
+ ```
183
+
184
+ ## Deep-Dive Documentation
185
+
186
+ | Reference | When to Use |
187
+ |-----------|-------------|
188
+ | [references/commands.md](references/commands.md) | Full command reference with all options |
189
+ | [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
190
+ | [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
191
+ | [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
192
+ | [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
193
+ | [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
194
+
195
+ ## Ready-to-Use Templates
196
+
197
+ | Template | Description |
198
+ |----------|-------------|
199
+ | [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation |
200
+ | [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state |
201
+ | [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots |
202
+
203
+ ```bash
204
+ ./templates/form-automation.sh https://example.com/form
205
+ ./templates/authenticated-session.sh https://app.example.com/login
206
+ ./templates/capture-workflow.sh https://example.com ./output
207
+ ```
@@ -0,0 +1,202 @@
1
+ # Authentication Patterns
2
+
3
+ Login flows, session persistence, OAuth, 2FA, and authenticated browsing.
4
+
5
+ **Related**: [session-management.md](session-management.md) for state persistence details, [SKILL.md](../SKILL.md) for quick start.
6
+
7
+ ## Contents
8
+
9
+ - [Basic Login Flow](#basic-login-flow)
10
+ - [Saving Authentication State](#saving-authentication-state)
11
+ - [Restoring Authentication](#restoring-authentication)
12
+ - [OAuth / SSO Flows](#oauth--sso-flows)
13
+ - [Two-Factor Authentication](#two-factor-authentication)
14
+ - [HTTP Basic Auth](#http-basic-auth)
15
+ - [Cookie-Based Auth](#cookie-based-auth)
16
+ - [Token Refresh Handling](#token-refresh-handling)
17
+ - [Security Best Practices](#security-best-practices)
18
+
19
+ ## Basic Login Flow
20
+
21
+ ```bash
22
+ # Navigate to login page
23
+ agent-browser open https://app.example.com/login
24
+ agent-browser wait --load networkidle
25
+
26
+ # Get form elements
27
+ agent-browser snapshot -i
28
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
29
+
30
+ # Fill credentials
31
+ agent-browser fill @e1 "user@example.com"
32
+ agent-browser fill @e2 "password123"
33
+
34
+ # Submit
35
+ agent-browser click @e3
36
+ agent-browser wait --load networkidle
37
+
38
+ # Verify login succeeded
39
+ agent-browser get url # Should be dashboard, not login
40
+ ```
41
+
42
+ ## Saving Authentication State
43
+
44
+ After logging in, save state for reuse:
45
+
46
+ ```bash
47
+ # Login first (see above)
48
+ agent-browser open https://app.example.com/login
49
+ agent-browser snapshot -i
50
+ agent-browser fill @e1 "user@example.com"
51
+ agent-browser fill @e2 "password123"
52
+ agent-browser click @e3
53
+ agent-browser wait --url "**/dashboard"
54
+
55
+ # Save authenticated state
56
+ agent-browser state save ./auth-state.json
57
+ ```
58
+
59
+ ## Restoring Authentication
60
+
61
+ Skip login by loading saved state:
62
+
63
+ ```bash
64
+ # Load saved auth state
65
+ agent-browser state load ./auth-state.json
66
+
67
+ # Navigate directly to protected page
68
+ agent-browser open https://app.example.com/dashboard
69
+
70
+ # Verify authenticated
71
+ agent-browser snapshot -i
72
+ ```
73
+
74
+ ## OAuth / SSO Flows
75
+
76
+ For OAuth redirects:
77
+
78
+ ```bash
79
+ # Start OAuth flow
80
+ agent-browser open https://app.example.com/auth/google
81
+
82
+ # Handle redirects automatically
83
+ agent-browser wait --url "**/accounts.google.com**"
84
+ agent-browser snapshot -i
85
+
86
+ # Fill Google credentials
87
+ agent-browser fill @e1 "user@gmail.com"
88
+ agent-browser click @e2 # Next button
89
+ agent-browser wait 2000
90
+ agent-browser snapshot -i
91
+ agent-browser fill @e3 "password"
92
+ agent-browser click @e4 # Sign in
93
+
94
+ # Wait for redirect back
95
+ agent-browser wait --url "**/app.example.com**"
96
+ agent-browser state save ./oauth-state.json
97
+ ```
98
+
99
+ ## Two-Factor Authentication
100
+
101
+ Handle 2FA with manual intervention:
102
+
103
+ ```bash
104
+ # Login with credentials
105
+ agent-browser open https://app.example.com/login --headed # Show browser
106
+ agent-browser snapshot -i
107
+ agent-browser fill @e1 "user@example.com"
108
+ agent-browser fill @e2 "password123"
109
+ agent-browser click @e3
110
+
111
+ # Wait for user to complete 2FA manually
112
+ echo "Complete 2FA in the browser window..."
113
+ agent-browser wait --url "**/dashboard" --timeout 120000
114
+
115
+ # Save state after 2FA
116
+ agent-browser state save ./2fa-state.json
117
+ ```
118
+
119
+ ## HTTP Basic Auth
120
+
121
+ For sites using HTTP Basic Authentication:
122
+
123
+ ```bash
124
+ # Set credentials before navigation
125
+ agent-browser set credentials username password
126
+
127
+ # Navigate to protected resource
128
+ agent-browser open https://protected.example.com/api
129
+ ```
130
+
131
+ ## Cookie-Based Auth
132
+
133
+ Manually set authentication cookies:
134
+
135
+ ```bash
136
+ # Set auth cookie
137
+ agent-browser cookies set session_token "abc123xyz"
138
+
139
+ # Navigate to protected page
140
+ agent-browser open https://app.example.com/dashboard
141
+ ```
142
+
143
+ ## Token Refresh Handling
144
+
145
+ For sessions with expiring tokens:
146
+
147
+ ```bash
148
+ #!/bin/bash
149
+ # Wrapper that handles token refresh
150
+
151
+ STATE_FILE="./auth-state.json"
152
+
153
+ # Try loading existing state
154
+ if [[ -f "$STATE_FILE" ]]; then
155
+ agent-browser state load "$STATE_FILE"
156
+ agent-browser open https://app.example.com/dashboard
157
+
158
+ # Check if session is still valid
159
+ URL=$(agent-browser get url)
160
+ if [[ "$URL" == *"/login"* ]]; then
161
+ echo "Session expired, re-authenticating..."
162
+ # Perform fresh login
163
+ agent-browser snapshot -i
164
+ agent-browser fill @e1 "$USERNAME"
165
+ agent-browser fill @e2 "$PASSWORD"
166
+ agent-browser click @e3
167
+ agent-browser wait --url "**/dashboard"
168
+ agent-browser state save "$STATE_FILE"
169
+ fi
170
+ else
171
+ # First-time login
172
+ agent-browser open https://app.example.com/login
173
+ # ... login flow ...
174
+ fi
175
+ ```
176
+
177
+ ## Security Best Practices
178
+
179
+ 1. **Never commit state files** - They contain session tokens
180
+ ```bash
181
+ echo "*.auth-state.json" >> .gitignore
182
+ ```
183
+
184
+ 2. **Use environment variables for credentials**
185
+ ```bash
186
+ agent-browser fill @e1 "$APP_USERNAME"
187
+ agent-browser fill @e2 "$APP_PASSWORD"
188
+ ```
189
+
190
+ 3. **Clean up after automation**
191
+ ```bash
192
+ agent-browser cookies clear
193
+ rm -f ./auth-state.json
194
+ ```
195
+
196
+ 4. **Use short-lived sessions for CI/CD**
197
+ ```bash
198
+ # Don't persist state in CI
199
+ agent-browser open https://app.example.com/login
200
+ # ... login and perform actions ...
201
+ agent-browser close # Session ends, nothing persisted
202
+ ```
@@ -0,0 +1,259 @@
1
+ # Command Reference
2
+
3
+ Complete reference for all agent-browser commands. For quick start and common patterns, see SKILL.md.
4
+
5
+ ## Navigation
6
+
7
+ ```bash
8
+ agent-browser open <url> # Navigate to URL (aliases: goto, navigate)
9
+ # Supports: https://, http://, file://, about:, data://
10
+ # Auto-prepends https:// if no protocol given
11
+ agent-browser back # Go back
12
+ agent-browser forward # Go forward
13
+ agent-browser reload # Reload page
14
+ agent-browser close # Close browser (aliases: quit, exit)
15
+ agent-browser connect 9222 # Connect to browser via CDP port
16
+ ```
17
+
18
+ ## Snapshot (page analysis)
19
+
20
+ ```bash
21
+ agent-browser snapshot # Full accessibility tree
22
+ agent-browser snapshot -i # Interactive elements only (recommended)
23
+ agent-browser snapshot -c # Compact output
24
+ agent-browser snapshot -d 3 # Limit depth to 3
25
+ agent-browser snapshot -s "#main" # Scope to CSS selector
26
+ ```
27
+
28
+ ## Interactions (use @refs from snapshot)
29
+
30
+ ```bash
31
+ agent-browser click @e1 # Click
32
+ agent-browser dblclick @e1 # Double-click
33
+ agent-browser focus @e1 # Focus element
34
+ agent-browser fill @e2 "text" # Clear and type
35
+ agent-browser type @e2 "text" # Type without clearing
36
+ agent-browser press Enter # Press key (alias: key)
37
+ agent-browser press Control+a # Key combination
38
+ agent-browser keydown Shift # Hold key down
39
+ agent-browser keyup Shift # Release key
40
+ agent-browser hover @e1 # Hover
41
+ agent-browser check @e1 # Check checkbox
42
+ agent-browser uncheck @e1 # Uncheck checkbox
43
+ agent-browser select @e1 "value" # Select dropdown option
44
+ agent-browser select @e1 "a" "b" # Select multiple options
45
+ agent-browser scroll down 500 # Scroll page (default: down 300px)
46
+ agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto)
47
+ agent-browser drag @e1 @e2 # Drag and drop
48
+ agent-browser upload @e1 file.pdf # Upload files
49
+ ```
50
+
51
+ ## Get Information
52
+
53
+ ```bash
54
+ agent-browser get text @e1 # Get element text
55
+ agent-browser get html @e1 # Get innerHTML
56
+ agent-browser get value @e1 # Get input value
57
+ agent-browser get attr @e1 href # Get attribute
58
+ agent-browser get title # Get page title
59
+ agent-browser get url # Get current URL
60
+ agent-browser get count ".item" # Count matching elements
61
+ agent-browser get box @e1 # Get bounding box
62
+ agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.)
63
+ ```
64
+
65
+ ## Check State
66
+
67
+ ```bash
68
+ agent-browser is visible @e1 # Check if visible
69
+ agent-browser is enabled @e1 # Check if enabled
70
+ agent-browser is checked @e1 # Check if checked
71
+ ```
72
+
73
+ ## Screenshots and PDF
74
+
75
+ ```bash
76
+ agent-browser screenshot # Save to temporary directory
77
+ agent-browser screenshot path.png # Save to specific path
78
+ agent-browser screenshot --full # Full page
79
+ agent-browser pdf output.pdf # Save as PDF
80
+ ```
81
+
82
+ ## Video Recording
83
+
84
+ ```bash
85
+ agent-browser record start ./demo.webm # Start recording
86
+ agent-browser click @e1 # Perform actions
87
+ agent-browser record stop # Stop and save video
88
+ agent-browser record restart ./take2.webm # Stop current + start new
89
+ ```
90
+
91
+ ## Wait
92
+
93
+ ```bash
94
+ agent-browser wait @e1 # Wait for element
95
+ agent-browser wait 2000 # Wait milliseconds
96
+ agent-browser wait --text "Success" # Wait for text (or -t)
97
+ agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u)
98
+ agent-browser wait --load networkidle # Wait for network idle (or -l)
99
+ agent-browser wait --fn "window.ready" # Wait for JS condition (or -f)
100
+ ```
101
+
102
+ ## Mouse Control
103
+
104
+ ```bash
105
+ agent-browser mouse move 100 200 # Move mouse
106
+ agent-browser mouse down left # Press button
107
+ agent-browser mouse up left # Release button
108
+ agent-browser mouse wheel 100 # Scroll wheel
109
+ ```
110
+
111
+ ## Semantic Locators (alternative to refs)
112
+
113
+ ```bash
114
+ agent-browser find role button click --name "Submit"
115
+ agent-browser find text "Sign In" click
116
+ agent-browser find text "Sign In" click --exact # Exact match only
117
+ agent-browser find label "Email" fill "user@test.com"
118
+ agent-browser find placeholder "Search" type "query"
119
+ agent-browser find alt "Logo" click
120
+ agent-browser find title "Close" click
121
+ agent-browser find testid "submit-btn" click
122
+ agent-browser find first ".item" click
123
+ agent-browser find last ".item" click
124
+ agent-browser find nth 2 "a" hover
125
+ ```
126
+
127
+ ## Browser Settings
128
+
129
+ ```bash
130
+ agent-browser set viewport 1920 1080 # Set viewport size
131
+ agent-browser set device "iPhone 14" # Emulate device
132
+ agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation)
133
+ agent-browser set offline on # Toggle offline mode
134
+ agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
135
+ agent-browser set credentials user pass # HTTP basic auth (alias: auth)
136
+ agent-browser set media dark # Emulate color scheme
137
+ agent-browser set media light reduced-motion # Light mode + reduced motion
138
+ ```
139
+
140
+ ## Cookies and Storage
141
+
142
+ ```bash
143
+ agent-browser cookies # Get all cookies
144
+ agent-browser cookies set name value # Set cookie
145
+ agent-browser cookies clear # Clear cookies
146
+ agent-browser storage local # Get all localStorage
147
+ agent-browser storage local key # Get specific key
148
+ agent-browser storage local set k v # Set value
149
+ agent-browser storage local clear # Clear all
150
+ ```
151
+
152
+ ## Network
153
+
154
+ ```bash
155
+ agent-browser network route <url> # Intercept requests
156
+ agent-browser network route <url> --abort # Block requests
157
+ agent-browser network route <url> --body '{}' # Mock response
158
+ agent-browser network unroute [url] # Remove routes
159
+ agent-browser network requests # View tracked requests
160
+ agent-browser network requests --filter api # Filter requests
161
+ ```
162
+
163
+ ## Tabs and Windows
164
+
165
+ ```bash
166
+ agent-browser tab # List tabs
167
+ agent-browser tab new [url] # New tab
168
+ agent-browser tab 2 # Switch to tab by index
169
+ agent-browser tab close # Close current tab
170
+ agent-browser tab close 2 # Close tab by index
171
+ agent-browser window new # New window
172
+ ```
173
+
174
+ ## Frames
175
+
176
+ ```bash
177
+ agent-browser frame "#iframe" # Switch to iframe
178
+ agent-browser frame main # Back to main frame
179
+ ```
180
+
181
+ ## Dialogs
182
+
183
+ ```bash
184
+ agent-browser dialog accept [text] # Accept dialog
185
+ agent-browser dialog dismiss # Dismiss dialog
186
+ ```
187
+
188
+ ## JavaScript
189
+
190
+ ```bash
191
+ agent-browser eval "document.title" # Simple expressions only
192
+ agent-browser eval -b "<base64>" # Any JavaScript (base64 encoded)
193
+ agent-browser eval --stdin # Read script from stdin
194
+ ```
195
+
196
+ Use `-b`/`--base64` or `--stdin` for reliable execution. Shell escaping with nested quotes and special characters is error-prone.
197
+
198
+ ```bash
199
+ # Base64 encode your script, then:
200
+ agent-browser eval -b "ZG9jdW1lbnQucXVlcnlTZWxlY3RvcignW3NyYyo9Il9uZXh0Il0nKQ=="
201
+
202
+ # Or use stdin with heredoc for multiline scripts:
203
+ cat <<'EOF' | agent-browser eval --stdin
204
+ const links = document.querySelectorAll('a');
205
+ Array.from(links).map(a => a.href);
206
+ EOF
207
+ ```
208
+
209
+ ## State Management
210
+
211
+ ```bash
212
+ agent-browser state save auth.json # Save cookies, storage, auth state
213
+ agent-browser state load auth.json # Restore saved state
214
+ ```
215
+
216
+ ## Global Options
217
+
218
+ ```bash
219
+ agent-browser --session <name> ... # Isolated browser session
220
+ agent-browser --json ... # JSON output for parsing
221
+ agent-browser --headed ... # Show browser window (not headless)
222
+ agent-browser --full ... # Full page screenshot (-f)
223
+ agent-browser --cdp <port> ... # Connect via Chrome DevTools Protocol
224
+ agent-browser -p <provider> ... # Cloud browser provider (--provider)
225
+ agent-browser --proxy <url> ... # Use proxy server
226
+ agent-browser --headers <json> ... # HTTP headers scoped to URL's origin
227
+ agent-browser --executable-path <p> # Custom browser executable
228
+ agent-browser --extension <path> ... # Load browser extension (repeatable)
229
+ agent-browser --ignore-https-errors # Ignore SSL certificate errors
230
+ agent-browser --help # Show help (-h)
231
+ agent-browser --version # Show version (-V)
232
+ agent-browser <command> --help # Show detailed help for a command
233
+ ```
234
+
235
+ ## Debugging
236
+
237
+ ```bash
238
+ agent-browser --headed open example.com # Show browser window
239
+ agent-browser --cdp 9222 snapshot # Connect via CDP port
240
+ agent-browser connect 9222 # Alternative: connect command
241
+ agent-browser console # View console messages
242
+ agent-browser console --clear # Clear console
243
+ agent-browser errors # View page errors
244
+ agent-browser errors --clear # Clear errors
245
+ agent-browser highlight @e1 # Highlight element
246
+ agent-browser trace start # Start recording trace
247
+ agent-browser trace stop trace.zip # Stop and save trace
248
+ ```
249
+
250
+ ## Environment Variables
251
+
252
+ ```bash
253
+ AGENT_BROWSER_SESSION="mysession" # Default session name
254
+ AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path
255
+ AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths
256
+ AGENT_BROWSER_PROVIDER="browserbase" # Cloud browser provider
257
+ AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port
258
+ AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location
259
+ ```