@dyyz1993/agent-browser 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +907 -0
  3. package/bin/agent-browser-darwin-arm64 +0 -0
  4. package/bin/agent-browser.js +120 -0
  5. package/dist/__tests__/e2e/utils/test-helpers.d.ts +5 -0
  6. package/dist/__tests__/e2e/utils/test-helpers.d.ts.map +1 -0
  7. package/dist/__tests__/e2e/utils/test-helpers.js +22 -0
  8. package/dist/__tests__/e2e/utils/test-helpers.js.map +1 -0
  9. package/dist/__tests__/test-iframe.d.ts +2 -0
  10. package/dist/__tests__/test-iframe.d.ts.map +1 -0
  11. package/dist/__tests__/test-iframe.js +52 -0
  12. package/dist/__tests__/test-iframe.js.map +1 -0
  13. package/dist/__tests__/utils/parseCli.d.ts +20 -0
  14. package/dist/__tests__/utils/parseCli.d.ts.map +1 -0
  15. package/dist/__tests__/utils/parseCli.js +1086 -0
  16. package/dist/__tests__/utils/parseCli.js.map +1 -0
  17. package/dist/actions.d.ts +50 -0
  18. package/dist/actions.d.ts.map +1 -0
  19. package/dist/actions.js +2164 -0
  20. package/dist/actions.js.map +1 -0
  21. package/dist/browser.d.ts +556 -0
  22. package/dist/browser.d.ts.map +1 -0
  23. package/dist/browser.js +2599 -0
  24. package/dist/browser.js.map +1 -0
  25. package/dist/cli/commands.d.ts +8 -0
  26. package/dist/cli/commands.d.ts.map +1 -0
  27. package/dist/cli/commands.js +1038 -0
  28. package/dist/cli/commands.js.map +1 -0
  29. package/dist/cli/connection.d.ts +50 -0
  30. package/dist/cli/connection.d.ts.map +1 -0
  31. package/dist/cli/connection.js +595 -0
  32. package/dist/cli/connection.js.map +1 -0
  33. package/dist/cli/flags.d.ts +36 -0
  34. package/dist/cli/flags.d.ts.map +1 -0
  35. package/dist/cli/flags.js +206 -0
  36. package/dist/cli/flags.js.map +1 -0
  37. package/dist/cli/help.d.ts +4 -0
  38. package/dist/cli/help.d.ts.map +1 -0
  39. package/dist/cli/help.js +1024 -0
  40. package/dist/cli/help.js.map +1 -0
  41. package/dist/cli/output.d.ts +14 -0
  42. package/dist/cli/output.d.ts.map +1 -0
  43. package/dist/cli/output.js +456 -0
  44. package/dist/cli/output.js.map +1 -0
  45. package/dist/cli-new.d.ts +3 -0
  46. package/dist/cli-new.d.ts.map +1 -0
  47. package/dist/cli-new.js +308 -0
  48. package/dist/cli-new.js.map +1 -0
  49. package/dist/cli-old.d.ts +3 -0
  50. package/dist/cli-old.d.ts.map +1 -0
  51. package/dist/cli-old.js +1101 -0
  52. package/dist/cli-old.js.map +1 -0
  53. package/dist/cli.d.ts +3 -0
  54. package/dist/cli.d.ts.map +1 -0
  55. package/dist/cli.js +403 -0
  56. package/dist/cli.js.map +1 -0
  57. package/dist/content-detection.d.ts +18 -0
  58. package/dist/content-detection.d.ts.map +1 -0
  59. package/dist/content-detection.js +68 -0
  60. package/dist/content-detection.js.map +1 -0
  61. package/dist/daemon.d.ts +55 -0
  62. package/dist/daemon.d.ts.map +1 -0
  63. package/dist/daemon.js +426 -0
  64. package/dist/daemon.js.map +1 -0
  65. package/dist/diff.d.ts +42 -0
  66. package/dist/diff.d.ts.map +1 -0
  67. package/dist/diff.js +166 -0
  68. package/dist/diff.js.map +1 -0
  69. package/dist/human-mouse.d.ts +31 -0
  70. package/dist/human-mouse.d.ts.map +1 -0
  71. package/dist/human-mouse.js +184 -0
  72. package/dist/human-mouse.js.map +1 -0
  73. package/dist/ios-actions.d.ts +11 -0
  74. package/dist/ios-actions.d.ts.map +1 -0
  75. package/dist/ios-actions.js +228 -0
  76. package/dist/ios-actions.js.map +1 -0
  77. package/dist/ios-manager.d.ts +266 -0
  78. package/dist/ios-manager.d.ts.map +1 -0
  79. package/dist/ios-manager.js +1076 -0
  80. package/dist/ios-manager.js.map +1 -0
  81. package/dist/message-bridge.d.ts +10 -0
  82. package/dist/message-bridge.d.ts.map +1 -0
  83. package/dist/message-bridge.js +60 -0
  84. package/dist/message-bridge.js.map +1 -0
  85. package/dist/protocol.d.ts +26 -0
  86. package/dist/protocol.d.ts.map +1 -0
  87. package/dist/protocol.js +912 -0
  88. package/dist/protocol.js.map +1 -0
  89. package/dist/recorder/binding.d.ts +24 -0
  90. package/dist/recorder/binding.d.ts.map +1 -0
  91. package/dist/recorder/binding.js +215 -0
  92. package/dist/recorder/binding.js.map +1 -0
  93. package/dist/recorder/index.d.ts +4 -0
  94. package/dist/recorder/index.d.ts.map +1 -0
  95. package/dist/recorder/index.js +4 -0
  96. package/dist/recorder/index.js.map +1 -0
  97. package/dist/recorder/inject.js +1913 -0
  98. package/dist/recorder/recorder.d.ts +19 -0
  99. package/dist/recorder/recorder.d.ts.map +1 -0
  100. package/dist/recorder/recorder.js +101 -0
  101. package/dist/recorder/recorder.js.map +1 -0
  102. package/dist/recorder/store.d.ts +22 -0
  103. package/dist/recorder/store.d.ts.map +1 -0
  104. package/dist/recorder/store.js +150 -0
  105. package/dist/recorder/store.js.map +1 -0
  106. package/dist/recorder/types.d.ts +73 -0
  107. package/dist/recorder/types.d.ts.map +1 -0
  108. package/dist/recorder/types.js +5 -0
  109. package/dist/recorder/types.js.map +1 -0
  110. package/dist/snapshot.d.ts +81 -0
  111. package/dist/snapshot.d.ts.map +1 -0
  112. package/dist/snapshot.js +1348 -0
  113. package/dist/snapshot.js.map +1 -0
  114. package/dist/stream-server-standalone.d.ts +38 -0
  115. package/dist/stream-server-standalone.d.ts.map +1 -0
  116. package/dist/stream-server-standalone.js +494 -0
  117. package/dist/stream-server-standalone.js.map +1 -0
  118. package/dist/stream-server.d.ts +214 -0
  119. package/dist/stream-server.d.ts.map +1 -0
  120. package/dist/stream-server.js +811 -0
  121. package/dist/stream-server.js.map +1 -0
  122. package/dist/types.d.ts +914 -0
  123. package/dist/types.d.ts.map +1 -0
  124. package/dist/types.js +4 -0
  125. package/dist/types.js.map +1 -0
  126. package/dist/viewer-html.d.ts +2 -0
  127. package/dist/viewer-html.d.ts.map +1 -0
  128. package/dist/viewer-html.js +185 -0
  129. package/dist/viewer-html.js.map +1 -0
  130. package/dist/viewer-script.d.ts +47 -0
  131. package/dist/viewer-script.d.ts.map +1 -0
  132. package/dist/viewer-script.js +586 -0
  133. package/dist/viewer-script.js.map +1 -0
  134. package/package.json +86 -0
  135. package/scripts/build-all-platforms.sh +68 -0
  136. package/scripts/check-version-sync.js +39 -0
  137. package/scripts/check_goods_container.js +35 -0
  138. package/scripts/check_page_content.js +36 -0
  139. package/scripts/click_applause_rate.js +30 -0
  140. package/scripts/copy-native.js +36 -0
  141. package/scripts/copy-recorder.js +21 -0
  142. package/scripts/e2e-test-recorder.ts +584 -0
  143. package/scripts/explore_jd_page.js +31 -0
  144. package/scripts/extract_all_jd_data.js +80 -0
  145. package/scripts/extract_jd_product_detail.js +62 -0
  146. package/scripts/extract_jd_products_correct_links.js +78 -0
  147. package/scripts/extract_jd_products_final.js +80 -0
  148. package/scripts/extract_jd_reviews.js +48 -0
  149. package/scripts/extract_jd_seafood_final.js +78 -0
  150. package/scripts/extract_multiple_products.js +77 -0
  151. package/scripts/extract_products_no_scroll.js +68 -0
  152. package/scripts/extract_products_simple.js +68 -0
  153. package/scripts/find_applause_rate.js +26 -0
  154. package/scripts/find_jd_links.js +28 -0
  155. package/scripts/find_main_content.js +20 -0
  156. package/scripts/find_product_cards.js +38 -0
  157. package/scripts/find_root_content.js +26 -0
  158. package/scripts/find_unique_products.js +55 -0
  159. package/scripts/get_jd_product_detail.js +16 -0
  160. package/scripts/get_jd_products.js +23 -0
  161. package/scripts/get_jd_seafood_products.js +44 -0
  162. package/scripts/get_product_details_from_images.js +54 -0
  163. package/scripts/postinstall.js +235 -0
  164. package/scripts/scroll_and_get_products.js +47 -0
  165. package/scripts/scroll_deep_and_find.js +45 -0
  166. package/scripts/sync-version.js +69 -0
  167. package/scripts/verify-baidu-enter.ts +116 -0
  168. package/skills/agent-browser/SKILL.md +310 -0
  169. package/skills/agent-browser/references/authentication.md +198 -0
  170. package/skills/agent-browser/references/commands.md +471 -0
  171. package/skills/agent-browser/references/data-extraction.md +377 -0
  172. package/skills/agent-browser/references/proxy-support.md +188 -0
  173. package/skills/agent-browser/references/session-management.md +197 -0
  174. package/skills/agent-browser/references/snapshot-refs.md +379 -0
  175. package/skills/agent-browser/references/video-recording.md +173 -0
  176. package/skills/agent-browser/templates/api-interception.sh +53 -0
  177. package/skills/agent-browser/templates/authenticated-session.sh +97 -0
  178. package/skills/agent-browser/templates/capture-workflow.sh +69 -0
  179. package/skills/agent-browser/templates/data-extraction.sh +210 -0
  180. package/skills/agent-browser/templates/form-automation.sh +62 -0
  181. package/skills/skill-creator/LICENSE.txt +202 -0
  182. package/skills/skill-creator/SKILL.md +356 -0
  183. package/skills/skill-creator/references/output-patterns.md +82 -0
  184. package/skills/skill-creator/references/workflows.md +28 -0
  185. package/skills/skill-creator/scripts/init_skill.py +303 -0
  186. package/skills/skill-creator/scripts/package_skill.py +113 -0
  187. package/skills/skill-creator/scripts/quick_validate.py +95 -0
@@ -0,0 +1,197 @@
1
+ # Session Management
2
+
3
+ Multiple isolated browser sessions with state persistence and concurrent browsing.
4
+
5
+ **Related**: [authentication.md](authentication.md) for login patterns, [SKILL.md](../SKILL.md) for quick start.
6
+
7
+ ## Contents
8
+
9
+ - [Named Sessions](#named-sessions)
10
+ - [Session Isolation Properties](#session-isolation-properties)
11
+ - [Session State Persistence](#session-state-persistence)
12
+ - [Common Patterns](#common-patterns)
13
+ - [Default Session](#default-session)
14
+ - [Session Cleanup](#session-cleanup)
15
+ - [Best Practices](#best-practices)
16
+
17
+ ## Named Sessions
18
+
19
+ Use `--session` flag to isolate browser contexts:
20
+
21
+ ```bash
22
+ # Session 1: Authentication flow
23
+ agent-browser --session auth open https://app.example.com/login
24
+
25
+ # Session 2: Public browsing (separate cookies, storage)
26
+ agent-browser --session public open https://example.com
27
+
28
+ # Commands are isolated by session
29
+ agent-browser --session auth fill @e1 "user@example.com"
30
+ agent-browser --session public get text body
31
+ ```
32
+
33
+ ## Session Isolation Properties
34
+
35
+ Each session has independent:
36
+ - Cookies
37
+ - LocalStorage / SessionStorage
38
+ - IndexedDB
39
+ - Cache
40
+ - Browsing history
41
+ - Open tabs
42
+
43
+ ## Session State Persistence
44
+
45
+ ### Save Session State
46
+
47
+ ```bash
48
+ # Save cookies, storage, and auth state
49
+ agent-browser state save /path/to/auth-state.json
50
+ ```
51
+
52
+ ### Load Session State
53
+
54
+ ```bash
55
+ # Set state path (loads at browser launch)
56
+ agent-browser state load /path/to/auth-state.json
57
+
58
+ # State loads at browser launch - use --state flag or close first
59
+ agent-browser --state /path/to/auth-state.json open https://app.example.com/dashboard
60
+
61
+ # Or: close and reopen to apply state
62
+ agent-browser state load /path/to/auth-state.json
63
+ agent-browser close
64
+ agent-browser open https://app.example.com/dashboard
65
+ ```
66
+
67
+ ### State File Contents
68
+
69
+ ```json
70
+ {
71
+ "cookies": [...],
72
+ "localStorage": {...},
73
+ "sessionStorage": {...},
74
+ "origins": [...]
75
+ }
76
+ ```
77
+
78
+ ## Common Patterns
79
+
80
+ ### Authenticated Session Reuse
81
+
82
+ ```bash
83
+ #!/bin/bash
84
+ # Save login state once, reuse many times
85
+
86
+ STATE_FILE="/tmp/auth-state.json"
87
+
88
+ # Check if we have saved state
89
+ if [[ -f "$STATE_FILE" ]]; then
90
+ agent-browser --state "$STATE_FILE" open https://app.example.com/dashboard
91
+ else
92
+ # Perform login
93
+ agent-browser open https://app.example.com/login
94
+ agent-browser snapshot -i
95
+ agent-browser fill @e1 "$USERNAME"
96
+ agent-browser fill @e2 "$PASSWORD"
97
+ agent-browser click @e3
98
+ agent-browser wait --load networkidle
99
+
100
+ # Save for future use
101
+ agent-browser state save "$STATE_FILE"
102
+ fi
103
+ ```
104
+
105
+ ### Concurrent Scraping
106
+
107
+ ```bash
108
+ #!/bin/bash
109
+ # Scrape multiple sites concurrently
110
+
111
+ # Start all sessions
112
+ agent-browser --session site1 open https://site1.com &
113
+ agent-browser --session site2 open https://site2.com &
114
+ agent-browser --session site3 open https://site3.com &
115
+ wait
116
+
117
+ # Extract from each
118
+ agent-browser --session site1 get text body > site1.txt
119
+ agent-browser --session site2 get text body > site2.txt
120
+ agent-browser --session site3 get text body > site3.txt
121
+
122
+ # Cleanup
123
+ agent-browser --session site1 close
124
+ agent-browser --session site2 close
125
+ agent-browser --session site3 close
126
+ ```
127
+
128
+ ### A/B Testing Sessions
129
+
130
+ ```bash
131
+ # Test different user experiences
132
+ agent-browser --session variant-a open "https://app.com?variant=a"
133
+ agent-browser --session variant-b open "https://app.com?variant=b"
134
+
135
+ # Compare
136
+ agent-browser --session variant-a screenshot /tmp/variant-a.png
137
+ agent-browser --session variant-b screenshot /tmp/variant-b.png
138
+ ```
139
+
140
+ ## Default Session
141
+
142
+ When `--session` is omitted, commands use the default session:
143
+
144
+ ```bash
145
+ # These use the same default session
146
+ agent-browser open https://example.com
147
+ agent-browser snapshot -i
148
+ agent-browser close # Closes default session
149
+ ```
150
+
151
+ ## Session Cleanup
152
+
153
+ ```bash
154
+ # Close specific session
155
+ agent-browser --session auth close
156
+
157
+ # List active sessions
158
+ agent-browser session list
159
+ ```
160
+
161
+ ## Best Practices
162
+
163
+ ### 1. Name Sessions Semantically
164
+
165
+ ```bash
166
+ # GOOD: Clear purpose
167
+ agent-browser --session github-auth open https://github.com
168
+ agent-browser --session docs-scrape open https://docs.example.com
169
+
170
+ # AVOID: Generic names
171
+ agent-browser --session s1 open https://github.com
172
+ ```
173
+
174
+ ### 2. Always Clean Up
175
+
176
+ ```bash
177
+ # Close sessions when done
178
+ agent-browser --session auth close
179
+ agent-browser --session scrape close
180
+ ```
181
+
182
+ ### 3. Handle State Files Securely
183
+
184
+ ```bash
185
+ # Don't commit state files (contain auth tokens!)
186
+ echo "*.auth-state.json" >> .gitignore
187
+
188
+ # Delete after use
189
+ rm /tmp/auth-state.json
190
+ ```
191
+
192
+ ### 4. Timeout Long Sessions
193
+
194
+ ```bash
195
+ # Set timeout for automated scripts
196
+ timeout 60 agent-browser --session long-task get text body
197
+ ```
@@ -0,0 +1,379 @@
1
+ # Snapshot and Refs
2
+
3
+ Compact element references that reduce context usage dramatically for AI agents.
4
+
5
+ **Related**: [commands.md](commands.md) for full command reference, [SKILL.md](../SKILL.md) for quick start.
6
+
7
+ ## Contents
8
+
9
+ - [How Refs Work](#how-refs-work)
10
+ - [Snapshot Command](#the-snapshot-command)
11
+ - [Using Refs](#using-refs)
12
+ - [Ref Lifecycle](#ref-lifecycle)
13
+ - [Converting to Shell Scripts](#converting-to-shell-scripts)
14
+ - [Best Practices](#best-practices)
15
+ - [Ref Notation Details](#ref-notation-details)
16
+ - [Troubleshooting](#troubleshooting)
17
+
18
+ ## How Refs Work
19
+
20
+ Traditional approach:
21
+ ```
22
+ Full DOM/HTML → AI parses → CSS selector → Action (~3000-5000 tokens)
23
+ ```
24
+
25
+ agent-browser approach:
26
+ ```
27
+ Compact snapshot → @refs assigned → Direct interaction (~200-400 tokens)
28
+ ```
29
+
30
+ ## The Snapshot Command
31
+
32
+ ```bash
33
+ # Basic snapshot (shows page structure)
34
+ agent-browser snapshot
35
+
36
+ # Interactive snapshot (-i flag) - RECOMMENDED
37
+ agent-browser snapshot -i
38
+
39
+ # Get element paths (xpath, cssPath)
40
+ agent-browser snapshot -s "body" --path
41
+
42
+ # Get element attributes
43
+ agent-browser snapshot -s "body" --attrs
44
+
45
+ # Get both paths and attributes
46
+ agent-browser snapshot -s "body" --path --attrs
47
+ ```
48
+
49
+ ### Path and Attributes Options
50
+
51
+ When you need to get element paths or attributes, use `--path` and `--attrs`:
52
+
53
+ ```bash
54
+ # Get xpath and cssPath for debugging or external tools
55
+ agent-browser snapshot --path
56
+ agent-browser snapshot -s "body" --path
57
+
58
+ # Get element attributes for analysis
59
+ agent-browser snapshot --attrs
60
+ agent-browser snapshot -s "body" --attrs
61
+ ```
62
+
63
+ **Note:** Using `--selector` is optional but recommended to limit scope and prevent large responses.
64
+
65
+ **XPath Generation Rules:**
66
+ 1. Priority: `id` > `data-testid` > `data-id` > semantic class > position index
67
+ 2. Maximum 5 levels deep
68
+ 3. Filters out utility classes (Tailwind, etc.)
69
+ 4. Uses semantic tags (main, nav, form) as anchors
70
+
71
+ ### Snapshot Output Format
72
+
73
+ ```
74
+ Page: Example Site - Home
75
+ URL: https://example.com
76
+
77
+ @e1 [header]
78
+ @e2 [nav]
79
+ @e3 [a] "Home"
80
+ @e4 [a] "Products"
81
+ @e5 [a] "About"
82
+ @e6 [button] "Sign In"
83
+
84
+ @e7 [main]
85
+ @e8 [h1] "Welcome"
86
+ @e9 [form]
87
+ @e10 [input type="email"] placeholder="Email"
88
+ @e11 [input type="password"] placeholder="Password"
89
+ @e12 [button type="submit"] "Log In"
90
+
91
+ @e13 [footer]
92
+ @e14 [a] "Privacy Policy"
93
+ ```
94
+
95
+ ## Using Refs
96
+
97
+ Once you have refs, interact directly:
98
+
99
+ ```bash
100
+ # Click the "Sign In" button
101
+ agent-browser click @e6
102
+
103
+ # Fill email input
104
+ agent-browser fill @e10 "user@example.com"
105
+
106
+ # Fill password
107
+ agent-browser fill @e11 "password123"
108
+
109
+ # Submit the form
110
+ agent-browser click @e12
111
+ ```
112
+
113
+ ## Ref Lifecycle
114
+
115
+ **IMPORTANT**: Refs are invalidated when the page changes!
116
+
117
+ ```bash
118
+ # Get initial snapshot
119
+ agent-browser snapshot -i
120
+ # @e1 [button] "Next"
121
+
122
+ # Click triggers page change
123
+ agent-browser click @e1
124
+
125
+ # MUST re-snapshot to get new refs!
126
+ agent-browser snapshot -i
127
+ # @e1 [h1] "Page 2" ← Different element now!
128
+ ```
129
+
130
+ ## Converting to Shell Scripts
131
+
132
+ **CRITICAL**: Refs (`@e1`, `@e2`, etc.) are session-specific and cannot be used in standalone shell scripts!
133
+
134
+ ### The Problem
135
+
136
+ When you use `snapshot -i` during an interactive session, refs are dynamically assigned based on the current page state. These refs are stored in memory and only valid for that specific browser session. If you convert your workflow to a shell script, the refs will not match:
137
+
138
+ ```bash
139
+ # This works in interactive session
140
+ agent-browser snapshot -i
141
+ # Output: @e1 [button] "Submit"
142
+
143
+ agent-browser click @e1 # Works because ref is in memory
144
+
145
+ # But this FAILS in a shell script
146
+ #!/bin/bash
147
+ agent-browser open https://example.com
148
+ agent-browser click @e1 # ERROR: Ref @e1 not found!
149
+ ```
150
+
151
+ ### Solution: Use Alternative Locators
152
+
153
+ When creating reusable shell scripts, use one of these approaches instead of refs:
154
+
155
+ #### Option 1: Semantic Locators (Recommended)
156
+
157
+ ```bash
158
+ #!/bin/bash
159
+ agent-browser open https://example.com/login
160
+
161
+ # Use find command with semantic locators
162
+ agent-browser find label "Email" fill "user@example.com"
163
+ agent-browser find label "Password" fill "password123"
164
+ agent-browser find role button click --name "Sign In"
165
+ ```
166
+
167
+ #### Option 2: CSS Selectors
168
+
169
+ ```bash
170
+ #!/bin/bash
171
+ agent-browser open https://example.com/login
172
+
173
+ # Use CSS selectors directly
174
+ agent-browser fill "#email" "user@example.com"
175
+ agent-browser fill "#password" "password123"
176
+ agent-browser click "button[type='submit']"
177
+ ```
178
+
179
+ #### Option 3: XPath (from snapshot --path)
180
+
181
+ First, get the xpath during interactive session:
182
+
183
+ ```bash
184
+ agent-browser snapshot -s "body" --path
185
+ # Output includes: xpath="/html/body/div/form/button"
186
+ ```
187
+
188
+ Then use in script:
189
+
190
+ ```bash
191
+ #!/bin/bash
192
+ agent-browser open https://example.com/login
193
+ agent-browser fill "xpath=/html/body/div/form/input[@type='email']" "user@example.com"
194
+ agent-browser click "xpath=/html/body/div/form/button"
195
+ ```
196
+
197
+ #### Option 4: Snapshot with JSON Parsing
198
+
199
+ For dynamic pages where selectors may change, parse snapshot output:
200
+
201
+ ```bash
202
+ #!/bin/bash
203
+ agent-browser open https://example.com
204
+
205
+ # Get refs in JSON format and parse
206
+ SNAPSHOT=$(agent-browser snapshot -i --json)
207
+ BUTTON_REF=$(echo "$SNAPSHOT" | jq -r '.data.refs | to_entries[] | select(.value.name == "Submit") | .key')
208
+
209
+ # Note: This still requires the session to be active
210
+ agent-browser click "@$BUTTON_REF"
211
+ ```
212
+
213
+ #### Option 5: Use Recorder (Recommended for Complex Workflows)
214
+
215
+ The recorder captures your interactions and outputs stable selectors (xpath) that work in scripts:
216
+
217
+ ```bash
218
+ # Step 1: Start recording
219
+ agent-browser recorder start https://example.com
220
+
221
+ # Step 2: Perform your workflow (use refs normally)
222
+ agent-browser snapshot -i
223
+ agent-browser click @e1
224
+ agent-browser fill @e2 "text"
225
+ agent-browser click @e3
226
+
227
+ # Step 3: Stop recording and save
228
+ agent-browser recorder stop --output workflow.yaml
229
+ ```
230
+
231
+ The recorder outputs a YAML file with stable selectors:
232
+
233
+ ```yaml
234
+ steps:
235
+ - action: click
236
+ xpath: "//a[contains(text(), 'Learn more')]" # Use this in scripts!
237
+ - action: fill
238
+ xpath: "//input[@id='email']"
239
+ value: "text"
240
+ ```
241
+
242
+ **Important:** The recorder's "CLI Commands" section may contain incorrect syntax (like `click "first a"` or `click "@e1"`). Always use the `xpath` field from the YAML output:
243
+
244
+ ```bash
245
+ #!/bin/bash
246
+ # Convert recorder output to working script
247
+ agent-browser open https://example.com
248
+ agent-browser click 'xpath=//a[contains(text(), "Learn more")]'
249
+ agent-browser fill 'xpath=//input[@id="email"]' "text"
250
+ ```
251
+
252
+ ### Best Practice: Interactive → Script Workflow
253
+
254
+ 1. **Interactive Phase**: Use refs for quick iteration and testing
255
+ 2. **Script Phase**: Replace refs with semantic locators or CSS selectors
256
+ 3. **Verification**: Test the script independently to ensure it works
257
+
258
+ ```bash
259
+ # Interactive workflow (with refs)
260
+ agent-browser open https://example.com
261
+ agent-browser snapshot -i
262
+ agent-browser fill @e1 "test@example.com"
263
+ agent-browser click @e2
264
+
265
+ # Converted script (without refs)
266
+ #!/bin/bash
267
+ agent-browser open https://example.com
268
+ agent-browser find placeholder "Email" fill "test@example.com"
269
+ agent-browser find role button click --name "Submit"
270
+ ```
271
+
272
+ ### Quick Reference: Locator Types
273
+
274
+ | Locator Type | Example | Best For |
275
+ |-------------|---------|----------|
276
+ | Ref | `@e1` | Interactive sessions only |
277
+ | Semantic | `find label "Email"` | Reusable scripts (recommended) |
278
+ | CSS | `#email`, `.btn-submit` | Stable page structures |
279
+ | XPath | `xpath=//button[@type='submit']` | Complex queries |
280
+ | Role | `find role button --name "Submit"` | Accessibility-focused |
281
+ | Recorder | `recorder stop --output workflow.yaml` | Complex workflows, auto-capture xpath |
282
+
283
+ ## Best Practices
284
+
285
+ ### 1. Always Snapshot Before Interacting
286
+
287
+ ```bash
288
+ # CORRECT
289
+ agent-browser open https://example.com
290
+ agent-browser snapshot -i # Get refs first
291
+ agent-browser click @e1 # Use ref
292
+
293
+ # WRONG
294
+ agent-browser open https://example.com
295
+ agent-browser click @e1 # Ref doesn't exist yet!
296
+ ```
297
+
298
+ ### 2. Re-Snapshot After Navigation
299
+
300
+ ```bash
301
+ agent-browser click @e5 # Navigates to new page
302
+ agent-browser snapshot -i # Get new refs
303
+ agent-browser click @e1 # Use new refs
304
+ ```
305
+
306
+ ### 3. Re-Snapshot After Dynamic Changes
307
+
308
+ ```bash
309
+ agent-browser click @e1 # Opens dropdown
310
+ agent-browser snapshot -i # See dropdown items
311
+ agent-browser click @e7 # Select item
312
+ ```
313
+
314
+ ### 4. Snapshot Specific Regions
315
+
316
+ For complex pages, snapshot specific areas:
317
+
318
+ ```bash
319
+ # Snapshot just the form
320
+ agent-browser snapshot @e9
321
+ ```
322
+
323
+ ## Ref Notation Details
324
+
325
+ ```
326
+ @e1 [tag type="value"] "text content" placeholder="hint"
327
+ │ │ │ │ │
328
+ │ │ │ │ └─ Additional attributes
329
+ │ │ │ └─ Visible text
330
+ │ │ └─ Key attributes shown
331
+ │ └─ HTML tag name
332
+ └─ Unique ref ID
333
+ ```
334
+
335
+ ### Common Patterns
336
+
337
+ ```
338
+ @e1 [button] "Submit" # Button with text
339
+ @e2 [input type="email"] # Email input
340
+ @e3 [input type="password"] # Password input
341
+ @e4 [a href="/page"] "Link Text" # Anchor link
342
+ @e5 [select] # Dropdown
343
+ @e6 [textarea] placeholder="Message" # Text area
344
+ @e7 [div class="modal"] # Container (when relevant)
345
+ @e8 [img alt="Logo"] # Image
346
+ @e9 [checkbox] checked # Checked checkbox
347
+ @e10 [radio] selected # Selected radio
348
+ ```
349
+
350
+ ## Troubleshooting
351
+
352
+ ### "Ref not found" Error
353
+
354
+ ```bash
355
+ # Ref may have changed - re-snapshot
356
+ agent-browser snapshot -i
357
+ ```
358
+
359
+ ### Element Not Visible in Snapshot
360
+
361
+ ```bash
362
+ # Scroll to reveal element
363
+ agent-browser scroll down 500
364
+ agent-browser snapshot -i
365
+
366
+ # Or wait for dynamic content
367
+ agent-browser wait 1000
368
+ agent-browser snapshot -i
369
+ ```
370
+
371
+ ### Too Many Elements
372
+
373
+ ```bash
374
+ # Snapshot specific container
375
+ agent-browser snapshot @e5
376
+
377
+ # Or use get text for content-only extraction
378
+ agent-browser get text @e5
379
+ ```