@dyyz1993/agent-browser 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (187) hide show
  1. package/LICENSE +202 -0
  2. package/README.md +907 -0
  3. package/bin/agent-browser-darwin-arm64 +0 -0
  4. package/bin/agent-browser.js +120 -0
  5. package/dist/__tests__/e2e/utils/test-helpers.d.ts +5 -0
  6. package/dist/__tests__/e2e/utils/test-helpers.d.ts.map +1 -0
  7. package/dist/__tests__/e2e/utils/test-helpers.js +22 -0
  8. package/dist/__tests__/e2e/utils/test-helpers.js.map +1 -0
  9. package/dist/__tests__/test-iframe.d.ts +2 -0
  10. package/dist/__tests__/test-iframe.d.ts.map +1 -0
  11. package/dist/__tests__/test-iframe.js +52 -0
  12. package/dist/__tests__/test-iframe.js.map +1 -0
  13. package/dist/__tests__/utils/parseCli.d.ts +20 -0
  14. package/dist/__tests__/utils/parseCli.d.ts.map +1 -0
  15. package/dist/__tests__/utils/parseCli.js +1086 -0
  16. package/dist/__tests__/utils/parseCli.js.map +1 -0
  17. package/dist/actions.d.ts +50 -0
  18. package/dist/actions.d.ts.map +1 -0
  19. package/dist/actions.js +2164 -0
  20. package/dist/actions.js.map +1 -0
  21. package/dist/browser.d.ts +556 -0
  22. package/dist/browser.d.ts.map +1 -0
  23. package/dist/browser.js +2599 -0
  24. package/dist/browser.js.map +1 -0
  25. package/dist/cli/commands.d.ts +8 -0
  26. package/dist/cli/commands.d.ts.map +1 -0
  27. package/dist/cli/commands.js +1038 -0
  28. package/dist/cli/commands.js.map +1 -0
  29. package/dist/cli/connection.d.ts +50 -0
  30. package/dist/cli/connection.d.ts.map +1 -0
  31. package/dist/cli/connection.js +595 -0
  32. package/dist/cli/connection.js.map +1 -0
  33. package/dist/cli/flags.d.ts +36 -0
  34. package/dist/cli/flags.d.ts.map +1 -0
  35. package/dist/cli/flags.js +206 -0
  36. package/dist/cli/flags.js.map +1 -0
  37. package/dist/cli/help.d.ts +4 -0
  38. package/dist/cli/help.d.ts.map +1 -0
  39. package/dist/cli/help.js +1024 -0
  40. package/dist/cli/help.js.map +1 -0
  41. package/dist/cli/output.d.ts +14 -0
  42. package/dist/cli/output.d.ts.map +1 -0
  43. package/dist/cli/output.js +456 -0
  44. package/dist/cli/output.js.map +1 -0
  45. package/dist/cli-new.d.ts +3 -0
  46. package/dist/cli-new.d.ts.map +1 -0
  47. package/dist/cli-new.js +308 -0
  48. package/dist/cli-new.js.map +1 -0
  49. package/dist/cli-old.d.ts +3 -0
  50. package/dist/cli-old.d.ts.map +1 -0
  51. package/dist/cli-old.js +1101 -0
  52. package/dist/cli-old.js.map +1 -0
  53. package/dist/cli.d.ts +3 -0
  54. package/dist/cli.d.ts.map +1 -0
  55. package/dist/cli.js +403 -0
  56. package/dist/cli.js.map +1 -0
  57. package/dist/content-detection.d.ts +18 -0
  58. package/dist/content-detection.d.ts.map +1 -0
  59. package/dist/content-detection.js +68 -0
  60. package/dist/content-detection.js.map +1 -0
  61. package/dist/daemon.d.ts +55 -0
  62. package/dist/daemon.d.ts.map +1 -0
  63. package/dist/daemon.js +426 -0
  64. package/dist/daemon.js.map +1 -0
  65. package/dist/diff.d.ts +42 -0
  66. package/dist/diff.d.ts.map +1 -0
  67. package/dist/diff.js +166 -0
  68. package/dist/diff.js.map +1 -0
  69. package/dist/human-mouse.d.ts +31 -0
  70. package/dist/human-mouse.d.ts.map +1 -0
  71. package/dist/human-mouse.js +184 -0
  72. package/dist/human-mouse.js.map +1 -0
  73. package/dist/ios-actions.d.ts +11 -0
  74. package/dist/ios-actions.d.ts.map +1 -0
  75. package/dist/ios-actions.js +228 -0
  76. package/dist/ios-actions.js.map +1 -0
  77. package/dist/ios-manager.d.ts +266 -0
  78. package/dist/ios-manager.d.ts.map +1 -0
  79. package/dist/ios-manager.js +1076 -0
  80. package/dist/ios-manager.js.map +1 -0
  81. package/dist/message-bridge.d.ts +10 -0
  82. package/dist/message-bridge.d.ts.map +1 -0
  83. package/dist/message-bridge.js +60 -0
  84. package/dist/message-bridge.js.map +1 -0
  85. package/dist/protocol.d.ts +26 -0
  86. package/dist/protocol.d.ts.map +1 -0
  87. package/dist/protocol.js +912 -0
  88. package/dist/protocol.js.map +1 -0
  89. package/dist/recorder/binding.d.ts +24 -0
  90. package/dist/recorder/binding.d.ts.map +1 -0
  91. package/dist/recorder/binding.js +215 -0
  92. package/dist/recorder/binding.js.map +1 -0
  93. package/dist/recorder/index.d.ts +4 -0
  94. package/dist/recorder/index.d.ts.map +1 -0
  95. package/dist/recorder/index.js +4 -0
  96. package/dist/recorder/index.js.map +1 -0
  97. package/dist/recorder/inject.js +1913 -0
  98. package/dist/recorder/recorder.d.ts +19 -0
  99. package/dist/recorder/recorder.d.ts.map +1 -0
  100. package/dist/recorder/recorder.js +101 -0
  101. package/dist/recorder/recorder.js.map +1 -0
  102. package/dist/recorder/store.d.ts +22 -0
  103. package/dist/recorder/store.d.ts.map +1 -0
  104. package/dist/recorder/store.js +150 -0
  105. package/dist/recorder/store.js.map +1 -0
  106. package/dist/recorder/types.d.ts +73 -0
  107. package/dist/recorder/types.d.ts.map +1 -0
  108. package/dist/recorder/types.js +5 -0
  109. package/dist/recorder/types.js.map +1 -0
  110. package/dist/snapshot.d.ts +81 -0
  111. package/dist/snapshot.d.ts.map +1 -0
  112. package/dist/snapshot.js +1348 -0
  113. package/dist/snapshot.js.map +1 -0
  114. package/dist/stream-server-standalone.d.ts +38 -0
  115. package/dist/stream-server-standalone.d.ts.map +1 -0
  116. package/dist/stream-server-standalone.js +494 -0
  117. package/dist/stream-server-standalone.js.map +1 -0
  118. package/dist/stream-server.d.ts +214 -0
  119. package/dist/stream-server.d.ts.map +1 -0
  120. package/dist/stream-server.js +811 -0
  121. package/dist/stream-server.js.map +1 -0
  122. package/dist/types.d.ts +914 -0
  123. package/dist/types.d.ts.map +1 -0
  124. package/dist/types.js +4 -0
  125. package/dist/types.js.map +1 -0
  126. package/dist/viewer-html.d.ts +2 -0
  127. package/dist/viewer-html.d.ts.map +1 -0
  128. package/dist/viewer-html.js +185 -0
  129. package/dist/viewer-html.js.map +1 -0
  130. package/dist/viewer-script.d.ts +47 -0
  131. package/dist/viewer-script.d.ts.map +1 -0
  132. package/dist/viewer-script.js +586 -0
  133. package/dist/viewer-script.js.map +1 -0
  134. package/package.json +86 -0
  135. package/scripts/build-all-platforms.sh +68 -0
  136. package/scripts/check-version-sync.js +39 -0
  137. package/scripts/check_goods_container.js +35 -0
  138. package/scripts/check_page_content.js +36 -0
  139. package/scripts/click_applause_rate.js +30 -0
  140. package/scripts/copy-native.js +36 -0
  141. package/scripts/copy-recorder.js +21 -0
  142. package/scripts/e2e-test-recorder.ts +584 -0
  143. package/scripts/explore_jd_page.js +31 -0
  144. package/scripts/extract_all_jd_data.js +80 -0
  145. package/scripts/extract_jd_product_detail.js +62 -0
  146. package/scripts/extract_jd_products_correct_links.js +78 -0
  147. package/scripts/extract_jd_products_final.js +80 -0
  148. package/scripts/extract_jd_reviews.js +48 -0
  149. package/scripts/extract_jd_seafood_final.js +78 -0
  150. package/scripts/extract_multiple_products.js +77 -0
  151. package/scripts/extract_products_no_scroll.js +68 -0
  152. package/scripts/extract_products_simple.js +68 -0
  153. package/scripts/find_applause_rate.js +26 -0
  154. package/scripts/find_jd_links.js +28 -0
  155. package/scripts/find_main_content.js +20 -0
  156. package/scripts/find_product_cards.js +38 -0
  157. package/scripts/find_root_content.js +26 -0
  158. package/scripts/find_unique_products.js +55 -0
  159. package/scripts/get_jd_product_detail.js +16 -0
  160. package/scripts/get_jd_products.js +23 -0
  161. package/scripts/get_jd_seafood_products.js +44 -0
  162. package/scripts/get_product_details_from_images.js +54 -0
  163. package/scripts/postinstall.js +235 -0
  164. package/scripts/scroll_and_get_products.js +47 -0
  165. package/scripts/scroll_deep_and_find.js +45 -0
  166. package/scripts/sync-version.js +69 -0
  167. package/scripts/verify-baidu-enter.ts +116 -0
  168. package/skills/agent-browser/SKILL.md +310 -0
  169. package/skills/agent-browser/references/authentication.md +198 -0
  170. package/skills/agent-browser/references/commands.md +471 -0
  171. package/skills/agent-browser/references/data-extraction.md +377 -0
  172. package/skills/agent-browser/references/proxy-support.md +188 -0
  173. package/skills/agent-browser/references/session-management.md +197 -0
  174. package/skills/agent-browser/references/snapshot-refs.md +379 -0
  175. package/skills/agent-browser/references/video-recording.md +173 -0
  176. package/skills/agent-browser/templates/api-interception.sh +53 -0
  177. package/skills/agent-browser/templates/authenticated-session.sh +97 -0
  178. package/skills/agent-browser/templates/capture-workflow.sh +69 -0
  179. package/skills/agent-browser/templates/data-extraction.sh +210 -0
  180. package/skills/agent-browser/templates/form-automation.sh +62 -0
  181. package/skills/skill-creator/LICENSE.txt +202 -0
  182. package/skills/skill-creator/SKILL.md +356 -0
  183. package/skills/skill-creator/references/output-patterns.md +82 -0
  184. package/skills/skill-creator/references/workflows.md +28 -0
  185. package/skills/skill-creator/scripts/init_skill.py +303 -0
  186. package/skills/skill-creator/scripts/package_skill.py +113 -0
  187. package/skills/skill-creator/scripts/quick_validate.py +95 -0
@@ -0,0 +1,116 @@
1
+ import { chromium } from 'playwright';
2
+ import fs from 'fs';
3
+ import path from 'path';
4
+
5
+ async function verifyBaiduEnter() {
6
+ console.log('=== 百度搜索 Enter 提交验证(类名检查) ===\n');
7
+
8
+ const browser = await chromium.launch({ headless: false, slowMo: 500 });
9
+ const context = await browser.newContext();
10
+ const page = await context.newPage();
11
+
12
+ const injectScriptPath = path.join(process.cwd(), 'src/recorder/inject.js');
13
+ const injectScript = fs.readFileSync(injectScriptPath, 'utf-8');
14
+
15
+ const steps: any[] = [];
16
+
17
+ await context.exposeBinding('__recorderSync', async (source, payload: string) => {
18
+ if (!payload) return;
19
+
20
+ const targetPage = source.page;
21
+
22
+ try {
23
+ const step = JSON.parse(payload);
24
+ if (step && step.action) {
25
+ if (step.action === '__poll__') {
26
+ await targetPage?.evaluate((s) => {
27
+ (window as any).__recorderSteps = s;
28
+ window.dispatchEvent(new CustomEvent('recorder:steps', { detail: s }));
29
+ }, steps).catch(() => {});
30
+ } else if (step.action === '__clear__') {
31
+ steps.length = 0;
32
+ } else if (step.action !== '__update_step__') {
33
+ steps.push(step);
34
+ console.log(` [Step #${steps.length}]`, step.action, step.selector?.slice(0, 30) || '');
35
+ await targetPage?.evaluate((s) => {
36
+ (window as any).__recorderSteps = s;
37
+ window.dispatchEvent(new CustomEvent('recorder:steps', { detail: s }));
38
+ }, steps).catch(() => {});
39
+ }
40
+ }
41
+ } catch (e) {}
42
+ });
43
+
44
+ await context.addInitScript(injectScript);
45
+
46
+ console.log('--- 步骤1:打开百度 ---');
47
+ await page.goto('https://www.baidu.com');
48
+ await page.waitForTimeout(2000);
49
+
50
+ const status1 = await page.evaluate(() => {
51
+ const panel = document.getElementById('recorder-panel');
52
+ const style = document.getElementById('recorder-styles');
53
+
54
+ return {
55
+ hasPanel: !!panel,
56
+ panelClassName: panel?.className,
57
+ panelId: panel?.id,
58
+ hasStyles: !!style,
59
+ styleInHead: style ? document.head.contains(style) : false,
60
+ styleText: style ? style.textContent?.includes('.recorder-panel { position: fixed') : false,
61
+ computed: panel ? {
62
+ position: window.getComputedStyle(panel).position,
63
+ top: window.getComputedStyle(panel).top,
64
+ right: window.getComputedStyle(panel).right,
65
+ width: window.getComputedStyle(panel).width
66
+ } : null
67
+ };
68
+ });
69
+ console.log(' 初始状态:', JSON.stringify(status1, null, 2));
70
+
71
+ console.log('\n--- 步骤2:输入并提交 ---');
72
+ await page.locator('#kw').fill('playwright', { force: true });
73
+ await page.waitForTimeout(500);
74
+ await page.locator('#kw').press('Enter', { force: true });
75
+
76
+ await page.waitForLoadState('load').catch(() => {});
77
+ await page.waitForTimeout(3000);
78
+
79
+ const status3 = await page.evaluate(() => {
80
+ const panel = document.getElementById('recorder-panel');
81
+ const style = document.getElementById('recorder-styles');
82
+
83
+ // 检查样式规则是否被应用
84
+ const rules = style?.sheet?.cssRules;
85
+ let firstRule = null;
86
+ if (rules && rules.length > 0) {
87
+ firstRule = rules[0].cssText?.slice(0, 200);
88
+ }
89
+
90
+ return {
91
+ hasPanel: !!panel,
92
+ panelClassName: panel?.className,
93
+ panelId: panel?.id,
94
+ panelOuterHTML: panel?.outerHTML?.slice(0, 300),
95
+ hasStyles: !!style,
96
+ styleInHead: style ? document.head.contains(style) : false,
97
+ styleText: style ? style.textContent?.includes('.recorder-panel { position: fixed') : false,
98
+ firstStyleRule: firstRule,
99
+ computed: panel ? {
100
+ position: window.getComputedStyle(panel).position,
101
+ top: window.getComputedStyle(panel).top,
102
+ right: window.getComputedStyle(panel).right,
103
+ width: window.getComputedStyle(panel).width
104
+ } : null
105
+ };
106
+ });
107
+
108
+ console.log('\n=== 导航后状态 ===');
109
+ console.log(JSON.stringify(status3, null, 2));
110
+
111
+ console.log('\n--- 浏览器将保持打开 10 秒 ---');
112
+ await page.waitForTimeout(10000);
113
+ await browser.close();
114
+ }
115
+
116
+ verifyBaiduEnter().catch(console.error);
@@ -0,0 +1,310 @@
1
+ ---
2
+ name: agent-browser
3
+ description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction.
4
+ allowed-tools: Bash(agent-browser:*)
5
+ ---
6
+
7
+ # Browser Automation with agent-browser
8
+
9
+ ## Core Workflow
10
+
11
+ Every browser automation follows this pattern:
12
+
13
+ 1. **Navigate**: `agent-browser open <url>`
14
+ 2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
15
+ 3. **Interact**: Use refs to click, fill, select
16
+ 4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
17
+
18
+ ```bash
19
+ agent-browser open https://example.com/form
20
+ agent-browser snapshot -i
21
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
22
+
23
+ agent-browser fill @e1 "user@example.com"
24
+ agent-browser fill @e2 "password123"
25
+ agent-browser click @e3
26
+ agent-browser wait --load networkidle
27
+ agent-browser snapshot -i # Check result
28
+ ```
29
+
30
+ ## Working with Iframes
31
+
32
+ Use `--in-frame` to operate inside iframes. The path uses iframe name/id or index:
33
+
34
+ ```bash
35
+ # Direct iframe by ID or name
36
+ agent-browser snapshot --in-frame "#my-iframe"
37
+
38
+ # Nested iframe using path (name/id or index)
39
+ agent-browser snapshot --in-frame "#outer-frame/inner-frame"
40
+
41
+ # Example: Click element inside nested cross-origin iframe
42
+ agent-browser open https://example.com
43
+ agent-browser snapshot --in-frame "#iframe-container"
44
+ agent-browser click @e1 --in-frame "#iframe-container/login-frame"
45
+ agent-browser fill #username "admin" --in-frame "#iframe-container/login-frame"
46
+ agent-browser get value #username --in-frame "#iframe-container/login-frame"
47
+ ```
48
+
49
+ ### Frame Path Syntax
50
+
51
+ The frame path supports:
52
+ - **ID/Name**: `#frame-id` or `#frame-name`
53
+ - **Index**: `#0`, `#1` (by position)
54
+ - **Nested**: `#parent/child/grandchild`
55
+
56
+ Examples:
57
+ - `#my-iframe` - Single iframe
58
+ - `#0` - First iframe
59
+ - `#outer-iframe/login-frame` - Nested iframes by name
60
+ - `#0/1` - First iframe's second child
61
+
62
+ ## Essential Commands
63
+
64
+ ```bash
65
+ # Navigation
66
+ agent-browser open <url> # Navigate (aliases: goto, navigate)
67
+ agent-browser close # Close browser
68
+
69
+ # Snapshot
70
+ agent-browser snapshot -i # Interactive elements with refs (recommended)
71
+ agent-browser snapshot -i -C # Include cursor-interactive elements (divs with onclick, cursor:pointer)
72
+ agent-browser snapshot -s "#selector" # Scope to CSS selector
73
+ agent-browser snapshot -s "body" --path # Include xpath and cssPath in refs
74
+ agent-browser snapshot -s "body" --attrs # Include element attributes in refs
75
+
76
+ # Interaction (use @refs from snapshot)
77
+ agent-browser click @e1 # Click element
78
+ agent-browser fill @e2 "text" # Clear and type text
79
+ agent-browser type @e2 "text" # Type without clearing
80
+ agent-browser select @e1 "option" # Select dropdown option
81
+ agent-browser check @e1 # Check checkbox
82
+ agent-browser press Enter # Press key
83
+ agent-browser scroll down 500 # Scroll page
84
+
85
+ # Get information
86
+ agent-browser get text @e1 # Get element text
87
+ agent-browser get url # Get current URL
88
+ agent-browser get title # Get page title
89
+
90
+ # Wait
91
+ agent-browser wait @e1 # Wait for element
92
+ agent-browser wait --load networkidle # Wait for network idle
93
+ agent-browser wait --url "**/page" # Wait for URL pattern
94
+ agent-browser wait 2000 # Wait milliseconds
95
+
96
+ # Capture
97
+ agent-browser screenshot # Screenshot to temp dir
98
+ agent-browser screenshot --full # Full page screenshot
99
+ agent-browser pdf output.pdf # Save as PDF
100
+ ```
101
+
102
+ ## Human-like Mouse Movement
103
+
104
+ Enable globally via environment variable to simulate natural mouse trajectories:
105
+
106
+ ```bash
107
+ # Enable human mode (default: arc path type)
108
+ export AGENT_BROWSER_HUMAN=1
109
+
110
+ # Or specify path type
111
+ export AGENT_BROWSER_HUMAN=bezier # Bezier curve with overshoot
112
+ export AGENT_BROWSER_HUMAN=arc # Smooth arc (default, most natural)
113
+ export AGENT_BROWSER_HUMAN=random # Random path with jitter
114
+ export AGENT_BROWSER_HUMAN=linear # Straight line (fastest)
115
+
116
+ # All interactions will use human-like movement
117
+ agent-browser click @e1
118
+ agent-browser fill @e1 "text"
119
+ agent-browser type @e1 "text"
120
+ agent-browser hover @e1
121
+ agent-browser dblclick @e1
122
+
123
+ # Wait with mouse wandering (when human mode enabled)
124
+ agent-browser wait 3000 # Wanders mouse while waiting
125
+
126
+ # Disable human mode
127
+ unset AGENT_BROWSER_HUMAN
128
+ ```
129
+
130
+ **Features:**
131
+ - Continues from last mouse position for realistic trajectories
132
+ - Natural acceleration/deceleration curves
133
+ - Randomized delays between movements
134
+ - Four trajectory types: `arc` (default), `bezier`, `random`, `linear`
135
+ - `wait <ms>` automatically does mouse wandering when enabled
136
+
137
+ ## Common Patterns
138
+
139
+ ### Form Submission
140
+
141
+ ```bash
142
+ agent-browser open https://example.com/signup
143
+ agent-browser snapshot -i
144
+ agent-browser fill @e1 "Jane Doe"
145
+ agent-browser fill @e2 "jane@example.com"
146
+ agent-browser select @e3 "California"
147
+ agent-browser check @e4
148
+ agent-browser click @e5
149
+ agent-browser wait --load networkidle
150
+ ```
151
+
152
+ ### Authentication with State Persistence
153
+
154
+ ```bash
155
+ # Login once and save state
156
+ agent-browser open https://app.example.com/login
157
+ agent-browser snapshot -i
158
+ agent-browser fill @e1 "$USERNAME"
159
+ agent-browser fill @e2 "$PASSWORD"
160
+ agent-browser click @e3
161
+ agent-browser wait --url "**/dashboard"
162
+ agent-browser state save auth.json
163
+
164
+ # Reuse in future sessions (use --state flag)
165
+ agent-browser --state auth.json open https://app.example.com/dashboard
166
+ ```
167
+
168
+ ### Data Extraction
169
+
170
+ ```bash
171
+ agent-browser open https://example.com/products
172
+ agent-browser snapshot -i
173
+ agent-browser get text @e5 # Get specific element text
174
+ agent-browser get text body > page.txt # Get all page text
175
+
176
+ # JSON output for parsing
177
+ agent-browser snapshot -i --json
178
+ agent-browser get text @e1 --json
179
+ ```
180
+
181
+ ### API Interception
182
+
183
+ Passively capture API responses without making direct requests. Useful for sites with anti-scraping measures.
184
+
185
+ ```bash
186
+ # 1. Open blank page first
187
+ agent-browser open "about:blank"
188
+
189
+ # 2. Start request listener in background
190
+ (agent-browser wait --request "api/users" --timeout 30000 > response.json) &
191
+ WAIT_PID=$!
192
+ sleep 1
193
+
194
+ # 3. Navigate to trigger the API call
195
+ agent-browser open "https://example.com/user/profile"
196
+
197
+ # 4. Wait for response
198
+ wait $WAIT_PID
199
+
200
+ # 5. Process captured data
201
+ jq '.body' response.json
202
+ ```
203
+
204
+ Example: Capture Douyin user videos
205
+ ```bash
206
+ agent-browser open "about:blank"
207
+ (agent-browser wait --request "aweme/post" --timeout 30000 > /tmp/douyin.json) &
208
+ sleep 1
209
+ agent-browser open "https://www.douyin.com/user/xxx"
210
+ sleep 5
211
+ wait
212
+ jq '.body.aweme_list[:10] | map({id, desc, stats})' /tmp/douyin.json
213
+ ```
214
+
215
+ ### Parallel Sessions
216
+
217
+ ```bash
218
+ agent-browser --session site1 open https://site-a.com
219
+ agent-browser --session site2 open https://site-b.com
220
+
221
+ agent-browser --session site1 snapshot -i
222
+ agent-browser --session site2 snapshot -i
223
+
224
+ agent-browser session list
225
+ ```
226
+
227
+ ### Visual Browser (Debugging)
228
+
229
+ ```bash
230
+ agent-browser --headed open https://example.com
231
+ agent-browser highlight @e1 # Highlight element
232
+ agent-browser record start demo.webm # Record session
233
+ ```
234
+
235
+ ### Local Files (PDFs, HTML)
236
+
237
+ ```bash
238
+ # Open local files with file:// URLs
239
+ agent-browser --allow-file-access open file:///path/to/document.pdf
240
+ agent-browser --allow-file-access open file:///path/to/page.html
241
+ agent-browser screenshot output.png
242
+ ```
243
+
244
+ ### iOS Simulator (Mobile Safari)
245
+
246
+ ```bash
247
+ # List available iOS simulators
248
+ agent-browser device list
249
+
250
+ # Launch Safari on a specific device
251
+ agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
252
+
253
+ # Same workflow as desktop - snapshot, interact, re-snapshot
254
+ agent-browser -p ios snapshot -i
255
+ agent-browser -p ios click @e1 # Click/tap element
256
+ agent-browser -p ios fill @e2 "text"
257
+ agent-browser -p ios scroll down 500 # Scroll gesture
258
+
259
+ # Take screenshot
260
+ agent-browser -p ios screenshot mobile.png
261
+
262
+ # Close session (shuts down simulator)
263
+ agent-browser -p ios close
264
+ ```
265
+
266
+ **Requirements:** macOS with Xcode, Appium (`npm install -g appium && appium driver install xcuitest`)
267
+
268
+ **Real devices:** Works with physical iOS devices if pre-configured. Use `--device "<UDID>"` where UDID is from `xcrun xctrace list devices`.
269
+
270
+ **Note:** iOS uses standard commands like `click`, `fill`, `scroll` instead of mobile-specific aliases like `tap` or `swipe`.
271
+
272
+ ## Ref Lifecycle (Important)
273
+
274
+ Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after:
275
+
276
+ - Clicking links or buttons that navigate
277
+ - Form submissions
278
+ - Dynamic content loading (dropdowns, modals)
279
+
280
+ ```bash
281
+ agent-browser click @e5 # Navigates to new page
282
+ agent-browser snapshot -i # MUST re-snapshot
283
+ agent-browser click @e1 # Use new refs
284
+ ```
285
+
286
+ **Important for Shell Scripts:** Refs are session-specific and cannot be used in standalone shell scripts. When converting interactive workflows to scripts, use semantic locators or CSS selectors instead. See [references/snapshot-refs.md](references/snapshot-refs.md#converting-to-shell-scripts) for details.
287
+
288
+ ## Semantic Locators (Alternative to Refs)
289
+
290
+ When refs are unavailable or unreliable, use semantic locators:
291
+
292
+ ```bash
293
+ agent-browser find text "Sign In" click
294
+ agent-browser find label "Email" fill "user@test.com"
295
+ agent-browser find role button click --name "Submit"
296
+ agent-browser find placeholder "Search" type "query"
297
+ agent-browser find testid "submit-btn" click
298
+ ```
299
+
300
+ ## Deep-Dive Documentation
301
+
302
+ | Reference | When to Use |
303
+ |-----------|-------------|
304
+ | [references/commands.md](references/commands.md) | Full command reference with all options |
305
+ | [references/data-extraction.md](references/data-extraction.md) | **Data extraction patterns: DOM, JS variables, API interception, infinite scroll, iframe** |
306
+ | [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
307
+ | [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
308
+ | [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
309
+ | [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
310
+ | [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
@@ -0,0 +1,198 @@
1
+ # Authentication Patterns
2
+
3
+ Login flows, session persistence, OAuth, 2FA, and authenticated browsing.
4
+
5
+ **Related**: [session-management.md](session-management.md) for state persistence details, [SKILL.md](../SKILL.md) for quick start.
6
+
7
+ ## Contents
8
+
9
+ - [Basic Login Flow](#basic-login-flow)
10
+ - [Saving Authentication State](#saving-authentication-state)
11
+ - [Restoring Authentication](#restoring-authentication)
12
+ - [OAuth / SSO Flows](#oauth--sso-flows)
13
+ - [Two-Factor Authentication](#two-factor-authentication)
14
+ - [HTTP Basic Auth](#http-basic-auth)
15
+ - [Cookie-Based Auth](#cookie-based-auth)
16
+ - [Token Refresh Handling](#token-refresh-handling)
17
+ - [Security Best Practices](#security-best-practices)
18
+
19
+ ## Basic Login Flow
20
+
21
+ ```bash
22
+ # Navigate to login page
23
+ agent-browser open https://app.example.com/login
24
+ agent-browser wait --load networkidle
25
+
26
+ # Get form elements
27
+ agent-browser snapshot -i
28
+ # Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
29
+
30
+ # Fill credentials
31
+ agent-browser fill @e1 "user@example.com"
32
+ agent-browser fill @e2 "password123"
33
+
34
+ # Submit
35
+ agent-browser click @e3
36
+ agent-browser wait --load networkidle
37
+
38
+ # Verify login succeeded
39
+ agent-browser get url # Should be dashboard, not login
40
+ ```
41
+
42
+ ## Saving Authentication State
43
+
44
+ After logging in, save state for reuse:
45
+
46
+ ```bash
47
+ # Login first (see above)
48
+ agent-browser open https://app.example.com/login
49
+ agent-browser snapshot -i
50
+ agent-browser fill @e1 "user@example.com"
51
+ agent-browser fill @e2 "password123"
52
+ agent-browser click @e3
53
+ agent-browser wait --url "**/dashboard"
54
+
55
+ # Save authenticated state
56
+ agent-browser state save ./auth-state.json
57
+ ```
58
+
59
+ ## Restoring Authentication
60
+
61
+ Skip login by loading saved state:
62
+
63
+ ```bash
64
+ # Use --state flag at browser launch
65
+ agent-browser --state ./auth-state.json open https://app.example.com/dashboard
66
+
67
+ # Verify authenticated
68
+ agent-browser snapshot -i
69
+ ```
70
+
71
+ ## OAuth / SSO Flows
72
+
73
+ For OAuth redirects:
74
+
75
+ ```bash
76
+ # Start OAuth flow
77
+ agent-browser open https://app.example.com/auth/google
78
+
79
+ # Handle redirects automatically
80
+ agent-browser wait --url "**/accounts.google.com**"
81
+ agent-browser snapshot -i
82
+
83
+ # Fill Google credentials
84
+ agent-browser fill @e1 "user@gmail.com"
85
+ agent-browser click @e2 # Next button
86
+ agent-browser wait 2000
87
+ agent-browser snapshot -i
88
+ agent-browser fill @e3 "password"
89
+ agent-browser click @e4 # Sign in
90
+
91
+ # Wait for redirect back
92
+ agent-browser wait --url "**/app.example.com**"
93
+ agent-browser state save ./oauth-state.json
94
+ ```
95
+
96
+ ## Two-Factor Authentication
97
+
98
+ Handle 2FA with manual intervention:
99
+
100
+ ```bash
101
+ # Login with credentials
102
+ agent-browser open https://app.example.com/login --headed # Show browser
103
+ agent-browser snapshot -i
104
+ agent-browser fill @e1 "user@example.com"
105
+ agent-browser fill @e2 "password123"
106
+ agent-browser click @e3
107
+
108
+ # Wait for user to complete 2FA manually
109
+ echo "Complete 2FA in the browser window..."
110
+ agent-browser wait --url "**/dashboard" --timeout 120000
111
+
112
+ # Save state after 2FA
113
+ agent-browser state save ./2fa-state.json
114
+ ```
115
+
116
+ ## HTTP Basic Auth
117
+
118
+ For sites using HTTP Basic Authentication:
119
+
120
+ ```bash
121
+ # Set credentials before navigation
122
+ agent-browser set credentials username password
123
+
124
+ # Navigate to protected resource
125
+ agent-browser open https://protected.example.com/api
126
+ ```
127
+
128
+ ## Cookie-Based Auth
129
+
130
+ Manually set authentication cookies:
131
+
132
+ ```bash
133
+ # Set auth cookie
134
+ agent-browser cookies set session_token "abc123xyz"
135
+
136
+ # Navigate to protected page
137
+ agent-browser open https://app.example.com/dashboard
138
+ ```
139
+
140
+ ## Token Refresh Handling
141
+
142
+ For sessions with expiring tokens:
143
+
144
+ ```bash
145
+ #!/bin/bash
146
+ # Wrapper that handles token refresh
147
+
148
+ STATE_FILE="./auth-state.json"
149
+
150
+ # Try loading existing state
151
+ if [[ -f "$STATE_FILE" ]]; then
152
+ agent-browser --state "$STATE_FILE" open https://app.example.com/dashboard
153
+
154
+ # Check if session is still valid
155
+ URL=$(agent-browser get url)
156
+ if [[ "$URL" == *"/login"* ]]; then
157
+ echo "Session expired, re-authenticating..."
158
+ # Perform fresh login
159
+ agent-browser snapshot -i
160
+ agent-browser fill @e1 "$USERNAME"
161
+ agent-browser fill @e2 "$PASSWORD"
162
+ agent-browser click @e3
163
+ agent-browser wait --url "**/dashboard"
164
+ agent-browser state save "$STATE_FILE"
165
+ fi
166
+ else
167
+ # First-time login
168
+ agent-browser open https://app.example.com/login
169
+ # ... login flow ...
170
+ fi
171
+ ```
172
+
173
+ ## Security Best Practices
174
+
175
+ 1. **Never commit state files** - They contain session tokens
176
+ ```bash
177
+ echo "*.auth-state.json" >> .gitignore
178
+ ```
179
+
180
+ 2. **Use environment variables for credentials**
181
+ ```bash
182
+ agent-browser fill @e1 "$APP_USERNAME"
183
+ agent-browser fill @e2 "$APP_PASSWORD"
184
+ ```
185
+
186
+ 3. **Clean up after automation**
187
+ ```bash
188
+ agent-browser cookies clear
189
+ rm -f ./auth-state.json
190
+ ```
191
+
192
+ 4. **Use short-lived sessions for CI/CD**
193
+ ```bash
194
+ # Don't persist state in CI
195
+ agent-browser open https://app.example.com/login
196
+ # ... login and perform actions ...
197
+ agent-browser close # Session ends, nothing persisted
198
+ ```