@dyyz1993/agent-browser 0.9.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/LICENSE +202 -0
- package/README.md +907 -0
- package/bin/agent-browser-darwin-arm64 +0 -0
- package/bin/agent-browser.js +120 -0
- package/dist/__tests__/e2e/utils/test-helpers.d.ts +5 -0
- package/dist/__tests__/e2e/utils/test-helpers.d.ts.map +1 -0
- package/dist/__tests__/e2e/utils/test-helpers.js +22 -0
- package/dist/__tests__/e2e/utils/test-helpers.js.map +1 -0
- package/dist/__tests__/test-iframe.d.ts +2 -0
- package/dist/__tests__/test-iframe.d.ts.map +1 -0
- package/dist/__tests__/test-iframe.js +52 -0
- package/dist/__tests__/test-iframe.js.map +1 -0
- package/dist/__tests__/utils/parseCli.d.ts +20 -0
- package/dist/__tests__/utils/parseCli.d.ts.map +1 -0
- package/dist/__tests__/utils/parseCli.js +1086 -0
- package/dist/__tests__/utils/parseCli.js.map +1 -0
- package/dist/actions.d.ts +50 -0
- package/dist/actions.d.ts.map +1 -0
- package/dist/actions.js +2164 -0
- package/dist/actions.js.map +1 -0
- package/dist/browser.d.ts +556 -0
- package/dist/browser.d.ts.map +1 -0
- package/dist/browser.js +2599 -0
- package/dist/browser.js.map +1 -0
- package/dist/cli/commands.d.ts +8 -0
- package/dist/cli/commands.d.ts.map +1 -0
- package/dist/cli/commands.js +1038 -0
- package/dist/cli/commands.js.map +1 -0
- package/dist/cli/connection.d.ts +50 -0
- package/dist/cli/connection.d.ts.map +1 -0
- package/dist/cli/connection.js +595 -0
- package/dist/cli/connection.js.map +1 -0
- package/dist/cli/flags.d.ts +36 -0
- package/dist/cli/flags.d.ts.map +1 -0
- package/dist/cli/flags.js +206 -0
- package/dist/cli/flags.js.map +1 -0
- package/dist/cli/help.d.ts +4 -0
- package/dist/cli/help.d.ts.map +1 -0
- package/dist/cli/help.js +1024 -0
- package/dist/cli/help.js.map +1 -0
- package/dist/cli/output.d.ts +14 -0
- package/dist/cli/output.d.ts.map +1 -0
- package/dist/cli/output.js +456 -0
- package/dist/cli/output.js.map +1 -0
- package/dist/cli-new.d.ts +3 -0
- package/dist/cli-new.d.ts.map +1 -0
- package/dist/cli-new.js +308 -0
- package/dist/cli-new.js.map +1 -0
- package/dist/cli-old.d.ts +3 -0
- package/dist/cli-old.d.ts.map +1 -0
- package/dist/cli-old.js +1101 -0
- package/dist/cli-old.js.map +1 -0
- package/dist/cli.d.ts +3 -0
- package/dist/cli.d.ts.map +1 -0
- package/dist/cli.js +403 -0
- package/dist/cli.js.map +1 -0
- package/dist/content-detection.d.ts +18 -0
- package/dist/content-detection.d.ts.map +1 -0
- package/dist/content-detection.js +68 -0
- package/dist/content-detection.js.map +1 -0
- package/dist/daemon.d.ts +55 -0
- package/dist/daemon.d.ts.map +1 -0
- package/dist/daemon.js +426 -0
- package/dist/daemon.js.map +1 -0
- package/dist/diff.d.ts +42 -0
- package/dist/diff.d.ts.map +1 -0
- package/dist/diff.js +166 -0
- package/dist/diff.js.map +1 -0
- package/dist/human-mouse.d.ts +31 -0
- package/dist/human-mouse.d.ts.map +1 -0
- package/dist/human-mouse.js +184 -0
- package/dist/human-mouse.js.map +1 -0
- package/dist/ios-actions.d.ts +11 -0
- package/dist/ios-actions.d.ts.map +1 -0
- package/dist/ios-actions.js +228 -0
- package/dist/ios-actions.js.map +1 -0
- package/dist/ios-manager.d.ts +266 -0
- package/dist/ios-manager.d.ts.map +1 -0
- package/dist/ios-manager.js +1076 -0
- package/dist/ios-manager.js.map +1 -0
- package/dist/message-bridge.d.ts +10 -0
- package/dist/message-bridge.d.ts.map +1 -0
- package/dist/message-bridge.js +60 -0
- package/dist/message-bridge.js.map +1 -0
- package/dist/protocol.d.ts +26 -0
- package/dist/protocol.d.ts.map +1 -0
- package/dist/protocol.js +912 -0
- package/dist/protocol.js.map +1 -0
- package/dist/recorder/binding.d.ts +24 -0
- package/dist/recorder/binding.d.ts.map +1 -0
- package/dist/recorder/binding.js +215 -0
- package/dist/recorder/binding.js.map +1 -0
- package/dist/recorder/index.d.ts +4 -0
- package/dist/recorder/index.d.ts.map +1 -0
- package/dist/recorder/index.js +4 -0
- package/dist/recorder/index.js.map +1 -0
- package/dist/recorder/inject.js +1913 -0
- package/dist/recorder/recorder.d.ts +19 -0
- package/dist/recorder/recorder.d.ts.map +1 -0
- package/dist/recorder/recorder.js +101 -0
- package/dist/recorder/recorder.js.map +1 -0
- package/dist/recorder/store.d.ts +22 -0
- package/dist/recorder/store.d.ts.map +1 -0
- package/dist/recorder/store.js +150 -0
- package/dist/recorder/store.js.map +1 -0
- package/dist/recorder/types.d.ts +73 -0
- package/dist/recorder/types.d.ts.map +1 -0
- package/dist/recorder/types.js +5 -0
- package/dist/recorder/types.js.map +1 -0
- package/dist/snapshot.d.ts +81 -0
- package/dist/snapshot.d.ts.map +1 -0
- package/dist/snapshot.js +1348 -0
- package/dist/snapshot.js.map +1 -0
- package/dist/stream-server-standalone.d.ts +38 -0
- package/dist/stream-server-standalone.d.ts.map +1 -0
- package/dist/stream-server-standalone.js +494 -0
- package/dist/stream-server-standalone.js.map +1 -0
- package/dist/stream-server.d.ts +214 -0
- package/dist/stream-server.d.ts.map +1 -0
- package/dist/stream-server.js +811 -0
- package/dist/stream-server.js.map +1 -0
- package/dist/types.d.ts +914 -0
- package/dist/types.d.ts.map +1 -0
- package/dist/types.js +4 -0
- package/dist/types.js.map +1 -0
- package/dist/viewer-html.d.ts +2 -0
- package/dist/viewer-html.d.ts.map +1 -0
- package/dist/viewer-html.js +185 -0
- package/dist/viewer-html.js.map +1 -0
- package/dist/viewer-script.d.ts +47 -0
- package/dist/viewer-script.d.ts.map +1 -0
- package/dist/viewer-script.js +586 -0
- package/dist/viewer-script.js.map +1 -0
- package/package.json +86 -0
- package/scripts/build-all-platforms.sh +68 -0
- package/scripts/check-version-sync.js +39 -0
- package/scripts/check_goods_container.js +35 -0
- package/scripts/check_page_content.js +36 -0
- package/scripts/click_applause_rate.js +30 -0
- package/scripts/copy-native.js +36 -0
- package/scripts/copy-recorder.js +21 -0
- package/scripts/e2e-test-recorder.ts +584 -0
- package/scripts/explore_jd_page.js +31 -0
- package/scripts/extract_all_jd_data.js +80 -0
- package/scripts/extract_jd_product_detail.js +62 -0
- package/scripts/extract_jd_products_correct_links.js +78 -0
- package/scripts/extract_jd_products_final.js +80 -0
- package/scripts/extract_jd_reviews.js +48 -0
- package/scripts/extract_jd_seafood_final.js +78 -0
- package/scripts/extract_multiple_products.js +77 -0
- package/scripts/extract_products_no_scroll.js +68 -0
- package/scripts/extract_products_simple.js +68 -0
- package/scripts/find_applause_rate.js +26 -0
- package/scripts/find_jd_links.js +28 -0
- package/scripts/find_main_content.js +20 -0
- package/scripts/find_product_cards.js +38 -0
- package/scripts/find_root_content.js +26 -0
- package/scripts/find_unique_products.js +55 -0
- package/scripts/get_jd_product_detail.js +16 -0
- package/scripts/get_jd_products.js +23 -0
- package/scripts/get_jd_seafood_products.js +44 -0
- package/scripts/get_product_details_from_images.js +54 -0
- package/scripts/postinstall.js +235 -0
- package/scripts/scroll_and_get_products.js +47 -0
- package/scripts/scroll_deep_and_find.js +45 -0
- package/scripts/sync-version.js +69 -0
- package/scripts/verify-baidu-enter.ts +116 -0
- package/skills/agent-browser/SKILL.md +310 -0
- package/skills/agent-browser/references/authentication.md +198 -0
- package/skills/agent-browser/references/commands.md +471 -0
- package/skills/agent-browser/references/data-extraction.md +377 -0
- package/skills/agent-browser/references/proxy-support.md +188 -0
- package/skills/agent-browser/references/session-management.md +197 -0
- package/skills/agent-browser/references/snapshot-refs.md +379 -0
- package/skills/agent-browser/references/video-recording.md +173 -0
- package/skills/agent-browser/templates/api-interception.sh +53 -0
- package/skills/agent-browser/templates/authenticated-session.sh +97 -0
- package/skills/agent-browser/templates/capture-workflow.sh +69 -0
- package/skills/agent-browser/templates/data-extraction.sh +210 -0
- package/skills/agent-browser/templates/form-automation.sh +62 -0
- package/skills/skill-creator/LICENSE.txt +202 -0
- package/skills/skill-creator/SKILL.md +356 -0
- package/skills/skill-creator/references/output-patterns.md +82 -0
- package/skills/skill-creator/references/workflows.md +28 -0
- package/skills/skill-creator/scripts/init_skill.py +303 -0
- package/skills/skill-creator/scripts/package_skill.py +113 -0
- package/skills/skill-creator/scripts/quick_validate.py +95 -0
|
@@ -0,0 +1,116 @@
|
|
|
1
|
+
import { chromium } from 'playwright';
|
|
2
|
+
import fs from 'fs';
|
|
3
|
+
import path from 'path';
|
|
4
|
+
|
|
5
|
+
async function verifyBaiduEnter() {
|
|
6
|
+
console.log('=== 百度搜索 Enter 提交验证(类名检查) ===\n');
|
|
7
|
+
|
|
8
|
+
const browser = await chromium.launch({ headless: false, slowMo: 500 });
|
|
9
|
+
const context = await browser.newContext();
|
|
10
|
+
const page = await context.newPage();
|
|
11
|
+
|
|
12
|
+
const injectScriptPath = path.join(process.cwd(), 'src/recorder/inject.js');
|
|
13
|
+
const injectScript = fs.readFileSync(injectScriptPath, 'utf-8');
|
|
14
|
+
|
|
15
|
+
const steps: any[] = [];
|
|
16
|
+
|
|
17
|
+
await context.exposeBinding('__recorderSync', async (source, payload: string) => {
|
|
18
|
+
if (!payload) return;
|
|
19
|
+
|
|
20
|
+
const targetPage = source.page;
|
|
21
|
+
|
|
22
|
+
try {
|
|
23
|
+
const step = JSON.parse(payload);
|
|
24
|
+
if (step && step.action) {
|
|
25
|
+
if (step.action === '__poll__') {
|
|
26
|
+
await targetPage?.evaluate((s) => {
|
|
27
|
+
(window as any).__recorderSteps = s;
|
|
28
|
+
window.dispatchEvent(new CustomEvent('recorder:steps', { detail: s }));
|
|
29
|
+
}, steps).catch(() => {});
|
|
30
|
+
} else if (step.action === '__clear__') {
|
|
31
|
+
steps.length = 0;
|
|
32
|
+
} else if (step.action !== '__update_step__') {
|
|
33
|
+
steps.push(step);
|
|
34
|
+
console.log(` [Step #${steps.length}]`, step.action, step.selector?.slice(0, 30) || '');
|
|
35
|
+
await targetPage?.evaluate((s) => {
|
|
36
|
+
(window as any).__recorderSteps = s;
|
|
37
|
+
window.dispatchEvent(new CustomEvent('recorder:steps', { detail: s }));
|
|
38
|
+
}, steps).catch(() => {});
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
} catch (e) {}
|
|
42
|
+
});
|
|
43
|
+
|
|
44
|
+
await context.addInitScript(injectScript);
|
|
45
|
+
|
|
46
|
+
console.log('--- 步骤1:打开百度 ---');
|
|
47
|
+
await page.goto('https://www.baidu.com');
|
|
48
|
+
await page.waitForTimeout(2000);
|
|
49
|
+
|
|
50
|
+
const status1 = await page.evaluate(() => {
|
|
51
|
+
const panel = document.getElementById('recorder-panel');
|
|
52
|
+
const style = document.getElementById('recorder-styles');
|
|
53
|
+
|
|
54
|
+
return {
|
|
55
|
+
hasPanel: !!panel,
|
|
56
|
+
panelClassName: panel?.className,
|
|
57
|
+
panelId: panel?.id,
|
|
58
|
+
hasStyles: !!style,
|
|
59
|
+
styleInHead: style ? document.head.contains(style) : false,
|
|
60
|
+
styleText: style ? style.textContent?.includes('.recorder-panel { position: fixed') : false,
|
|
61
|
+
computed: panel ? {
|
|
62
|
+
position: window.getComputedStyle(panel).position,
|
|
63
|
+
top: window.getComputedStyle(panel).top,
|
|
64
|
+
right: window.getComputedStyle(panel).right,
|
|
65
|
+
width: window.getComputedStyle(panel).width
|
|
66
|
+
} : null
|
|
67
|
+
};
|
|
68
|
+
});
|
|
69
|
+
console.log(' 初始状态:', JSON.stringify(status1, null, 2));
|
|
70
|
+
|
|
71
|
+
console.log('\n--- 步骤2:输入并提交 ---');
|
|
72
|
+
await page.locator('#kw').fill('playwright', { force: true });
|
|
73
|
+
await page.waitForTimeout(500);
|
|
74
|
+
await page.locator('#kw').press('Enter', { force: true });
|
|
75
|
+
|
|
76
|
+
await page.waitForLoadState('load').catch(() => {});
|
|
77
|
+
await page.waitForTimeout(3000);
|
|
78
|
+
|
|
79
|
+
const status3 = await page.evaluate(() => {
|
|
80
|
+
const panel = document.getElementById('recorder-panel');
|
|
81
|
+
const style = document.getElementById('recorder-styles');
|
|
82
|
+
|
|
83
|
+
// 检查样式规则是否被应用
|
|
84
|
+
const rules = style?.sheet?.cssRules;
|
|
85
|
+
let firstRule = null;
|
|
86
|
+
if (rules && rules.length > 0) {
|
|
87
|
+
firstRule = rules[0].cssText?.slice(0, 200);
|
|
88
|
+
}
|
|
89
|
+
|
|
90
|
+
return {
|
|
91
|
+
hasPanel: !!panel,
|
|
92
|
+
panelClassName: panel?.className,
|
|
93
|
+
panelId: panel?.id,
|
|
94
|
+
panelOuterHTML: panel?.outerHTML?.slice(0, 300),
|
|
95
|
+
hasStyles: !!style,
|
|
96
|
+
styleInHead: style ? document.head.contains(style) : false,
|
|
97
|
+
styleText: style ? style.textContent?.includes('.recorder-panel { position: fixed') : false,
|
|
98
|
+
firstStyleRule: firstRule,
|
|
99
|
+
computed: panel ? {
|
|
100
|
+
position: window.getComputedStyle(panel).position,
|
|
101
|
+
top: window.getComputedStyle(panel).top,
|
|
102
|
+
right: window.getComputedStyle(panel).right,
|
|
103
|
+
width: window.getComputedStyle(panel).width
|
|
104
|
+
} : null
|
|
105
|
+
};
|
|
106
|
+
});
|
|
107
|
+
|
|
108
|
+
console.log('\n=== 导航后状态 ===');
|
|
109
|
+
console.log(JSON.stringify(status3, null, 2));
|
|
110
|
+
|
|
111
|
+
console.log('\n--- 浏览器将保持打开 10 秒 ---');
|
|
112
|
+
await page.waitForTimeout(10000);
|
|
113
|
+
await browser.close();
|
|
114
|
+
}
|
|
115
|
+
|
|
116
|
+
verifyBaiduEnter().catch(console.error);
|
|
@@ -0,0 +1,310 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: agent-browser
|
|
3
|
+
description: Browser automation CLI for AI agents. Use when the user needs to interact with websites, including navigating pages, filling forms, clicking buttons, taking screenshots, extracting data, testing web apps, or automating any browser task. Triggers include requests to "open a website", "fill out a form", "click a button", "take a screenshot", "scrape data from a page", "test this web app", "login to a site", "automate browser actions", or any task requiring programmatic web interaction.
|
|
4
|
+
allowed-tools: Bash(agent-browser:*)
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Browser Automation with agent-browser
|
|
8
|
+
|
|
9
|
+
## Core Workflow
|
|
10
|
+
|
|
11
|
+
Every browser automation follows this pattern:
|
|
12
|
+
|
|
13
|
+
1. **Navigate**: `agent-browser open <url>`
|
|
14
|
+
2. **Snapshot**: `agent-browser snapshot -i` (get element refs like `@e1`, `@e2`)
|
|
15
|
+
3. **Interact**: Use refs to click, fill, select
|
|
16
|
+
4. **Re-snapshot**: After navigation or DOM changes, get fresh refs
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
agent-browser open https://example.com/form
|
|
20
|
+
agent-browser snapshot -i
|
|
21
|
+
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Submit"
|
|
22
|
+
|
|
23
|
+
agent-browser fill @e1 "user@example.com"
|
|
24
|
+
agent-browser fill @e2 "password123"
|
|
25
|
+
agent-browser click @e3
|
|
26
|
+
agent-browser wait --load networkidle
|
|
27
|
+
agent-browser snapshot -i # Check result
|
|
28
|
+
```
|
|
29
|
+
|
|
30
|
+
## Working with Iframes
|
|
31
|
+
|
|
32
|
+
Use `--in-frame` to operate inside iframes. The path uses iframe name/id or index:
|
|
33
|
+
|
|
34
|
+
```bash
|
|
35
|
+
# Direct iframe by ID or name
|
|
36
|
+
agent-browser snapshot --in-frame "#my-iframe"
|
|
37
|
+
|
|
38
|
+
# Nested iframe using path (name/id or index)
|
|
39
|
+
agent-browser snapshot --in-frame "#outer-frame/inner-frame"
|
|
40
|
+
|
|
41
|
+
# Example: Click element inside nested cross-origin iframe
|
|
42
|
+
agent-browser open https://example.com
|
|
43
|
+
agent-browser snapshot --in-frame "#iframe-container"
|
|
44
|
+
agent-browser click @e1 --in-frame "#iframe-container/login-frame"
|
|
45
|
+
agent-browser fill #username "admin" --in-frame "#iframe-container/login-frame"
|
|
46
|
+
agent-browser get value #username --in-frame "#iframe-container/login-frame"
|
|
47
|
+
```
|
|
48
|
+
|
|
49
|
+
### Frame Path Syntax
|
|
50
|
+
|
|
51
|
+
The frame path supports:
|
|
52
|
+
- **ID/Name**: `#frame-id` or `#frame-name`
|
|
53
|
+
- **Index**: `#0`, `#1` (by position)
|
|
54
|
+
- **Nested**: `#parent/child/grandchild`
|
|
55
|
+
|
|
56
|
+
Examples:
|
|
57
|
+
- `#my-iframe` - Single iframe
|
|
58
|
+
- `#0` - First iframe
|
|
59
|
+
- `#outer-iframe/login-frame` - Nested iframes by name
|
|
60
|
+
- `#0/1` - First iframe's second child
|
|
61
|
+
|
|
62
|
+
## Essential Commands
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Navigation
|
|
66
|
+
agent-browser open <url> # Navigate (aliases: goto, navigate)
|
|
67
|
+
agent-browser close # Close browser
|
|
68
|
+
|
|
69
|
+
# Snapshot
|
|
70
|
+
agent-browser snapshot -i # Interactive elements with refs (recommended)
|
|
71
|
+
agent-browser snapshot -i -C # Include cursor-interactive elements (divs with onclick, cursor:pointer)
|
|
72
|
+
agent-browser snapshot -s "#selector" # Scope to CSS selector
|
|
73
|
+
agent-browser snapshot -s "body" --path # Include xpath and cssPath in refs
|
|
74
|
+
agent-browser snapshot -s "body" --attrs # Include element attributes in refs
|
|
75
|
+
|
|
76
|
+
# Interaction (use @refs from snapshot)
|
|
77
|
+
agent-browser click @e1 # Click element
|
|
78
|
+
agent-browser fill @e2 "text" # Clear and type text
|
|
79
|
+
agent-browser type @e2 "text" # Type without clearing
|
|
80
|
+
agent-browser select @e1 "option" # Select dropdown option
|
|
81
|
+
agent-browser check @e1 # Check checkbox
|
|
82
|
+
agent-browser press Enter # Press key
|
|
83
|
+
agent-browser scroll down 500 # Scroll page
|
|
84
|
+
|
|
85
|
+
# Get information
|
|
86
|
+
agent-browser get text @e1 # Get element text
|
|
87
|
+
agent-browser get url # Get current URL
|
|
88
|
+
agent-browser get title # Get page title
|
|
89
|
+
|
|
90
|
+
# Wait
|
|
91
|
+
agent-browser wait @e1 # Wait for element
|
|
92
|
+
agent-browser wait --load networkidle # Wait for network idle
|
|
93
|
+
agent-browser wait --url "**/page" # Wait for URL pattern
|
|
94
|
+
agent-browser wait 2000 # Wait milliseconds
|
|
95
|
+
|
|
96
|
+
# Capture
|
|
97
|
+
agent-browser screenshot # Screenshot to temp dir
|
|
98
|
+
agent-browser screenshot --full # Full page screenshot
|
|
99
|
+
agent-browser pdf output.pdf # Save as PDF
|
|
100
|
+
```
|
|
101
|
+
|
|
102
|
+
## Human-like Mouse Movement
|
|
103
|
+
|
|
104
|
+
Enable globally via environment variable to simulate natural mouse trajectories:
|
|
105
|
+
|
|
106
|
+
```bash
|
|
107
|
+
# Enable human mode (default: arc path type)
|
|
108
|
+
export AGENT_BROWSER_HUMAN=1
|
|
109
|
+
|
|
110
|
+
# Or specify path type
|
|
111
|
+
export AGENT_BROWSER_HUMAN=bezier # Bezier curve with overshoot
|
|
112
|
+
export AGENT_BROWSER_HUMAN=arc # Smooth arc (default, most natural)
|
|
113
|
+
export AGENT_BROWSER_HUMAN=random # Random path with jitter
|
|
114
|
+
export AGENT_BROWSER_HUMAN=linear # Straight line (fastest)
|
|
115
|
+
|
|
116
|
+
# All interactions will use human-like movement
|
|
117
|
+
agent-browser click @e1
|
|
118
|
+
agent-browser fill @e1 "text"
|
|
119
|
+
agent-browser type @e1 "text"
|
|
120
|
+
agent-browser hover @e1
|
|
121
|
+
agent-browser dblclick @e1
|
|
122
|
+
|
|
123
|
+
# Wait with mouse wandering (when human mode enabled)
|
|
124
|
+
agent-browser wait 3000 # Wanders mouse while waiting
|
|
125
|
+
|
|
126
|
+
# Disable human mode
|
|
127
|
+
unset AGENT_BROWSER_HUMAN
|
|
128
|
+
```
|
|
129
|
+
|
|
130
|
+
**Features:**
|
|
131
|
+
- Continues from last mouse position for realistic trajectories
|
|
132
|
+
- Natural acceleration/deceleration curves
|
|
133
|
+
- Randomized delays between movements
|
|
134
|
+
- Four trajectory types: `arc` (default), `bezier`, `random`, `linear`
|
|
135
|
+
- `wait <ms>` automatically does mouse wandering when enabled
|
|
136
|
+
|
|
137
|
+
## Common Patterns
|
|
138
|
+
|
|
139
|
+
### Form Submission
|
|
140
|
+
|
|
141
|
+
```bash
|
|
142
|
+
agent-browser open https://example.com/signup
|
|
143
|
+
agent-browser snapshot -i
|
|
144
|
+
agent-browser fill @e1 "Jane Doe"
|
|
145
|
+
agent-browser fill @e2 "jane@example.com"
|
|
146
|
+
agent-browser select @e3 "California"
|
|
147
|
+
agent-browser check @e4
|
|
148
|
+
agent-browser click @e5
|
|
149
|
+
agent-browser wait --load networkidle
|
|
150
|
+
```
|
|
151
|
+
|
|
152
|
+
### Authentication with State Persistence
|
|
153
|
+
|
|
154
|
+
```bash
|
|
155
|
+
# Login once and save state
|
|
156
|
+
agent-browser open https://app.example.com/login
|
|
157
|
+
agent-browser snapshot -i
|
|
158
|
+
agent-browser fill @e1 "$USERNAME"
|
|
159
|
+
agent-browser fill @e2 "$PASSWORD"
|
|
160
|
+
agent-browser click @e3
|
|
161
|
+
agent-browser wait --url "**/dashboard"
|
|
162
|
+
agent-browser state save auth.json
|
|
163
|
+
|
|
164
|
+
# Reuse in future sessions (use --state flag)
|
|
165
|
+
agent-browser --state auth.json open https://app.example.com/dashboard
|
|
166
|
+
```
|
|
167
|
+
|
|
168
|
+
### Data Extraction
|
|
169
|
+
|
|
170
|
+
```bash
|
|
171
|
+
agent-browser open https://example.com/products
|
|
172
|
+
agent-browser snapshot -i
|
|
173
|
+
agent-browser get text @e5 # Get specific element text
|
|
174
|
+
agent-browser get text body > page.txt # Get all page text
|
|
175
|
+
|
|
176
|
+
# JSON output for parsing
|
|
177
|
+
agent-browser snapshot -i --json
|
|
178
|
+
agent-browser get text @e1 --json
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
### API Interception
|
|
182
|
+
|
|
183
|
+
Passively capture API responses without making direct requests. Useful for sites with anti-scraping measures.
|
|
184
|
+
|
|
185
|
+
```bash
|
|
186
|
+
# 1. Open blank page first
|
|
187
|
+
agent-browser open "about:blank"
|
|
188
|
+
|
|
189
|
+
# 2. Start request listener in background
|
|
190
|
+
(agent-browser wait --request "api/users" --timeout 30000 > response.json) &
|
|
191
|
+
WAIT_PID=$!
|
|
192
|
+
sleep 1
|
|
193
|
+
|
|
194
|
+
# 3. Navigate to trigger the API call
|
|
195
|
+
agent-browser open "https://example.com/user/profile"
|
|
196
|
+
|
|
197
|
+
# 4. Wait for response
|
|
198
|
+
wait $WAIT_PID
|
|
199
|
+
|
|
200
|
+
# 5. Process captured data
|
|
201
|
+
jq '.body' response.json
|
|
202
|
+
```
|
|
203
|
+
|
|
204
|
+
Example: Capture Douyin user videos
|
|
205
|
+
```bash
|
|
206
|
+
agent-browser open "about:blank"
|
|
207
|
+
(agent-browser wait --request "aweme/post" --timeout 30000 > /tmp/douyin.json) &
|
|
208
|
+
sleep 1
|
|
209
|
+
agent-browser open "https://www.douyin.com/user/xxx"
|
|
210
|
+
sleep 5
|
|
211
|
+
wait
|
|
212
|
+
jq '.body.aweme_list[:10] | map({id, desc, stats})' /tmp/douyin.json
|
|
213
|
+
```
|
|
214
|
+
|
|
215
|
+
### Parallel Sessions
|
|
216
|
+
|
|
217
|
+
```bash
|
|
218
|
+
agent-browser --session site1 open https://site-a.com
|
|
219
|
+
agent-browser --session site2 open https://site-b.com
|
|
220
|
+
|
|
221
|
+
agent-browser --session site1 snapshot -i
|
|
222
|
+
agent-browser --session site2 snapshot -i
|
|
223
|
+
|
|
224
|
+
agent-browser session list
|
|
225
|
+
```
|
|
226
|
+
|
|
227
|
+
### Visual Browser (Debugging)
|
|
228
|
+
|
|
229
|
+
```bash
|
|
230
|
+
agent-browser --headed open https://example.com
|
|
231
|
+
agent-browser highlight @e1 # Highlight element
|
|
232
|
+
agent-browser record start demo.webm # Record session
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
### Local Files (PDFs, HTML)
|
|
236
|
+
|
|
237
|
+
```bash
|
|
238
|
+
# Open local files with file:// URLs
|
|
239
|
+
agent-browser --allow-file-access open file:///path/to/document.pdf
|
|
240
|
+
agent-browser --allow-file-access open file:///path/to/page.html
|
|
241
|
+
agent-browser screenshot output.png
|
|
242
|
+
```
|
|
243
|
+
|
|
244
|
+
### iOS Simulator (Mobile Safari)
|
|
245
|
+
|
|
246
|
+
```bash
|
|
247
|
+
# List available iOS simulators
|
|
248
|
+
agent-browser device list
|
|
249
|
+
|
|
250
|
+
# Launch Safari on a specific device
|
|
251
|
+
agent-browser -p ios --device "iPhone 16 Pro" open https://example.com
|
|
252
|
+
|
|
253
|
+
# Same workflow as desktop - snapshot, interact, re-snapshot
|
|
254
|
+
agent-browser -p ios snapshot -i
|
|
255
|
+
agent-browser -p ios click @e1 # Click/tap element
|
|
256
|
+
agent-browser -p ios fill @e2 "text"
|
|
257
|
+
agent-browser -p ios scroll down 500 # Scroll gesture
|
|
258
|
+
|
|
259
|
+
# Take screenshot
|
|
260
|
+
agent-browser -p ios screenshot mobile.png
|
|
261
|
+
|
|
262
|
+
# Close session (shuts down simulator)
|
|
263
|
+
agent-browser -p ios close
|
|
264
|
+
```
|
|
265
|
+
|
|
266
|
+
**Requirements:** macOS with Xcode, Appium (`npm install -g appium && appium driver install xcuitest`)
|
|
267
|
+
|
|
268
|
+
**Real devices:** Works with physical iOS devices if pre-configured. Use `--device "<UDID>"` where UDID is from `xcrun xctrace list devices`.
|
|
269
|
+
|
|
270
|
+
**Note:** iOS uses standard commands like `click`, `fill`, `scroll` instead of mobile-specific aliases like `tap` or `swipe`.
|
|
271
|
+
|
|
272
|
+
## Ref Lifecycle (Important)
|
|
273
|
+
|
|
274
|
+
Refs (`@e1`, `@e2`, etc.) are invalidated when the page changes. Always re-snapshot after:
|
|
275
|
+
|
|
276
|
+
- Clicking links or buttons that navigate
|
|
277
|
+
- Form submissions
|
|
278
|
+
- Dynamic content loading (dropdowns, modals)
|
|
279
|
+
|
|
280
|
+
```bash
|
|
281
|
+
agent-browser click @e5 # Navigates to new page
|
|
282
|
+
agent-browser snapshot -i # MUST re-snapshot
|
|
283
|
+
agent-browser click @e1 # Use new refs
|
|
284
|
+
```
|
|
285
|
+
|
|
286
|
+
**Important for Shell Scripts:** Refs are session-specific and cannot be used in standalone shell scripts. When converting interactive workflows to scripts, use semantic locators or CSS selectors instead. See [references/snapshot-refs.md](references/snapshot-refs.md#converting-to-shell-scripts) for details.
|
|
287
|
+
|
|
288
|
+
## Semantic Locators (Alternative to Refs)
|
|
289
|
+
|
|
290
|
+
When refs are unavailable or unreliable, use semantic locators:
|
|
291
|
+
|
|
292
|
+
```bash
|
|
293
|
+
agent-browser find text "Sign In" click
|
|
294
|
+
agent-browser find label "Email" fill "user@test.com"
|
|
295
|
+
agent-browser find role button click --name "Submit"
|
|
296
|
+
agent-browser find placeholder "Search" type "query"
|
|
297
|
+
agent-browser find testid "submit-btn" click
|
|
298
|
+
```
|
|
299
|
+
|
|
300
|
+
## Deep-Dive Documentation
|
|
301
|
+
|
|
302
|
+
| Reference | When to Use |
|
|
303
|
+
|-----------|-------------|
|
|
304
|
+
| [references/commands.md](references/commands.md) | Full command reference with all options |
|
|
305
|
+
| [references/data-extraction.md](references/data-extraction.md) | **Data extraction patterns: DOM, JS variables, API interception, infinite scroll, iframe** |
|
|
306
|
+
| [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
|
|
307
|
+
| [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
|
|
308
|
+
| [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
|
|
309
|
+
| [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
|
|
310
|
+
| [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
|
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
# Authentication Patterns
|
|
2
|
+
|
|
3
|
+
Login flows, session persistence, OAuth, 2FA, and authenticated browsing.
|
|
4
|
+
|
|
5
|
+
**Related**: [session-management.md](session-management.md) for state persistence details, [SKILL.md](../SKILL.md) for quick start.
|
|
6
|
+
|
|
7
|
+
## Contents
|
|
8
|
+
|
|
9
|
+
- [Basic Login Flow](#basic-login-flow)
|
|
10
|
+
- [Saving Authentication State](#saving-authentication-state)
|
|
11
|
+
- [Restoring Authentication](#restoring-authentication)
|
|
12
|
+
- [OAuth / SSO Flows](#oauth--sso-flows)
|
|
13
|
+
- [Two-Factor Authentication](#two-factor-authentication)
|
|
14
|
+
- [HTTP Basic Auth](#http-basic-auth)
|
|
15
|
+
- [Cookie-Based Auth](#cookie-based-auth)
|
|
16
|
+
- [Token Refresh Handling](#token-refresh-handling)
|
|
17
|
+
- [Security Best Practices](#security-best-practices)
|
|
18
|
+
|
|
19
|
+
## Basic Login Flow
|
|
20
|
+
|
|
21
|
+
```bash
|
|
22
|
+
# Navigate to login page
|
|
23
|
+
agent-browser open https://app.example.com/login
|
|
24
|
+
agent-browser wait --load networkidle
|
|
25
|
+
|
|
26
|
+
# Get form elements
|
|
27
|
+
agent-browser snapshot -i
|
|
28
|
+
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
|
|
29
|
+
|
|
30
|
+
# Fill credentials
|
|
31
|
+
agent-browser fill @e1 "user@example.com"
|
|
32
|
+
agent-browser fill @e2 "password123"
|
|
33
|
+
|
|
34
|
+
# Submit
|
|
35
|
+
agent-browser click @e3
|
|
36
|
+
agent-browser wait --load networkidle
|
|
37
|
+
|
|
38
|
+
# Verify login succeeded
|
|
39
|
+
agent-browser get url # Should be dashboard, not login
|
|
40
|
+
```
|
|
41
|
+
|
|
42
|
+
## Saving Authentication State
|
|
43
|
+
|
|
44
|
+
After logging in, save state for reuse:
|
|
45
|
+
|
|
46
|
+
```bash
|
|
47
|
+
# Login first (see above)
|
|
48
|
+
agent-browser open https://app.example.com/login
|
|
49
|
+
agent-browser snapshot -i
|
|
50
|
+
agent-browser fill @e1 "user@example.com"
|
|
51
|
+
agent-browser fill @e2 "password123"
|
|
52
|
+
agent-browser click @e3
|
|
53
|
+
agent-browser wait --url "**/dashboard"
|
|
54
|
+
|
|
55
|
+
# Save authenticated state
|
|
56
|
+
agent-browser state save ./auth-state.json
|
|
57
|
+
```
|
|
58
|
+
|
|
59
|
+
## Restoring Authentication
|
|
60
|
+
|
|
61
|
+
Skip login by loading saved state:
|
|
62
|
+
|
|
63
|
+
```bash
|
|
64
|
+
# Use --state flag at browser launch
|
|
65
|
+
agent-browser --state ./auth-state.json open https://app.example.com/dashboard
|
|
66
|
+
|
|
67
|
+
# Verify authenticated
|
|
68
|
+
agent-browser snapshot -i
|
|
69
|
+
```
|
|
70
|
+
|
|
71
|
+
## OAuth / SSO Flows
|
|
72
|
+
|
|
73
|
+
For OAuth redirects:
|
|
74
|
+
|
|
75
|
+
```bash
|
|
76
|
+
# Start OAuth flow
|
|
77
|
+
agent-browser open https://app.example.com/auth/google
|
|
78
|
+
|
|
79
|
+
# Handle redirects automatically
|
|
80
|
+
agent-browser wait --url "**/accounts.google.com**"
|
|
81
|
+
agent-browser snapshot -i
|
|
82
|
+
|
|
83
|
+
# Fill Google credentials
|
|
84
|
+
agent-browser fill @e1 "user@gmail.com"
|
|
85
|
+
agent-browser click @e2 # Next button
|
|
86
|
+
agent-browser wait 2000
|
|
87
|
+
agent-browser snapshot -i
|
|
88
|
+
agent-browser fill @e3 "password"
|
|
89
|
+
agent-browser click @e4 # Sign in
|
|
90
|
+
|
|
91
|
+
# Wait for redirect back
|
|
92
|
+
agent-browser wait --url "**/app.example.com**"
|
|
93
|
+
agent-browser state save ./oauth-state.json
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
## Two-Factor Authentication
|
|
97
|
+
|
|
98
|
+
Handle 2FA with manual intervention:
|
|
99
|
+
|
|
100
|
+
```bash
|
|
101
|
+
# Login with credentials
|
|
102
|
+
agent-browser open https://app.example.com/login --headed # Show browser
|
|
103
|
+
agent-browser snapshot -i
|
|
104
|
+
agent-browser fill @e1 "user@example.com"
|
|
105
|
+
agent-browser fill @e2 "password123"
|
|
106
|
+
agent-browser click @e3
|
|
107
|
+
|
|
108
|
+
# Wait for user to complete 2FA manually
|
|
109
|
+
echo "Complete 2FA in the browser window..."
|
|
110
|
+
agent-browser wait --url "**/dashboard" --timeout 120000
|
|
111
|
+
|
|
112
|
+
# Save state after 2FA
|
|
113
|
+
agent-browser state save ./2fa-state.json
|
|
114
|
+
```
|
|
115
|
+
|
|
116
|
+
## HTTP Basic Auth
|
|
117
|
+
|
|
118
|
+
For sites using HTTP Basic Authentication:
|
|
119
|
+
|
|
120
|
+
```bash
|
|
121
|
+
# Set credentials before navigation
|
|
122
|
+
agent-browser set credentials username password
|
|
123
|
+
|
|
124
|
+
# Navigate to protected resource
|
|
125
|
+
agent-browser open https://protected.example.com/api
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
## Cookie-Based Auth
|
|
129
|
+
|
|
130
|
+
Manually set authentication cookies:
|
|
131
|
+
|
|
132
|
+
```bash
|
|
133
|
+
# Set auth cookie
|
|
134
|
+
agent-browser cookies set session_token "abc123xyz"
|
|
135
|
+
|
|
136
|
+
# Navigate to protected page
|
|
137
|
+
agent-browser open https://app.example.com/dashboard
|
|
138
|
+
```
|
|
139
|
+
|
|
140
|
+
## Token Refresh Handling
|
|
141
|
+
|
|
142
|
+
For sessions with expiring tokens:
|
|
143
|
+
|
|
144
|
+
```bash
|
|
145
|
+
#!/bin/bash
|
|
146
|
+
# Wrapper that handles token refresh
|
|
147
|
+
|
|
148
|
+
STATE_FILE="./auth-state.json"
|
|
149
|
+
|
|
150
|
+
# Try loading existing state
|
|
151
|
+
if [[ -f "$STATE_FILE" ]]; then
|
|
152
|
+
agent-browser --state "$STATE_FILE" open https://app.example.com/dashboard
|
|
153
|
+
|
|
154
|
+
# Check if session is still valid
|
|
155
|
+
URL=$(agent-browser get url)
|
|
156
|
+
if [[ "$URL" == *"/login"* ]]; then
|
|
157
|
+
echo "Session expired, re-authenticating..."
|
|
158
|
+
# Perform fresh login
|
|
159
|
+
agent-browser snapshot -i
|
|
160
|
+
agent-browser fill @e1 "$USERNAME"
|
|
161
|
+
agent-browser fill @e2 "$PASSWORD"
|
|
162
|
+
agent-browser click @e3
|
|
163
|
+
agent-browser wait --url "**/dashboard"
|
|
164
|
+
agent-browser state save "$STATE_FILE"
|
|
165
|
+
fi
|
|
166
|
+
else
|
|
167
|
+
# First-time login
|
|
168
|
+
agent-browser open https://app.example.com/login
|
|
169
|
+
# ... login flow ...
|
|
170
|
+
fi
|
|
171
|
+
```
|
|
172
|
+
|
|
173
|
+
## Security Best Practices
|
|
174
|
+
|
|
175
|
+
1. **Never commit state files** - They contain session tokens
|
|
176
|
+
```bash
|
|
177
|
+
echo "*.auth-state.json" >> .gitignore
|
|
178
|
+
```
|
|
179
|
+
|
|
180
|
+
2. **Use environment variables for credentials**
|
|
181
|
+
```bash
|
|
182
|
+
agent-browser fill @e1 "$APP_USERNAME"
|
|
183
|
+
agent-browser fill @e2 "$APP_PASSWORD"
|
|
184
|
+
```
|
|
185
|
+
|
|
186
|
+
3. **Clean up after automation**
|
|
187
|
+
```bash
|
|
188
|
+
agent-browser cookies clear
|
|
189
|
+
rm -f ./auth-state.json
|
|
190
|
+
```
|
|
191
|
+
|
|
192
|
+
4. **Use short-lived sessions for CI/CD**
|
|
193
|
+
```bash
|
|
194
|
+
# Don't persist state in CI
|
|
195
|
+
agent-browser open https://app.example.com/login
|
|
196
|
+
# ... login and perform actions ...
|
|
197
|
+
agent-browser close # Session ends, nothing persisted
|
|
198
|
+
```
|