remotion-claude-agent-demo 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- package/README.md +160 -0
- package/apps/web/README.md +36 -0
- package/apps/web/env.example +20 -0
- package/apps/web/eslint.config.mjs +18 -0
- package/apps/web/next.config.ts +7 -0
- package/apps/web/package-lock.json +10348 -0
- package/apps/web/package.json +35 -0
- package/apps/web/postcss.config.mjs +7 -0
- package/apps/web/public/file.svg +1 -0
- package/apps/web/public/globe.svg +1 -0
- package/apps/web/public/next.svg +1 -0
- package/apps/web/public/vercel.svg +1 -0
- package/apps/web/public/window.svg +1 -0
- package/apps/web/src/app/.well-known/agent-card.json/route.ts +50 -0
- package/apps/web/src/app/background-tasks/[jobId]/cancel/route.ts +29 -0
- package/apps/web/src/app/events/stream/route.ts +58 -0
- package/apps/web/src/app/favicon.ico +0 -0
- package/apps/web/src/app/globals.css +174 -0
- package/apps/web/src/app/layout.tsx +34 -0
- package/apps/web/src/app/messages/answer/route.ts +57 -0
- package/apps/web/src/app/messages/stream/route.ts +381 -0
- package/apps/web/src/app/page.tsx +358 -0
- package/apps/web/src/app/tasks/[taskId]/cancel/route.ts +24 -0
- package/apps/web/src/app/tasks/[taskId]/route.ts +24 -0
- package/apps/web/src/app/tasks/route.ts +13 -0
- package/apps/web/src/components/chat/agent-blocks.tsx +111 -0
- package/apps/web/src/components/chat/ask-user-question-panel.tsx +172 -0
- package/apps/web/src/components/chat/session-sidebar.tsx +222 -0
- package/apps/web/src/components/chat/subagent-activity-sidebar.tsx +248 -0
- package/apps/web/src/components/chat/tool-blocks.tsx +550 -0
- package/apps/web/src/lib/a2a/activity-store.ts +150 -0
- package/apps/web/src/lib/a2a/client.ts +357 -0
- package/apps/web/src/lib/a2a/sse.ts +19 -0
- package/apps/web/src/lib/a2a/task-store.ts +111 -0
- package/apps/web/src/lib/a2a/types.ts +216 -0
- package/apps/web/src/lib/agent/answer-store.ts +109 -0
- package/apps/web/src/lib/agent/background-delivery.ts +343 -0
- package/apps/web/src/lib/agent/background-tool.ts +78 -0
- package/apps/web/src/lib/agent/background.ts +452 -0
- package/apps/web/src/lib/agent/chat.ts +543 -0
- package/apps/web/src/lib/agent/session-store.ts +26 -0
- package/apps/web/src/lib/chat/types.ts +44 -0
- package/apps/web/src/lib/env.ts +31 -0
- package/apps/web/src/lib/hooks/useA2AChat.ts +863 -0
- package/apps/web/src/lib/state/chat-atoms.ts +52 -0
- package/apps/web/src/lib/workspace.ts +9 -0
- package/apps/web/tsconfig.json +35 -0
- package/bin/remotion-agent.js +451 -0
- package/package.json +34 -0
- package/templates/.claude/CLAUDE.md +95 -0
- package/templates/.claude/README.md +129 -0
- package/templates/.claude/agents/composer-agent.md +188 -0
- package/templates/.claude/agents/crafter.md +181 -0
- package/templates/.claude/agents/creator.md +134 -0
- package/templates/.claude/agents/perceiver.md +92 -0
- package/templates/.claude/settings.json +36 -0
- package/templates/.claude/settings.local.json +39 -0
- package/templates/.claude/skills/agent-browser/SKILL.md +349 -0
- package/templates/.claude/skills/agent-browser/references/authentication.md +188 -0
- package/templates/.claude/skills/agent-browser/references/proxy-support.md +175 -0
- package/templates/.claude/skills/agent-browser/references/session-management.md +181 -0
- package/templates/.claude/skills/agent-browser/references/snapshot-refs.md +186 -0
- package/templates/.claude/skills/agent-browser/references/video-recording.md +162 -0
- package/templates/.claude/skills/agent-browser/templates/authenticated-session.sh +91 -0
- package/templates/.claude/skills/agent-browser/templates/capture-workflow.sh +68 -0
- package/templates/.claude/skills/agent-browser/templates/form-automation.sh +64 -0
- package/templates/.claude/skills/algorithmic-art/LICENSE.txt +202 -0
- package/templates/.claude/skills/algorithmic-art/SKILL.md +405 -0
- package/templates/.claude/skills/algorithmic-art/templates/generator_template.js +223 -0
- package/templates/.claude/skills/algorithmic-art/templates/viewer.html +599 -0
- package/templates/.claude/skills/asset-validator/SKILL.md +376 -0
- package/templates/.claude/skills/audio-video-sync/SKILL.md +219 -0
- package/templates/.claude/skills/bgm-manager/SKILL.md +334 -0
- package/templates/.claude/skills/remotion-best-practices/SKILL.md +45 -0
- package/templates/.claude/skills/remotion-best-practices/rules/3d.md +86 -0
- package/templates/.claude/skills/remotion-best-practices/rules/animations.md +29 -0
- package/templates/.claude/skills/remotion-best-practices/rules/assets/charts-bar-chart.tsx +173 -0
- package/templates/.claude/skills/remotion-best-practices/rules/assets/text-animations-typewriter.tsx +100 -0
- package/templates/.claude/skills/remotion-best-practices/rules/assets/text-animations-word-highlight.tsx +108 -0
- package/templates/.claude/skills/remotion-best-practices/rules/assets.md +78 -0
- package/templates/.claude/skills/remotion-best-practices/rules/audio.md +172 -0
- package/templates/.claude/skills/remotion-best-practices/rules/calculate-metadata.md +104 -0
- package/templates/.claude/skills/remotion-best-practices/rules/can-decode.md +75 -0
- package/templates/.claude/skills/remotion-best-practices/rules/charts.md +58 -0
- package/templates/.claude/skills/remotion-best-practices/rules/compositions.md +141 -0
- package/templates/.claude/skills/remotion-best-practices/rules/display-captions.md +126 -0
- package/templates/.claude/skills/remotion-best-practices/rules/extract-frames.md +229 -0
- package/templates/.claude/skills/remotion-best-practices/rules/fonts.md +152 -0
- package/templates/.claude/skills/remotion-best-practices/rules/get-audio-duration.md +58 -0
- package/templates/.claude/skills/remotion-best-practices/rules/get-video-dimensions.md +68 -0
- package/templates/.claude/skills/remotion-best-practices/rules/get-video-duration.md +58 -0
- package/templates/.claude/skills/remotion-best-practices/rules/gifs.md +138 -0
- package/templates/.claude/skills/remotion-best-practices/rules/images.md +130 -0
- package/templates/.claude/skills/remotion-best-practices/rules/import-srt-captions.md +67 -0
- package/templates/.claude/skills/remotion-best-practices/rules/lottie.md +68 -0
- package/templates/.claude/skills/remotion-best-practices/rules/maps.md +403 -0
- package/templates/.claude/skills/remotion-best-practices/rules/measuring-dom-nodes.md +35 -0
- package/templates/.claude/skills/remotion-best-practices/rules/measuring-text.md +143 -0
- package/templates/.claude/skills/remotion-best-practices/rules/parameters.md +98 -0
- package/templates/.claude/skills/remotion-best-practices/rules/sequencing.md +118 -0
- package/templates/.claude/skills/remotion-best-practices/rules/tailwind.md +11 -0
- package/templates/.claude/skills/remotion-best-practices/rules/text-animations.md +20 -0
- package/templates/.claude/skills/remotion-best-practices/rules/timing.md +179 -0
- package/templates/.claude/skills/remotion-best-practices/rules/transcribe-captions.md +19 -0
- package/templates/.claude/skills/remotion-best-practices/rules/transitions.md +122 -0
- package/templates/.claude/skills/remotion-best-practices/rules/trimming.md +53 -0
- package/templates/.claude/skills/remotion-best-practices/rules/videos.md +171 -0
- package/templates/.claude/skills/remotion-components/SKILL.md +453 -0
- package/templates/.claude/skills/render-config/SKILL.md +290 -0
- package/templates/.claude/skills/script-writer/SKILL.md +59 -0
- package/templates/.claude/skills/style-director/script-writer/SKILL.md +82 -0
- package/templates/.claude/skills/style-director/style-director/SKILL.md +287 -0
- package/templates/.claude/skills/style-director/style-director/references/audience-and-scenarios.md +43 -0
- package/templates/.claude/skills/style-director/style-director/references/interaction-innovation.md +26 -0
- package/templates/.claude/skills/style-director/style-director/references/motion-grammar.md +66 -0
- package/templates/.claude/skills/style-director/style-director/references/quality-checklist.md +29 -0
- package/templates/.claude/skills/style-director/style-director/references/scene-recipes.md +38 -0
- package/templates/.claude/skills/style-director/style-director/references/visual-style-system.md +148 -0
- package/templates/.claude/skills/subtitle-composer/SKILL.md +304 -0
- package/templates/.claude/skills/subtitle-processor/SKILL.md +308 -0
- package/templates/.claude/skills/timeline-generator/SKILL.md +253 -0
- package/templates/.claude/skills/video-preflight-check/SKILL.md +353 -0
- package/templates/.claude/skills/voice-synthesizer/SKILL.md +296 -0
- package/templates/.claude/skills/voice-synthesizer/scripts/synthesize_voice.py +315 -0
- package/templates/.claude/skills/voice-synthesizer/scripts/tts_cli.py +142 -0
- package/templates/.claude/skills/web-design-guidelines/SKILL.md +36 -0
- package/templates/.claude/skills/youtube-downloader/SKILL.md +99 -0
- package/templates/.claude/skills/youtube-downloader/scripts/download_video.py +145 -0
|
@@ -0,0 +1,92 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: perceiver
|
|
3
|
+
description: 浏览器感知与录屏代理。使用 agent-browser CLI 访问网页、分析内容、录制操作演示。
|
|
4
|
+
skills:
|
|
5
|
+
- agent-browser
|
|
6
|
+
---
|
|
7
|
+
|
|
8
|
+
你是感知分析代理。通过浏览器能力获取页面信息或录制操作演示,输出结构化的内容理解。
|
|
9
|
+
|
|
10
|
+
## 核心能力
|
|
11
|
+
|
|
12
|
+
1. **网页感知** - 访问 URL,分析页面结构、功能、关键元素
|
|
13
|
+
2. **操作录制** - 录制浏览器操作演示,标记关键步骤
|
|
14
|
+
3. **内容提取** - 提取文本、截图、交互元素信息
|
|
15
|
+
|
|
16
|
+
## agent-browser 核心命令
|
|
17
|
+
|
|
18
|
+
```bash
|
|
19
|
+
# 导航
|
|
20
|
+
agent-browser open <url> # 打开页面
|
|
21
|
+
agent-browser back / forward # 前进后退
|
|
22
|
+
agent-browser reload # 刷新
|
|
23
|
+
|
|
24
|
+
# 页面分析
|
|
25
|
+
agent-browser snapshot -i # 获取交互元素 (返回 @e1, @e2 等 ref)
|
|
26
|
+
agent-browser screenshot [path] # 截图
|
|
27
|
+
agent-browser get text @e1 # 获取元素文本
|
|
28
|
+
agent-browser get title # 获取页面标题
|
|
29
|
+
agent-browser get url # 获取当前 URL
|
|
30
|
+
|
|
31
|
+
# 交互操作
|
|
32
|
+
agent-browser click @e1 # 点击元素
|
|
33
|
+
agent-browser fill @e2 "text" # 填充输入框
|
|
34
|
+
agent-browser type @e2 "text" # 追加输入
|
|
35
|
+
agent-browser hover @e1 # 悬停
|
|
36
|
+
agent-browser scroll down 500 # 滚动
|
|
37
|
+
|
|
38
|
+
# 等待
|
|
39
|
+
agent-browser wait @e1 # 等待元素出现
|
|
40
|
+
agent-browser wait 2000 # 等待毫秒
|
|
41
|
+
agent-browser wait --load networkidle # 等待网络空闲
|
|
42
|
+
|
|
43
|
+
# 录制
|
|
44
|
+
agent-browser record start ./demo.webm # 开始录制
|
|
45
|
+
agent-browser record stop # 停止录制
|
|
46
|
+
|
|
47
|
+
# 关闭
|
|
48
|
+
agent-browser close # 关闭浏览器
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
## 执行步骤
|
|
52
|
+
|
|
53
|
+
1. 确认输入类型(URL、录屏任务、页面状态)
|
|
54
|
+
2. 使用 agent-browser CLI:
|
|
55
|
+
- `agent-browser open <url>` - 访问页面
|
|
56
|
+
- `agent-browser snapshot -i` - 获取页面结构与可交互元素
|
|
57
|
+
- `agent-browser screenshot` - 截取屏幕
|
|
58
|
+
- `agent-browser click/fill/type @ref` - 执行操作
|
|
59
|
+
3. 提取关键内容:标题、功能点、操作流程、UI 元素
|
|
60
|
+
4. 标记关键时刻与建议的视觉元素
|
|
61
|
+
5. 输出 `content_understanding`
|
|
62
|
+
|
|
63
|
+
## 输出格式
|
|
64
|
+
|
|
65
|
+
```yaml
|
|
66
|
+
content_understanding:
|
|
67
|
+
type: "webpage" | "recording" | "analysis"
|
|
68
|
+
summary: string
|
|
69
|
+
|
|
70
|
+
key_steps:
|
|
71
|
+
- step: string
|
|
72
|
+
screenshot?: string
|
|
73
|
+
timestamp?: number
|
|
74
|
+
|
|
75
|
+
key_moments:
|
|
76
|
+
- description: string
|
|
77
|
+
visual_cue: string
|
|
78
|
+
|
|
79
|
+
ui_elements:
|
|
80
|
+
- type: "button" | "input" | "menu" | "modal"
|
|
81
|
+
description: string
|
|
82
|
+
importance: "high" | "medium" | "low"
|
|
83
|
+
|
|
84
|
+
suggestions:
|
|
85
|
+
- string
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
## 约束
|
|
89
|
+
|
|
90
|
+
- 只做分析与结构化输出,不写脚本或分镜
|
|
91
|
+
- 录屏时考虑后期剪辑需求,标记关键操作点
|
|
92
|
+
- 对 SaaS 产品重点关注:核心功能、用户价值、操作流程
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
{
|
|
2
|
+
"project": {
|
|
3
|
+
"name": "视频创作系统",
|
|
4
|
+
"description": "专注于产品教程、Demo演示、SaaS介绍的智能视频创作",
|
|
5
|
+
"version": "1.0.0"
|
|
6
|
+
},
|
|
7
|
+
"agents": {
|
|
8
|
+
"perceiver": {
|
|
9
|
+
"path": "./agents/perceiver.md",
|
|
10
|
+
"description": "感知Agent - 浏览器访问、内容分析、操作录制"
|
|
11
|
+
},
|
|
12
|
+
"creator": {
|
|
13
|
+
"path": "./agents/creator.md",
|
|
14
|
+
"description": "创造Agent - 脚本编写、分镜设计、风格配置"
|
|
15
|
+
},
|
|
16
|
+
"crafter": {
|
|
17
|
+
"path": "./agents/crafter.md",
|
|
18
|
+
"description": "执行Agent - 语音合成、素材下载、录屏调度"
|
|
19
|
+
},
|
|
20
|
+
"composer-agent": {
|
|
21
|
+
"path": "./agents/composer-agent.md",
|
|
22
|
+
"description": "合成Agent - Remotion项目生成与渲染"
|
|
23
|
+
}
|
|
24
|
+
},
|
|
25
|
+
"skills": {
|
|
26
|
+
"creation": ["script-writer", "style-director"],
|
|
27
|
+
"execution": ["voice-synthesizer", "youtube-downloader", "agent-browser"],
|
|
28
|
+
"composition": ["remotion-best-practices"]
|
|
29
|
+
},
|
|
30
|
+
"tools": {
|
|
31
|
+
"agent-browser": {
|
|
32
|
+
"url": "https://agent-browser.dev/",
|
|
33
|
+
"description": "浏览器自动化CLI,用于网页访问和录屏"
|
|
34
|
+
}
|
|
35
|
+
}
|
|
36
|
+
}
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
{
|
|
2
|
+
"permissions": {
|
|
3
|
+
"allow": [
|
|
4
|
+
"WebFetch(domain:agent-browser.dev)",
|
|
5
|
+
"WebSearch",
|
|
6
|
+
"WebFetch(domain:github.com)",
|
|
7
|
+
"WebFetch(domain:ai.google.dev)",
|
|
8
|
+
"Bash(xargs:*)",
|
|
9
|
+
"Bash(claude skill install:*)",
|
|
10
|
+
"Bash(claude skill --help:*)",
|
|
11
|
+
"Bash(claude install --help:*)",
|
|
12
|
+
"WebFetch(domain:raw.githubusercontent.com)",
|
|
13
|
+
"Bash(git clone:*)",
|
|
14
|
+
"Bash(ls:*)",
|
|
15
|
+
"WebFetch(domain:docs.anthropic.com)",
|
|
16
|
+
"Bash(edge-tts:*)",
|
|
17
|
+
"Bash(npm init -y)",
|
|
18
|
+
"Bash(npm install:*)",
|
|
19
|
+
"Bash(npx remotion render:*)",
|
|
20
|
+
"Bash(ffprobe:*)",
|
|
21
|
+
"WebFetch(domain:www.anthropic.com)",
|
|
22
|
+
"WebFetch(domain:www.datacamp.com)",
|
|
23
|
+
"WebFetch(domain:nader.substack.com)",
|
|
24
|
+
"Bash(npx tsx:*)",
|
|
25
|
+
"Bash(npx tsc:*)",
|
|
26
|
+
"Bash(npx eslint:*)",
|
|
27
|
+
"Bash(test:*)",
|
|
28
|
+
"Bash(python3:*)",
|
|
29
|
+
"Bash(chmod:*)",
|
|
30
|
+
"Bash(pip3 install:*)",
|
|
31
|
+
"Bash(curl:*)",
|
|
32
|
+
"WebFetch(domain:mp.weixin.qq.com)",
|
|
33
|
+
"Bash(node:*)",
|
|
34
|
+
"Bash(npm --version:*)",
|
|
35
|
+
"Bash(npm run render)",
|
|
36
|
+
"Bash(mediainfo:*)"
|
|
37
|
+
]
|
|
38
|
+
}
|
|
39
|
+
}
|
|
@@ -0,0 +1,349 @@
|
|
|
1
|
+
---
|
|
2
|
+
name: agent-browser
|
|
3
|
+
description: Automates browser interactions for web testing, form filling, screenshots, and data extraction. Use when the user needs to navigate websites, interact with web pages, fill forms, take screenshots, test web applications, or extract information from web pages.
|
|
4
|
+
allowed-tools: Bash(agent-browser:*)
|
|
5
|
+
---
|
|
6
|
+
|
|
7
|
+
# Browser Automation with agent-browser
|
|
8
|
+
|
|
9
|
+
## Quick start
|
|
10
|
+
|
|
11
|
+
```bash
|
|
12
|
+
agent-browser open <url> # Navigate to page
|
|
13
|
+
agent-browser snapshot -i # Get interactive elements with refs
|
|
14
|
+
agent-browser click @e1 # Click element by ref
|
|
15
|
+
agent-browser fill @e2 "text" # Fill input by ref
|
|
16
|
+
agent-browser close # Close browser
|
|
17
|
+
```
|
|
18
|
+
|
|
19
|
+
## Core workflow
|
|
20
|
+
|
|
21
|
+
1. Navigate: `agent-browser open <url>`
|
|
22
|
+
2. Snapshot: `agent-browser snapshot -i` (returns elements with refs like `@e1`, `@e2`)
|
|
23
|
+
3. Interact using refs from the snapshot
|
|
24
|
+
4. Re-snapshot after navigation or significant DOM changes
|
|
25
|
+
|
|
26
|
+
## Commands
|
|
27
|
+
|
|
28
|
+
### Navigation
|
|
29
|
+
|
|
30
|
+
```bash
|
|
31
|
+
agent-browser open <url> # Navigate to URL (aliases: goto, navigate)
|
|
32
|
+
# Supports: https://, http://, file://, about:, data://
|
|
33
|
+
# Auto-prepends https:// if no protocol given
|
|
34
|
+
agent-browser back # Go back
|
|
35
|
+
agent-browser forward # Go forward
|
|
36
|
+
agent-browser reload # Reload page
|
|
37
|
+
agent-browser close # Close browser (aliases: quit, exit)
|
|
38
|
+
agent-browser connect 9222 # Connect to browser via CDP port
|
|
39
|
+
```
|
|
40
|
+
|
|
41
|
+
### Snapshot (page analysis)
|
|
42
|
+
|
|
43
|
+
```bash
|
|
44
|
+
agent-browser snapshot # Full accessibility tree
|
|
45
|
+
agent-browser snapshot -i # Interactive elements only (recommended)
|
|
46
|
+
agent-browser snapshot -c # Compact output
|
|
47
|
+
agent-browser snapshot -d 3 # Limit depth to 3
|
|
48
|
+
agent-browser snapshot -s "#main" # Scope to CSS selector
|
|
49
|
+
```
|
|
50
|
+
|
|
51
|
+
### Interactions (use @refs from snapshot)
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
agent-browser click @e1 # Click
|
|
55
|
+
agent-browser dblclick @e1 # Double-click
|
|
56
|
+
agent-browser focus @e1 # Focus element
|
|
57
|
+
agent-browser fill @e2 "text" # Clear and type
|
|
58
|
+
agent-browser type @e2 "text" # Type without clearing
|
|
59
|
+
agent-browser press Enter # Press key (alias: key)
|
|
60
|
+
agent-browser press Control+a # Key combination
|
|
61
|
+
agent-browser keydown Shift # Hold key down
|
|
62
|
+
agent-browser keyup Shift # Release key
|
|
63
|
+
agent-browser hover @e1 # Hover
|
|
64
|
+
agent-browser check @e1 # Check checkbox
|
|
65
|
+
agent-browser uncheck @e1 # Uncheck checkbox
|
|
66
|
+
agent-browser select @e1 "value" # Select dropdown option
|
|
67
|
+
agent-browser select @e1 "a" "b" # Select multiple options
|
|
68
|
+
agent-browser scroll down 500 # Scroll page (default: down 300px)
|
|
69
|
+
agent-browser scrollintoview @e1 # Scroll element into view (alias: scrollinto)
|
|
70
|
+
agent-browser drag @e1 @e2 # Drag and drop
|
|
71
|
+
agent-browser upload @e1 file.pdf # Upload files
|
|
72
|
+
```
|
|
73
|
+
|
|
74
|
+
### Get information
|
|
75
|
+
|
|
76
|
+
```bash
|
|
77
|
+
agent-browser get text @e1 # Get element text
|
|
78
|
+
agent-browser get html @e1 # Get innerHTML
|
|
79
|
+
agent-browser get value @e1 # Get input value
|
|
80
|
+
agent-browser get attr @e1 href # Get attribute
|
|
81
|
+
agent-browser get title # Get page title
|
|
82
|
+
agent-browser get url # Get current URL
|
|
83
|
+
agent-browser get count ".item" # Count matching elements
|
|
84
|
+
agent-browser get box @e1 # Get bounding box
|
|
85
|
+
agent-browser get styles @e1 # Get computed styles (font, color, bg, etc.)
|
|
86
|
+
```
|
|
87
|
+
|
|
88
|
+
### Check state
|
|
89
|
+
|
|
90
|
+
```bash
|
|
91
|
+
agent-browser is visible @e1 # Check if visible
|
|
92
|
+
agent-browser is enabled @e1 # Check if enabled
|
|
93
|
+
agent-browser is checked @e1 # Check if checked
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Screenshots & PDF
|
|
97
|
+
|
|
98
|
+
```bash
|
|
99
|
+
agent-browser screenshot # Screenshot to stdout
|
|
100
|
+
agent-browser screenshot path.png # Save to file
|
|
101
|
+
agent-browser screenshot --full # Full page
|
|
102
|
+
agent-browser pdf output.pdf # Save as PDF
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
### Video recording
|
|
106
|
+
|
|
107
|
+
```bash
|
|
108
|
+
agent-browser record start ./demo.webm # Start recording (uses current URL + state)
|
|
109
|
+
agent-browser click @e1 # Perform actions
|
|
110
|
+
agent-browser record stop # Stop and save video
|
|
111
|
+
agent-browser record restart ./take2.webm # Stop current + start new recording
|
|
112
|
+
```
|
|
113
|
+
|
|
114
|
+
Recording creates a fresh context but preserves cookies/storage from your session. If no URL is provided, it
|
|
115
|
+
automatically returns to your current page. For smooth demos, explore first, then start recording.
|
|
116
|
+
|
|
117
|
+
### Wait
|
|
118
|
+
|
|
119
|
+
```bash
|
|
120
|
+
agent-browser wait @e1 # Wait for element
|
|
121
|
+
agent-browser wait 2000 # Wait milliseconds
|
|
122
|
+
agent-browser wait --text "Success" # Wait for text (or -t)
|
|
123
|
+
agent-browser wait --url "**/dashboard" # Wait for URL pattern (or -u)
|
|
124
|
+
agent-browser wait --load networkidle # Wait for network idle (or -l)
|
|
125
|
+
agent-browser wait --fn "window.ready" # Wait for JS condition (or -f)
|
|
126
|
+
```
|
|
127
|
+
|
|
128
|
+
### Mouse control
|
|
129
|
+
|
|
130
|
+
```bash
|
|
131
|
+
agent-browser mouse move 100 200 # Move mouse
|
|
132
|
+
agent-browser mouse down left # Press button
|
|
133
|
+
agent-browser mouse up left # Release button
|
|
134
|
+
agent-browser mouse wheel 100 # Scroll wheel
|
|
135
|
+
```
|
|
136
|
+
|
|
137
|
+
### Semantic locators (alternative to refs)
|
|
138
|
+
|
|
139
|
+
```bash
|
|
140
|
+
agent-browser find role button click --name "Submit"
|
|
141
|
+
agent-browser find text "Sign In" click
|
|
142
|
+
agent-browser find text "Sign In" click --exact # Exact match only
|
|
143
|
+
agent-browser find label "Email" fill "user@test.com"
|
|
144
|
+
agent-browser find placeholder "Search" type "query"
|
|
145
|
+
agent-browser find alt "Logo" click
|
|
146
|
+
agent-browser find title "Close" click
|
|
147
|
+
agent-browser find testid "submit-btn" click
|
|
148
|
+
agent-browser find first ".item" click
|
|
149
|
+
agent-browser find last ".item" click
|
|
150
|
+
agent-browser find nth 2 "a" hover
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
### Browser settings
|
|
154
|
+
|
|
155
|
+
```bash
|
|
156
|
+
agent-browser set viewport 1920 1080 # Set viewport size
|
|
157
|
+
agent-browser set device "iPhone 14" # Emulate device
|
|
158
|
+
agent-browser set geo 37.7749 -122.4194 # Set geolocation (alias: geolocation)
|
|
159
|
+
agent-browser set offline on # Toggle offline mode
|
|
160
|
+
agent-browser set headers '{"X-Key":"v"}' # Extra HTTP headers
|
|
161
|
+
agent-browser set credentials user pass # HTTP basic auth (alias: auth)
|
|
162
|
+
agent-browser set media dark # Emulate color scheme
|
|
163
|
+
agent-browser set media light reduced-motion # Light mode + reduced motion
|
|
164
|
+
```
|
|
165
|
+
|
|
166
|
+
### Cookies & Storage
|
|
167
|
+
|
|
168
|
+
```bash
|
|
169
|
+
agent-browser cookies # Get all cookies
|
|
170
|
+
agent-browser cookies set name value # Set cookie
|
|
171
|
+
agent-browser cookies clear # Clear cookies
|
|
172
|
+
agent-browser storage local # Get all localStorage
|
|
173
|
+
agent-browser storage local key # Get specific key
|
|
174
|
+
agent-browser storage local set k v # Set value
|
|
175
|
+
agent-browser storage local clear # Clear all
|
|
176
|
+
```
|
|
177
|
+
|
|
178
|
+
### Network
|
|
179
|
+
|
|
180
|
+
```bash
|
|
181
|
+
agent-browser network route <url> # Intercept requests
|
|
182
|
+
agent-browser network route <url> --abort # Block requests
|
|
183
|
+
agent-browser network route <url> --body '{}' # Mock response
|
|
184
|
+
agent-browser network unroute [url] # Remove routes
|
|
185
|
+
agent-browser network requests # View tracked requests
|
|
186
|
+
agent-browser network requests --filter api # Filter requests
|
|
187
|
+
```
|
|
188
|
+
|
|
189
|
+
### Tabs & Windows
|
|
190
|
+
|
|
191
|
+
```bash
|
|
192
|
+
agent-browser tab # List tabs
|
|
193
|
+
agent-browser tab new [url] # New tab
|
|
194
|
+
agent-browser tab 2 # Switch to tab by index
|
|
195
|
+
agent-browser tab close # Close current tab
|
|
196
|
+
agent-browser tab close 2 # Close tab by index
|
|
197
|
+
agent-browser window new # New window
|
|
198
|
+
```
|
|
199
|
+
|
|
200
|
+
### Frames
|
|
201
|
+
|
|
202
|
+
```bash
|
|
203
|
+
agent-browser frame "#iframe" # Switch to iframe
|
|
204
|
+
agent-browser frame main # Back to main frame
|
|
205
|
+
```
|
|
206
|
+
|
|
207
|
+
### Dialogs
|
|
208
|
+
|
|
209
|
+
```bash
|
|
210
|
+
agent-browser dialog accept [text] # Accept dialog
|
|
211
|
+
agent-browser dialog dismiss # Dismiss dialog
|
|
212
|
+
```
|
|
213
|
+
|
|
214
|
+
### JavaScript
|
|
215
|
+
|
|
216
|
+
```bash
|
|
217
|
+
agent-browser eval "document.title" # Run JavaScript
|
|
218
|
+
```
|
|
219
|
+
|
|
220
|
+
## Global options
|
|
221
|
+
|
|
222
|
+
```bash
|
|
223
|
+
agent-browser --session <name> ... # Isolated browser session
|
|
224
|
+
agent-browser --json ... # JSON output for parsing
|
|
225
|
+
agent-browser --headed ... # Show browser window (not headless)
|
|
226
|
+
agent-browser --full ... # Full page screenshot (-f)
|
|
227
|
+
agent-browser --cdp <port> ... # Connect via Chrome DevTools Protocol
|
|
228
|
+
agent-browser -p <provider> ... # Cloud browser provider (--provider)
|
|
229
|
+
agent-browser --proxy <url> ... # Use proxy server
|
|
230
|
+
agent-browser --headers <json> ... # HTTP headers scoped to URL's origin
|
|
231
|
+
agent-browser --executable-path <p> # Custom browser executable
|
|
232
|
+
agent-browser --extension <path> ... # Load browser extension (repeatable)
|
|
233
|
+
agent-browser --help # Show help (-h)
|
|
234
|
+
agent-browser --version # Show version (-V)
|
|
235
|
+
agent-browser <command> --help # Show detailed help for a command
|
|
236
|
+
```
|
|
237
|
+
|
|
238
|
+
### Proxy support
|
|
239
|
+
|
|
240
|
+
```bash
|
|
241
|
+
agent-browser --proxy http://proxy.com:8080 open example.com
|
|
242
|
+
agent-browser --proxy http://user:pass@proxy.com:8080 open example.com
|
|
243
|
+
agent-browser --proxy socks5://proxy.com:1080 open example.com
|
|
244
|
+
```
|
|
245
|
+
|
|
246
|
+
## Environment variables
|
|
247
|
+
|
|
248
|
+
```bash
|
|
249
|
+
AGENT_BROWSER_SESSION="mysession" # Default session name
|
|
250
|
+
AGENT_BROWSER_EXECUTABLE_PATH="/path/chrome" # Custom browser path
|
|
251
|
+
AGENT_BROWSER_EXTENSIONS="/ext1,/ext2" # Comma-separated extension paths
|
|
252
|
+
AGENT_BROWSER_PROVIDER="browserbase" # Cloud browser provider
|
|
253
|
+
AGENT_BROWSER_STREAM_PORT="9223" # WebSocket streaming port
|
|
254
|
+
AGENT_BROWSER_HOME="/path/to/agent-browser" # Custom install location (for daemon.js)
|
|
255
|
+
```
|
|
256
|
+
|
|
257
|
+
## Example: Form submission
|
|
258
|
+
|
|
259
|
+
```bash
|
|
260
|
+
agent-browser open https://example.com/form
|
|
261
|
+
agent-browser snapshot -i
|
|
262
|
+
# Output shows: textbox "Email" [ref=e1], textbox "Password" [ref=e2], button "Submit" [ref=e3]
|
|
263
|
+
|
|
264
|
+
agent-browser fill @e1 "user@example.com"
|
|
265
|
+
agent-browser fill @e2 "password123"
|
|
266
|
+
agent-browser click @e3
|
|
267
|
+
agent-browser wait --load networkidle
|
|
268
|
+
agent-browser snapshot -i # Check result
|
|
269
|
+
```
|
|
270
|
+
|
|
271
|
+
## Example: Authentication with saved state
|
|
272
|
+
|
|
273
|
+
```bash
|
|
274
|
+
# Login once
|
|
275
|
+
agent-browser open https://app.example.com/login
|
|
276
|
+
agent-browser snapshot -i
|
|
277
|
+
agent-browser fill @e1 "username"
|
|
278
|
+
agent-browser fill @e2 "password"
|
|
279
|
+
agent-browser click @e3
|
|
280
|
+
agent-browser wait --url "**/dashboard"
|
|
281
|
+
agent-browser state save auth.json
|
|
282
|
+
|
|
283
|
+
# Later sessions: load saved state
|
|
284
|
+
agent-browser state load auth.json
|
|
285
|
+
agent-browser open https://app.example.com/dashboard
|
|
286
|
+
```
|
|
287
|
+
|
|
288
|
+
## Sessions (parallel browsers)
|
|
289
|
+
|
|
290
|
+
```bash
|
|
291
|
+
agent-browser --session test1 open site-a.com
|
|
292
|
+
agent-browser --session test2 open site-b.com
|
|
293
|
+
agent-browser session list
|
|
294
|
+
```
|
|
295
|
+
|
|
296
|
+
## JSON output (for parsing)
|
|
297
|
+
|
|
298
|
+
Add `--json` for machine-readable output:
|
|
299
|
+
|
|
300
|
+
```bash
|
|
301
|
+
agent-browser snapshot -i --json
|
|
302
|
+
agent-browser get text @e1 --json
|
|
303
|
+
```
|
|
304
|
+
|
|
305
|
+
## Debugging
|
|
306
|
+
|
|
307
|
+
```bash
|
|
308
|
+
agent-browser --headed open example.com # Show browser window
|
|
309
|
+
agent-browser --cdp 9222 snapshot # Connect via CDP port
|
|
310
|
+
agent-browser connect 9222 # Alternative: connect command
|
|
311
|
+
agent-browser console # View console messages
|
|
312
|
+
agent-browser console --clear # Clear console
|
|
313
|
+
agent-browser errors # View page errors
|
|
314
|
+
agent-browser errors --clear # Clear errors
|
|
315
|
+
agent-browser highlight @e1 # Highlight element
|
|
316
|
+
agent-browser trace start # Start recording trace
|
|
317
|
+
agent-browser trace stop trace.zip # Stop and save trace
|
|
318
|
+
agent-browser record start ./debug.webm # Record video from current page
|
|
319
|
+
agent-browser record stop # Save recording
|
|
320
|
+
```
|
|
321
|
+
|
|
322
|
+
## Deep-dive documentation
|
|
323
|
+
|
|
324
|
+
For detailed patterns and best practices, see:
|
|
325
|
+
|
|
326
|
+
| Reference | Description |
|
|
327
|
+
|-----------|-------------|
|
|
328
|
+
| [references/snapshot-refs.md](references/snapshot-refs.md) | Ref lifecycle, invalidation rules, troubleshooting |
|
|
329
|
+
| [references/session-management.md](references/session-management.md) | Parallel sessions, state persistence, concurrent scraping |
|
|
330
|
+
| [references/authentication.md](references/authentication.md) | Login flows, OAuth, 2FA handling, state reuse |
|
|
331
|
+
| [references/video-recording.md](references/video-recording.md) | Recording workflows for debugging and documentation |
|
|
332
|
+
| [references/proxy-support.md](references/proxy-support.md) | Proxy configuration, geo-testing, rotating proxies |
|
|
333
|
+
|
|
334
|
+
## Ready-to-use templates
|
|
335
|
+
|
|
336
|
+
Executable workflow scripts for common patterns:
|
|
337
|
+
|
|
338
|
+
| Template | Description |
|
|
339
|
+
|----------|-------------|
|
|
340
|
+
| [templates/form-automation.sh](templates/form-automation.sh) | Form filling with validation |
|
|
341
|
+
| [templates/authenticated-session.sh](templates/authenticated-session.sh) | Login once, reuse state |
|
|
342
|
+
| [templates/capture-workflow.sh](templates/capture-workflow.sh) | Content extraction with screenshots |
|
|
343
|
+
|
|
344
|
+
Usage:
|
|
345
|
+
```bash
|
|
346
|
+
./templates/form-automation.sh https://example.com/form
|
|
347
|
+
./templates/authenticated-session.sh https://app.example.com/login
|
|
348
|
+
./templates/capture-workflow.sh https://example.com ./output
|
|
349
|
+
```
|
|
@@ -0,0 +1,188 @@
|
|
|
1
|
+
# Authentication Patterns
|
|
2
|
+
|
|
3
|
+
Patterns for handling login flows, session persistence, and authenticated browsing.
|
|
4
|
+
|
|
5
|
+
## Basic Login Flow
|
|
6
|
+
|
|
7
|
+
```bash
|
|
8
|
+
# Navigate to login page
|
|
9
|
+
agent-browser open https://app.example.com/login
|
|
10
|
+
agent-browser wait --load networkidle
|
|
11
|
+
|
|
12
|
+
# Get form elements
|
|
13
|
+
agent-browser snapshot -i
|
|
14
|
+
# Output: @e1 [input type="email"], @e2 [input type="password"], @e3 [button] "Sign In"
|
|
15
|
+
|
|
16
|
+
# Fill credentials
|
|
17
|
+
agent-browser fill @e1 "user@example.com"
|
|
18
|
+
agent-browser fill @e2 "password123"
|
|
19
|
+
|
|
20
|
+
# Submit
|
|
21
|
+
agent-browser click @e3
|
|
22
|
+
agent-browser wait --load networkidle
|
|
23
|
+
|
|
24
|
+
# Verify login succeeded
|
|
25
|
+
agent-browser get url # Should be dashboard, not login
|
|
26
|
+
```
|
|
27
|
+
|
|
28
|
+
## Saving Authentication State
|
|
29
|
+
|
|
30
|
+
After logging in, save state for reuse:
|
|
31
|
+
|
|
32
|
+
```bash
|
|
33
|
+
# Login first (see above)
|
|
34
|
+
agent-browser open https://app.example.com/login
|
|
35
|
+
agent-browser snapshot -i
|
|
36
|
+
agent-browser fill @e1 "user@example.com"
|
|
37
|
+
agent-browser fill @e2 "password123"
|
|
38
|
+
agent-browser click @e3
|
|
39
|
+
agent-browser wait --url "**/dashboard"
|
|
40
|
+
|
|
41
|
+
# Save authenticated state
|
|
42
|
+
agent-browser state save ./auth-state.json
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
## Restoring Authentication
|
|
46
|
+
|
|
47
|
+
Skip login by loading saved state:
|
|
48
|
+
|
|
49
|
+
```bash
|
|
50
|
+
# Load saved auth state
|
|
51
|
+
agent-browser state load ./auth-state.json
|
|
52
|
+
|
|
53
|
+
# Navigate directly to protected page
|
|
54
|
+
agent-browser open https://app.example.com/dashboard
|
|
55
|
+
|
|
56
|
+
# Verify authenticated
|
|
57
|
+
agent-browser snapshot -i
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## OAuth / SSO Flows
|
|
61
|
+
|
|
62
|
+
For OAuth redirects:
|
|
63
|
+
|
|
64
|
+
```bash
|
|
65
|
+
# Start OAuth flow
|
|
66
|
+
agent-browser open https://app.example.com/auth/google
|
|
67
|
+
|
|
68
|
+
# Handle redirects automatically
|
|
69
|
+
agent-browser wait --url "**/accounts.google.com**"
|
|
70
|
+
agent-browser snapshot -i
|
|
71
|
+
|
|
72
|
+
# Fill Google credentials
|
|
73
|
+
agent-browser fill @e1 "user@gmail.com"
|
|
74
|
+
agent-browser click @e2 # Next button
|
|
75
|
+
agent-browser wait 2000
|
|
76
|
+
agent-browser snapshot -i
|
|
77
|
+
agent-browser fill @e3 "password"
|
|
78
|
+
agent-browser click @e4 # Sign in
|
|
79
|
+
|
|
80
|
+
# Wait for redirect back
|
|
81
|
+
agent-browser wait --url "**/app.example.com**"
|
|
82
|
+
agent-browser state save ./oauth-state.json
|
|
83
|
+
```
|
|
84
|
+
|
|
85
|
+
## Two-Factor Authentication
|
|
86
|
+
|
|
87
|
+
Handle 2FA with manual intervention:
|
|
88
|
+
|
|
89
|
+
```bash
|
|
90
|
+
# Login with credentials
|
|
91
|
+
agent-browser open https://app.example.com/login --headed # Show browser
|
|
92
|
+
agent-browser snapshot -i
|
|
93
|
+
agent-browser fill @e1 "user@example.com"
|
|
94
|
+
agent-browser fill @e2 "password123"
|
|
95
|
+
agent-browser click @e3
|
|
96
|
+
|
|
97
|
+
# Wait for user to complete 2FA manually
|
|
98
|
+
echo "Complete 2FA in the browser window..."
|
|
99
|
+
agent-browser wait --url "**/dashboard" --timeout 120000
|
|
100
|
+
|
|
101
|
+
# Save state after 2FA
|
|
102
|
+
agent-browser state save ./2fa-state.json
|
|
103
|
+
```
|
|
104
|
+
|
|
105
|
+
## HTTP Basic Auth
|
|
106
|
+
|
|
107
|
+
For sites using HTTP Basic Authentication:
|
|
108
|
+
|
|
109
|
+
```bash
|
|
110
|
+
# Set credentials before navigation
|
|
111
|
+
agent-browser set credentials username password
|
|
112
|
+
|
|
113
|
+
# Navigate to protected resource
|
|
114
|
+
agent-browser open https://protected.example.com/api
|
|
115
|
+
```
|
|
116
|
+
|
|
117
|
+
## Cookie-Based Auth
|
|
118
|
+
|
|
119
|
+
Manually set authentication cookies:
|
|
120
|
+
|
|
121
|
+
```bash
|
|
122
|
+
# Set auth cookie
|
|
123
|
+
agent-browser cookies set session_token "abc123xyz"
|
|
124
|
+
|
|
125
|
+
# Navigate to protected page
|
|
126
|
+
agent-browser open https://app.example.com/dashboard
|
|
127
|
+
```
|
|
128
|
+
|
|
129
|
+
## Token Refresh Handling
|
|
130
|
+
|
|
131
|
+
For sessions with expiring tokens:
|
|
132
|
+
|
|
133
|
+
```bash
|
|
134
|
+
#!/bin/bash
|
|
135
|
+
# Wrapper that handles token refresh
|
|
136
|
+
|
|
137
|
+
STATE_FILE="./auth-state.json"
|
|
138
|
+
|
|
139
|
+
# Try loading existing state
|
|
140
|
+
if [[ -f "$STATE_FILE" ]]; then
|
|
141
|
+
agent-browser state load "$STATE_FILE"
|
|
142
|
+
agent-browser open https://app.example.com/dashboard
|
|
143
|
+
|
|
144
|
+
# Check if session is still valid
|
|
145
|
+
URL=$(agent-browser get url)
|
|
146
|
+
if [[ "$URL" == *"/login"* ]]; then
|
|
147
|
+
echo "Session expired, re-authenticating..."
|
|
148
|
+
# Perform fresh login
|
|
149
|
+
agent-browser snapshot -i
|
|
150
|
+
agent-browser fill @e1 "$USERNAME"
|
|
151
|
+
agent-browser fill @e2 "$PASSWORD"
|
|
152
|
+
agent-browser click @e3
|
|
153
|
+
agent-browser wait --url "**/dashboard"
|
|
154
|
+
agent-browser state save "$STATE_FILE"
|
|
155
|
+
fi
|
|
156
|
+
else
|
|
157
|
+
# First-time login
|
|
158
|
+
agent-browser open https://app.example.com/login
|
|
159
|
+
# ... login flow ...
|
|
160
|
+
fi
|
|
161
|
+
```
|
|
162
|
+
|
|
163
|
+
## Security Best Practices
|
|
164
|
+
|
|
165
|
+
1. **Never commit state files** - They contain session tokens
|
|
166
|
+
```bash
|
|
167
|
+
echo "*.auth-state.json" >> .gitignore
|
|
168
|
+
```
|
|
169
|
+
|
|
170
|
+
2. **Use environment variables for credentials**
|
|
171
|
+
```bash
|
|
172
|
+
agent-browser fill @e1 "$APP_USERNAME"
|
|
173
|
+
agent-browser fill @e2 "$APP_PASSWORD"
|
|
174
|
+
```
|
|
175
|
+
|
|
176
|
+
3. **Clean up after automation**
|
|
177
|
+
```bash
|
|
178
|
+
agent-browser cookies clear
|
|
179
|
+
rm -f ./auth-state.json
|
|
180
|
+
```
|
|
181
|
+
|
|
182
|
+
4. **Use short-lived sessions for CI/CD**
|
|
183
|
+
```bash
|
|
184
|
+
# Don't persist state in CI
|
|
185
|
+
agent-browser open https://app.example.com/login
|
|
186
|
+
# ... login and perform actions ...
|
|
187
|
+
agent-browser close # Session ends, nothing persisted
|
|
188
|
+
```
|